diff --git a/CHANGELOG.md b/CHANGELOG.md index a67f8fad..afc4e04c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ + * Fix accuracy and latency issues with `FFmpegFrameGrabber.setVideoFrameNumber()` ([pull #1734](https://github.com/bytedeco/javacv/pull/1734)) * Add new `Frame.pictType` field set to `I`, `P`, `B`, etc by `FFmpegFrameGrabber` ([pull #1730](https://github.com/bytedeco/javacv/pull/1730)) * Set metadata for `AVFrame.opaque` in `FFmpegFrameGrabber` with call to `av_frame_copy_props()` ([issue #1729](https://github.com/bytedeco/javacv/issues/1729)) * Add `charset` property to `FrameGrabber` and `FrameRecorder` to use for metadata from FFmpeg ([pull #1720](https://github.com/bytedeco/javacv/pull/1720)) diff --git a/src/main/java/org/bytedeco/javacv/FFmpegFrameGrabber.java b/src/main/java/org/bytedeco/javacv/FFmpegFrameGrabber.java index 5cf15f7e..03065294 100644 --- a/src/main/java/org/bytedeco/javacv/FFmpegFrameGrabber.java +++ b/src/main/java/org/bytedeco/javacv/FFmpegFrameGrabber.java @@ -385,6 +385,7 @@ static class SeekCallback extends Seek_Pointer_long_int { private int samples_channels, samples_format, samples_rate; private boolean frameGrabbed; private Frame frame; + private int[] streams; private volatile boolean started = false; @@ -605,7 +606,7 @@ public double getVideoFrameRate() { /** default override of super.setFrameNumber implies setting * of a frame close to a video frame having that number */ @Override public void setFrameNumber(int frameNumber) throws Exception { - if (hasVideo()) setTimestamp(Math.round(1000000L * frameNumber / getFrameRate())); + if (hasVideo()) setTimestamp((long)Math.floor(1000000L * frameNumber / getFrameRate())); else super.frameNumber = frameNumber; } @@ -613,7 +614,7 @@ public double getVideoFrameRate() { * otherwise sets super.frameNumber only because frameRate==0 if there is no video stream */ public void setVideoFrameNumber(int frameNumber) throws Exception { // best guess, AVSEEK_FLAG_FRAME has not been implemented in FFmpeg... - if (hasVideo()) setVideoTimestamp(Math.round(1000000L * frameNumber / getFrameRate())); + if (hasVideo()) setVideoTimestamp((long)Math.floor(1000000L * frameNumber / getFrameRate())); else super.frameNumber = frameNumber; } @@ -621,7 +622,7 @@ public void setVideoFrameNumber(int frameNumber) throws Exception { * ignoring otherwise */ public void setAudioFrameNumber(int frameNumber) throws Exception { // best guess, AVSEEK_FLAG_FRAME has not been implemented in FFmpeg... - if (hasAudio()) setAudioTimestamp(Math.round(1000000L * frameNumber / getAudioFrameRate())); + if (hasAudio()) setAudioTimestamp((long)Math.floor(1000000L * frameNumber / getAudioFrameRate())); } @@ -755,9 +756,14 @@ else if (frameTypesToSeek.contains(Frame.Type.AUDIO)) { else if (seekFrame.samples != null && samples_frame != null && getSampleRate() > 0) { frameDuration = AV_TIME_BASE * samples_frame.nb_samples() / (double)getSampleRate(); } +// if(frameDuration>0.0) { +// maxSeekSteps = (long)(10*(timestamp - initialSeekPosition - frameDuration)/frameDuration); +// if (maxSeekSteps<0) maxSeekSteps = 0; +// } if(frameDuration>0.0) { - maxSeekSteps = (long)(10*(timestamp - initialSeekPosition - frameDuration)/frameDuration); - if (maxSeekSteps<0) maxSeekSteps = 0; + maxSeekSteps = 0; //no more grab if the distance to the requested timestamp is smaller than frameDuration + if (timestamp - initialSeekPosition + 1 > frameDuration) //allow for a rounding error + maxSeekSteps = (long)(10*(timestamp - initialSeekPosition)/frameDuration); } else if (initialSeekPosition < timestamp) maxSeekSteps = 1000; @@ -768,7 +774,7 @@ else if (seekFrame.samples != null && samples_frame != null && getSampleRate() > if (seekFrame == null) return; //is it better to throw NullPointerException? count++; - double ts=this.timestamp; + double ts=seekFrame.timestamp; frameDuration = 0.0; if (seekFrame.image != null && this.getFrameRate() > 0) frameDuration = AV_TIME_BASE / (double)getFrameRate(); @@ -933,10 +939,12 @@ public synchronized void startUnsafe(boolean findStreamInfo) throws Exception { video_st = audio_st = null; AVCodecParameters video_par = null, audio_par = null; int nb_streams = oc.nb_streams(); + streams = new int[nb_streams]; for (int i = 0; i < nb_streams; i++) { AVStream st = oc.streams(i); // Get a pointer to the codec context for the video or audio stream AVCodecParameters par = st.codecpar(); + streams[i] = par.codec_type(); if (video_st == null && par.codec_type() == AVMEDIA_TYPE_VIDEO && (videoStream < 0 || videoStream == i)) { video_st = st; video_par = par; @@ -1294,7 +1302,7 @@ public synchronized Frame grabFrame(boolean doAudio, boolean doVideo, boolean do if (oc == null || oc.isNull()) { throw new Exception("Could not grab: No AVFormatContext. (Has start() been called?)"); - } else if ((!doVideo || video_st == null) && (!doAudio || audio_st == null)) { + } else if ((!doVideo || video_st == null) && (!doAudio || audio_st == null) && !doData) { return null; } if (!started) { @@ -1303,19 +1311,8 @@ public synchronized Frame grabFrame(boolean doAudio, boolean doVideo, boolean do boolean videoFrameGrabbed = frameGrabbed && frame.image != null; boolean audioFrameGrabbed = frameGrabbed && frame.samples != null; + boolean dataFrameGrabbed = frameGrabbed && frame.data != null; frameGrabbed = false; - frame.keyFrame = false; - frame.imageWidth = 0; - frame.imageHeight = 0; - frame.imageDepth = 0; - frame.imageChannels = 0; - frame.imageStride = 0; - frame.image = null; - frame.sampleRate = 0; - frame.audioChannels = 0; - frame.samples = null; - frame.data = null; - frame.opaque = null; if (doVideo && videoFrameGrabbed) { if (doProcessing) { processImage(); @@ -1328,7 +1325,24 @@ public synchronized Frame grabFrame(boolean doAudio, boolean doVideo, boolean do } frame.keyFrame = samples_frame.key_frame() != 0; return frame; + } else if (doData && dataFrameGrabbed) { + return frame; } + + frame.keyFrame = false; + frame.imageWidth = 0; + frame.imageHeight = 0; + frame.imageDepth = 0; + frame.imageChannels = 0; + frame.imageStride = 0; + frame.image = null; + frame.sampleRate = 0; + frame.audioChannels = 0; + frame.samples = null; + frame.data = null; + frame.opaque = null; + frame.type = null; + boolean done = false; boolean readPacket = pkt.stream_index() == -1; while (!done) { @@ -1355,7 +1369,7 @@ public synchronized Frame grabFrame(boolean doAudio, boolean doVideo, boolean do frame.streamIndex = pkt.stream_index(); // Is this a packet from the video stream? - if (doVideo && video_st != null && pkt.stream_index() == video_st.index() + if (doVideo && video_st != null && frame.streamIndex == video_st.index() && (!keyFrames || pkt.flags() == AV_PKT_FLAG_KEY)) { // Decode video frame if (readPacket) { @@ -1393,7 +1407,7 @@ public synchronized Frame grabFrame(boolean doAudio, boolean doVideo, boolean do AVRational time_base = video_st.time_base(); timestamp = 1000000L * pts * time_base.num() / time_base.den(); // best guess, AVCodecContext.frame_number = number of decoded frames... - frameNumber = (int)Math.round(timestamp * getFrameRate() / 1000000L); + frameNumber = (int)Math.floor(timestamp * getFrameRate() / 1000000L); frame.image = image_buf; if (doProcessing) { processImage(); @@ -1404,9 +1418,10 @@ public synchronized Frame grabFrame(boolean doAudio, boolean doVideo, boolean do frame.timestamp = timestamp; frame.keyFrame = picture.key_frame() != 0; frame.pictType = (char)av_get_picture_type_char(picture.pict_type()); + frame.type = Frame.Type.VIDEO; } } - } else if (doAudio && audio_st != null && pkt.stream_index() == audio_st.index()) { + } else if (doAudio && audio_st != null && frame.streamIndex == audio_st.index()) { // Decode audio frame if (readPacket) { ret = avcodec_send_packet(audio_c, pkt); @@ -1440,15 +1455,24 @@ public synchronized Frame grabFrame(boolean doAudio, boolean doVideo, boolean do done = true; frame.timestamp = timestamp; frame.keyFrame = samples_frame.key_frame() != 0; + frame.type = Frame.Type.AUDIO; } - } else if (doData) { - if (!readPacket) { - readPacket = true; - continue; - } + } else if (readPacket && doData + && frame.streamIndex > -1 && frame.streamIndex < streams.length + && streams[frame.streamIndex] != AVMEDIA_TYPE_VIDEO && streams[frame.streamIndex] != AVMEDIA_TYPE_AUDIO) { // Export the stream byte data for non audio / video frames frame.data = pkt.data().position(0).capacity(pkt.size()).asByteBuffer(); + frame.opaque = pkt; done = true; + switch (streams[frame.streamIndex]) { + case AVMEDIA_TYPE_DATA: frame.type = Frame.Type.DATA; break; + case AVMEDIA_TYPE_SUBTITLE: frame.type = Frame.Type.SUBTITLE; break; + case AVMEDIA_TYPE_ATTACHMENT: frame.type = Frame.Type.ATTACHMENT; break; + default: frame.type = null; + } + } else { + // Current packet is not needed (different stream index required) + readPacket = true; } } return frame; diff --git a/src/main/java/org/bytedeco/javacv/FFmpegFrameRecorder.java b/src/main/java/org/bytedeco/javacv/FFmpegFrameRecorder.java index 4b71b0fd..656b995f 100644 --- a/src/main/java/org/bytedeco/javacv/FFmpegFrameRecorder.java +++ b/src/main/java/org/bytedeco/javacv/FFmpegFrameRecorder.java @@ -1302,7 +1302,7 @@ private boolean record(AVFrame frame) throws Exception { private void writePacket(int mediaType, AVPacket avPacket) throws Exception { - AVStream avStream = (mediaType == AVMEDIA_TYPE_VIDEO) ? audio_st : (mediaType == AVMEDIA_TYPE_AUDIO) ? video_st : null; + AVStream avStream = (mediaType == AVMEDIA_TYPE_VIDEO) ? video_st : (mediaType == AVMEDIA_TYPE_AUDIO) ? audio_st : null; String mediaTypeStr = (mediaType == AVMEDIA_TYPE_VIDEO) ? "video" : (mediaType == AVMEDIA_TYPE_AUDIO) ? "audio" : "unsupported media stream type"; synchronized (oc) { diff --git a/src/main/java/org/bytedeco/javacv/Frame.java b/src/main/java/org/bytedeco/javacv/Frame.java index 7c5c80c2..ec506037 100644 --- a/src/main/java/org/bytedeco/javacv/Frame.java +++ b/src/main/java/org/bytedeco/javacv/Frame.java @@ -75,11 +75,13 @@ public class Frame implements AutoCloseable, Indexable { DEPTH_FLOAT = 32, DEPTH_DOUBLE = 64; - /** Constants defining data type in the frame*/ + /** Constants defining data type in the frame. */ public static enum Type { VIDEO, AUDIO, - DATA + DATA, + SUBTITLE, + ATTACHMENT } /** Information associated with the {@link #image} field. */ @@ -104,6 +106,9 @@ public static enum Type { /** Stream number the audio|video|other data is associated with. */ public int streamIndex; + /** The type of the stream. */ + public Type type; + /** The underlying data object, for example, Pointer, AVFrame, IplImage, or Mat. */ public Object opaque; @@ -132,6 +137,7 @@ public Frame(int width, int height, int depth, int channels, int imageStride) { this.image = new Buffer[1]; this.data = null; this.streamIndex = -1; + this.type = null; Pointer pointer = new BytePointer(imageHeight * imageStride * pixelSize(depth)); ByteBuffer buffer = pointer.asByteBuffer(); @@ -222,6 +228,7 @@ public Frame clone() { newFrame.keyFrame = keyFrame; newFrame.pictType = pictType; newFrame.streamIndex = streamIndex; + newFrame.type = type; newFrame.opaque = new Pointer[3]; if (image != null) { newFrame.image = new Buffer[image.length];