Skip to content

Commit

Permalink
* Fix accuracy and latency issues with `FFmpegFrameGrabber.setVideoF…
Browse files Browse the repository at this point in the history
…rameNumber()` (pull bytedeco#1734)
  • Loading branch information
anotherche authored Jan 13, 2022
1 parent 97aab27 commit 4269832
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 30 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@

* Fix accuracy and latency issues with `FFmpegFrameGrabber.setVideoFrameNumber()` ([pull #1734](https://github.com/bytedeco/javacv/pull/1734))
* Add new `Frame.pictType` field set to `I`, `P`, `B`, etc by `FFmpegFrameGrabber` ([pull #1730](https://github.com/bytedeco/javacv/pull/1730))
* Set metadata for `AVFrame.opaque` in `FFmpegFrameGrabber` with call to `av_frame_copy_props()` ([issue #1729](https://github.com/bytedeco/javacv/issues/1729))
* Add `charset` property to `FrameGrabber` and `FrameRecorder` to use for metadata from FFmpeg ([pull #1720](https://github.com/bytedeco/javacv/pull/1720))
Expand Down
78 changes: 51 additions & 27 deletions src/main/java/org/bytedeco/javacv/FFmpegFrameGrabber.java
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,7 @@ static class SeekCallback extends Seek_Pointer_long_int {
private int samples_channels, samples_format, samples_rate;
private boolean frameGrabbed;
private Frame frame;
private int[] streams;

private volatile boolean started = false;

Expand Down Expand Up @@ -605,23 +606,23 @@ public double getVideoFrameRate() {
/** default override of super.setFrameNumber implies setting
* of a frame close to a video frame having that number */
@Override public void setFrameNumber(int frameNumber) throws Exception {
if (hasVideo()) setTimestamp(Math.round(1000000L * frameNumber / getFrameRate()));
if (hasVideo()) setTimestamp((long)Math.floor(1000000L * frameNumber / getFrameRate()));
else super.frameNumber = frameNumber;
}

/** if there is video stream tries to seek to video frame with corresponding timestamp
* otherwise sets super.frameNumber only because frameRate==0 if there is no video stream */
public void setVideoFrameNumber(int frameNumber) throws Exception {
// best guess, AVSEEK_FLAG_FRAME has not been implemented in FFmpeg...
if (hasVideo()) setVideoTimestamp(Math.round(1000000L * frameNumber / getFrameRate()));
if (hasVideo()) setVideoTimestamp((long)Math.floor(1000000L * frameNumber / getFrameRate()));
else super.frameNumber = frameNumber;
}

/** if there is audio stream tries to seek to audio frame with corresponding timestamp
* ignoring otherwise */
public void setAudioFrameNumber(int frameNumber) throws Exception {
// best guess, AVSEEK_FLAG_FRAME has not been implemented in FFmpeg...
if (hasAudio()) setAudioTimestamp(Math.round(1000000L * frameNumber / getAudioFrameRate()));
if (hasAudio()) setAudioTimestamp((long)Math.floor(1000000L * frameNumber / getAudioFrameRate()));

}

Expand Down Expand Up @@ -755,9 +756,14 @@ else if (frameTypesToSeek.contains(Frame.Type.AUDIO)) {
else if (seekFrame.samples != null && samples_frame != null && getSampleRate() > 0) {
frameDuration = AV_TIME_BASE * samples_frame.nb_samples() / (double)getSampleRate();
}
// if(frameDuration>0.0) {
// maxSeekSteps = (long)(10*(timestamp - initialSeekPosition - frameDuration)/frameDuration);
// if (maxSeekSteps<0) maxSeekSteps = 0;
// }
if(frameDuration>0.0) {
maxSeekSteps = (long)(10*(timestamp - initialSeekPosition - frameDuration)/frameDuration);
if (maxSeekSteps<0) maxSeekSteps = 0;
maxSeekSteps = 0; //no more grab if the distance to the requested timestamp is smaller than frameDuration
if (timestamp - initialSeekPosition + 1 > frameDuration) //allow for a rounding error
maxSeekSteps = (long)(10*(timestamp - initialSeekPosition)/frameDuration);
}
else if (initialSeekPosition < timestamp) maxSeekSteps = 1000;

Expand All @@ -768,7 +774,7 @@ else if (seekFrame.samples != null && samples_frame != null && getSampleRate() >
if (seekFrame == null) return; //is it better to throw NullPointerException?

count++;
double ts=this.timestamp;
double ts=seekFrame.timestamp;
frameDuration = 0.0;
if (seekFrame.image != null && this.getFrameRate() > 0)
frameDuration = AV_TIME_BASE / (double)getFrameRate();
Expand Down Expand Up @@ -933,10 +939,12 @@ public synchronized void startUnsafe(boolean findStreamInfo) throws Exception {
video_st = audio_st = null;
AVCodecParameters video_par = null, audio_par = null;
int nb_streams = oc.nb_streams();
streams = new int[nb_streams];
for (int i = 0; i < nb_streams; i++) {
AVStream st = oc.streams(i);
// Get a pointer to the codec context for the video or audio stream
AVCodecParameters par = st.codecpar();
streams[i] = par.codec_type();
if (video_st == null && par.codec_type() == AVMEDIA_TYPE_VIDEO && (videoStream < 0 || videoStream == i)) {
video_st = st;
video_par = par;
Expand Down Expand Up @@ -1294,7 +1302,7 @@ public synchronized Frame grabFrame(boolean doAudio, boolean doVideo, boolean do

if (oc == null || oc.isNull()) {
throw new Exception("Could not grab: No AVFormatContext. (Has start() been called?)");
} else if ((!doVideo || video_st == null) && (!doAudio || audio_st == null)) {
} else if ((!doVideo || video_st == null) && (!doAudio || audio_st == null) && !doData) {
return null;
}
if (!started) {
Expand All @@ -1303,19 +1311,8 @@ public synchronized Frame grabFrame(boolean doAudio, boolean doVideo, boolean do

boolean videoFrameGrabbed = frameGrabbed && frame.image != null;
boolean audioFrameGrabbed = frameGrabbed && frame.samples != null;
boolean dataFrameGrabbed = frameGrabbed && frame.data != null;
frameGrabbed = false;
frame.keyFrame = false;
frame.imageWidth = 0;
frame.imageHeight = 0;
frame.imageDepth = 0;
frame.imageChannels = 0;
frame.imageStride = 0;
frame.image = null;
frame.sampleRate = 0;
frame.audioChannels = 0;
frame.samples = null;
frame.data = null;
frame.opaque = null;
if (doVideo && videoFrameGrabbed) {
if (doProcessing) {
processImage();
Expand All @@ -1328,7 +1325,24 @@ public synchronized Frame grabFrame(boolean doAudio, boolean doVideo, boolean do
}
frame.keyFrame = samples_frame.key_frame() != 0;
return frame;
} else if (doData && dataFrameGrabbed) {
return frame;
}

frame.keyFrame = false;
frame.imageWidth = 0;
frame.imageHeight = 0;
frame.imageDepth = 0;
frame.imageChannels = 0;
frame.imageStride = 0;
frame.image = null;
frame.sampleRate = 0;
frame.audioChannels = 0;
frame.samples = null;
frame.data = null;
frame.opaque = null;
frame.type = null;

boolean done = false;
boolean readPacket = pkt.stream_index() == -1;
while (!done) {
Expand All @@ -1355,7 +1369,7 @@ public synchronized Frame grabFrame(boolean doAudio, boolean doVideo, boolean do
frame.streamIndex = pkt.stream_index();

// Is this a packet from the video stream?
if (doVideo && video_st != null && pkt.stream_index() == video_st.index()
if (doVideo && video_st != null && frame.streamIndex == video_st.index()
&& (!keyFrames || pkt.flags() == AV_PKT_FLAG_KEY)) {
// Decode video frame
if (readPacket) {
Expand Down Expand Up @@ -1393,7 +1407,7 @@ public synchronized Frame grabFrame(boolean doAudio, boolean doVideo, boolean do
AVRational time_base = video_st.time_base();
timestamp = 1000000L * pts * time_base.num() / time_base.den();
// best guess, AVCodecContext.frame_number = number of decoded frames...
frameNumber = (int)Math.round(timestamp * getFrameRate() / 1000000L);
frameNumber = (int)Math.floor(timestamp * getFrameRate() / 1000000L);
frame.image = image_buf;
if (doProcessing) {
processImage();
Expand All @@ -1404,9 +1418,10 @@ public synchronized Frame grabFrame(boolean doAudio, boolean doVideo, boolean do
frame.timestamp = timestamp;
frame.keyFrame = picture.key_frame() != 0;
frame.pictType = (char)av_get_picture_type_char(picture.pict_type());
frame.type = Frame.Type.VIDEO;
}
}
} else if (doAudio && audio_st != null && pkt.stream_index() == audio_st.index()) {
} else if (doAudio && audio_st != null && frame.streamIndex == audio_st.index()) {
// Decode audio frame
if (readPacket) {
ret = avcodec_send_packet(audio_c, pkt);
Expand Down Expand Up @@ -1440,15 +1455,24 @@ public synchronized Frame grabFrame(boolean doAudio, boolean doVideo, boolean do
done = true;
frame.timestamp = timestamp;
frame.keyFrame = samples_frame.key_frame() != 0;
frame.type = Frame.Type.AUDIO;
}
} else if (doData) {
if (!readPacket) {
readPacket = true;
continue;
}
} else if (readPacket && doData
&& frame.streamIndex > -1 && frame.streamIndex < streams.length
&& streams[frame.streamIndex] != AVMEDIA_TYPE_VIDEO && streams[frame.streamIndex] != AVMEDIA_TYPE_AUDIO) {
// Export the stream byte data for non audio / video frames
frame.data = pkt.data().position(0).capacity(pkt.size()).asByteBuffer();
frame.opaque = pkt;
done = true;
switch (streams[frame.streamIndex]) {
case AVMEDIA_TYPE_DATA: frame.type = Frame.Type.DATA; break;
case AVMEDIA_TYPE_SUBTITLE: frame.type = Frame.Type.SUBTITLE; break;
case AVMEDIA_TYPE_ATTACHMENT: frame.type = Frame.Type.ATTACHMENT; break;
default: frame.type = null;
}
} else {
// Current packet is not needed (different stream index required)
readPacket = true;
}
}
return frame;
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/bytedeco/javacv/FFmpegFrameRecorder.java
Original file line number Diff line number Diff line change
Expand Up @@ -1302,7 +1302,7 @@ private boolean record(AVFrame frame) throws Exception {

private void writePacket(int mediaType, AVPacket avPacket) throws Exception {

AVStream avStream = (mediaType == AVMEDIA_TYPE_VIDEO) ? audio_st : (mediaType == AVMEDIA_TYPE_AUDIO) ? video_st : null;
AVStream avStream = (mediaType == AVMEDIA_TYPE_VIDEO) ? video_st : (mediaType == AVMEDIA_TYPE_AUDIO) ? audio_st : null;
String mediaTypeStr = (mediaType == AVMEDIA_TYPE_VIDEO) ? "video" : (mediaType == AVMEDIA_TYPE_AUDIO) ? "audio" : "unsupported media stream type";

synchronized (oc) {
Expand Down
11 changes: 9 additions & 2 deletions src/main/java/org/bytedeco/javacv/Frame.java
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,13 @@ public class Frame implements AutoCloseable, Indexable {
DEPTH_FLOAT = 32,
DEPTH_DOUBLE = 64;

/** Constants defining data type in the frame*/
/** Constants defining data type in the frame. */
public static enum Type {
VIDEO,
AUDIO,
DATA
DATA,
SUBTITLE,
ATTACHMENT
}

/** Information associated with the {@link #image} field. */
Expand All @@ -104,6 +106,9 @@ public static enum Type {
/** Stream number the audio|video|other data is associated with. */
public int streamIndex;

/** The type of the stream. */
public Type type;

/** The underlying data object, for example, Pointer, AVFrame, IplImage, or Mat. */
public Object opaque;

Expand Down Expand Up @@ -132,6 +137,7 @@ public Frame(int width, int height, int depth, int channels, int imageStride) {
this.image = new Buffer[1];
this.data = null;
this.streamIndex = -1;
this.type = null;

Pointer pointer = new BytePointer(imageHeight * imageStride * pixelSize(depth));
ByteBuffer buffer = pointer.asByteBuffer();
Expand Down Expand Up @@ -222,6 +228,7 @@ public Frame clone() {
newFrame.keyFrame = keyFrame;
newFrame.pictType = pictType;
newFrame.streamIndex = streamIndex;
newFrame.type = type;
newFrame.opaque = new Pointer[3];
if (image != null) {
newFrame.image = new Buffer[image.length];
Expand Down

0 comments on commit 4269832

Please sign in to comment.