Skip to content

Commit

Permalink
Use ffmpeg parser for H.264 (#431)
Browse files Browse the repository at this point in the history
Fixes a number of things including a LPMS crash, choppy video
quality, green screens during rotation, inconsistent frame counts
vs software decoding, etc. We also apparently gained GPU
support for MPEG2 decoding.

This is a massive change: we can no longer add outputs up front
due to the ffmpeg hwaccel API, so we have to wait until we receive
a decoded video frame in order to add outputs. This also means
properly queuing up audio and draining things in the same order.
  • Loading branch information
j0sh authored Jan 18, 2025
1 parent 25cbb36 commit 79e6dcf
Show file tree
Hide file tree
Showing 12 changed files with 256 additions and 149 deletions.
Binary file added data/bad-cuvid.ts
Binary file not shown.
Binary file added data/broken-h264-parser.ts
Binary file not shown.
87 changes: 16 additions & 71 deletions ffmpeg/decoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -188,38 +188,17 @@ enum AVPixelFormat hw2pixfmt(AVCodecContext *ctx)
return AV_PIX_FMT_NONE;
}

/**
* Callback to negotiate the pixel format for AVCodecContext.
*/
static enum AVPixelFormat get_hw_pixfmt(AVCodecContext *vc, const enum AVPixelFormat *pix_fmts)
static enum AVPixelFormat get_hw_format(AVCodecContext *ctx,
const enum AVPixelFormat *pix_fmts)
{
AVHWFramesContext *frames;
int ret = 0;
const enum AVPixelFormat *p;
const enum AVPixelFormat hw_pix_fmt = hw2pixfmt(ctx);

for (p = pix_fmts; *p != -1; p++) {
if (*p == hw_pix_fmt) return *p;
}

// XXX Ideally this would be auto initialized by the HW device ctx
// However the initialization doesn't occur in time to set up filters
// So we do it here. Also see avcodec_get_hw_frames_parameters
av_buffer_unref(&vc->hw_frames_ctx);
vc->hw_frames_ctx = av_hwframe_ctx_alloc(vc->hw_device_ctx);
if (!vc->hw_frames_ctx) LPMS_ERR(pixfmt_cleanup, "Unable to allocate hwframe context for decoding");

frames = (AVHWFramesContext*)vc->hw_frames_ctx->data;
frames->format = hw2pixfmt(vc);
frames->sw_format = vc->sw_pix_fmt;
frames->width = vc->width;
frames->height = vc->height;

// May want to allocate extra HW frames if we encounter samples where
// the defaults are insufficient. Raising this increases GPU memory usage
// For now, the defaults seems OK.
//vc->extra_hw_frames = 16 + 1; // H.264 max refs

ret = av_hwframe_ctx_init(vc->hw_frames_ctx);
if (AVERROR(ENOSYS) == ret) ret = lpms_ERR_INPUT_PIXFMT; // most likely
if (ret < 0) LPMS_ERR(pixfmt_cleanup, "Unable to initialize a hardware frame pool");
return frames->format;

pixfmt_cleanup:
fprintf(stderr, "Failed to get HW surface format.\n");
return AV_PIX_FMT_NONE;
}

Expand Down Expand Up @@ -253,38 +232,6 @@ int open_audio_decoder(input_params *params, struct input_ctx *ctx)
return ret;
}

char* get_hw_decoder(int ff_codec_id, int hw_type)
{
switch (hw_type) {
case AV_HWDEVICE_TYPE_CUDA:
switch (ff_codec_id) {
case AV_CODEC_ID_H264:
return "h264_cuvid";
case AV_CODEC_ID_HEVC:
return "hevc_cuvid";
case AV_CODEC_ID_VP8:
return "vp8_cuvid";
case AV_CODEC_ID_VP9:
return "vp9_cuvid";
default:
return "";
}
case AV_HWDEVICE_TYPE_MEDIACODEC:
switch (ff_codec_id) {
case AV_CODEC_ID_H264:
return "h264_ni_dec";
case AV_CODEC_ID_HEVC:
return "h265_ni_dec";
case AV_CODEC_ID_VP8:
return "";
case AV_CODEC_ID_VP9:
return "";
default:
return "";
}
}
}

int open_video_decoder(input_params *params, struct input_ctx *ctx)
{
int ret = 0;
Expand All @@ -298,14 +245,6 @@ int open_video_decoder(input_params *params, struct input_ctx *ctx)
LPMS_WARN("No video stream found in input");
} else {
if (params->hw_type > AV_HWDEVICE_TYPE_NONE) {
char* decoder_name = get_hw_decoder(codec->id, params->hw_type);
if (!*decoder_name) {
ret = lpms_ERR_INPUT_CODEC;
LPMS_ERR(open_decoder_err, "Input codec does not support hardware acceleration");
}
const AVCodec *c = avcodec_find_decoder_by_name(decoder_name);
if (c) codec = c;
else LPMS_WARN("Nvidia decoder not found; defaulting to software");
if (AV_PIX_FMT_YUV420P != ic->streams[ctx->vi]->codecpar->format &&
AV_PIX_FMT_YUVJ420P != ic->streams[ctx->vi]->codecpar->format) {
// TODO check whether the color range is truncated if yuvj420p is used
Expand All @@ -330,13 +269,19 @@ int open_video_decoder(input_params *params, struct input_ctx *ctx)
ret = av_hwdevice_ctx_create(&ctx->hw_device_ctx, params->hw_type, params->device, NULL, 0);
if (ret < 0) LPMS_ERR(open_decoder_err, "Unable to open hardware context for decoding")
vc->hw_device_ctx = av_buffer_ref(ctx->hw_device_ctx);
vc->get_format = get_hw_pixfmt;
vc->get_format = get_hw_format;
}
ctx->hw_type = params->hw_type;
vc->pkt_timebase = ic->streams[ctx->vi]->time_base;
av_opt_set(vc->priv_data, "xcoder-params", ctx->xcoderParams, 0);
ret = avcodec_open2(vc, codec, opts);
if (ret < 0) LPMS_ERR(open_decoder_err, "Unable to open video decoder");
if (params->hw_type > AV_HWDEVICE_TYPE_NONE) {
if (AV_PIX_FMT_NONE == hw2pixfmt(vc)) {
ret = lpms_ERR_INPUT_CODEC;
LPMS_ERR(open_decoder_err, "Input codec does not support hardware acceleration");
}
}
}

return 0;
Expand Down
1 change: 0 additions & 1 deletion ffmpeg/decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ enum AVPixelFormat hw2pixfmt(AVCodecContext *ctx);
int open_input(input_params *params, struct input_ctx *ctx);
int open_video_decoder(input_params *params, struct input_ctx *ctx);
int open_audio_decoder(input_params *params, struct input_ctx *ctx);
char* get_hw_decoder(int ff_codec_id, int hw_type);
void free_input(struct input_ctx *inctx);

// Utility functions
Expand Down
6 changes: 4 additions & 2 deletions ffmpeg/encoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ int open_output(struct output_ctx *octx, struct input_ctx *ictx)

// add video encoder if a decoder exists and this output requires one
if (ictx->vc && needs_decoder(octx->video->name)) {
ret = init_video_filters(ictx, octx);
ret = init_video_filters(ictx, octx, NULL);
if (ret < 0) LPMS_ERR(open_output_err, "Unable to open video filter");

codec = avcodec_find_encoder_by_name(octx->video->name);
Expand Down Expand Up @@ -296,6 +296,8 @@ int open_output(struct output_ctx *octx, struct input_ctx *ictx)
if (ret < 0) LPMS_ERR(open_output_err, "Unable to open signature filter");
}

octx->initialized = 1;

return 0;

open_output_err:
Expand Down Expand Up @@ -521,7 +523,7 @@ int mux(AVPacket *pkt, AVRational tb, struct output_ctx *octx, AVStream *ost)
static int calc_signature(AVFrame *inf, struct output_ctx *octx)
{
int ret = 0;
if (inf->hw_frames_ctx && octx->sf.hwframes && inf->hw_frames_ctx->data != octx->sf.hwframes) {
if (inf->hw_frames_ctx && octx->sf.hw_frames_ctx && inf->hw_frames_ctx->data != octx->sf.hw_frames_ctx->data) {
free_filter(&octx->sf);
ret = init_signature_filters(octx, inf);
if (ret < 0) return lpms_ERR_FILTERS;
Expand Down
47 changes: 4 additions & 43 deletions ffmpeg/ffmpeg_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2218,23 +2218,7 @@ func runRotationTests(t *testing.T, accel Acceleration) {
`
}

// TODO figure out why cpu/gpu are different
if accel == Nvidia {
cmd = cmd + `
cat <<-EOF1 > expected.dims
115 256,144
120 146,260
125 256,144
EOF1
cat <<-EOF2 > expected-30fps.dims
58 256,144
60 146,260
63 256,144
EOF2
`
} else {
cmd = cmd + `
cmd = cmd + `
cat <<-EOF1 > expected.dims
120 256,144
120 146,260
Expand All @@ -2246,10 +2230,7 @@ func runRotationTests(t *testing.T, accel Acceleration) {
60 146,260
61 256,144
EOF2
`
}
cmd = cmd + `
diff -u expected.dims out.dims
diff -u expected-30fps.dims out-30fps.dims
`
Expand Down Expand Up @@ -2299,9 +2280,7 @@ func runRotationTests(t *testing.T, accel Acceleration) {
}})
require.NoError(t, err)

// TODO figure out why nvidia is different; green screen?
if accel == Software {
cmd = `
cmd = `
cat out-test-0.ts out-transposed.ts out-test-2.ts > out-test-concat.ts
ffprobe -show_entries frame=pts,pkt_dts,duration,pict_type,width,height -of csv out-test-concat.ts > out-test-concat.framedata
Expand All @@ -2317,8 +2296,7 @@ func runRotationTests(t *testing.T, accel Acceleration) {
# this does not line up
#diff -u out-test-concat-30fps.framedata out-double-rotated-30fps.framedata
`
run(cmd)
}
run(cmd)

// check single rotations
res, err = Transcode3(
Expand All @@ -2344,21 +2322,7 @@ func runRotationTests(t *testing.T, accel Acceleration) {
ffprobe -show_entries frame=height,width -of csv=p=0 out-single-rotated-30fps.ts | sed 's/,$//g' | uniq -c | sed 's/^ *//g' > single-out-30fps.dims
`

// TODO figure out why cpu/gpu are different
if accel == Nvidia {
cmd = cmd + `
cat <<-EOF1 > single-expected.dims
115 256,144
125 146,260
EOF1
cat <<-EOF2 > single-expected-30fps.dims
58 256,144
63 146,260
EOF2
`
} else {
cmd = cmd + `
cmd = cmd + `
cat <<-EOF1 > single-expected.dims
120 256,144
120 146,260
Expand All @@ -2368,10 +2332,7 @@ func runRotationTests(t *testing.T, accel Acceleration) {
60 256,144
61 146,260
EOF2
`
}
cmd = cmd + `
diff -u single-expected.dims single-out.dims
diff -u single-expected-30fps.dims single-out-30fps.dims
`
Expand Down
18 changes: 10 additions & 8 deletions ffmpeg/filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ int filtergraph_parser(struct filter_ctx *fctx, char* filters_descr, AVFilterInO
return ret;
}

int init_video_filters(struct input_ctx *ictx, struct output_ctx *octx)
int init_video_filters(struct input_ctx *ictx, struct output_ctx *octx, AVFrame *inf)
{
char args[512];
int ret = 0;
Expand Down Expand Up @@ -92,8 +92,9 @@ int init_video_filters(struct input_ctx *ictx, struct output_ctx *octx)
if (ictx->vc && ictx->vc->hw_frames_ctx) {
// XXX a bit problematic in that it's set before decoder is fully ready
AVBufferSrcParameters *srcpar = av_buffersrc_parameters_alloc();
srcpar->hw_frames_ctx = ictx->vc->hw_frames_ctx;
vf->hwframes = ictx->vc->hw_frames_ctx->data;
AVBufferRef *hw_frames_ctx = inf && inf->hw_frames_ctx ? inf->hw_frames_ctx : ictx->vc->hw_frames_ctx;
srcpar->hw_frames_ctx = hw_frames_ctx;
av_buffer_replace(&vf->hw_frames_ctx, hw_frames_ctx);
av_buffersrc_parameters_set(vf->src_ctx, srcpar);
av_freep(&srcpar);
}
Expand Down Expand Up @@ -243,13 +244,13 @@ int init_signature_filters(struct output_ctx *octx, AVFrame *inf)
if (octx->vc && inf && inf->hw_frames_ctx) {
AVBufferSrcParameters *srcpar = av_buffersrc_parameters_alloc();
srcpar->hw_frames_ctx = inf->hw_frames_ctx;
sf->hwframes = inf->hw_frames_ctx->data;
av_buffer_replace(&sf->hw_frames_ctx, inf->hw_frames_ctx);
av_buffersrc_parameters_set(sf->src_ctx, srcpar);
av_freep(&srcpar);
} else if (octx->vc && octx->vc->hw_frames_ctx) {
AVBufferSrcParameters *srcpar = av_buffersrc_parameters_alloc();
srcpar->hw_frames_ctx = octx->vc->hw_frames_ctx;
sf->hwframes = octx->vc->hw_frames_ctx->data;
av_buffer_replace(&sf->hw_frames_ctx, octx->vc->hw_frames_ctx);
av_buffersrc_parameters_set(sf->src_ctx, srcpar);
av_freep(&srcpar);
}
Expand Down Expand Up @@ -288,8 +289,8 @@ int filtergraph_write(AVFrame *inf, struct input_ctx *ictx, struct output_ctx *o
// before the decoder is fully ready, and the decoder may change HW params
// XXX: Unclear if this path is hit on all devices
if (is_video && inf && (
(inf->hw_frames_ctx && filter->hwframes &&
inf->hw_frames_ctx->data != filter->hwframes) ||
(inf->hw_frames_ctx && filter->hw_frames_ctx &&
inf->hw_frames_ctx->data != filter->hw_frames_ctx->data) ||
(filter->src_ctx->nb_outputs > 0 &&
filter->src_ctx->outputs[0]->w != inf->width &&
filter->src_ctx->outputs[0]->h != inf->height))) {
Expand Down Expand Up @@ -326,7 +327,7 @@ int filtergraph_write(AVFrame *inf, struct input_ctx *ictx, struct output_ctx *o
ret = 0;

free_filter(&octx->vf);
ret = init_video_filters(ictx, octx);
ret = init_video_filters(ictx, octx, inf);
if (ret < 0) return lpms_ERR_FILTERS;
}

Expand Down Expand Up @@ -411,5 +412,6 @@ void free_filter(struct filter_ctx *filter)
{
if (filter->frame) av_frame_free(&filter->frame);
if (filter->graph) avfilter_graph_free(&filter->graph);
if (filter->hw_frames_ctx) av_buffer_unref(&filter->hw_frames_ctx);
memset(filter, 0, sizeof(struct filter_ctx));
}
5 changes: 3 additions & 2 deletions ffmpeg/filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ struct filter_ctx {
AVFilterContext *sink_ctx;
AVFilterContext *src_ctx;

uint8_t *hwframes; // GPU frame pool data
AVBufferRef *hw_frames_ctx; // GPU frame pool data

// Input timebase for this filter
AVRational time_base;
Expand Down Expand Up @@ -46,6 +46,7 @@ struct filter_ctx {
};

struct output_ctx {
int initialized; // whether this output is ready
char *fname; // required output file name
char *vfilters; // required output video filters
char *sfilters; // required output signature filters
Expand Down Expand Up @@ -82,7 +83,7 @@ struct output_ctx {
char *xcoderParams;
};

int init_video_filters(struct input_ctx *ictx, struct output_ctx *octx);
int init_video_filters(struct input_ctx *ictx, struct output_ctx *octx, AVFrame *inf);
int init_audio_filters(struct input_ctx *ictx, struct output_ctx *octx);
int init_signature_filters(struct output_ctx *octx, AVFrame *inf);
int filtergraph_write(AVFrame *inf, struct input_ctx *ictx, struct output_ctx *octx, struct filter_ctx *filter, int is_video);
Expand Down
Loading

0 comments on commit 79e6dcf

Please sign in to comment.