Skip to content

Commit

Permalink
Support VP8 and VP9
Browse files Browse the repository at this point in the history
Signed-off-by: Joaquin Anton Guirao <[email protected]>
  • Loading branch information
jantonguirao committed Feb 17, 2025
1 parent db5cf7f commit 390d8ab
Show file tree
Hide file tree
Showing 6 changed files with 251 additions and 57 deletions.
4 changes: 2 additions & 2 deletions dali/operators/decoder/video/video_decoder_cpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ Not relevant when using ``frames`` argument.)code",
"constant", true)
.AddOptionalArg(
"fill_value",
R"code(Value(s) used to pad missing frames when ``pad_mode='constant``'.
R"code(Value(s) used to pad missing frames when ``pad_mode='constant'``'.
Each value must be in range [0, 255].
If a single value is provided, it will be used for all channels.
Expand All @@ -136,7 +136,7 @@ Otherwise, the number of values must match the number of channels in the video.)
.AddOptionalArg("build_index",
R"code(Controls whether to build a frame index during initialization.
Building an index allows faster seeking to specific frames, but requires additional memory
Building an index allows faster seeking to specific frames, but requires additional CPU memory
to store frame metadata and longer initialization time to scan the entire video file. The index
stores metadata, such as whether it is a key frame and the presentation timestamp (PTS).
Expand Down
113 changes: 76 additions & 37 deletions dali/operators/reader/loader/video/frames_decoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,18 @@ int64_t seek_memory_video_file(void *data_ptr, int64_t new_position, int origin)

using AVPacketScope = std::unique_ptr<AVPacket, decltype(&av_packet_unref)>;

const std::vector<AVCodecID> FramesDecoder::SupportedCodecs = {
span<const AVCodecID> FramesDecoder::SupportedCodecs() const {
static constexpr std::array<AVCodecID, 7> codecs = {
AVCodecID::AV_CODEC_ID_H264,
AVCodecID::AV_CODEC_ID_HEVC,
AVCodecID::AV_CODEC_ID_MPEG4
};
// TODO(janton): AVCodecID::AV_CODEC_ID_MPEG4, -> not supported by current FFmpeg build
AVCodecID::AV_CODEC_ID_VP8,
AVCodecID::AV_CODEC_ID_VP9,
// TODO(janton): AVCodecID::AV_CODEC_ID_AV1, -> not supported by most platforms
AVCodecID::AV_CODEC_ID_MJPEG,
};
return make_cspan(codecs);
}

int64_t FramesDecoder::NumFrames() const {
if (num_frames_.has_value()) {
Expand Down Expand Up @@ -115,57 +122,93 @@ void FramesDecoder::InitAvState(bool init_codecs) {
}
}

bool FramesDecoder::CheckCodecSupport() {
return std::find(SupportedCodecs.begin(), SupportedCodecs.end(),
av_state_->codec_params_->codec_id) != SupportedCodecs.end();
}

bool FramesDecoder::FindVideoStream(bool init_codecs) {
// First try to find stream info if not already present
if (av_state_->ctx_->nb_streams == 0) {
if (avformat_find_stream_info(av_state_->ctx_, nullptr) < 0) {
LOG_LINE << "Could not find any streams in \"" << Filename() << "\"" << std::endl;
return false;
}
}

av_state_->stream_id_ = -1;
if (init_codecs) {
// Search through all streams for a valid video stream with a supported codec
size_t i = 0;

for (i = 0; i < av_state_->ctx_->nb_streams; ++i) {
av_state_->codec_params_ = av_state_->ctx_->streams[i]->codecpar;
av_state_->codec_ = avcodec_find_decoder(av_state_->codec_params_->codec_id);

if (av_state_->codec_ == nullptr) {
// Skip if not a video stream
if (av_state_->codec_params_->codec_type != AVMEDIA_TYPE_VIDEO) {
LOG_LINE << "Stream " << i << " is not a video stream" << std::endl;
continue;
}

if (av_state_->codec_->type == AVMEDIA_TYPE_VIDEO) {
// Try to find decoder for this codec
av_state_->codec_ = avcodec_find_decoder(av_state_->codec_params_->codec_id);
if (av_state_->codec_ != nullptr) {
av_state_->stream_id_ = i;
LOG_LINE << "Found video stream " << i << " with codec "
<< avcodec_get_name(av_state_->codec_params_->codec_id) << std::endl;
break;
}
}

if (i >= av_state_->ctx_->nb_streams) {
DALI_WARN(make_string("Could not find a valid video stream in a file \"", Filename(), "\""));
return false;
LOG_LINE << "Found video stream but no decoder available for codec "
<< avcodec_get_name(av_state_->codec_params_->codec_id) << std::endl;
}
} else {
// Use FFmpeg's stream finding function
av_state_->stream_id_ =
av_find_best_stream(av_state_->ctx_, AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0);

if (av_state_->stream_id_ < 0) {
DALI_WARN(make_string("Could not find a valid video stream in a file \"", Filename(), "\""));
return false;
if (av_state_->stream_id_ != -1) {
LOG_LINE << "Found video stream " << av_state_->stream_id_ << " with codec "
<< avcodec_get_name(
av_state_->ctx_->streams[av_state_->stream_id_]->codecpar->codec_id)
<< std::endl;
av_state_->codec_params_ = av_state_->ctx_->streams[av_state_->stream_id_]->codecpar;
}
}

av_state_->codec_params_ = av_state_->ctx_->streams[av_state_->stream_id_]->codecpar;
if (av_state_->stream_id_ == -1) {
LOG_LINE << "Could not find a video stream in \"" << Filename()
<< "\". File may be corrupted or not contain video." << std::endl;
return false;
}

// Verify we can get valid dimensions
if (Height() == 0 || Width() == 0) {
if (avformat_find_stream_info(av_state_->ctx_, nullptr) < 0) {
DALI_WARN(make_string("Could not find stream information in \"", Filename(), "\""));
LOG_LINE << "Could not find stream information in \"" << Filename() << "\"" << std::endl;
return false;
}
if (Height() == 0 || Width() == 0) {
DALI_WARN("Couldn't load video size info.");
LOG_LINE << "Found video stream but couldn't determine dimensions in \"" << Filename() << "\""
<< std::endl;
return false;
}
}
return true;
}

void FramesDecoder::CheckCodecSupport(AVCodecID codec_id) const {
if (std::find(SupportedCodecs().begin(), SupportedCodecs().end(), codec_id) ==
SupportedCodecs().end()) {
throw std::runtime_error(make_string("Codec ", avcodec_get_name(codec_id),
" is not supported by this DALI operator."));
}

void *iter = NULL;
const AVCodec *codec = NULL;
while ((codec = av_codec_iterate(&iter))) {
if (codec->id == codec_id && av_codec_is_decoder(codec)) {
return;
}
}
throw std::runtime_error(
make_string("Codec ", avcodec_get_name(codec_id),
" is not supported by the FFMPEG version provided by DALI."));
}

FramesDecoder::FramesDecoder(const std::string &filename)
: av_state_(std::make_unique<AvState>()), filename_(filename) {
av_log_set_level(AV_LOG_ERROR);
Expand All @@ -181,19 +224,15 @@ FramesDecoder::FramesDecoder(const std::string &filename)
}

if (!FindVideoStream()) {
DALI_WARN(make_string("Could not find a video stream in the file."));
return;
}
if (!CheckCodecSupport()) {
DALI_WARN(
make_string("Unsupported video codec: ", CodecName(), ". Supported codecs: h264, HEVC."));
return;
}
CheckCodecSupport(av_state_->codec_params_->codec_id);
InitAvState();
BuildIndex();
is_valid_ = true;
}


FramesDecoder::FramesDecoder(const char *memory_file, int memory_file_size, bool build_index,
bool init_codecs, int num_frames, std::string_view source_info)
: av_state_(std::make_unique<AvState>()),
Expand Down Expand Up @@ -235,13 +274,15 @@ FramesDecoder::FramesDecoder(const char *memory_file, int memory_file_size, bool
}

if (!FindVideoStream(init_codecs || build_index)) {
DALI_WARN(make_string("Could not find a video stream in the memory file."));
return;
}
if (!CheckCodecSupport()) {
DALI_WARN(make_string("Unsupported video codec: \"", CodecName(),
"\". Supported codecs: h264, HEVC."));
return;

// If init_codecs is true, check if all the expected codecs are supported
if (init_codecs) {
CheckCodecSupport(av_state_->codec_params_->codec_id);
}

InitAvState(init_codecs || build_index);

// Number of frames is unknown and we do not plan to build the index
Expand Down Expand Up @@ -561,7 +602,6 @@ bool FramesDecoder::ReadRegularFrame(uint8_t *data, bool copy_to_output) {
make_string("Failed to send packet to decoder: ", detail::av_error_string(ret)));

ret = avcodec_receive_frame(av_state_->codec_ctx_, av_state_->frame_);

if (ret == AVERROR(EAGAIN)) {
continue;
}
Expand Down Expand Up @@ -662,9 +702,8 @@ void FramesDecoder::SeekFrame(int frame_id) {
if (IsFormatSeekable()) {
Reset();
} else {
memory_video_file_->Seek(0, SEEK_SET);
av_state_ = std::make_unique<AvState>();
next_frame_idx_ = 0;
DALI_FAIL(make_string("Video file \"", Filename(), "\" is not seekable"));
// TODO(janton): Implement seeking by closing and reopening the handle
}
}
}
Expand Down
12 changes: 4 additions & 8 deletions dali/operators/reader/loader/video/frames_decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ extern "C" {
#include <optional>

#include "dali/core/common.h"
#include "dali/core/span.h"

namespace dali {
struct IndexEntry {
Expand Down Expand Up @@ -102,8 +103,6 @@ struct MemoryVideoFile {
*/
class DLL_PUBLIC FramesDecoder {
public:
static const std::vector<AVCodecID> SupportedCodecs;

/**
* @brief Construct a new FramesDecoder object.
*
Expand Down Expand Up @@ -270,7 +269,9 @@ class DLL_PUBLIC FramesDecoder {

void LazyInitSwContext();

bool CheckCodecSupport();
virtual span<const AVCodecID> SupportedCodecs() const;

virtual void CheckCodecSupport(AVCodecID codec_id) const;

void ParseNumFrames();

Expand All @@ -280,11 +281,6 @@ class DLL_PUBLIC FramesDecoder {

void CountFrames(AvState *av_state);

std::string CodecName() {
return av_state_->codec_ ? av_state_->codec_->name :
to_string(static_cast<int>(av_state_->codec_params_->codec_id));
}

int channels_ = 3;
bool flush_state_ = false;
bool is_vfr_ = false;
Expand Down
37 changes: 29 additions & 8 deletions dali/operators/reader/loader/video/frames_decoder_gpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,6 @@ using AVPacketScope = std::unique_ptr<AVPacket, decltype(&av_packet_unref)>;

void FramesDecoderGpu::InitBitStreamFilter() {
const AVBitStreamFilter *bsf = nullptr;

const char* filtername = nullptr;
switch (av_state_->codec_params_->codec_id) {
case AVCodecID::AV_CODEC_ID_H264:
Expand All @@ -316,6 +315,11 @@ void FramesDecoderGpu::InitBitStreamFilter() {
filtername = "mpeg4_unpack_bframes";
}
break;
case AVCodecID::AV_CODEC_ID_MJPEG:
case AVCodecID::AV_CODEC_ID_VP8:
case AVCodecID::AV_CODEC_ID_VP9:
case AVCodecID::AV_CODEC_ID_AV1:
break; // No bit stream filter needed for MJPEG, VP8, VP9, AV1
default:
DALI_FAIL(make_string(
"Could not find suitable bit stream filter for codec: ",
Expand All @@ -341,14 +345,17 @@ void FramesDecoderGpu::InitBitStreamFilter() {
"Unable to initialize bit stream filter");
}

cudaVideoCodec FramesDecoderGpu::GetCodecType() {
// Code assumes av_state_->codec_->id in FramesDecoder::SupportedCodecs
switch (av_state_->codec_params_->codec_id) {
cudaVideoCodec FramesDecoderGpu::GetCodecType(AVCodecID codec_id) const {
switch (codec_id) {
case AV_CODEC_ID_HEVC: return cudaVideoCodec_HEVC;
case AV_CODEC_ID_H264: return cudaVideoCodec_H264;
case AV_CODEC_ID_MPEG4: return cudaVideoCodec_MPEG4;
case AV_CODEC_ID_VP8: return cudaVideoCodec_VP8;
case AV_CODEC_ID_VP9: return cudaVideoCodec_VP9;
case AV_CODEC_ID_MJPEG: return cudaVideoCodec_JPEG;
case AV_CODEC_ID_AV1: return cudaVideoCodec_AV1;
default: {
DALI_FAIL(make_string("Unsupported codec type ", av_state_->codec_->id));
DALI_FAIL(make_string("Unsupported codec type ", avcodec_get_name(codec_id)));
return {};
}
}
Expand All @@ -373,7 +380,7 @@ void FramesDecoderGpu::InitGpuParser() {
return;
}

auto codec_type = GetCodecType();
auto codec_type = GetCodecType(av_state_->codec_params_->codec_id);

// Create nv parser
CUVIDPARSERPARAMS parser_info;
Expand Down Expand Up @@ -409,6 +416,18 @@ void FramesDecoderGpu::InitGpuParser() {
}
}

void FramesDecoderGpu::CheckCodecSupport(AVCodecID codec_id) const {
CUVIDDECODECAPS decoder_caps = {};
decoder_caps.eCodecType = GetCodecType(codec_id);
decoder_caps.eChromaFormat = cudaVideoChromaFormat_420;
decoder_caps.nBitDepthMinus8 = 0;
CUDA_CALL(cuvidGetDecoderCaps(&decoder_caps));
if (!decoder_caps.bIsSupported) {
throw std::runtime_error(
make_string("Codec ", avcodec_get_name(codec_id), " is not supported by this platform."));
}
}

FramesDecoderGpu::FramesDecoderGpu(const std::string &filename, cudaStream_t stream) :
FramesDecoder(filename),
frame_buffer_(num_decode_surfaces_),
Expand All @@ -417,6 +436,7 @@ FramesDecoderGpu::FramesDecoderGpu(const std::string &filename, cudaStream_t str
return;
}
InitGpuParser();
CheckCodecSupport(av_state_->codec_params_->codec_id);
}

FramesDecoderGpu::FramesDecoderGpu(
Expand All @@ -429,9 +449,11 @@ FramesDecoderGpu::FramesDecoderGpu(
FramesDecoder(memory_file, memory_file_size, build_index, build_index, num_frames, source_info),
frame_buffer_(num_decode_surfaces_), stream_(stream) {
if (!IsValid()) {
DALI_WARN(make_string("Could not initialize FramesDecoderGpu from memory file."));
return;
}
InitGpuParser();
CheckCodecSupport(av_state_->codec_params_->codec_id);
}

int FramesDecoderGpu::ProcessPictureDecode(CUVIDPICPARAMS *picture_params) {
Expand Down Expand Up @@ -817,9 +839,8 @@ bool FramesDecoderGpu::SupportsHevc() {
CUVIDDECODECAPS decoder_caps = {};
decoder_caps.eCodecType = cudaVideoCodec_HEVC;
decoder_caps.eChromaFormat = cudaVideoChromaFormat_420;
decoder_caps.nBitDepthMinus8 = 2;
decoder_caps.nBitDepthMinus8 = 10 - 8; // 10-bit HEVC
CUDA_CALL(cuvidGetDecoderCaps(&decoder_caps));

return decoder_caps.bIsSupported;
}

Expand Down
6 changes: 5 additions & 1 deletion dali/operators/reader/loader/video/frames_decoder_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,8 @@ class DLL_PUBLIC FramesDecoderGpu : public FramesDecoder {

static bool SupportsHevc();

static bool SupportsCodec(AVCodecID codec_id, uint8_t bit_depth = 8);

void InitGpuDecoder(CUVIDEOFORMAT *video_format);

private:
Expand Down Expand Up @@ -205,7 +207,7 @@ class DLL_PUBLIC FramesDecoderGpu : public FramesDecoder {

void InitBitStreamFilter();

cudaVideoCodec GetCodecType();
cudaVideoCodec GetCodecType(AVCodecID codec_id) const;

void InitGpuParser();

Expand All @@ -216,6 +218,8 @@ class DLL_PUBLIC FramesDecoderGpu : public FramesDecoder {
bool SendFrameToParser();

unsigned int NumEmptySpots() const;

void CheckCodecSupport(AVCodecID codec_id) const override;
};

} // namespace dali
Expand Down
Loading

0 comments on commit 390d8ab

Please sign in to comment.