From 972e04ac77bcd86999501fd05e132ffc352c16b8 Mon Sep 17 00:00:00 2001 From: squishyhuman Date: Sat, 7 Oct 2023 13:17:27 -0700 Subject: [PATCH] Add C++ streaming transcoder --- .gitignore | 2 + lib/CMakeLists.txt | 141 +++++ lib/stream_decoder/stream_decoder.cpp | 442 +++++++++++++ lib/stream_decoder/stream_decoder.h | 112 ++++ .../stream_decoder_embinder.cpp | 40 ++ lib/stream_encoder/stream_encoder.cpp | 584 ++++++++++++++++++ lib/stream_encoder/stream_encoder.hpp | 134 ++++ .../stream_encoder_embinder.cpp | 36 ++ lib/video/video_frame.cpp | 77 +++ lib/video/video_frame.hpp | 53 ++ 10 files changed, 1621 insertions(+) create mode 100644 lib/stream_decoder/stream_decoder.cpp create mode 100644 lib/stream_decoder/stream_decoder.h create mode 100644 lib/stream_decoder/stream_decoder_embinder.cpp create mode 100644 lib/stream_encoder/stream_encoder.cpp create mode 100644 lib/stream_encoder/stream_encoder.hpp create mode 100644 lib/stream_encoder/stream_encoder_embinder.cpp create mode 100644 lib/video/video_frame.cpp create mode 100644 lib/video/video_frame.hpp diff --git a/.gitignore b/.gitignore index 749a7d56a..28342c6b8 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,8 @@ # Generated libraries /frontend/public/motion_tracker +/frontend/public/stream_decoder.* +/frontend/public/stream_encoder.* # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index b3771e584..e62d0f220 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -39,6 +39,26 @@ set(CMAKE_CXX_STANDARD 17) find_package(OpenCV REQUIRED) +# +# FFmpeg +# + +find_path(AVCODEC_INCLUDE_DIR libavcodec/avcodec.h PATHS "${CMAKE_PREFIX_PATH}/include") +find_path(AVFORMAT_INCLUDE_DIR libavformat/avformat.h PATHS "${CMAKE_PREFIX_PATH}/include") +find_path(AVUTIL_INCLUDE_DIR libavutil/avutil.h PATHS "${CMAKE_PREFIX_PATH}/include") + +find_library(AVCODEC_LIBRARY avcodec PATHS "${CMAKE_PREFIX_PATH}/lib") +find_library(AVFILTER_LIBRARY avfilter PATHS "${CMAKE_PREFIX_PATH}/lib") +find_library(AVFORMAT_LIBRARY avformat PATHS "${CMAKE_PREFIX_PATH}/lib") +find_library(AVUTIL_LIBRARY avutil PATHS "${CMAKE_PREFIX_PATH}/lib") +find_library(LAME_LIBRARY mp3lame PATHS "${CMAKE_PREFIX_PATH}/lib") +find_library(OPENH264_LIBRARY openh264 PATHS "${CMAKE_PREFIX_PATH}/lib") +find_library(OPUS_LIBRARY opus PATHS "${CMAKE_PREFIX_PATH}/lib") +find_library(SWRESAMPLE_LIBRARY swresample PATHS "${CMAKE_PREFIX_PATH}/lib") +find_library(SWSCALE_LIBRARY swscale PATHS "${CMAKE_PREFIX_PATH}/lib") +find_library(VPX_LIBRARY vpx PATHS "${CMAKE_PREFIX_PATH}/lib") +find_library(X264_LIBRARY x264 PATHS "${CMAKE_PREFIX_PATH}/lib") + ################################################################################ # # Define sources @@ -65,6 +85,27 @@ set(MOTION_TRACKER_SOURCES utils/math_utils.cpp ) +# +# Stream decoder +# + +set(STREAM_DECODER_SOURCES + stream_decoder/stream_decoder.cpp + stream_decoder/stream_decoder_embinder.cpp + utils/emscripten_utils.cpp +) + +# +# Stream encoder +# + +set(STREAM_ENCODER_SOURCES + stream_encoder/stream_encoder.cpp + stream_encoder/stream_encoder_embinder.cpp + utils/emscripten_utils.cpp + video/video_frame.cpp +) + ################################################################################ # # Build libraries @@ -91,10 +132,13 @@ string(APPEND EMSCRIPTEN_LINK_FLAGS # " --pre-js pre-module.j " # " --post-js post-module.j " "-s ALLOW_MEMORY_GROWTH=1 " + "-s ALLOW_TABLE_GROWTH=1 " "-s ASSERTIONS=1 " # " -s DEMANGLE_SUPPORT=1 " # " -s DISABLE_EXCEPTION_CATCHING=0 " "-s ERROR_ON_UNDEFINED_SYMBOLS=0 " + "-s EXPORTED_RUNTIME_METHODS='[\"addFunction\"]' " + "-s EXTRA_EXPORTED_RUNTIME_METHODS='[\"addFunction\"]' " # " -s FULL_ES3=1 " # " -s GL_ASSERTIONS=1 " # " -s GL_UNSAFE_OPTS=0 " @@ -102,6 +146,7 @@ string(APPEND EMSCRIPTEN_LINK_FLAGS # " -s LEGACY_GL_EMULATION=0 " #"-s LLD_REPORT_UNDEFINED " # " -s OFFSCREENCANVAS_SUPPORT=1 " + "-s RESERVED_FUNCTION_POINTERS=10 " # " -s SAFE_HEAP=1 " #"-s TOTAL_MEMORY=67108864 " # " -s USE_FREETYPE=1 " @@ -147,6 +192,78 @@ if (${CMAKE_SYSTEM_NAME} MATCHES "Emscripten") ) endif () +# +# Stream decoder +# + +add_executable(stream_decoder + ${STREAM_DECODER_SOURCES} +) + +target_include_directories(stream_decoder PRIVATE + ${AVCODEC_INCLUDE_DIR} + ${AVFORMAT_INCLUDE_DIR} + ${AVUTIL_INCLUDE_DIR} +) + +target_link_libraries(stream_decoder PRIVATE + ${AVCODEC_LIBRARY} + ${AVFORMAT_LIBRARY} + ${AVUTIL_LIBRARY} + ${LAME_LIBRARY} + ${OPENH264_LIBRARY} + ${OPUS_LIBRARY} + ${SWRESAMPLE_LIBRARY} + ${SWSCALE_LIBRARY} + ${VPX_LIBRARY} + ${X264_LIBRARY} +) + +if (${CMAKE_SYSTEM_NAME} MATCHES "Emscripten") + set_target_properties(stream_decoder PROPERTIES + COMPILE_FLAGS " \ + -Wno-deprecated \ + -s ASSERTIONS=1 \ + " + LINK_FLAGS ${EMSCRIPTEN_LINK_FLAGS}) +endif () + +# +# Stream encoder +# + +add_executable(stream_encoder + ${STREAM_ENCODER_SOURCES} +) + +target_include_directories(stream_encoder PRIVATE + ${AVCODEC_INCLUDE_DIR} + ${AVFORMAT_INCLUDE_DIR} + ${AVUTIL_INCLUDE_DIR} +) + +target_link_libraries(stream_encoder PRIVATE + ${AVCODEC_LIBRARY} + ${AVFORMAT_LIBRARY} + ${AVUTIL_LIBRARY} + ${LAME_LIBRARY} + ${OPENH264_LIBRARY} + ${OPUS_LIBRARY} + ${SWRESAMPLE_LIBRARY} + ${SWSCALE_LIBRARY} + ${VPX_LIBRARY} + ${X264_LIBRARY} +) + +if (${CMAKE_SYSTEM_NAME} MATCHES "Emscripten") + set_target_properties(stream_encoder PROPERTIES + COMPILE_FLAGS " \ + -Wno-deprecated \ + -s ASSERTIONS=1 \ + " + LINK_FLAGS ${EMSCRIPTEN_LINK_FLAGS}) +endif () + ################################################################################ # # Install libraries @@ -165,3 +282,27 @@ INSTALL( DESTINATION motion_tracker ) + +# +# Stream decoder +# + +INSTALL( + FILES + "${CMAKE_BINARY_DIR}/stream_decoder.js" + "${CMAKE_BINARY_DIR}/stream_decoder.wasm" + DESTINATION + "${CMAKE_INSTALL_PREFIX}" +) + +# +# Stream encoder +# + +INSTALL( + FILES + "${CMAKE_BINARY_DIR}/stream_encoder.js" + "${CMAKE_BINARY_DIR}/stream_encoder.wasm" + DESTINATION + "${CMAKE_INSTALL_PREFIX}" +) diff --git a/lib/stream_decoder/stream_decoder.cpp b/lib/stream_decoder/stream_decoder.cpp new file mode 100644 index 000000000..da69969de --- /dev/null +++ b/lib/stream_decoder/stream_decoder.cpp @@ -0,0 +1,442 @@ +/* + * Copyright (C) 2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#include "stream_decoder.h" + +#include "utils/emscripten_utils.hpp" + +#include +#include +#include + +#include + +extern "C" +{ +#include +#include +#include +#include +#include +#include +} + +StreamDecoder::StreamDecoder(const std::string& fileName, int blockSize, int maxDecodedSize) + : m_fileName(fileName), + m_blockSize(std::max(blockSize, 0)), + m_maxDecodedSize(std::max(maxDecodedSize, 0)) +{ +} + +StreamDecoder::~StreamDecoder() +{ + Deinitialize(); +} + +bool StreamDecoder::Initialize() +{ + // Preallocate an AVFormatContext to use our custom read function instead of + // the avformat internal I/O layer + m_formatContext = avformat_alloc_context(); + if (m_formatContext == nullptr) + { + std::cerr << "Failed to allocate AVFormatContext" << std::endl; + return false; + } + + // Use nonblocking reads if possible + m_formatContext->flags |= AVFMT_FLAG_NONBLOCK; + + // The buffer size is very important for performance. For protocols with + // fixed blocksize it should be set to this blocksize. + unsigned int bufferSize = 32768; // From IO_BUFFER_SIZE in aviobuf.c + if (m_blockSize > 1) + bufferSize = m_blockSize; + + uint8_t* avioContextBuffer = static_cast(av_malloc(bufferSize)); + if (avioContextBuffer == nullptr) + { + std::cerr << "Failed to allocate buffer for I/O" << std::endl; + return false; + } + + // Initialize the I/O context + m_ioContext = avio_alloc_context(avioContextBuffer, bufferSize, 0, this, ReadPacketInternal, + nullptr, SeekInternal); + if (m_ioContext == nullptr) + { + std::cerr << "Failed to allocate AVIOContext" << std::endl; + return false; + } + + if (m_blockSize > 1) + m_ioContext->max_packet_size = bufferSize; + + // Set the pb field of the AVFormatContext to the newly created AVIOContext + m_formatContext->pb = m_ioContext; + + int result = avformat_open_input(&m_formatContext, m_fileName.c_str(), nullptr, nullptr); + if (result < 0) + { + std::cerr << "Failed to open input: " << av_err2str(result) << std::endl; + return false; + } + + const int streamInfoResult = avformat_find_stream_info(m_formatContext, nullptr); + + // Uncomment to dump video metadata + std::cout << "=== Detecting input video" << std::endl; + av_dump_format(m_formatContext, 0, m_fileName.c_str(), 0); + + for (int i = 0; i < m_formatContext->nb_streams; i++) + { + auto type = m_formatContext->streams[i]->codec->codec_type; + if (m_videoStreamId < 0 && type == AVMEDIA_TYPE_VIDEO) + { + m_videoStreamId = i; + break; + } + } + + if (m_videoStreamId < 0) + { + std::cerr << "No audio/video stream found" << std::endl; + return false; + } + + // Get stream parameters + AVStream* videoStream = m_formatContext->streams[m_videoStreamId]; + m_timeBaseNum = videoStream->time_base.num; + m_timeBaseDen = videoStream->time_base.den; + m_realFrameRateNum = videoStream->r_frame_rate.num; + m_realFrameRateDen = videoStream->r_frame_rate.den; + + // Get codec parameters + AVCodecParameters* codecParams = videoStream->codecpar; + m_videoBitRate = videoStream->codecpar->bit_rate; + + auto codec = avcodec_find_decoder(codecParams->codec_id); + if (codec == nullptr) + { + std::cerr << "Failed avcodec_find_decoder(" << codecParams->codec_id << ")" << std::endl; + return false; + } + + m_videoCodecContext = avcodec_alloc_context3(codec); + if (m_videoCodecContext == nullptr) + { + std::cerr << "Failed avcodec_alloc_context3()" << std::endl; + return false; + } + + result = avcodec_parameters_to_context(m_videoCodecContext, codecParams); + if (result != 0) + { + std::cerr << "Failed avcodec_parameters_to_context(): " << av_err2str(result) << std::endl; + return false; + } + + result = avcodec_open2(m_videoCodecContext, codec, nullptr); + if (result < 0) + { + std::cerr << "Failed avcodec_open2(): " << av_err2str(result) << std::endl; + return false; + } + + if (m_videoCodecContext->width <= 0 || m_videoCodecContext->height <= 0) + { + std::cerr << "Video has invalid dimensions: " << m_videoCodecContext->width << " x " + << m_videoCodecContext->height << std::endl; + return false; + } + + m_width = static_cast(m_videoCodecContext->width); + m_height = static_cast(m_videoCodecContext->height); + + // Reduce destination size to fit max constraint + if (m_maxDecodedSize > 0) + { + while (std::max(m_width, m_height) > m_maxDecodedSize) + { + m_width /= 2; + m_height /= 2; + } + } + + if (m_width == 0 || m_height == 0) + { + std::cerr << "Invalid dimensions (width = " << m_width << ", height = " << m_height + << ", max size = " << m_maxDecodedSize << ")" << std::endl; + return false; + } + + m_frameSize = av_image_get_buffer_size(m_targetFormat, m_width, m_height, 1); + + m_decodedFrame = av_frame_alloc(); + if (m_decodedFrame == nullptr) + { + std::cerr << "Failed to alloc m_decodedFrame" << std::endl; + return false; + } + + m_videoBuffer = static_cast(av_malloc(m_frameSize)); + if (m_videoBuffer == nullptr) + { + std::cerr << "Failed to allocate video buffer" << std::endl; + return false; + } + + return true; +} + +void StreamDecoder::Deinitialize() +{ + if (m_videoBuffer != nullptr) + { + av_free(m_videoBuffer); + m_videoBuffer = nullptr; + } + + if (m_decodedFrame != nullptr) + { + av_free(m_decodedFrame); + m_decodedFrame = nullptr; + } + + if (m_scaler != nullptr) + { + sws_freeContext(m_scaler); + m_scaler = nullptr; + } + + if (m_videoCodecContext != nullptr) + { + avcodec_close(m_videoCodecContext); + avcodec_free_context(&m_videoCodecContext); + m_videoCodecContext = nullptr; + } + + if (m_formatContext != nullptr) + { + avformat_close_input(&m_formatContext); + m_formatContext = nullptr; + } + + if (m_ioContext != nullptr) + { + av_free(m_ioContext->buffer); + av_free(m_ioContext); + m_ioContext = nullptr; + } +} + +bool StreamDecoder::OpenVideo() +{ + if (m_state == StreamDecoderState::Init) + { + if (Initialize()) + m_state = StreamDecoderState::Running; + else + m_state = StreamDecoderState::Failed; + } + + return (m_state != StreamDecoderState::Failed); +} + +void StreamDecoder::AddPacket(const emscripten::val& packet) +{ + const unsigned int dataSize = EmscriptenUtils::ArrayLength(packet); + + std::vector data(dataSize); + + EmscriptenUtils::GetArrayData(packet, data.data(), dataSize); + + m_packets.emplace_back(std::move(data)); + m_totalSize += dataSize; +} + +void StreamDecoder::Decode() +{ + if (m_state == StreamDecoderState::Failed) + return; + + AVPacket packet{}; + + int result = av_read_frame(m_formatContext, &packet); + if (result == AVERROR_EOF) + { + m_state = StreamDecoderState::Ended; + return; + } + else if (result < 0) + { + std::cerr << "Error reading frame: " << av_err2str(result) << std::endl; + m_state = StreamDecoderState::Failed; + return; + } + + if (packet.stream_index == m_videoStreamId) + { + if (m_scaler == nullptr) + { + m_scaler = sws_getContext(m_videoCodecContext->width, m_videoCodecContext->height, + m_videoCodecContext->pix_fmt, m_width, m_height, m_targetFormat, + SWS_BILINEAR, nullptr, nullptr, nullptr); + } + + if (m_scaler == nullptr) + { + std::cerr << "Failed to create software scaler" << std::endl; + m_state = StreamDecoderState::Failed; + return; + } + + result = avcodec_send_packet(m_videoCodecContext, &packet); + if (result < 0) + { + std::cerr << "Error sending packet: " << av_err2str(result) << std::endl; + m_state = StreamDecoderState::Failed; + return; + } + + result = avcodec_receive_frame(m_videoCodecContext, m_decodedFrame); + if (result < 0 && result != AVERROR(EAGAIN)) + { + std::cerr << "Error receiving frame: " << av_err2str(result) << std::endl; + m_state = StreamDecoderState::Failed; + return; + } + + if (result == 0) + { + if (m_decodedFrame->pts < 0) + { + std::cerr << "Invalid pts: " << m_decodedFrame->pts << std::endl; + m_state = StreamDecoderState::Failed; + return; + } + + const int videoLineSize = av_image_get_buffer_size(m_targetFormat, m_width, 1, 1); + + uint8_t* const dest[] = {m_videoBuffer}; + const int destLineSize[] = {videoLineSize}; + + result = sws_scale(m_scaler, m_decodedFrame->data, m_decodedFrame->linesize, 0, + m_videoCodecContext->height, dest, destLineSize); + + if (result < 0) + { + std::cerr << "Error scaling frame: " << av_err2str(result) << std::endl; + m_state = StreamDecoderState::Failed; + return; + } + + m_videoPts = static_cast(m_decodedFrame->pts); + + m_state = StreamDecoderState::HasFrame; + } + } + + av_packet_unref(&packet); +} + +uintptr_t StreamDecoder::GetFrameBuffer() +{ + // Reset state + m_state = StreamDecoderState::Running; + + return reinterpret_cast(m_videoBuffer); +} + +unsigned int StreamDecoder::ReadPacket(uint8_t* buffer, unsigned int bufferSize) +{ + if (m_packets.empty()) + { + std::cerr << "No data to read" << std::endl; + return 0; + } + + unsigned int position = m_readPosition; + + for (auto it = m_packets.begin(); it != m_packets.end(); ++it) + { + const std::vector& packet = *it; + + if (position < packet.size()) + { + const unsigned int remainingPacketSize = packet.size() - position; + const unsigned int copySize = std::min(bufferSize, remainingPacketSize); + + if (copySize > 0) + { + std::memcpy(static_cast(buffer), static_cast(packet.data() + position), + copySize); + m_readPosition += copySize; + } + + return copySize; + } + + position -= packet.size(); + } + + return 0; +} + +uint64_t StreamDecoder::Seek(uint64_t offset) +{ + if (offset > m_totalSize) + return -1; + + m_readPosition = offset; + + return m_readPosition; +} + +int StreamDecoder::ReadPacketInternal(void* context, uint8_t* buffer, int bufferSize) +{ + StreamDecoder* decoder = static_cast(context); + if (decoder == nullptr || bufferSize <= 0) + return AVERROR_EXIT; + + const unsigned int length = decoder->ReadPacket(buffer, bufferSize); + if (length == 0) + return AVERROR(EAGAIN); + + return static_cast(length); +} + +int64_t StreamDecoder::SeekInternal(void* context, int64_t offset, int whence) +{ + StreamDecoder* decoder = static_cast(context); + if (decoder == nullptr || offset < 0) + return AVERROR_EXIT; + + if ((whence & AVSEEK_SIZE) != 0) + return decoder->GetTotalSize(); + + switch (whence) + { + // Seek from beginning of file + case SEEK_SET: + return decoder->Seek(offset); + + // Seek from current position + case SEEK_CUR: + return decoder->Seek(decoder->GetReadPosition() + offset); + + // Seek from end of file + case SEEK_END: + return decoder->Seek(decoder->GetTotalSize() + offset); + + default: + break; + } + + return -1; +} diff --git a/lib/stream_decoder/stream_decoder.h b/lib/stream_decoder/stream_decoder.h new file mode 100644 index 000000000..b812175c0 --- /dev/null +++ b/lib/stream_decoder/stream_decoder.h @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#pragma once + +#include +#include +#include + +extern "C" +{ +#include +#include +#include +} + +namespace emscripten +{ +class val; +} + +enum class StreamDecoderState +{ + Init, + Running, + HasFrame, + Ended, + Failed, +}; + +class StreamDecoder +{ +public: + StreamDecoder(const std::string& fileName, int blockSize, int maxDecodedSize); + ~StreamDecoder(); + + // Public accessors + StreamDecoderState GetState() const { return m_state; } + unsigned int GetFrameSize() const { return m_frameSize; } + unsigned int GetFrameWidth() const { return m_width; } + unsigned int GetFrameHeight() const { return m_height; } + unsigned int GetTimeBaseNum() const { return m_timeBaseNum; } + unsigned int GetTimeBaseDen() const { return m_timeBaseDen; } + unsigned int GetRealFrameRateNum() const { return m_realFrameRateNum; } + unsigned int GetRealFrameRateDen() const { return m_realFrameRateDen; } + unsigned int GetVideoBitRate() const { return m_videoBitRate; } + + // Video API + bool OpenVideo(); + void AddPacket(const emscripten::val& packet); + void Decode(); + // TODO: "Can't use void* because embind has problems with a getter that returns + // a raw pointer" + uintptr_t GetFrameBuffer(); + unsigned int GetFramePts() { return m_videoPts; } + +private: + // Lifecycle functions + bool Initialize(); + void Deinitialize(); + + // Private accessors + unsigned int GetReadPosition() const { return m_readPosition; } + unsigned int GetTotalSize() const { return m_totalSize; } + + // IO functions + unsigned int ReadPacket(uint8_t* buffer, unsigned int bufferSize); + uint64_t Seek(uint64_t offset); + + // libav internal IO functions + static int ReadPacketInternal(void* context, uint8_t* buffer, int bufferSize); + static int64_t SeekInternal(void* context, int64_t offset, int whence); + + // Construction parameters + const std::string m_fileName; + const unsigned int m_blockSize; + const unsigned int m_maxDecodedSize; + + // State parameters + StreamDecoderState m_state = StreamDecoderState::Init; + + // Video parameters + int m_videoStreamId = -1; + unsigned int m_width = 0; + unsigned int m_height = 0; + unsigned int m_frameSize = 0; + static const AVPixelFormat m_targetFormat = AV_PIX_FMT_RGBA; + unsigned int m_timeBaseNum = 0; + unsigned int m_timeBaseDen = 0; + unsigned int m_realFrameRateNum = 0; + unsigned int m_realFrameRateDen = 0; + unsigned int m_videoBitRate = 0; // bits/sec + + // Video resources + AVIOContext* m_ioContext = nullptr; + AVFormatContext* m_formatContext = nullptr; + AVCodecContext* m_videoCodecContext = nullptr; + SwsContext* m_scaler = nullptr; + AVFrame* m_decodedFrame = nullptr; + uint8_t* m_videoBuffer = nullptr; + uint64_t m_videoPts = 0; + + // IO state (seeking is virtualized) + std::vector> m_packets; + uint64_t m_totalSize = 0; + uint64_t m_readPosition = 0; +}; diff --git a/lib/stream_decoder/stream_decoder_embinder.cpp b/lib/stream_decoder/stream_decoder_embinder.cpp new file mode 100644 index 000000000..82462c0cf --- /dev/null +++ b/lib/stream_decoder/stream_decoder_embinder.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#include "stream_decoder.h" + +#include + +using namespace emscripten; + +EMSCRIPTEN_BINDINGS(stream_decoder) +{ + enum_("StreamDecoderState") + .value("Init", StreamDecoderState::Init) + .value("Running", StreamDecoderState::Running) + .value("HasFrame", StreamDecoderState::HasFrame) + .value("Ended", StreamDecoderState::Ended) + .value("Failed", StreamDecoderState::Failed); + + class_("StreamDecoder") + .constructor() + .property("state", &StreamDecoder::GetState) + .property("frameSize", &StreamDecoder::GetFrameSize) + .property("frameWidth", &StreamDecoder::GetFrameWidth) + .property("frameHeight", &StreamDecoder::GetFrameHeight) + .property("timeBaseNum", &StreamDecoder::GetTimeBaseNum) + .property("timeBaseDen", &StreamDecoder::GetTimeBaseDen) + .property("realFrameRateNum", &StreamDecoder::GetRealFrameRateNum) + .property("realFrameRateDen", &StreamDecoder::GetRealFrameRateDen) + .property("videoBitRate", &StreamDecoder::GetVideoBitRate) + .function("openVideo", &StreamDecoder::OpenVideo) + .function("addPacket", &StreamDecoder::AddPacket) + .function("decode", &StreamDecoder::Decode) + .function("getFrameBuffer", &StreamDecoder::GetFrameBuffer) + .function("getFramePts", &StreamDecoder::GetFramePts); +} diff --git a/lib/stream_encoder/stream_encoder.cpp b/lib/stream_encoder/stream_encoder.cpp new file mode 100644 index 000000000..63356d5ad --- /dev/null +++ b/lib/stream_encoder/stream_encoder.cpp @@ -0,0 +1,584 @@ +/* + * Copyright (C) 2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#include "stream_encoder.hpp" + +#include "utils/emscripten_utils.hpp" +#include "video/video_frame.hpp" + +#include +#include +#include + +#include + +extern "C" +{ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +} + +const AVPixelFormat StreamEncoder::m_inputFormat = AV_PIX_FMT_RGBA; +const AVPixelFormat StreamEncoder::m_targetFormat = AV_PIX_FMT_YUV420P; +const unsigned int StreamEncoder::m_cachePageSize = 32768; // From IO_BUFFER_SIZE in aviobuf.c + +StreamEncoder::StreamEncoder(const std::string& fileName, + int width, + int height, + int timeBaseNum, + int timeBaseDen, + int realFrameRateNum, + int realFrameRateDen, + int videoBitRate, + int writeBufferSize, + int writePacketFnPtr, + int seekFnPtr) + : m_fileName(fileName), + m_width(std::max(width, 0)), + m_height(std::max(height, 0)), + m_timeBaseNum(std::max(timeBaseNum, 0)), + m_timeBaseDen(std::max(timeBaseDen, 0)), + m_realFrameRateNum(std::max(realFrameRateNum, 0)), + m_realFrameRateDen(std::max(realFrameRateDen, 0)), + m_videoBitRate(std::max(videoBitRate, 0)), + m_writeBufferSize(writeBufferSize > 0 ? writeBufferSize : m_cachePageSize), + m_targetFrame(std::make_unique(width, height, m_targetFormat)), + m_writePacketFnPtr(reinterpret_cast(writePacketFnPtr)), + m_seekFnPtr(reinterpret_cast(seekFnPtr)) +{ + // Enable libav debugging + av_log_set_level(AV_LOG_DEBUG); +} + +StreamEncoder::~StreamEncoder() +{ + Deinitialize(); +} + +bool StreamEncoder::Initialize() +{ + // Validate state + if (m_width == 0 || m_height == 0) + { + std::cerr << "Invalid dimensions (width = " << m_width << ", height = " << m_height << ")" + << std::endl; + return false; + } + + if (m_timeBaseNum == 0 || m_timeBaseDen == 0) + { + std::cerr << "Invalid time base (numerator = " << m_timeBaseNum + << ", denominator = " << m_timeBaseDen << ")" << std::endl; + return false; + } + + if (m_realFrameRateNum == 0 || m_realFrameRateDen == 0) + { + std::cerr << "Invalid frame rate (numerator = " << m_realFrameRateNum + << ", denominator = " << m_realFrameRateDen << ")" << std::endl; + return false; + } + + if (m_writePacketFnPtr == nullptr || m_seekFnPtr == nullptr) + { + std::cerr << "Invalid function pointers passed to constructor" << std::endl; + return false; + } + + if (!CreateContext()) + return false; + + if (!PrepareVideoEncoder()) + return false; + + if (!InitializeVideoStream()) + return false; + + EnableGlobalHeaders(); + + // Dump av format information + PrintInfo(); + + if (!WriteHeader()) + return false; + + // Initialize buffer for YUV420p frame data + if (!m_targetFrame->Initialize()) + return false; + + return true; +} + +bool StreamEncoder::CreateContext() +{ + // Guess encoding format + AVOutputFormat* outputFormat = av_guess_format(nullptr, m_fileName.c_str(), nullptr); + if (outputFormat == nullptr) + { + std::cerr << "Unknown file extension: " << m_fileName << std::endl; + return false; + } + + // Allocate an AVFormatContext to use our custom write function instead of + // the avformat internal I/O layer + int result = + avformat_alloc_output_context2(&m_formatContext, outputFormat, nullptr, m_fileName.c_str()); + if (result < 0) + { + std::cerr << "Failed to allocate AVFormatContext: " << av_err2str(result) << std::endl; + return false; + } + + // Create a write buffer for our custom I/O context + uint8_t* avioContextBuffer = static_cast(av_malloc(m_writeBufferSize)); + if (avioContextBuffer == nullptr) + { + std::cerr << "Failed to allocate buffer for I/O" << std::endl; + return false; + } + + // Initialize the I/O context + m_ioContext = avio_alloc_context(avioContextBuffer, m_writeBufferSize, 1, this, nullptr, + WritePacketInternal, SeekInternal); + if (m_ioContext == nullptr) + { + std::cerr << "Failed to allocate AVIOContext" << std::endl; + return false; + } + + // Set the pb field of the AVFormatContext to the newly created AVIOContext + m_formatContext->pb = m_ioContext; + + return true; +} + +bool StreamEncoder::PrepareVideoEncoder() +{ + // It's necessary to open stream codec to link it to "codec" (the encoder) + const AVCodec* codec = avcodec_find_encoder(AV_CODEC_ID_H264); + if (codec == nullptr) + { + std::cerr << "Error: Can't find H.264 encoder" << std::endl; + return false; + } + + m_codecContext = avcodec_alloc_context3(codec); + if (m_codecContext == nullptr) + { + std::cerr << "Error: Can't allocate codec context" << std::endl; + return false; + } + + // Fill in codec context + m_codecContext->bit_rate = m_videoBitRate; + m_codecContext->pix_fmt = m_targetFormat; + m_codecContext->width = m_width; + m_codecContext->height = m_height; + m_codecContext->time_base = + AVRational{static_cast(m_realFrameRateDen), static_cast(m_realFrameRateNum)}; + + // Prefer reasonable speed over quality + av_opt_set(m_codecContext->priv_data, "preset", "fast", 0); + + // TODO: Set Constant Rate Factor (CRF) quality if bit_rate is zero. In this + // case, expose CRF as an initialization parameter to the user + //av_opt_set_int(m_codecContext->priv_data, "crf", 23, AV_OPT_SEARCH_CHILDREN); + + // H.264 specific options + /* TODO + m_codecContext->gop_size = 25; // Emit one intraframe every 25 frames + m_codecContext->level = 31; + */ + + int result = avcodec_open2(m_codecContext, codec, nullptr); + if (result < 0) + { + std::cerr << "Error opening codec: " << av_err2str(result) << std::endl; + return false; + } + + return true; +} + +bool StreamEncoder::InitializeVideoStream() +{ + m_videoStream = avformat_new_stream(m_formatContext, nullptr); + if (m_videoStream == nullptr) + { + std::cerr << "Error: Can't create new stream" << std::endl; + return false; + } + + m_videoStream->id = m_formatContext->nb_streams - 1; + + // Copy the stream parameters to the muxer + int result = avcodec_parameters_from_context(m_videoStream->codecpar, m_codecContext); + if (result < 0) + { + std::cerr << "Error setting parameters: " << av_err2str(result) << std::endl; + return false; + } + + // Set stream time base as a hint to the muxer before writing the header + m_videoStream->time_base = + AVRational{static_cast(m_timeBaseNum), static_cast(m_timeBaseDen)}; + m_videoStream->r_frame_rate = + AVRational{static_cast(m_realFrameRateNum), static_cast(m_realFrameRateDen)}; + + return true; +} + +void StreamEncoder::EnableGlobalHeaders() +{ + // Set the flag AV_CODEC_FLAG_GLOBAL_HEADER which tells the encoder that it + // can use the global headers + if (m_formatContext->oformat->flags & AVFMT_GLOBALHEADER) + m_formatContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; +} + +void StreamEncoder::PrintInfo() +{ + std::cout << "=== Detecting output video" << std::endl; + av_dump_format(m_formatContext, 0, m_fileName.c_str(), 1); +} + +bool StreamEncoder::WriteHeader() +{ + // Set format's private options, to be passed to avformat_write_header() + AVDictionary* formatOptions = nullptr; + + // Set faststart (requires a seekable write stream) + // TODO: This requires re-opening the file for reading + /* + int result = av_dict_set(&formatOptions, "movflags", "faststart", 0); + if (result < 0) + { + std::cerr << "Error setting option: " << av_err2str(result) << std::endl; + return false; + } + */ + + // Write file header if necessary + int result = avformat_write_header(m_formatContext, &formatOptions); + + if (formatOptions != nullptr) + av_dict_free(&formatOptions); + + if (result < 0) + { + std::cerr << "Error writing header: " << av_err2str(result) << std::endl; + return false; + } + + return true; +} + +void StreamEncoder::Deinitialize() +{ + if (m_formatContext != nullptr) + { + avformat_free_context(m_formatContext); + m_formatContext = nullptr; + } + + if (m_ioContext != nullptr) + { + av_free(m_ioContext->buffer); + av_free(m_ioContext); + m_ioContext = nullptr; + } + + if (m_inputFrameBuffer != nullptr) + { + av_free(m_inputFrameBuffer); + m_inputFrameBuffer = nullptr; + } +} + +bool StreamEncoder::OpenVideo() +{ + Deinitialize(); + + if (Initialize()) + { + m_state = StreamEncoderState::Running; + return true; + } + else + { + m_state = StreamEncoderState::Failed; + return false; + } +} + +bool StreamEncoder::AddFrame(const emscripten::val& frameData, int framePts) +{ + // Validate state + if (m_state != StreamEncoderState::Running) + { + std::cerr << "Failed to add frame: Not running" << std::endl; + m_state = StreamEncoderState::Failed; + return false; + } + + const size_t dataSize = EmscriptenUtils::ArrayLength(frameData); + + // Allocate buffer for input frame, if necessary + if (m_inputFrameSize == 0) + { + m_inputFrameSize = static_cast(dataSize); + m_inputFrameBuffer = static_cast(av_malloc(dataSize)); + } + else + { + // Verify size hasn't changed + if (m_inputFrameSize != dataSize) + { + std::cerr << "Invalid data size: " << dataSize << " (expected " << m_inputFrameSize << ")" + << std::endl; + m_state = StreamEncoderState::Failed; + return false; + } + } + + // Copy data + EmscriptenUtils::GetArrayData(frameData, m_inputFrameBuffer, m_inputFrameSize); + + // Create scaler, if necessary + if (m_scaler == nullptr) + { + m_scaler = sws_getContext(m_width, m_height, m_inputFormat, m_width, m_height, m_targetFormat, + SWS_BILINEAR, nullptr, nullptr, nullptr); + } + + if (m_scaler == nullptr) + { + std::cerr << "Failed to create software scaler" << std::endl; + m_state = StreamEncoderState::Failed; + return false; + } + + uint8_t* const sourceData[] = {m_inputFrameBuffer}; + const int sourceLineSize[] = {static_cast(m_inputFrameSize / m_height)}; + + // Scale pixels from RGBA to YUV420p + int result = + sws_scale(m_scaler, sourceData, sourceLineSize, 0, m_height, + m_targetFrame->FrameHandle()->data, m_targetFrame->FrameHandle()->linesize); + + if (result < 0) + { + std::cerr << "Error scaling frame: " << av_err2str(result) << std::endl; + m_state = StreamEncoderState::Failed; + return false; + } + + // + // Rescale the presentation timestamp to fix bit rate estimation. We need to + // rescale the pts from the stream's time_base to the codec time_base to get + // a simple frame number (e.g. 1, 2, 3). + // + // See https://stackoverflow.com/questions/11466184/setting-video-bit-rate-through-ffmpeg-api-is-ignored-for-libx264-codec + // + m_targetFrame->FrameHandle()->pts = av_rescale_q( + static_cast(framePts), m_videoStream->time_base, m_codecContext->time_base); + + // Encode the frame + result = avcodec_send_frame(m_codecContext, m_targetFrame->FrameHandle()); + if (result < 0) + { + std::cerr << "Failed to encode frame: " << av_err2str(result) << std::endl; + m_state = StreamEncoderState::Failed; + return false; + } + + // Handle encoded packets + return ReceivePackets(); +} + +bool StreamEncoder::ReceivePackets() +{ + int result = 0; + + AVPacket videoPacket; + av_init_packet(&videoPacket); + videoPacket.stream_index = m_videoStream->index; + + // Process the encoded packets + while (result >= 0) + { + result = avcodec_receive_packet(m_codecContext, &videoPacket); + + if (result == AVERROR(EAGAIN)) + { + // We'll read again when more frames have been added + break; + } + else if (result == AVERROR_EOF) + { + // No more packets to read + break; + } + else if (result < 0) + { + std::cerr << "Error receiving packet: " << av_err2str(result) << std::endl; + m_state = StreamEncoderState::Failed; + return false; + } + + // Rescale the pts, dts and duration of the encoded video packet to the + // stream time base + av_packet_rescale_ts(&videoPacket, m_codecContext->time_base, m_videoStream->time_base); + + // Write the frame. Packet is freed by the write. + result = av_interleaved_write_frame(m_formatContext, &videoPacket); + if (result < 0) + { + std::cerr << "Error writing packet: " << av_err2str(result) << std::endl; + m_state = StreamEncoderState::Failed; + return false; + } + } + + return true; +} + +bool StreamEncoder::Finalize() +{ + // Validate state + if (m_state == StreamEncoderState::Ended) + { + // Ignore double calls to finalize + return true; + } + + if (m_state != StreamEncoderState::Running) + { + std::cerr << "Failed to finalize: Not running" << std::endl; + m_state = StreamEncoderState::Failed; + return false; + } + + m_state = StreamEncoderState::Ended; + + // Send NULL frame, in which case it is considered a flush packet. This + // signals the end of the stream. + int result = avcodec_send_frame(m_codecContext, nullptr); + if (result < 0) + { + std::cerr << "Error ending stream: " << av_err2str(result) << std::endl; + m_state = StreamEncoderState::Failed; + return false; + } + + // Handle remaining encoded packets + if (!ReceivePackets()) + return false; + + // Write file trailer before exit + result = av_write_trailer(m_formatContext); + if (result < 0) + { + std::cerr << "Error writing trailer: " << av_err2str(result) << std::endl; + m_state = StreamEncoderState::Failed; + return false; + } + + Deinitialize(); + + return true; +} + +/* +unsigned int StreamEncoder::ReadPacket(uint8_t* buffer, unsigned int bufferSize) +{ + // TODO + + return -1; +} +*/ + +void StreamEncoder::WritePacket(uint8_t* buffer, unsigned int bufferSize) +{ + const int bufPtr = reinterpret_cast(buffer); + const int bufSize = static_cast(bufferSize); + m_writePacketFnPtr(bufPtr, bufSize); +} + +void StreamEncoder::Seek(int64_t offset, int whence) +{ + m_seekFnPtr(static_cast(offset), whence); +} + +/* +int StreamEncoder::ReadPacketInternal(void* context, uint8_t* buffer, int bufferSize) +{ + StreamEncoder* encoder = static_cast(context); + if (encoder == nullptr || bufferSize <= 0) + return AVERROR_EXIT; + + const unsigned int length = encoder->ReadPacket(buffer, bufferSize); + if (length == 0) + return AVERROR(EAGAIN); + + return static_cast(length); +} +*/ + +int StreamEncoder::WritePacketInternal(void* context, uint8_t* buffer, int bufferSize) +{ + StreamEncoder* encoder = static_cast(context); + if (encoder == nullptr || bufferSize <= 0) + return AVERROR_EXIT; + + encoder->WritePacket(buffer, static_cast(bufferSize)); + + return bufferSize; +} + +int64_t StreamEncoder::SeekInternal(void* context, int64_t offset, int whence) +{ + StreamEncoder* encoder = static_cast(context); + if (encoder == nullptr) + return AVERROR_EXIT; + + encoder->Seek(offset, TranslateWhence(whence)); + + return 0; +} + +int StreamEncoder::TranslateWhence(int whence) +{ + switch (whence) + { + // Seek from beginning of file + case SEEK_SET: + return 0; + + // Seek from current position + case SEEK_CUR: + return 1; + + // Seek from end of file + case SEEK_END: + return 2; + + default: + break; + } + + throw whence; +} diff --git a/lib/stream_encoder/stream_encoder.hpp b/lib/stream_encoder/stream_encoder.hpp new file mode 100644 index 000000000..dac2489cb --- /dev/null +++ b/lib/stream_encoder/stream_encoder.hpp @@ -0,0 +1,134 @@ +/* + * Copyright (C) 2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#pragma once + +#include +#include +#include +#include // for std::add_pointer +#include + +extern "C" +{ +#include +#include +#include +} + +namespace emscripten +{ +class val; +} + +class VideoFrame; + +enum class StreamEncoderState +{ + Init, + Running, + Ended, + Failed, +}; + +class StreamEncoder +{ +public: + /*! + * \brief Create a stream encoder instance + */ + StreamEncoder(const std::string& fileName, + int width, + int height, + int timeBaseNum, + int timeBaseDen, + int realFrameRateNum, + int realFrameRateDen, + int videoBitRate, + int writeBufferSize, + int writePacketFnPtr, + int seekFnPtr); + ~StreamEncoder(); + + // Accessors + StreamEncoderState GetState() const { return m_state; } + unsigned int GetVideoWidth() const { return m_width; } + unsigned int GetVideoHeight() const { return m_height; } + unsigned int GetTimeBaseNumerator() const { return m_timeBaseNum; } + unsigned int GetTimeBaseDenominator() const { return m_timeBaseDen; } + unsigned int GetRealFrameRateNumerator() const { return m_realFrameRateNum; } + unsigned int GetRealFrameRateDenominator() const { return m_realFrameRateDen; } + unsigned int GetWriteBufferSize() const { return m_writeBufferSize; } + + // Video API + bool OpenVideo(); + bool AddFrame(const emscripten::val& frameData, int framePts); + bool Finalize(); + +private: + // Lifecycle functions + bool Initialize(); + void Deinitialize(); + + // Video functions + bool ReceivePackets(); + + // IO functions + //unsigned int ReadPacket(uint8_t* buffer, unsigned int bufferSize); + void WritePacket(uint8_t* buffer, unsigned int bufferSize); + void Seek(int64_t offset, int whence); + + // libav internal IO functions + //static int ReadPacketInternal(void* context, uint8_t* buffer, int bufferSize); + static int WritePacketInternal(void* context, uint8_t* buffer, int bufferSize); + static int64_t SeekInternal(void* context, int64_t offset, int whence); + + // Utility functions + bool CreateContext(); + bool PrepareVideoEncoder(); + bool InitializeVideoStream(); + void EnableGlobalHeaders(); + void PrintInfo(); + bool WriteHeader(); + static int TranslateWhence(int whence); + + // Types + using WritePacketFnPtr = std::add_pointer::type; + using SeekFnPtr = std::add_pointer::type; + + // Constants + static const AVPixelFormat m_inputFormat; + static const AVPixelFormat m_targetFormat; + static const unsigned int m_cachePageSize; + + // Construction parameters + const std::string m_fileName; + const unsigned int m_width; + const unsigned int m_height; + const unsigned int m_timeBaseNum; + const unsigned int m_timeBaseDen; + const unsigned int m_realFrameRateNum; + const unsigned int m_realFrameRateDen; + const unsigned int m_videoBitRate; + const unsigned int m_writeBufferSize; + const WritePacketFnPtr m_writePacketFnPtr; + const SeekFnPtr m_seekFnPtr; + + // State parameters + StreamEncoderState m_state = StreamEncoderState::Init; + + // Video resources + AVIOContext* m_ioContext = nullptr; + AVFormatContext* m_formatContext = nullptr; + AVStream* m_videoStream = nullptr; + AVCodecContext* m_codecContext = nullptr; + SwsContext* m_scaler = nullptr; + uint8_t* m_inputFrameBuffer = nullptr; // RGBA + size_t m_inputFrameSize = 0; + std::unique_ptr m_targetFrame; // YUV420p +}; diff --git a/lib/stream_encoder/stream_encoder_embinder.cpp b/lib/stream_encoder/stream_encoder_embinder.cpp new file mode 100644 index 000000000..364427a67 --- /dev/null +++ b/lib/stream_encoder/stream_encoder_embinder.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#include "stream_encoder.hpp" + +#include + +using namespace emscripten; + +EMSCRIPTEN_BINDINGS(stream_encoder) +{ + enum_("StreamEncoderState") + .value("Init", StreamEncoderState::Init) + .value("Running", StreamEncoderState::Running) + .value("Ended", StreamEncoderState::Ended) + .value("Failed", StreamEncoderState::Failed); + + class_("StreamEncoder") + .constructor() + .property("state", &StreamEncoder::GetState) + .property("videoWidth", &StreamEncoder::GetVideoWidth) + .property("videoHeight", &StreamEncoder::GetVideoHeight) + .property("timeBaseNum", &StreamEncoder::GetTimeBaseNumerator) + .property("timeBaseDen", &StreamEncoder::GetTimeBaseDenominator) + .property("realFrameRateNum", &StreamEncoder::GetRealFrameRateNumerator) + .property("realFrameRateDen", &StreamEncoder::GetRealFrameRateDenominator) + .property("writeBufferSize", &StreamEncoder::GetWriteBufferSize) + .function("openVideo", &StreamEncoder::OpenVideo) + .function("addFrame", &StreamEncoder::AddFrame) + .function("finalize", &StreamEncoder::Finalize); +} diff --git a/lib/video/video_frame.cpp b/lib/video/video_frame.cpp new file mode 100644 index 000000000..0a50753d1 --- /dev/null +++ b/lib/video/video_frame.cpp @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#include "video_frame.hpp" + +#include + +extern "C" +{ +#include +#include +} + +// HTML uses RGBA pixels, what to use for YUV420p? +const unsigned int m_bitsPerPixel = 32; + +void VideoFrame::FrameDeleter::operator()(AVFrame* frame) +{ + if (frame != nullptr) + { + av_freep(&frame->data[0]); + av_frame_free(&frame); + } +} + +VideoFrame::VideoFrame(unsigned int width, unsigned int height, AVPixelFormat pixelFormat) + : m_width(width), m_height(height), m_pixelFormat(pixelFormat) +{ +} + +VideoFrame::~VideoFrame() = default; + +bool VideoFrame::Initialize() +{ + // Validate state + if (m_width == 0 || m_height == 0) + { + std::cerr << "Invalid dimensions, width = " << m_width << ", height = " << m_height + << std::endl; + return false; + } + + // Allocate frame + m_frame.reset(av_frame_alloc()); + if (!m_frame) + { + std::cerr << "Failed to allocate frame" << std::endl; + return false; + } + + // Set frame details + m_frame->width = m_width; + m_frame->height = m_height; + m_frame->format = m_pixelFormat; + + int result = av_frame_get_buffer(m_frame.get(), 0); + if (result < 0) + { + std::cerr << "Can't allocate buffer for output frame: " << av_err2str(result) << std::endl; + return false; + } + + // Make sure the encoder doesn't keep ref to this frame as we'll modify it + result = av_frame_make_writable(m_frame.get()); + if (result < 0) + { + std::cerr << "Failed to make frame writable: " << av_err2str(result) << std::endl; + return false; + } + + return true; +} diff --git a/lib/video/video_frame.hpp b/lib/video/video_frame.hpp new file mode 100644 index 000000000..e60297531 --- /dev/null +++ b/lib/video/video_frame.hpp @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#pragma once + +#include +#include + +extern "C" +{ +#include +} + +struct AVFrame; + +class VideoFrame +{ +public: + VideoFrame(unsigned int width, unsigned int height, AVPixelFormat pixelFormat); + ~VideoFrame(); + + // Lifecycle functions + bool Initialize(); + + // Accessors + unsigned int Width() const { return m_width; } + unsigned int Height() const { return m_height; } + AVPixelFormat PixelFormat() const { return m_pixelFormat; } + AVFrame* FrameHandle() { return m_frame.get(); } + +private: + // Utilities + struct FrameDeleter + { + void operator()(AVFrame* frame); + }; + + // Constants + static const unsigned int m_bitsPerPixel; // TODO: Different value needed for YUV420p? + + // Construction parameters + const unsigned int m_width; + const unsigned int m_height; + const AVPixelFormat m_pixelFormat; + + // Video resources + std::unique_ptr m_frame; +};