From 972e04ac77bcd86999501fd05e132ffc352c16b8 Mon Sep 17 00:00:00 2001
From: squishyhuman <retro.gaming.ai@gmail.com>
Date: Sat, 7 Oct 2023 13:17:27 -0700
Subject: [PATCH] Add C++ streaming transcoder

---
 .gitignore                                    |   2 +
 lib/CMakeLists.txt                            | 141 +++++
 lib/stream_decoder/stream_decoder.cpp         | 442 +++++++++++++
 lib/stream_decoder/stream_decoder.h           | 112 ++++
 .../stream_decoder_embinder.cpp               |  40 ++
 lib/stream_encoder/stream_encoder.cpp         | 584 ++++++++++++++++++
 lib/stream_encoder/stream_encoder.hpp         | 134 ++++
 .../stream_encoder_embinder.cpp               |  36 ++
 lib/video/video_frame.cpp                     |  77 +++
 lib/video/video_frame.hpp                     |  53 ++
 10 files changed, 1621 insertions(+)
 create mode 100644 lib/stream_decoder/stream_decoder.cpp
 create mode 100644 lib/stream_decoder/stream_decoder.h
 create mode 100644 lib/stream_decoder/stream_decoder_embinder.cpp
 create mode 100644 lib/stream_encoder/stream_encoder.cpp
 create mode 100644 lib/stream_encoder/stream_encoder.hpp
 create mode 100644 lib/stream_encoder/stream_encoder_embinder.cpp
 create mode 100644 lib/video/video_frame.cpp
 create mode 100644 lib/video/video_frame.hpp

diff --git a/.gitignore b/.gitignore
index 749a7d56a..28342c6b8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,8 @@
 
 # Generated libraries
 /frontend/public/motion_tracker
+/frontend/public/stream_decoder.*
+/frontend/public/stream_encoder.*
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
index b3771e584..e62d0f220 100644
--- a/lib/CMakeLists.txt
+++ b/lib/CMakeLists.txt
@@ -39,6 +39,26 @@ set(CMAKE_CXX_STANDARD 17)
 
 find_package(OpenCV REQUIRED)
 
+#
+# FFmpeg
+#
+
+find_path(AVCODEC_INCLUDE_DIR libavcodec/avcodec.h PATHS "${CMAKE_PREFIX_PATH}/include")
+find_path(AVFORMAT_INCLUDE_DIR libavformat/avformat.h PATHS "${CMAKE_PREFIX_PATH}/include")
+find_path(AVUTIL_INCLUDE_DIR libavutil/avutil.h PATHS "${CMAKE_PREFIX_PATH}/include")
+
+find_library(AVCODEC_LIBRARY avcodec PATHS "${CMAKE_PREFIX_PATH}/lib")
+find_library(AVFILTER_LIBRARY avfilter PATHS "${CMAKE_PREFIX_PATH}/lib")
+find_library(AVFORMAT_LIBRARY avformat PATHS "${CMAKE_PREFIX_PATH}/lib")
+find_library(AVUTIL_LIBRARY avutil PATHS "${CMAKE_PREFIX_PATH}/lib")
+find_library(LAME_LIBRARY mp3lame PATHS "${CMAKE_PREFIX_PATH}/lib")
+find_library(OPENH264_LIBRARY openh264 PATHS "${CMAKE_PREFIX_PATH}/lib")
+find_library(OPUS_LIBRARY opus PATHS "${CMAKE_PREFIX_PATH}/lib")
+find_library(SWRESAMPLE_LIBRARY swresample PATHS "${CMAKE_PREFIX_PATH}/lib")
+find_library(SWSCALE_LIBRARY swscale PATHS "${CMAKE_PREFIX_PATH}/lib")
+find_library(VPX_LIBRARY vpx PATHS "${CMAKE_PREFIX_PATH}/lib")
+find_library(X264_LIBRARY x264 PATHS "${CMAKE_PREFIX_PATH}/lib")
+
 ################################################################################
 #
 # Define sources
@@ -65,6 +85,27 @@ set(MOTION_TRACKER_SOURCES
   utils/math_utils.cpp
 )
 
+#
+# Stream decoder
+#
+
+set(STREAM_DECODER_SOURCES
+  stream_decoder/stream_decoder.cpp
+  stream_decoder/stream_decoder_embinder.cpp
+  utils/emscripten_utils.cpp
+)
+
+#
+# Stream encoder
+#
+
+set(STREAM_ENCODER_SOURCES
+  stream_encoder/stream_encoder.cpp
+  stream_encoder/stream_encoder_embinder.cpp
+  utils/emscripten_utils.cpp
+  video/video_frame.cpp
+)
+
 ################################################################################
 #
 # Build libraries
@@ -91,10 +132,13 @@ string(APPEND EMSCRIPTEN_LINK_FLAGS
   # " --pre-js pre-module.j "
   # " --post-js post-module.j "
   "-s ALLOW_MEMORY_GROWTH=1 "
+  "-s ALLOW_TABLE_GROWTH=1 "
   "-s ASSERTIONS=1 "
   # " -s DEMANGLE_SUPPORT=1 "
   # " -s DISABLE_EXCEPTION_CATCHING=0 "
   "-s ERROR_ON_UNDEFINED_SYMBOLS=0 "
+  "-s EXPORTED_RUNTIME_METHODS='[\"addFunction\"]' "
+  "-s EXTRA_EXPORTED_RUNTIME_METHODS='[\"addFunction\"]' "
   # " -s FULL_ES3=1 "
   # " -s GL_ASSERTIONS=1 "
   # " -s GL_UNSAFE_OPTS=0 "
@@ -102,6 +146,7 @@ string(APPEND EMSCRIPTEN_LINK_FLAGS
   # " -s LEGACY_GL_EMULATION=0 "
   #"-s LLD_REPORT_UNDEFINED "
   # " -s OFFSCREENCANVAS_SUPPORT=1 "
+  "-s RESERVED_FUNCTION_POINTERS=10 "
   # " -s SAFE_HEAP=1 "
   #"-s TOTAL_MEMORY=67108864 "
   # " -s USE_FREETYPE=1 "
@@ -147,6 +192,78 @@ if (${CMAKE_SYSTEM_NAME} MATCHES "Emscripten")
   )
 endif ()
 
+#
+# Stream decoder
+#
+
+add_executable(stream_decoder
+  ${STREAM_DECODER_SOURCES}
+)
+
+target_include_directories(stream_decoder PRIVATE
+  ${AVCODEC_INCLUDE_DIR}
+  ${AVFORMAT_INCLUDE_DIR}
+  ${AVUTIL_INCLUDE_DIR}
+)
+
+target_link_libraries(stream_decoder PRIVATE
+  ${AVCODEC_LIBRARY}
+  ${AVFORMAT_LIBRARY}
+  ${AVUTIL_LIBRARY}
+  ${LAME_LIBRARY}
+  ${OPENH264_LIBRARY}
+  ${OPUS_LIBRARY}
+  ${SWRESAMPLE_LIBRARY}
+  ${SWSCALE_LIBRARY}
+  ${VPX_LIBRARY}
+  ${X264_LIBRARY}
+)
+
+if (${CMAKE_SYSTEM_NAME} MATCHES "Emscripten")
+  set_target_properties(stream_decoder PROPERTIES
+    COMPILE_FLAGS " \
+      -Wno-deprecated \
+      -s ASSERTIONS=1 \
+    "
+    LINK_FLAGS ${EMSCRIPTEN_LINK_FLAGS})
+endif ()
+
+#
+# Stream encoder
+#
+
+add_executable(stream_encoder
+  ${STREAM_ENCODER_SOURCES}
+)
+
+target_include_directories(stream_encoder PRIVATE
+  ${AVCODEC_INCLUDE_DIR}
+  ${AVFORMAT_INCLUDE_DIR}
+  ${AVUTIL_INCLUDE_DIR}
+)
+
+target_link_libraries(stream_encoder PRIVATE
+  ${AVCODEC_LIBRARY}
+  ${AVFORMAT_LIBRARY}
+  ${AVUTIL_LIBRARY}
+  ${LAME_LIBRARY}
+  ${OPENH264_LIBRARY}
+  ${OPUS_LIBRARY}
+  ${SWRESAMPLE_LIBRARY}
+  ${SWSCALE_LIBRARY}
+  ${VPX_LIBRARY}
+  ${X264_LIBRARY}
+)
+
+if (${CMAKE_SYSTEM_NAME} MATCHES "Emscripten")
+  set_target_properties(stream_encoder PROPERTIES
+    COMPILE_FLAGS " \
+      -Wno-deprecated \
+      -s ASSERTIONS=1 \
+    "
+    LINK_FLAGS ${EMSCRIPTEN_LINK_FLAGS})
+endif ()
+
 ################################################################################
 #
 # Install libraries
@@ -165,3 +282,27 @@ INSTALL(
   DESTINATION
     motion_tracker
 )
+
+#
+# Stream decoder
+#
+
+INSTALL(
+  FILES
+    "${CMAKE_BINARY_DIR}/stream_decoder.js"
+    "${CMAKE_BINARY_DIR}/stream_decoder.wasm"
+  DESTINATION
+    "${CMAKE_INSTALL_PREFIX}"
+)
+
+#
+# Stream encoder
+#
+
+INSTALL(
+  FILES
+    "${CMAKE_BINARY_DIR}/stream_encoder.js"
+    "${CMAKE_BINARY_DIR}/stream_encoder.wasm"
+  DESTINATION
+    "${CMAKE_INSTALL_PREFIX}"
+)
diff --git a/lib/stream_decoder/stream_decoder.cpp b/lib/stream_decoder/stream_decoder.cpp
new file mode 100644
index 000000000..da69969de
--- /dev/null
+++ b/lib/stream_decoder/stream_decoder.cpp
@@ -0,0 +1,442 @@
+/*
+ * Copyright (C) 2023 retro.ai
+ * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ * See LICENSE.txt for more information.
+ */
+
+#include "stream_decoder.h"
+
+#include "utils/emscripten_utils.hpp"
+
+#include <algorithm>
+#include <cstring>
+#include <iostream>
+
+#include <emscripten/val.h>
+
+extern "C"
+{
+#include <libavcodec/avcodec.h>
+#include <libavformat/avformat.h>
+#include <libavutil/error.h>
+#include <libavutil/imgutils.h>
+#include <libavutil/mem.h>
+#include <libswscale/swscale.h>
+}
+
+StreamDecoder::StreamDecoder(const std::string& fileName, int blockSize, int maxDecodedSize)
+  : m_fileName(fileName),
+    m_blockSize(std::max(blockSize, 0)),
+    m_maxDecodedSize(std::max(maxDecodedSize, 0))
+{
+}
+
+StreamDecoder::~StreamDecoder()
+{
+  Deinitialize();
+}
+
+bool StreamDecoder::Initialize()
+{
+  // Preallocate an AVFormatContext to use our custom read function instead of
+  // the avformat internal I/O layer
+  m_formatContext = avformat_alloc_context();
+  if (m_formatContext == nullptr)
+  {
+    std::cerr << "Failed to allocate AVFormatContext" << std::endl;
+    return false;
+  }
+
+  // Use nonblocking reads if possible
+  m_formatContext->flags |= AVFMT_FLAG_NONBLOCK;
+
+  // The buffer size is very important for performance. For protocols with
+  // fixed blocksize it should be set to this blocksize.
+  unsigned int bufferSize = 32768; // From IO_BUFFER_SIZE in aviobuf.c
+  if (m_blockSize > 1)
+    bufferSize = m_blockSize;
+
+  uint8_t* avioContextBuffer = static_cast<uint8_t*>(av_malloc(bufferSize));
+  if (avioContextBuffer == nullptr)
+  {
+    std::cerr << "Failed to allocate buffer for I/O" << std::endl;
+    return false;
+  }
+
+  // Initialize the I/O context
+  m_ioContext = avio_alloc_context(avioContextBuffer, bufferSize, 0, this, ReadPacketInternal,
+                                   nullptr, SeekInternal);
+  if (m_ioContext == nullptr)
+  {
+    std::cerr << "Failed to allocate AVIOContext" << std::endl;
+    return false;
+  }
+
+  if (m_blockSize > 1)
+    m_ioContext->max_packet_size = bufferSize;
+
+  // Set the pb field of the AVFormatContext to the newly created AVIOContext
+  m_formatContext->pb = m_ioContext;
+
+  int result = avformat_open_input(&m_formatContext, m_fileName.c_str(), nullptr, nullptr);
+  if (result < 0)
+  {
+    std::cerr << "Failed to open input: " << av_err2str(result) << std::endl;
+    return false;
+  }
+
+  const int streamInfoResult = avformat_find_stream_info(m_formatContext, nullptr);
+
+  // Uncomment to dump video metadata
+  std::cout << "=== Detecting input video" << std::endl;
+  av_dump_format(m_formatContext, 0, m_fileName.c_str(), 0);
+
+  for (int i = 0; i < m_formatContext->nb_streams; i++)
+  {
+    auto type = m_formatContext->streams[i]->codec->codec_type;
+    if (m_videoStreamId < 0 && type == AVMEDIA_TYPE_VIDEO)
+    {
+      m_videoStreamId = i;
+      break;
+    }
+  }
+
+  if (m_videoStreamId < 0)
+  {
+    std::cerr << "No audio/video stream found" << std::endl;
+    return false;
+  }
+
+  // Get stream parameters
+  AVStream* videoStream = m_formatContext->streams[m_videoStreamId];
+  m_timeBaseNum = videoStream->time_base.num;
+  m_timeBaseDen = videoStream->time_base.den;
+  m_realFrameRateNum = videoStream->r_frame_rate.num;
+  m_realFrameRateDen = videoStream->r_frame_rate.den;
+
+  // Get codec parameters
+  AVCodecParameters* codecParams = videoStream->codecpar;
+  m_videoBitRate = videoStream->codecpar->bit_rate;
+
+  auto codec = avcodec_find_decoder(codecParams->codec_id);
+  if (codec == nullptr)
+  {
+    std::cerr << "Failed avcodec_find_decoder(" << codecParams->codec_id << ")" << std::endl;
+    return false;
+  }
+
+  m_videoCodecContext = avcodec_alloc_context3(codec);
+  if (m_videoCodecContext == nullptr)
+  {
+    std::cerr << "Failed avcodec_alloc_context3()" << std::endl;
+    return false;
+  }
+
+  result = avcodec_parameters_to_context(m_videoCodecContext, codecParams);
+  if (result != 0)
+  {
+    std::cerr << "Failed avcodec_parameters_to_context(): " << av_err2str(result) << std::endl;
+    return false;
+  }
+
+  result = avcodec_open2(m_videoCodecContext, codec, nullptr);
+  if (result < 0)
+  {
+    std::cerr << "Failed avcodec_open2(): " << av_err2str(result) << std::endl;
+    return false;
+  }
+
+  if (m_videoCodecContext->width <= 0 || m_videoCodecContext->height <= 0)
+  {
+    std::cerr << "Video has invalid dimensions: " << m_videoCodecContext->width << " x "
+              << m_videoCodecContext->height << std::endl;
+    return false;
+  }
+
+  m_width = static_cast<unsigned int>(m_videoCodecContext->width);
+  m_height = static_cast<unsigned int>(m_videoCodecContext->height);
+
+  // Reduce destination size to fit max constraint
+  if (m_maxDecodedSize > 0)
+  {
+    while (std::max(m_width, m_height) > m_maxDecodedSize)
+    {
+      m_width /= 2;
+      m_height /= 2;
+    }
+  }
+
+  if (m_width == 0 || m_height == 0)
+  {
+    std::cerr << "Invalid dimensions (width = " << m_width << ", height = " << m_height
+              << ", max size = " << m_maxDecodedSize << ")" << std::endl;
+    return false;
+  }
+
+  m_frameSize = av_image_get_buffer_size(m_targetFormat, m_width, m_height, 1);
+
+  m_decodedFrame = av_frame_alloc();
+  if (m_decodedFrame == nullptr)
+  {
+    std::cerr << "Failed to alloc m_decodedFrame" << std::endl;
+    return false;
+  }
+
+  m_videoBuffer = static_cast<uint8_t*>(av_malloc(m_frameSize));
+  if (m_videoBuffer == nullptr)
+  {
+    std::cerr << "Failed to allocate video buffer" << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+void StreamDecoder::Deinitialize()
+{
+  if (m_videoBuffer != nullptr)
+  {
+    av_free(m_videoBuffer);
+    m_videoBuffer = nullptr;
+  }
+
+  if (m_decodedFrame != nullptr)
+  {
+    av_free(m_decodedFrame);
+    m_decodedFrame = nullptr;
+  }
+
+  if (m_scaler != nullptr)
+  {
+    sws_freeContext(m_scaler);
+    m_scaler = nullptr;
+  }
+
+  if (m_videoCodecContext != nullptr)
+  {
+    avcodec_close(m_videoCodecContext);
+    avcodec_free_context(&m_videoCodecContext);
+    m_videoCodecContext = nullptr;
+  }
+
+  if (m_formatContext != nullptr)
+  {
+    avformat_close_input(&m_formatContext);
+    m_formatContext = nullptr;
+  }
+
+  if (m_ioContext != nullptr)
+  {
+    av_free(m_ioContext->buffer);
+    av_free(m_ioContext);
+    m_ioContext = nullptr;
+  }
+}
+
+bool StreamDecoder::OpenVideo()
+{
+  if (m_state == StreamDecoderState::Init)
+  {
+    if (Initialize())
+      m_state = StreamDecoderState::Running;
+    else
+      m_state = StreamDecoderState::Failed;
+  }
+
+  return (m_state != StreamDecoderState::Failed);
+}
+
+void StreamDecoder::AddPacket(const emscripten::val& packet)
+{
+  const unsigned int dataSize = EmscriptenUtils::ArrayLength(packet);
+
+  std::vector<uint8_t> data(dataSize);
+
+  EmscriptenUtils::GetArrayData(packet, data.data(), dataSize);
+
+  m_packets.emplace_back(std::move(data));
+  m_totalSize += dataSize;
+}
+
+void StreamDecoder::Decode()
+{
+  if (m_state == StreamDecoderState::Failed)
+    return;
+
+  AVPacket packet{};
+
+  int result = av_read_frame(m_formatContext, &packet);
+  if (result == AVERROR_EOF)
+  {
+    m_state = StreamDecoderState::Ended;
+    return;
+  }
+  else if (result < 0)
+  {
+    std::cerr << "Error reading frame: " << av_err2str(result) << std::endl;
+    m_state = StreamDecoderState::Failed;
+    return;
+  }
+
+  if (packet.stream_index == m_videoStreamId)
+  {
+    if (m_scaler == nullptr)
+    {
+      m_scaler = sws_getContext(m_videoCodecContext->width, m_videoCodecContext->height,
+                                m_videoCodecContext->pix_fmt, m_width, m_height, m_targetFormat,
+                                SWS_BILINEAR, nullptr, nullptr, nullptr);
+    }
+
+    if (m_scaler == nullptr)
+    {
+      std::cerr << "Failed to create software scaler" << std::endl;
+      m_state = StreamDecoderState::Failed;
+      return;
+    }
+
+    result = avcodec_send_packet(m_videoCodecContext, &packet);
+    if (result < 0)
+    {
+      std::cerr << "Error sending packet: " << av_err2str(result) << std::endl;
+      m_state = StreamDecoderState::Failed;
+      return;
+    }
+
+    result = avcodec_receive_frame(m_videoCodecContext, m_decodedFrame);
+    if (result < 0 && result != AVERROR(EAGAIN))
+    {
+      std::cerr << "Error receiving frame: " << av_err2str(result) << std::endl;
+      m_state = StreamDecoderState::Failed;
+      return;
+    }
+
+    if (result == 0)
+    {
+      if (m_decodedFrame->pts < 0)
+      {
+        std::cerr << "Invalid pts: " << m_decodedFrame->pts << std::endl;
+        m_state = StreamDecoderState::Failed;
+        return;
+      }
+
+      const int videoLineSize = av_image_get_buffer_size(m_targetFormat, m_width, 1, 1);
+
+      uint8_t* const dest[] = {m_videoBuffer};
+      const int destLineSize[] = {videoLineSize};
+
+      result = sws_scale(m_scaler, m_decodedFrame->data, m_decodedFrame->linesize, 0,
+                         m_videoCodecContext->height, dest, destLineSize);
+
+      if (result < 0)
+      {
+        std::cerr << "Error scaling frame: " << av_err2str(result) << std::endl;
+        m_state = StreamDecoderState::Failed;
+        return;
+      }
+
+      m_videoPts = static_cast<uint64_t>(m_decodedFrame->pts);
+
+      m_state = StreamDecoderState::HasFrame;
+    }
+  }
+
+  av_packet_unref(&packet);
+}
+
+uintptr_t StreamDecoder::GetFrameBuffer()
+{
+  // Reset state
+  m_state = StreamDecoderState::Running;
+
+  return reinterpret_cast<uintptr_t>(m_videoBuffer);
+}
+
+unsigned int StreamDecoder::ReadPacket(uint8_t* buffer, unsigned int bufferSize)
+{
+  if (m_packets.empty())
+  {
+    std::cerr << "No data to read" << std::endl;
+    return 0;
+  }
+
+  unsigned int position = m_readPosition;
+
+  for (auto it = m_packets.begin(); it != m_packets.end(); ++it)
+  {
+    const std::vector<uint8_t>& packet = *it;
+
+    if (position < packet.size())
+    {
+      const unsigned int remainingPacketSize = packet.size() - position;
+      const unsigned int copySize = std::min(bufferSize, remainingPacketSize);
+
+      if (copySize > 0)
+      {
+        std::memcpy(static_cast<void*>(buffer), static_cast<const void*>(packet.data() + position),
+                    copySize);
+        m_readPosition += copySize;
+      }
+
+      return copySize;
+    }
+
+    position -= packet.size();
+  }
+
+  return 0;
+}
+
+uint64_t StreamDecoder::Seek(uint64_t offset)
+{
+  if (offset > m_totalSize)
+    return -1;
+
+  m_readPosition = offset;
+
+  return m_readPosition;
+}
+
+int StreamDecoder::ReadPacketInternal(void* context, uint8_t* buffer, int bufferSize)
+{
+  StreamDecoder* decoder = static_cast<StreamDecoder*>(context);
+  if (decoder == nullptr || bufferSize <= 0)
+    return AVERROR_EXIT;
+
+  const unsigned int length = decoder->ReadPacket(buffer, bufferSize);
+  if (length == 0)
+    return AVERROR(EAGAIN);
+
+  return static_cast<int>(length);
+}
+
+int64_t StreamDecoder::SeekInternal(void* context, int64_t offset, int whence)
+{
+  StreamDecoder* decoder = static_cast<StreamDecoder*>(context);
+  if (decoder == nullptr || offset < 0)
+    return AVERROR_EXIT;
+
+  if ((whence & AVSEEK_SIZE) != 0)
+    return decoder->GetTotalSize();
+
+  switch (whence)
+  {
+    // Seek from beginning of file
+    case SEEK_SET:
+      return decoder->Seek(offset);
+
+    // Seek from current position
+    case SEEK_CUR:
+      return decoder->Seek(decoder->GetReadPosition() + offset);
+
+    // Seek from end of file
+    case SEEK_END:
+      return decoder->Seek(decoder->GetTotalSize() + offset);
+
+    default:
+      break;
+  }
+
+  return -1;
+}
diff --git a/lib/stream_decoder/stream_decoder.h b/lib/stream_decoder/stream_decoder.h
new file mode 100644
index 000000000..b812175c0
--- /dev/null
+++ b/lib/stream_decoder/stream_decoder.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2023 retro.ai
+ * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ * See LICENSE.txt for more information.
+ */
+
+#pragma once
+
+#include <stdint.h>
+#include <string>
+#include <vector>
+
+extern "C"
+{
+#include <libavformat/avformat.h>
+#include <libavformat/avio.h>
+#include <libswscale/swscale.h>
+}
+
+namespace emscripten
+{
+class val;
+}
+
+enum class StreamDecoderState
+{
+  Init,
+  Running,
+  HasFrame,
+  Ended,
+  Failed,
+};
+
+class StreamDecoder
+{
+public:
+  StreamDecoder(const std::string& fileName, int blockSize, int maxDecodedSize);
+  ~StreamDecoder();
+
+  // Public accessors
+  StreamDecoderState GetState() const { return m_state; }
+  unsigned int GetFrameSize() const { return m_frameSize; }
+  unsigned int GetFrameWidth() const { return m_width; }
+  unsigned int GetFrameHeight() const { return m_height; }
+  unsigned int GetTimeBaseNum() const { return m_timeBaseNum; }
+  unsigned int GetTimeBaseDen() const { return m_timeBaseDen; }
+  unsigned int GetRealFrameRateNum() const { return m_realFrameRateNum; }
+  unsigned int GetRealFrameRateDen() const { return m_realFrameRateDen; }
+  unsigned int GetVideoBitRate() const { return m_videoBitRate; }
+
+  // Video API
+  bool OpenVideo();
+  void AddPacket(const emscripten::val& packet);
+  void Decode();
+  // TODO: "Can't use void* because embind has problems with a getter that returns
+  // a raw pointer"
+  uintptr_t GetFrameBuffer();
+  unsigned int GetFramePts() { return m_videoPts; }
+
+private:
+  // Lifecycle functions
+  bool Initialize();
+  void Deinitialize();
+
+  // Private accessors
+  unsigned int GetReadPosition() const { return m_readPosition; }
+  unsigned int GetTotalSize() const { return m_totalSize; }
+
+  // IO functions
+  unsigned int ReadPacket(uint8_t* buffer, unsigned int bufferSize);
+  uint64_t Seek(uint64_t offset);
+
+  // libav internal IO functions
+  static int ReadPacketInternal(void* context, uint8_t* buffer, int bufferSize);
+  static int64_t SeekInternal(void* context, int64_t offset, int whence);
+
+  // Construction parameters
+  const std::string m_fileName;
+  const unsigned int m_blockSize;
+  const unsigned int m_maxDecodedSize;
+
+  // State parameters
+  StreamDecoderState m_state = StreamDecoderState::Init;
+
+  // Video parameters
+  int m_videoStreamId = -1;
+  unsigned int m_width = 0;
+  unsigned int m_height = 0;
+  unsigned int m_frameSize = 0;
+  static const AVPixelFormat m_targetFormat = AV_PIX_FMT_RGBA;
+  unsigned int m_timeBaseNum = 0;
+  unsigned int m_timeBaseDen = 0;
+  unsigned int m_realFrameRateNum = 0;
+  unsigned int m_realFrameRateDen = 0;
+  unsigned int m_videoBitRate = 0; // bits/sec
+
+  // Video resources
+  AVIOContext* m_ioContext = nullptr;
+  AVFormatContext* m_formatContext = nullptr;
+  AVCodecContext* m_videoCodecContext = nullptr;
+  SwsContext* m_scaler = nullptr;
+  AVFrame* m_decodedFrame = nullptr;
+  uint8_t* m_videoBuffer = nullptr;
+  uint64_t m_videoPts = 0;
+
+  // IO state (seeking is virtualized)
+  std::vector<std::vector<uint8_t>> m_packets;
+  uint64_t m_totalSize = 0;
+  uint64_t m_readPosition = 0;
+};
diff --git a/lib/stream_decoder/stream_decoder_embinder.cpp b/lib/stream_decoder/stream_decoder_embinder.cpp
new file mode 100644
index 000000000..82462c0cf
--- /dev/null
+++ b/lib/stream_decoder/stream_decoder_embinder.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2023 retro.ai
+ * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ * See LICENSE.txt for more information.
+ */
+
+#include "stream_decoder.h"
+
+#include <emscripten/bind.h>
+
+using namespace emscripten;
+
+EMSCRIPTEN_BINDINGS(stream_decoder)
+{
+  enum_<StreamDecoderState>("StreamDecoderState")
+      .value("Init", StreamDecoderState::Init)
+      .value("Running", StreamDecoderState::Running)
+      .value("HasFrame", StreamDecoderState::HasFrame)
+      .value("Ended", StreamDecoderState::Ended)
+      .value("Failed", StreamDecoderState::Failed);
+
+  class_<StreamDecoder>("StreamDecoder")
+      .constructor<const std::string&, int, int>()
+      .property("state", &StreamDecoder::GetState)
+      .property("frameSize", &StreamDecoder::GetFrameSize)
+      .property("frameWidth", &StreamDecoder::GetFrameWidth)
+      .property("frameHeight", &StreamDecoder::GetFrameHeight)
+      .property("timeBaseNum", &StreamDecoder::GetTimeBaseNum)
+      .property("timeBaseDen", &StreamDecoder::GetTimeBaseDen)
+      .property("realFrameRateNum", &StreamDecoder::GetRealFrameRateNum)
+      .property("realFrameRateDen", &StreamDecoder::GetRealFrameRateDen)
+      .property("videoBitRate", &StreamDecoder::GetVideoBitRate)
+      .function("openVideo", &StreamDecoder::OpenVideo)
+      .function("addPacket", &StreamDecoder::AddPacket)
+      .function("decode", &StreamDecoder::Decode)
+      .function("getFrameBuffer", &StreamDecoder::GetFrameBuffer)
+      .function("getFramePts", &StreamDecoder::GetFramePts);
+}
diff --git a/lib/stream_encoder/stream_encoder.cpp b/lib/stream_encoder/stream_encoder.cpp
new file mode 100644
index 000000000..63356d5ad
--- /dev/null
+++ b/lib/stream_encoder/stream_encoder.cpp
@@ -0,0 +1,584 @@
+/*
+ * Copyright (C) 2023 retro.ai
+ * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ * See LICENSE.txt for more information.
+ */
+
+#include "stream_encoder.hpp"
+
+#include "utils/emscripten_utils.hpp"
+#include "video/video_frame.hpp"
+
+#include <algorithm>
+#include <cstring>
+#include <iostream>
+
+#include <emscripten/val.h>
+
+extern "C"
+{
+#include <libavcodec/avcodec.h>
+#include <libavcodec/codec_par.h>
+#include <libavformat/avformat.h>
+#include <libavutil/error.h>
+#include <libavutil/imgutils.h>
+#include <libavutil/log.h>
+#include <libavutil/mathematics.h>
+#include <libavutil/mem.h>
+#include <libavutil/opt.h>
+#include <libswscale/swscale.h>
+}
+
+const AVPixelFormat StreamEncoder::m_inputFormat = AV_PIX_FMT_RGBA;
+const AVPixelFormat StreamEncoder::m_targetFormat = AV_PIX_FMT_YUV420P;
+const unsigned int StreamEncoder::m_cachePageSize = 32768; // From IO_BUFFER_SIZE in aviobuf.c
+
+StreamEncoder::StreamEncoder(const std::string& fileName,
+                             int width,
+                             int height,
+                             int timeBaseNum,
+                             int timeBaseDen,
+                             int realFrameRateNum,
+                             int realFrameRateDen,
+                             int videoBitRate,
+                             int writeBufferSize,
+                             int writePacketFnPtr,
+                             int seekFnPtr)
+  : m_fileName(fileName),
+    m_width(std::max(width, 0)),
+    m_height(std::max(height, 0)),
+    m_timeBaseNum(std::max(timeBaseNum, 0)),
+    m_timeBaseDen(std::max(timeBaseDen, 0)),
+    m_realFrameRateNum(std::max(realFrameRateNum, 0)),
+    m_realFrameRateDen(std::max(realFrameRateDen, 0)),
+    m_videoBitRate(std::max(videoBitRate, 0)),
+    m_writeBufferSize(writeBufferSize > 0 ? writeBufferSize : m_cachePageSize),
+    m_targetFrame(std::make_unique<VideoFrame>(width, height, m_targetFormat)),
+    m_writePacketFnPtr(reinterpret_cast<WritePacketFnPtr>(writePacketFnPtr)),
+    m_seekFnPtr(reinterpret_cast<SeekFnPtr>(seekFnPtr))
+{
+  // Enable libav debugging
+  av_log_set_level(AV_LOG_DEBUG);
+}
+
+StreamEncoder::~StreamEncoder()
+{
+  Deinitialize();
+}
+
+bool StreamEncoder::Initialize()
+{
+  // Validate state
+  if (m_width == 0 || m_height == 0)
+  {
+    std::cerr << "Invalid dimensions (width = " << m_width << ", height = " << m_height << ")"
+              << std::endl;
+    return false;
+  }
+
+  if (m_timeBaseNum == 0 || m_timeBaseDen == 0)
+  {
+    std::cerr << "Invalid time base (numerator = " << m_timeBaseNum
+              << ", denominator = " << m_timeBaseDen << ")" << std::endl;
+    return false;
+  }
+
+  if (m_realFrameRateNum == 0 || m_realFrameRateDen == 0)
+  {
+    std::cerr << "Invalid frame rate (numerator = " << m_realFrameRateNum
+              << ", denominator = " << m_realFrameRateDen << ")" << std::endl;
+    return false;
+  }
+
+  if (m_writePacketFnPtr == nullptr || m_seekFnPtr == nullptr)
+  {
+    std::cerr << "Invalid function pointers passed to constructor" << std::endl;
+    return false;
+  }
+
+  if (!CreateContext())
+    return false;
+
+  if (!PrepareVideoEncoder())
+    return false;
+
+  if (!InitializeVideoStream())
+    return false;
+
+  EnableGlobalHeaders();
+
+  // Dump av format information
+  PrintInfo();
+
+  if (!WriteHeader())
+    return false;
+
+  // Initialize buffer for YUV420p frame data
+  if (!m_targetFrame->Initialize())
+    return false;
+
+  return true;
+}
+
+bool StreamEncoder::CreateContext()
+{
+  // Guess encoding format
+  AVOutputFormat* outputFormat = av_guess_format(nullptr, m_fileName.c_str(), nullptr);
+  if (outputFormat == nullptr)
+  {
+    std::cerr << "Unknown file extension: " << m_fileName << std::endl;
+    return false;
+  }
+
+  // Allocate an AVFormatContext to use our custom write function instead of
+  // the avformat internal I/O layer
+  int result =
+      avformat_alloc_output_context2(&m_formatContext, outputFormat, nullptr, m_fileName.c_str());
+  if (result < 0)
+  {
+    std::cerr << "Failed to allocate AVFormatContext: " << av_err2str(result) << std::endl;
+    return false;
+  }
+
+  // Create a write buffer for our custom I/O context
+  uint8_t* avioContextBuffer = static_cast<uint8_t*>(av_malloc(m_writeBufferSize));
+  if (avioContextBuffer == nullptr)
+  {
+    std::cerr << "Failed to allocate buffer for I/O" << std::endl;
+    return false;
+  }
+
+  // Initialize the I/O context
+  m_ioContext = avio_alloc_context(avioContextBuffer, m_writeBufferSize, 1, this, nullptr,
+                                   WritePacketInternal, SeekInternal);
+  if (m_ioContext == nullptr)
+  {
+    std::cerr << "Failed to allocate AVIOContext" << std::endl;
+    return false;
+  }
+
+  // Set the pb field of the AVFormatContext to the newly created AVIOContext
+  m_formatContext->pb = m_ioContext;
+
+  return true;
+}
+
+bool StreamEncoder::PrepareVideoEncoder()
+{
+  // It's necessary to open stream codec to link it to "codec" (the encoder)
+  const AVCodec* codec = avcodec_find_encoder(AV_CODEC_ID_H264);
+  if (codec == nullptr)
+  {
+    std::cerr << "Error: Can't find H.264 encoder" << std::endl;
+    return false;
+  }
+
+  m_codecContext = avcodec_alloc_context3(codec);
+  if (m_codecContext == nullptr)
+  {
+    std::cerr << "Error: Can't allocate codec context" << std::endl;
+    return false;
+  }
+
+  // Fill in codec context
+  m_codecContext->bit_rate = m_videoBitRate;
+  m_codecContext->pix_fmt = m_targetFormat;
+  m_codecContext->width = m_width;
+  m_codecContext->height = m_height;
+  m_codecContext->time_base =
+      AVRational{static_cast<int>(m_realFrameRateDen), static_cast<int>(m_realFrameRateNum)};
+
+  // Prefer reasonable speed over quality
+  av_opt_set(m_codecContext->priv_data, "preset", "fast", 0);
+
+  // TODO: Set Constant Rate Factor (CRF) quality if bit_rate is zero. In this
+  // case, expose CRF as an initialization parameter to the user
+  //av_opt_set_int(m_codecContext->priv_data, "crf", 23, AV_OPT_SEARCH_CHILDREN);
+
+  // H.264 specific options
+  /* TODO
+  m_codecContext->gop_size = 25; // Emit one intraframe every 25 frames
+  m_codecContext->level = 31;
+  */
+
+  int result = avcodec_open2(m_codecContext, codec, nullptr);
+  if (result < 0)
+  {
+    std::cerr << "Error opening codec: " << av_err2str(result) << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+bool StreamEncoder::InitializeVideoStream()
+{
+  m_videoStream = avformat_new_stream(m_formatContext, nullptr);
+  if (m_videoStream == nullptr)
+  {
+    std::cerr << "Error: Can't create new stream" << std::endl;
+    return false;
+  }
+
+  m_videoStream->id = m_formatContext->nb_streams - 1;
+
+  // Copy the stream parameters to the muxer
+  int result = avcodec_parameters_from_context(m_videoStream->codecpar, m_codecContext);
+  if (result < 0)
+  {
+    std::cerr << "Error setting parameters: " << av_err2str(result) << std::endl;
+    return false;
+  }
+
+  // Set stream time base as a hint to the muxer before writing the header
+  m_videoStream->time_base =
+      AVRational{static_cast<int>(m_timeBaseNum), static_cast<int>(m_timeBaseDen)};
+  m_videoStream->r_frame_rate =
+      AVRational{static_cast<int>(m_realFrameRateNum), static_cast<int>(m_realFrameRateDen)};
+
+  return true;
+}
+
+void StreamEncoder::EnableGlobalHeaders()
+{
+  // Set the flag AV_CODEC_FLAG_GLOBAL_HEADER which tells the encoder that it
+  // can use the global headers
+  if (m_formatContext->oformat->flags & AVFMT_GLOBALHEADER)
+    m_formatContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
+}
+
+void StreamEncoder::PrintInfo()
+{
+  std::cout << "=== Detecting output video" << std::endl;
+  av_dump_format(m_formatContext, 0, m_fileName.c_str(), 1);
+}
+
+bool StreamEncoder::WriteHeader()
+{
+  // Set format's private options, to be passed to avformat_write_header()
+  AVDictionary* formatOptions = nullptr;
+
+  // Set faststart (requires a seekable write stream)
+  // TODO: This requires re-opening the file for reading
+  /*
+  int result = av_dict_set(&formatOptions, "movflags", "faststart", 0);
+  if (result < 0)
+  {
+    std::cerr << "Error setting option: " << av_err2str(result) << std::endl;
+    return false;
+  }
+  */
+
+  // Write file header if necessary
+  int result = avformat_write_header(m_formatContext, &formatOptions);
+
+  if (formatOptions != nullptr)
+    av_dict_free(&formatOptions);
+
+  if (result < 0)
+  {
+    std::cerr << "Error writing header: " << av_err2str(result) << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+void StreamEncoder::Deinitialize()
+{
+  if (m_formatContext != nullptr)
+  {
+    avformat_free_context(m_formatContext);
+    m_formatContext = nullptr;
+  }
+
+  if (m_ioContext != nullptr)
+  {
+    av_free(m_ioContext->buffer);
+    av_free(m_ioContext);
+    m_ioContext = nullptr;
+  }
+
+  if (m_inputFrameBuffer != nullptr)
+  {
+    av_free(m_inputFrameBuffer);
+    m_inputFrameBuffer = nullptr;
+  }
+}
+
+bool StreamEncoder::OpenVideo()
+{
+  Deinitialize();
+
+  if (Initialize())
+  {
+    m_state = StreamEncoderState::Running;
+    return true;
+  }
+  else
+  {
+    m_state = StreamEncoderState::Failed;
+    return false;
+  }
+}
+
+bool StreamEncoder::AddFrame(const emscripten::val& frameData, int framePts)
+{
+  // Validate state
+  if (m_state != StreamEncoderState::Running)
+  {
+    std::cerr << "Failed to add frame: Not running" << std::endl;
+    m_state = StreamEncoderState::Failed;
+    return false;
+  }
+
+  const size_t dataSize = EmscriptenUtils::ArrayLength(frameData);
+
+  // Allocate buffer for input frame, if necessary
+  if (m_inputFrameSize == 0)
+  {
+    m_inputFrameSize = static_cast<size_t>(dataSize);
+    m_inputFrameBuffer = static_cast<uint8_t*>(av_malloc(dataSize));
+  }
+  else
+  {
+    // Verify size hasn't changed
+    if (m_inputFrameSize != dataSize)
+    {
+      std::cerr << "Invalid data size: " << dataSize << " (expected " << m_inputFrameSize << ")"
+                << std::endl;
+      m_state = StreamEncoderState::Failed;
+      return false;
+    }
+  }
+
+  // Copy data
+  EmscriptenUtils::GetArrayData(frameData, m_inputFrameBuffer, m_inputFrameSize);
+
+  // Create scaler, if necessary
+  if (m_scaler == nullptr)
+  {
+    m_scaler = sws_getContext(m_width, m_height, m_inputFormat, m_width, m_height, m_targetFormat,
+                              SWS_BILINEAR, nullptr, nullptr, nullptr);
+  }
+
+  if (m_scaler == nullptr)
+  {
+    std::cerr << "Failed to create software scaler" << std::endl;
+    m_state = StreamEncoderState::Failed;
+    return false;
+  }
+
+  uint8_t* const sourceData[] = {m_inputFrameBuffer};
+  const int sourceLineSize[] = {static_cast<int>(m_inputFrameSize / m_height)};
+
+  // Scale pixels from RGBA to YUV420p
+  int result =
+      sws_scale(m_scaler, sourceData, sourceLineSize, 0, m_height,
+                m_targetFrame->FrameHandle()->data, m_targetFrame->FrameHandle()->linesize);
+
+  if (result < 0)
+  {
+    std::cerr << "Error scaling frame: " << av_err2str(result) << std::endl;
+    m_state = StreamEncoderState::Failed;
+    return false;
+  }
+
+  //
+  // Rescale the presentation timestamp to fix bit rate estimation. We need to
+  // rescale the pts from the stream's time_base to the codec time_base to get
+  // a simple frame number (e.g. 1, 2, 3).
+  //
+  // See https://stackoverflow.com/questions/11466184/setting-video-bit-rate-through-ffmpeg-api-is-ignored-for-libx264-codec
+  //
+  m_targetFrame->FrameHandle()->pts = av_rescale_q(
+      static_cast<int64_t>(framePts), m_videoStream->time_base, m_codecContext->time_base);
+
+  // Encode the frame
+  result = avcodec_send_frame(m_codecContext, m_targetFrame->FrameHandle());
+  if (result < 0)
+  {
+    std::cerr << "Failed to encode frame: " << av_err2str(result) << std::endl;
+    m_state = StreamEncoderState::Failed;
+    return false;
+  }
+
+  // Handle encoded packets
+  return ReceivePackets();
+}
+
+bool StreamEncoder::ReceivePackets()
+{
+  int result = 0;
+
+  AVPacket videoPacket;
+  av_init_packet(&videoPacket);
+  videoPacket.stream_index = m_videoStream->index;
+
+  // Process the encoded packets
+  while (result >= 0)
+  {
+    result = avcodec_receive_packet(m_codecContext, &videoPacket);
+
+    if (result == AVERROR(EAGAIN))
+    {
+      // We'll read again when more frames have been added
+      break;
+    }
+    else if (result == AVERROR_EOF)
+    {
+      // No more packets to read
+      break;
+    }
+    else if (result < 0)
+    {
+      std::cerr << "Error receiving packet: " << av_err2str(result) << std::endl;
+      m_state = StreamEncoderState::Failed;
+      return false;
+    }
+
+    // Rescale the pts, dts and duration of the encoded video packet to the
+    // stream time base
+    av_packet_rescale_ts(&videoPacket, m_codecContext->time_base, m_videoStream->time_base);
+
+    // Write the frame. Packet is freed by the write.
+    result = av_interleaved_write_frame(m_formatContext, &videoPacket);
+    if (result < 0)
+    {
+      std::cerr << "Error writing packet: " << av_err2str(result) << std::endl;
+      m_state = StreamEncoderState::Failed;
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool StreamEncoder::Finalize()
+{
+  // Validate state
+  if (m_state == StreamEncoderState::Ended)
+  {
+    // Ignore double calls to finalize
+    return true;
+  }
+
+  if (m_state != StreamEncoderState::Running)
+  {
+    std::cerr << "Failed to finalize: Not running" << std::endl;
+    m_state = StreamEncoderState::Failed;
+    return false;
+  }
+
+  m_state = StreamEncoderState::Ended;
+
+  // Send NULL frame, in which case it is considered a flush packet. This
+  // signals the end of the stream.
+  int result = avcodec_send_frame(m_codecContext, nullptr);
+  if (result < 0)
+  {
+    std::cerr << "Error ending stream: " << av_err2str(result) << std::endl;
+    m_state = StreamEncoderState::Failed;
+    return false;
+  }
+
+  // Handle remaining encoded packets
+  if (!ReceivePackets())
+    return false;
+
+  // Write file trailer before exit
+  result = av_write_trailer(m_formatContext);
+  if (result < 0)
+  {
+    std::cerr << "Error writing trailer: " << av_err2str(result) << std::endl;
+    m_state = StreamEncoderState::Failed;
+    return false;
+  }
+
+  Deinitialize();
+
+  return true;
+}
+
+/*
+unsigned int StreamEncoder::ReadPacket(uint8_t* buffer, unsigned int bufferSize)
+{
+  // TODO
+
+  return -1;
+}
+*/
+
+void StreamEncoder::WritePacket(uint8_t* buffer, unsigned int bufferSize)
+{
+  const int bufPtr = reinterpret_cast<int>(buffer);
+  const int bufSize = static_cast<int>(bufferSize);
+  m_writePacketFnPtr(bufPtr, bufSize);
+}
+
+void StreamEncoder::Seek(int64_t offset, int whence)
+{
+  m_seekFnPtr(static_cast<int>(offset), whence);
+}
+
+/*
+int StreamEncoder::ReadPacketInternal(void* context, uint8_t* buffer, int bufferSize)
+{
+  StreamEncoder* encoder = static_cast<StreamEncoder*>(context);
+  if (encoder == nullptr || bufferSize <= 0)
+    return AVERROR_EXIT;
+
+  const unsigned int length = encoder->ReadPacket(buffer, bufferSize);
+  if (length == 0)
+    return AVERROR(EAGAIN);
+
+  return static_cast<int>(length);
+}
+*/
+
+int StreamEncoder::WritePacketInternal(void* context, uint8_t* buffer, int bufferSize)
+{
+  StreamEncoder* encoder = static_cast<StreamEncoder*>(context);
+  if (encoder == nullptr || bufferSize <= 0)
+    return AVERROR_EXIT;
+
+  encoder->WritePacket(buffer, static_cast<unsigned int>(bufferSize));
+
+  return bufferSize;
+}
+
+int64_t StreamEncoder::SeekInternal(void* context, int64_t offset, int whence)
+{
+  StreamEncoder* encoder = static_cast<StreamEncoder*>(context);
+  if (encoder == nullptr)
+    return AVERROR_EXIT;
+
+  encoder->Seek(offset, TranslateWhence(whence));
+
+  return 0;
+}
+
+int StreamEncoder::TranslateWhence(int whence)
+{
+  switch (whence)
+  {
+    // Seek from beginning of file
+    case SEEK_SET:
+      return 0;
+
+    // Seek from current position
+    case SEEK_CUR:
+      return 1;
+
+    // Seek from end of file
+    case SEEK_END:
+      return 2;
+
+    default:
+      break;
+  }
+
+  throw whence;
+}
diff --git a/lib/stream_encoder/stream_encoder.hpp b/lib/stream_encoder/stream_encoder.hpp
new file mode 100644
index 000000000..dac2489cb
--- /dev/null
+++ b/lib/stream_encoder/stream_encoder.hpp
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2023 retro.ai
+ * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ * See LICENSE.txt for more information.
+ */
+
+#pragma once
+
+#include <memory>
+#include <stdint.h>
+#include <string>
+#include <type_traits> // for std::add_pointer
+#include <vector>
+
+extern "C"
+{
+#include <libavformat/avformat.h>
+#include <libavformat/avio.h>
+#include <libswscale/swscale.h>
+}
+
+namespace emscripten
+{
+class val;
+}
+
+class VideoFrame;
+
+enum class StreamEncoderState
+{
+  Init,
+  Running,
+  Ended,
+  Failed,
+};
+
+class StreamEncoder
+{
+public:
+  /*!
+   * \brief Create a stream encoder instance
+   */
+  StreamEncoder(const std::string& fileName,
+                int width,
+                int height,
+                int timeBaseNum,
+                int timeBaseDen,
+                int realFrameRateNum,
+                int realFrameRateDen,
+                int videoBitRate,
+                int writeBufferSize,
+                int writePacketFnPtr,
+                int seekFnPtr);
+  ~StreamEncoder();
+
+  // Accessors
+  StreamEncoderState GetState() const { return m_state; }
+  unsigned int GetVideoWidth() const { return m_width; }
+  unsigned int GetVideoHeight() const { return m_height; }
+  unsigned int GetTimeBaseNumerator() const { return m_timeBaseNum; }
+  unsigned int GetTimeBaseDenominator() const { return m_timeBaseDen; }
+  unsigned int GetRealFrameRateNumerator() const { return m_realFrameRateNum; }
+  unsigned int GetRealFrameRateDenominator() const { return m_realFrameRateDen; }
+  unsigned int GetWriteBufferSize() const { return m_writeBufferSize; }
+
+  // Video API
+  bool OpenVideo();
+  bool AddFrame(const emscripten::val& frameData, int framePts);
+  bool Finalize();
+
+private:
+  // Lifecycle functions
+  bool Initialize();
+  void Deinitialize();
+
+  // Video functions
+  bool ReceivePackets();
+
+  // IO functions
+  //unsigned int ReadPacket(uint8_t* buffer, unsigned int bufferSize);
+  void WritePacket(uint8_t* buffer, unsigned int bufferSize);
+  void Seek(int64_t offset, int whence);
+
+  // libav internal IO functions
+  //static int ReadPacketInternal(void* context, uint8_t* buffer, int bufferSize);
+  static int WritePacketInternal(void* context, uint8_t* buffer, int bufferSize);
+  static int64_t SeekInternal(void* context, int64_t offset, int whence);
+
+  // Utility functions
+  bool CreateContext();
+  bool PrepareVideoEncoder();
+  bool InitializeVideoStream();
+  void EnableGlobalHeaders();
+  void PrintInfo();
+  bool WriteHeader();
+  static int TranslateWhence(int whence);
+
+  // Types
+  using WritePacketFnPtr = std::add_pointer<void(int buffer, int bufferSize)>::type;
+  using SeekFnPtr = std::add_pointer<void(int offset, int whence)>::type;
+
+  // Constants
+  static const AVPixelFormat m_inputFormat;
+  static const AVPixelFormat m_targetFormat;
+  static const unsigned int m_cachePageSize;
+
+  // Construction parameters
+  const std::string m_fileName;
+  const unsigned int m_width;
+  const unsigned int m_height;
+  const unsigned int m_timeBaseNum;
+  const unsigned int m_timeBaseDen;
+  const unsigned int m_realFrameRateNum;
+  const unsigned int m_realFrameRateDen;
+  const unsigned int m_videoBitRate;
+  const unsigned int m_writeBufferSize;
+  const WritePacketFnPtr m_writePacketFnPtr;
+  const SeekFnPtr m_seekFnPtr;
+
+  // State parameters
+  StreamEncoderState m_state = StreamEncoderState::Init;
+
+  // Video resources
+  AVIOContext* m_ioContext = nullptr;
+  AVFormatContext* m_formatContext = nullptr;
+  AVStream* m_videoStream = nullptr;
+  AVCodecContext* m_codecContext = nullptr;
+  SwsContext* m_scaler = nullptr;
+  uint8_t* m_inputFrameBuffer = nullptr; // RGBA
+  size_t m_inputFrameSize = 0;
+  std::unique_ptr<VideoFrame> m_targetFrame; // YUV420p
+};
diff --git a/lib/stream_encoder/stream_encoder_embinder.cpp b/lib/stream_encoder/stream_encoder_embinder.cpp
new file mode 100644
index 000000000..364427a67
--- /dev/null
+++ b/lib/stream_encoder/stream_encoder_embinder.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2023 retro.ai
+ * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ * See LICENSE.txt for more information.
+ */
+
+#include "stream_encoder.hpp"
+
+#include <emscripten/bind.h>
+
+using namespace emscripten;
+
+EMSCRIPTEN_BINDINGS(stream_encoder)
+{
+  enum_<StreamEncoderState>("StreamEncoderState")
+      .value("Init", StreamEncoderState::Init)
+      .value("Running", StreamEncoderState::Running)
+      .value("Ended", StreamEncoderState::Ended)
+      .value("Failed", StreamEncoderState::Failed);
+
+  class_<StreamEncoder>("StreamEncoder")
+      .constructor<const std::string&, int, int, int, int, int, int, int, int, int, int>()
+      .property("state", &StreamEncoder::GetState)
+      .property("videoWidth", &StreamEncoder::GetVideoWidth)
+      .property("videoHeight", &StreamEncoder::GetVideoHeight)
+      .property("timeBaseNum", &StreamEncoder::GetTimeBaseNumerator)
+      .property("timeBaseDen", &StreamEncoder::GetTimeBaseDenominator)
+      .property("realFrameRateNum", &StreamEncoder::GetRealFrameRateNumerator)
+      .property("realFrameRateDen", &StreamEncoder::GetRealFrameRateDenominator)
+      .property("writeBufferSize", &StreamEncoder::GetWriteBufferSize)
+      .function("openVideo", &StreamEncoder::OpenVideo)
+      .function("addFrame", &StreamEncoder::AddFrame)
+      .function("finalize", &StreamEncoder::Finalize);
+}
diff --git a/lib/video/video_frame.cpp b/lib/video/video_frame.cpp
new file mode 100644
index 000000000..0a50753d1
--- /dev/null
+++ b/lib/video/video_frame.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2023 retro.ai
+ * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ * See LICENSE.txt for more information.
+ */
+
+#include "video_frame.hpp"
+
+#include <iostream>
+
+extern "C"
+{
+#include <libavutil/frame.h>
+#include <libavutil/imgutils.h>
+}
+
+// HTML uses RGBA pixels, what to use for YUV420p?
+const unsigned int m_bitsPerPixel = 32;
+
+void VideoFrame::FrameDeleter::operator()(AVFrame* frame)
+{
+  if (frame != nullptr)
+  {
+    av_freep(&frame->data[0]);
+    av_frame_free(&frame);
+  }
+}
+
+VideoFrame::VideoFrame(unsigned int width, unsigned int height, AVPixelFormat pixelFormat)
+  : m_width(width), m_height(height), m_pixelFormat(pixelFormat)
+{
+}
+
+VideoFrame::~VideoFrame() = default;
+
+bool VideoFrame::Initialize()
+{
+  // Validate state
+  if (m_width == 0 || m_height == 0)
+  {
+    std::cerr << "Invalid dimensions, width = " << m_width << ", height = " << m_height
+              << std::endl;
+    return false;
+  }
+
+  // Allocate frame
+  m_frame.reset(av_frame_alloc());
+  if (!m_frame)
+  {
+    std::cerr << "Failed to allocate frame" << std::endl;
+    return false;
+  }
+
+  // Set frame details
+  m_frame->width = m_width;
+  m_frame->height = m_height;
+  m_frame->format = m_pixelFormat;
+
+  int result = av_frame_get_buffer(m_frame.get(), 0);
+  if (result < 0)
+  {
+    std::cerr << "Can't allocate buffer for output frame: " << av_err2str(result) << std::endl;
+    return false;
+  }
+
+  // Make sure the encoder doesn't keep ref to this frame as we'll modify it
+  result = av_frame_make_writable(m_frame.get());
+  if (result < 0)
+  {
+    std::cerr << "Failed to make frame writable: " << av_err2str(result) << std::endl;
+    return false;
+  }
+
+  return true;
+}
diff --git a/lib/video/video_frame.hpp b/lib/video/video_frame.hpp
new file mode 100644
index 000000000..e60297531
--- /dev/null
+++ b/lib/video/video_frame.hpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2023 retro.ai
+ * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ * See LICENSE.txt for more information.
+ */
+
+#pragma once
+
+#include <memory>
+#include <stdint.h>
+
+extern "C"
+{
+#include <libavformat/avformat.h>
+}
+
+struct AVFrame;
+
+class VideoFrame
+{
+public:
+  VideoFrame(unsigned int width, unsigned int height, AVPixelFormat pixelFormat);
+  ~VideoFrame();
+
+  // Lifecycle functions
+  bool Initialize();
+
+  // Accessors
+  unsigned int Width() const { return m_width; }
+  unsigned int Height() const { return m_height; }
+  AVPixelFormat PixelFormat() const { return m_pixelFormat; }
+  AVFrame* FrameHandle() { return m_frame.get(); }
+
+private:
+  // Utilities
+  struct FrameDeleter
+  {
+    void operator()(AVFrame* frame);
+  };
+
+  // Constants
+  static const unsigned int m_bitsPerPixel; // TODO: Different value needed for YUV420p?
+
+  // Construction parameters
+  const unsigned int m_width;
+  const unsigned int m_height;
+  const AVPixelFormat m_pixelFormat;
+
+  // Video resources
+  std::unique_ptr<AVFrame, FrameDeleter> m_frame;
+};