From 6e823804786506d0f87edb77b0f20f4fdbd2d471 Mon Sep 17 00:00:00 2001
From: Brad Hards <bradh@frogmouth.net>
Date: Tue, 3 Sep 2024 08:40:48 +1000
Subject: [PATCH 01/41] minor encoder reporting cleanup

---
 CMakeLists.txt       | 17 +++++++++--------
 examples/heif_enc.cc | 17 ++++++++++++-----
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9c008b84a8..c1d34b935a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -164,7 +164,7 @@ endif ()
 # openh264 decoder
 
 plugin_option(OpenH264_DECODER "OpenH264 decoder" ON OFF)
-plugin_option(OpenH264_ENCODER "OpenH264 encoder" ON OFF)
+# plugin_option(OpenH264_ENCODER "OpenH264 encoder" ON OFF)
 if (WITH_OpenH264_ENCODER OR WITH_OpenH264_DECODER)
     find_package(OpenH264)
 
@@ -173,9 +173,9 @@ if (WITH_OpenH264_ENCODER OR WITH_OpenH264_DECODER)
     if (OpenH264_FOUND AND WITH_OpenH264_DECODER)
         set(OpenH264_DECODER_FOUND TRUE)
     endif()
-    if (OpenH264_FOUND AND WITH_OpenH264_ENCODER)
-        set(OpenH264_ENCODER_FOUND TRUE)
-    endif()
+#    if (OpenH264_FOUND AND WITH_OpenH264_ENCODER)
+#        set(OpenH264_ENCODER_FOUND TRUE)
+#    endif()
 endif()
 
 
@@ -268,7 +268,7 @@ plugin_compilation_info(RAV1E RAV1E "Rav1e AV1 encoder")
 plugin_compilation_info(JPEG_DECODER JPEG "JPEG decoder")
 plugin_compilation_info(JPEG_ENCODER JPEG "JPEG encoder")
 plugin_compilation_info(OpenH264_DECODER OpenH264_DECODER "OpenH264 decoder")
-plugin_compilation_info(OpenH264_ENCODER OpenH264_ENCODER "OpenH264 encoder")
+# plugin_compilation_info(OpenH264_ENCODER OpenH264_ENCODER "OpenH264 encoder")
 plugin_compilation_info(OpenJPEG_DECODER OpenJPEG "OpenJPEG J2K decoder")
 plugin_compilation_info(OpenJPEG_ENCODER OpenJPEG "OpenJPEG J2K encoder")
 # plugin_compilation_info(OPENJPH_DECODER OPENJPH "OpenJPH HT-J2K decoder")
@@ -351,14 +351,15 @@ endif()
 
 message("\n=== Supported formats ===")
 message("format        decoding   encoding")
-format_compilation_info("HEIC" SUPPORTS_HEIC_DECODING SUPPORTS_HEIC_ENCODING)
-format_compilation_info("AVIF" SUPPORTS_AVIF_DECODING SUPPORTS_AVIF_ENCODING)
-format_compilation_info("VVC"  SUPPORTS_VVC_DECODING SUPPORTS_VVC_ENCODING)
 format_compilation_info("AVC"  SUPPORTS_AVC_DECODING SUPPORTS_AVC_ENCODING)
+format_compilation_info("AVIF" SUPPORTS_AVIF_DECODING SUPPORTS_AVIF_ENCODING)
+format_compilation_info("HEIC" SUPPORTS_HEIC_DECODING SUPPORTS_HEIC_ENCODING)
 format_compilation_info("JPEG" SUPPORTS_JPEG_DECODING SUPPORTS_JPEG_ENCODING)
 format_compilation_info("JPEG2000" SUPPORTS_J2K_DECODING SUPPORTS_J2K_ENCODING)
 format_compilation_info("JPEG2000-HT" SUPPORTS_J2K_HT_DECODING SUPPORTS_J2K_HT_ENCODING)
 format_compilation_info("Uncompressed" SUPPORTS_UNCOMPRESSED_DECODING SUPPORTS_UNCOMPRESSED_ENCODING)
+format_compilation_info("VVC"  SUPPORTS_VVC_DECODING SUPPORTS_VVC_ENCODING)
+
 message("")
 
 # --- Libsharpyuv color space transforms
diff --git a/examples/heif_enc.cc b/examples/heif_enc.cc
index 1329d58965..bef60e9dde 100644
--- a/examples/heif_enc.cc
+++ b/examples/heif_enc.cc
@@ -373,6 +373,9 @@ static const char* get_compression_format_name(heif_compression_format format)
     case heif_compression_AV1:
       return "AV1";
       break;
+    case heif_compression_AVC:
+      return "AVC";
+      break;
     case heif_compression_VVC:
       return "VVC";
       break;
@@ -399,19 +402,20 @@ static const char* get_compression_format_name(heif_compression_format format)
 
 static void show_list_of_all_encoders()
 {
-    for (auto compression_format : {heif_compression_HEVC, heif_compression_AV1, heif_compression_VVC, heif_compression_JPEG, heif_compression_JPEG2000, heif_compression_HTJ2K
+    for (auto compression_format : {heif_compression_AVC, heif_compression_AV1, heif_compression_HEVC, heif_compression_JPEG, heif_compression_JPEG2000, heif_compression_HTJ2K
 #if WITH_UNCOMPRESSED_CODEC
 , heif_compression_uncompressed
 #endif
+, heif_compression_VVC
   }) {
 
     switch (compression_format) {
+      case heif_compression_AVC:
+        std::cout << "AVC";
+        break;
       case heif_compression_AV1:
         std::cout << "AVIF";
         break;
-      case heif_compression_VVC:
-        std::cout << "VVIC";
-        break;
       case heif_compression_HEVC:
         std::cout << "HEIC";
         break;
@@ -422,11 +426,14 @@ static void show_list_of_all_encoders()
         std::cout << "JPEG 2000";
         break;
       case heif_compression_HTJ2K:
-        std::cout << "HT-J2K";
+        std::cout << "JPEG 2000 (HT)";
         break;
       case heif_compression_uncompressed:
         std::cout << "Uncompressed";
         break;
+      case heif_compression_VVC:
+        std::cout << "VVIC";
+        break;
       default:
         assert(false);
     }

From 956ba0914a8c3617f3a600e7e2eea3461ea8caea Mon Sep 17 00:00:00 2001
From: Brad Hards <bradh@frogmouth.net>
Date: Tue, 3 Sep 2024 09:56:47 +1000
Subject: [PATCH 02/41] nvdec: initial merge

---
 CMakeLists.txt                    |   15 +
 cmake/modules/FindNVDEC.cmake     |   16 +
 libheif/api/libheif/heif.h        |    7 +
 libheif/api/libheif/heif_plugin.h |   48 +-
 libheif/codecs/image_item.cc      |   10 +-
 libheif/plugin_registry.cc        |    8 +
 libheif/plugins/CMakeLists.txt    |    8 +
 libheif/plugins/NvDecoder.cpp     |  548 +++++++++++++
 libheif/plugins/NvDecoder.h       |  191 +++++
 libheif/plugins/cuviddec.h        | 1188 +++++++++++++++++++++++++++++
 libheif/plugins/decoder_nvdec.cc  |  363 +++++++++
 libheif/plugins/decoder_nvdec.h   |   34 +
 libheif/plugins/nvcuvid.h         |  553 ++++++++++++++
 13 files changed, 2967 insertions(+), 22 deletions(-)
 create mode 100644 cmake/modules/FindNVDEC.cmake
 create mode 100644 libheif/plugins/NvDecoder.cpp
 create mode 100644 libheif/plugins/NvDecoder.h
 create mode 100644 libheif/plugins/cuviddec.h
 create mode 100644 libheif/plugins/decoder_nvdec.cc
 create mode 100644 libheif/plugins/decoder_nvdec.h
 create mode 100644 libheif/plugins/nvcuvid.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9c008b84a8..2fa668cc48 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -248,6 +248,13 @@ if (WITH_OPENJPH_ENCODER OR WITH_OPENJPH_DECODER)
     find_package(OPENJPH)
 endif()
 
+# nvidia hardware decode
+
+plugin_option(NV_DECODER "NVIDIA Hardware AVC/AV1/HEVC/JPEG decoder" OFF OFF)
+if (WITH_NV_DECODER)
+    find_package(NVDEC)
+endif()
+
 # uncompressed
 
 option(WITH_UNCOMPRESSED_CODEC " Support internal ISO/IEC 23001-17 uncompressed codec (experimental) " OFF)
@@ -276,6 +283,8 @@ plugin_compilation_info(OPENJPH_ENCODER OPENJPH "OpenJPH HT-J2K encoder")
 plugin_compilation_info(UVG266_ENCODER UVG266 "uvg266 VVC enc. (experimental)")
 plugin_compilation_info(VVENC vvenc "vvenc VVC enc. (experimental)")
 plugin_compilation_info(VVDEC vvdec "vvdec VVC dec. (experimental)")
+plugin_compilation_info(NV_DECODER NVDEC "NVIDIA hardware decoder")
+
 
 # --- show summary which formats are supported
 
@@ -343,6 +352,12 @@ endif()
 if (OpenH264_ENCODER_FOUND)
     set(SUPPORTS_AVC_ENCODING TRUE)
 endif()
+if (NVDEC_FOUND)
+    set(SUPPORTS_HEIC_DECODING TRUE)
+    set(SUPPORTS_AVC_DECODING TRUE)
+    set(SUPPORTS_JPEG_DECODING TRUE)
+    set(SUPPORTS_AVIF_DECODING TRUE)
+endif()
 
 if (WITH_UNCOMPRESSED_CODEC)
     set(SUPPORTS_UNCOMPRESSED_DECODING TRUE)
diff --git a/cmake/modules/FindNVDEC.cmake b/cmake/modules/FindNVDEC.cmake
new file mode 100644
index 0000000000..8425c0af5e
--- /dev/null
+++ b/cmake/modules/FindNVDEC.cmake
@@ -0,0 +1,16 @@
+include(LibFindMacros)
+
+find_library(NVDEC_LIBRARY
+    NAMES libnvcuvid nvcuvid
+)
+
+find_package(CUDAToolkit REQUIRED)
+
+set(NVDEC_PROCESS_LIBS NVDEC_LIBRARY)
+libfind_process(NVDEC)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(NVDEC
+    REQUIRED_VARS
+        NVDEC_LIBRARY
+)
diff --git a/libheif/api/libheif/heif.h b/libheif/api/libheif/heif.h
index ecd9c7eb23..39998fe576 100644
--- a/libheif/api/libheif/heif.h
+++ b/libheif/api/libheif/heif.h
@@ -856,6 +856,13 @@ typedef uint32_t heif_brand2;
  */
 #define heif_brand2_miaf   heif_fourcc('m','i','a','f')
 
+/**
+ * AVC (H.264) image (`avci`) brand.
+ *
+ * See ISO/IEC 23008-12:2022 Annex E.4
+ */
+#define heif_brand2_avci   heif_fourcc('a','v','c','i')
+
 /**
  * Single picture file brand.
  *
diff --git a/libheif/api/libheif/heif_plugin.h b/libheif/api/libheif/heif_plugin.h
index 3a438bfc94..08c771ef6d 100644
--- a/libheif/api/libheif/heif_plugin.h
+++ b/libheif/api/libheif/heif_plugin.h
@@ -34,14 +34,16 @@ extern "C" {
 
 // API versions table
 //
-// release    decoder   encoder   enc.params
-// -----------------------------------------
-//  1.0          1        N/A        N/A
-//  1.1          1         1          1
-//  1.4          1         1          2
-//  1.8          1         2          2
-//  1.13         2         3          2
-//  1.15         3         3          2
+// release    decoder  dec.config  encoder   enc.params
+// -----------------------------------------------------
+//  1.0          1        N/A        N/A        N/A
+//  1.1          1        N/A         1          1
+//  1.4          1        N/A         1          2
+//  1.8          1        N/A         2          2
+//  1.13         2        N/A         3          2
+//  1.15         3        N/A         3          2
+//  1.19         4         1          3          2
+
 
 
 // ====================================================================================================
@@ -50,6 +52,14 @@ extern "C" {
 //  added as plugins. A plugin has to implement the functions specified in heif_decoder_plugin
 //  and the plugin has to be registered to the libheif library using heif_register_decoder().
 
+struct heif_decoder_configuration
+{
+  int version; // current version: 1
+
+  // --- version 1 fields ---
+  heif_compression_format compression_format;
+};
+
 struct heif_decoder_plugin
 {
   // API version supported by this plugin (see table above for supported versions)
@@ -88,27 +98,23 @@ struct heif_decoder_plugin
   struct heif_error (* decode_image)(void* decoder, struct heif_image** out_img);
 
 
-  // --- version 2 functions will follow below ... ---
+  // --- version 2 functions ---
 
   void (*set_strict_decoding)(void* decoder, int flag);
 
-  // If not NULL, this can provide a specialized function to convert YCbCr to sRGB, because
-  // only the codec itself knows how to interpret the chroma samples and their locations.
-  /*
-  struct heif_error (*convert_YCbCr_to_sRGB)(void* decoder,
-                                             struct heif_image* in_YCbCr_img,
-                                             struct heif_image** out_sRGB_img);
 
-  */
+  // --- version 3 functions ---
 
-  // Reset decoder, such that we can feed in new data for another image.
-  // void (*reset_image)(void* decoder);
+  const char* id_name;
 
-  // --- version 3 functions will follow below ... ---
 
-  const char* id_name;
+  // --- version 4 functions ---
 
-  // --- version 4 functions will follow below ... ---
+  // Create a new decoder context for decoding an image
+  struct heif_error (* new_decoder2)(void** decoder, const heif_decoder_configuration* decoder_config);
+
+
+  // --- version 5 functions will follow below ... ---
 };
 
 
diff --git a/libheif/codecs/image_item.cc b/libheif/codecs/image_item.cc
index fa969a0175..7c09667e88 100644
--- a/libheif/codecs/image_item.cc
+++ b/libheif/codecs/image_item.cc
@@ -1030,7 +1030,15 @@ Result<std::shared_ptr<HeifPixelImage>> ImageItem::decode_from_compressed_data(h
   // --- decode image with the plugin
 
   void* decoder;
-  struct heif_error err = decoder_plugin->new_decoder(&decoder);
+
+  struct heif_error err;
+  if (decoder_plugin->plugin_api_version >= 4) {
+    heif_decoder_configuration decoder_configuration {.version = 1, .compression_format = compression_format};
+    err = decoder_plugin->new_decoder2(&decoder, &decoder_configuration);
+  } else {
+    err = decoder_plugin->new_decoder(&decoder);
+  }
+
   if (err.code != heif_error_Ok) {
     return Error(err.code, err.subcode, err.message);
   }
diff --git a/libheif/plugin_registry.cc b/libheif/plugin_registry.cc
index 9bc196f467..83a41aeeca 100644
--- a/libheif/plugin_registry.cc
+++ b/libheif/plugin_registry.cc
@@ -104,6 +104,10 @@
 #include "plugins/encoder_openjph.h"
 #endif
 
+#if HAVE_NV_DECODER
+#include "plugins/decoder_nvdec.h"
+#endif
+
 std::set<const struct heif_decoder_plugin*> s_decoder_plugins;
 
 std::multiset<std::unique_ptr<struct heif_encoder_descriptor>,
@@ -211,6 +215,10 @@ void register_default_plugins()
   register_decoder(get_decoder_plugin_openh264());
 #endif
 
+#if HAVE_NV_DECODER
+  register_decoder(get_decoder_plugin_nvdec());
+#endif
+
 #if WITH_UNCOMPRESSED_CODEC
   register_encoder(get_encoder_plugin_uncompressed());
 #endif
diff --git a/libheif/plugins/CMakeLists.txt b/libheif/plugins/CMakeLists.txt
index 58418d9983..49f6f47066 100644
--- a/libheif/plugins/CMakeLists.txt
+++ b/libheif/plugins/CMakeLists.txt
@@ -112,6 +112,14 @@ set(OpenH264_DECODER_sources decoder_openh264.cc decoder_openh264.h)
 set(OpenH264_DECODER_extra_plugin_sources)
 plugin_compilation(openh264dec OpenH264 OpenH264_DECODER_FOUND OpenH264_DECODER OpenH264_DECODER)
 
+set(NV_DECODER_sources decoder_nvdec.cc decoder_nvdec.h NvDecoder.cpp NvDecoder.h)
+set(NV_DECODER_extra_plugin_sources)
+plugin_compilation(nvdec NVDEC NVDEC_FOUND NV_DECODER NV_DECODER)
+if(WITH_NV_DECODER)
+    target_link_libraries(heif PRIVATE CUDA::cuda_driver)
+endif()
+
+
 target_sources(heif PRIVATE
                encoder_mask.h
                encoder_mask.cc
diff --git a/libheif/plugins/NvDecoder.cpp b/libheif/plugins/NvDecoder.cpp
new file mode 100644
index 0000000000..44fd9be176
--- /dev/null
+++ b/libheif/plugins/NvDecoder.cpp
@@ -0,0 +1,548 @@
+/*
+ * This copyright notice applies to this header file only:
+ *
+ * Copyright (c) 2010-2023 NVIDIA Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the software, and to permit persons to whom the
+ * software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// for std::cout and friends
+#include <iostream>
+
+// for ceil()
+#include <cmath>
+
+// for memset, memcpy
+#include <cstring>
+
+// TODO: remove this once we dump the errorLog
+#include <sstream>
+
+#include "libheif/heif_plugin.h"
+
+#include "NvDecoder.h"
+
+/**
+* @brief Exception class for error reporting from the decode API.
+*/
+class NVDECException : public std::exception
+{
+public:
+    NVDECException(const std::string& errorStr, const CUresult errorCode)
+        : m_errorString(errorStr), m_errorCode(errorCode) {}
+
+    virtual ~NVDECException() throw() {}
+    virtual const char* what() const throw() { return m_errorString.c_str(); }
+    CUresult  getErrorCode() const { return m_errorCode; }
+    const std::string& getErrorString() const { return m_errorString; }
+    static NVDECException makeNVDECException(const std::string& errorStr, const CUresult errorCode,
+        const std::string& functionName, const std::string& fileName, int lineNo);
+private:
+    std::string m_errorString;
+    CUresult m_errorCode;
+};
+
+inline NVDECException NVDECException::makeNVDECException(const std::string& errorStr, const CUresult errorCode, const std::string& functionName,
+    const std::string& fileName, int lineNo)
+{
+    std::ostringstream errorLog;
+    errorLog << functionName << " : " << errorStr << " at " << fileName << ":" << lineNo << std::endl;
+    NVDECException exception(errorLog.str(), errorCode);
+    return exception;
+}
+
+#define NVDEC_THROW_ERROR( errorStr, errorCode )                                                         \
+    do                                                                                                   \
+    {                                                                                                    \
+        throw NVDECException::makeNVDECException(errorStr, errorCode, __FUNCTION__, __FILE__, __LINE__); \
+    } while (0)
+
+
+#define NVDEC_API_CALL( cuvidAPI )                                                                                 \
+    do                                                                                                             \
+    {                                                                                                              \
+        CUresult errorCode = cuvidAPI;                                                                             \
+        if( errorCode != CUDA_SUCCESS)                                                                             \
+        {                                                                                                          \
+            std::ostringstream errorLog;                                                                           \
+            errorLog << #cuvidAPI << " returned error " << errorCode;                                              \
+            throw NVDECException::makeNVDECException(errorLog.str(), errorCode, __FUNCTION__, __FILE__, __LINE__); \
+        }                                                                                                          \
+    } while (0)
+
+
+#define CUDA_DRVAPI_CALL( call )                                                                                                 \
+    do                                                                                                                           \
+    {                                                                                                                            \
+        CUresult err__ = call;                                                                                                   \
+        if (err__ != CUDA_SUCCESS)                                                                                               \
+        {                                                                                                                        \
+            const char *szErrName = NULL;                                                                                        \
+            cuGetErrorName(err__, &szErrName);                                                                                   \
+            std::ostringstream errorLog;                                                                                         \
+            errorLog << "CUDA driver API error " << szErrName ;                                                                  \
+            throw NVDECException::makeNVDECException(errorLog.str(), err__, __FUNCTION__, __FILE__, __LINE__);                   \
+        }                                                                                                                        \
+    }                                                                                                                            \
+    while (0)
+
+
+#ifdef __cuda_cuda_h__
+inline bool check(CUresult e, int iLine, const char *szFile) {
+    if (e != CUDA_SUCCESS) {
+        const char *szErrName = NULL;
+        cuGetErrorName(e, &szErrName);
+        // LOG(FATAL) << "CUDA driver API error " << szErrName << " at line " << iLine << " in file " << szFile;
+        std::cout << "CUDA driver API error " << szErrName << " at line " << iLine << " in file " << szFile << std::endl;
+        return false;
+    }
+    return true;
+}
+#endif
+
+
+#define ck(call) check(call, __LINE__, __FILE__)
+
+
+/**
+* @brief Template class to facilitate color space conversion
+*/
+template<typename T>
+class YuvConverter {
+public:
+    YuvConverter(int nWidth, int nHeight) : nWidth(nWidth), nHeight(nHeight) {
+        pQuad = new T[((nWidth + 1) / 2) * ((nHeight + 1) / 2)];
+    }
+    ~YuvConverter() {
+        delete[] pQuad;
+    }
+    void PlanarToUVInterleaved(T *pFrame, int nPitch = 0) {
+        if (nPitch == 0) {
+            nPitch = nWidth;
+        }
+
+        // sizes of source surface plane
+        int nSizePlaneY = nPitch * nHeight;
+        int nSizePlaneU = ((nPitch + 1) / 2) * ((nHeight + 1) / 2);
+        int nSizePlaneV = nSizePlaneU;
+
+        T *puv = pFrame + nSizePlaneY;
+        if (nPitch == nWidth) {
+            memcpy(pQuad, puv, nSizePlaneU * sizeof(T));
+        } else {
+            for (int i = 0; i < (nHeight + 1) / 2; i++) {
+                memcpy(pQuad + ((nWidth + 1) / 2) * i, puv + ((nPitch + 1) / 2) * i, ((nWidth + 1) / 2) * sizeof(T));
+            }
+        }
+        T *pv = puv + nSizePlaneU;
+        for (int y = 0; y < (nHeight + 1) / 2; y++) {
+            for (int x = 0; x < (nWidth + 1) / 2; x++) {
+                puv[y * nPitch + x * 2] = pQuad[y * ((nWidth + 1) / 2) + x];
+                puv[y * nPitch + x * 2 + 1] = pv[y * ((nPitch + 1) / 2) + x];
+            }
+        }
+    }
+    void UVInterleavedToPlanar(T *pFrame, int nPitch = 0) {
+        if (nPitch == 0) {
+            nPitch = nWidth;
+        }
+
+        // sizes of source surface plane
+        int nSizePlaneY = nPitch * nHeight;
+        int nSizePlaneU = ((nPitch + 1) / 2) * ((nHeight + 1) / 2);
+        int nSizePlaneV = nSizePlaneU;
+
+        T *puv = pFrame + nSizePlaneY,
+            *pu = puv, 
+            *pv = puv + nSizePlaneU;
+
+        // split chroma from interleave to planar
+        for (int y = 0; y < (nHeight + 1) / 2; y++) {
+            for (int x = 0; x < (nWidth + 1) / 2; x++) {
+                pu[y * ((nPitch + 1) / 2) + x] = puv[y * nPitch + x * 2];
+                pQuad[y * ((nWidth + 1) / 2) + x] = puv[y * nPitch + x * 2 + 1];
+            }
+        }
+        if (nPitch == nWidth) {
+            memcpy(pv, pQuad, nSizePlaneV * sizeof(T));
+        } else {
+            for (int i = 0; i < (nHeight + 1) / 2; i++) {
+                memcpy(pv + ((nPitch + 1) / 2) * i, pQuad + ((nWidth + 1) / 2) * i, ((nWidth + 1) / 2) * sizeof(T));
+            }
+        }
+    }
+
+private:
+    T *pQuad;
+    int nWidth, nHeight;
+};
+
+
+void ConvertSemiplanarToPlanar(uint8_t *pHostFrame, int nWidth, int nHeight, int nBitDepth) {
+    if (nBitDepth == 8) {
+        // nv12->iyuv
+        YuvConverter<uint8_t> converter8(nWidth, nHeight);
+        converter8.UVInterleavedToPlanar(pHostFrame);
+    } else {
+        // p016->yuv420p16
+        YuvConverter<uint16_t> converter16(nWidth, nHeight);
+        converter16.UVInterleavedToPlanar((uint16_t *)pHostFrame);
+    }
+}
+
+static float GetChromaHeightFactor(cudaVideoSurfaceFormat eSurfaceFormat)
+{
+    float factor = 0.5;
+    switch (eSurfaceFormat)
+    {
+    case cudaVideoSurfaceFormat_NV12:
+    case cudaVideoSurfaceFormat_P016:
+        factor = 0.5;
+        break;
+    case cudaVideoSurfaceFormat_YUV444:
+    case cudaVideoSurfaceFormat_YUV444_16Bit:
+        factor = 1.0;
+        break;
+    }
+
+    return factor;
+}
+
+static int GetChromaPlaneCount(cudaVideoSurfaceFormat eSurfaceFormat)
+{
+    int numPlane = 1;
+    switch (eSurfaceFormat)
+    {
+    case cudaVideoSurfaceFormat_NV12:
+    case cudaVideoSurfaceFormat_P016:
+        numPlane = 1;
+        break;
+    case cudaVideoSurfaceFormat_YUV444:
+    case cudaVideoSurfaceFormat_YUV444_16Bit:
+        numPlane = 2;
+        break;
+    }
+
+    return numPlane;
+}
+
+
+/* Called when the parser encounters sequence header for AV1 SVC content
+*  return value interpretation:
+*      < 0 : fail, >=0: succeeded (bit 0-9: currOperatingPoint, bit 10-10: bDispAllLayer, bit 11-30: reserved, must be set 0)
+*/
+int NvDecoder::GetOperatingPoint(CUVIDOPERATINGPOINTINFO *pOPInfo)
+{
+    if (pOPInfo->codec == cudaVideoCodec_AV1)
+    {
+        if (pOPInfo->av1.operating_points_cnt > 1)
+        {
+            // clip has SVC enabled
+            if (m_nOperatingPoint >= pOPInfo->av1.operating_points_cnt)
+                m_nOperatingPoint = 0;
+
+            printf("AV1 SVC clip: operating point count %d  ", pOPInfo->av1.operating_points_cnt);
+            printf("Selected operating point: %d, IDC 0x%x bOutputAllLayers %d\n", m_nOperatingPoint, pOPInfo->av1.operating_points_idc[m_nOperatingPoint], m_bDispAllLayers);
+            return (m_nOperatingPoint | (m_bDispAllLayers << 10));
+        }
+    }
+    return -1;
+}
+
+/* Return value from HandleVideoSequence() are interpreted as   :
+*  0: fail, 1: succeeded, > 1: override dpb size of parser (set by CUVIDPARSERPARAMS::ulMaxNumDecodeSurfaces while creating parser)
+*/
+int NvDecoder::HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat)
+{
+    int nDecodeSurface = pVideoFormat->min_num_decode_surfaces;
+
+    CUVIDDECODECAPS decodecaps;
+    memset(&decodecaps, 0, sizeof(decodecaps));
+
+    decodecaps.eCodecType = pVideoFormat->codec;
+    decodecaps.eChromaFormat = pVideoFormat->chroma_format;
+    decodecaps.nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
+
+    CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_ctx->cuContext));
+    NVDEC_API_CALL(cuvidGetDecoderCaps(&decodecaps));
+    CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
+
+    if(!decodecaps.bIsSupported){
+        NVDEC_THROW_ERROR("Codec not supported on this GPU", CUDA_ERROR_NOT_SUPPORTED);
+        return nDecodeSurface;
+    }
+
+    if ((pVideoFormat->coded_width > decodecaps.nMaxWidth) ||
+        (pVideoFormat->coded_height > decodecaps.nMaxHeight)){
+
+        std::ostringstream errorString;
+        errorString << std::endl
+                    << "Resolution          : " << pVideoFormat->coded_width << "x" << pVideoFormat->coded_height << std::endl
+                    << "Max Supported (wxh) : " << decodecaps.nMaxWidth << "x" << decodecaps.nMaxHeight << std::endl
+                    << "Resolution not supported on this GPU";
+
+        const std::string cErr = errorString.str();
+        NVDEC_THROW_ERROR(cErr, CUDA_ERROR_NOT_SUPPORTED);
+        return nDecodeSurface;
+    }
+
+    if ((pVideoFormat->coded_width>>4)*(pVideoFormat->coded_height>>4) > decodecaps.nMaxMBCount){
+
+        std::ostringstream errorString;
+        errorString << std::endl
+                    << "MBCount             : " << (pVideoFormat->coded_width >> 4)*(pVideoFormat->coded_height >> 4) << std::endl
+                    << "Max Supported mbcnt : " << decodecaps.nMaxMBCount << std::endl
+                    << "MBCount not supported on this GPU";
+
+        const std::string cErr = errorString.str();
+        NVDEC_THROW_ERROR(cErr, CUDA_ERROR_NOT_SUPPORTED);
+        return nDecodeSurface;
+    }
+
+    m_ctx->eCodec = pVideoFormat->codec;
+    cudaVideoChromaFormat eChromaFormat = pVideoFormat->chroma_format;
+    m_nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
+    m_nBPP = m_nBitDepthMinus8 > 0 ? 2 : 1;
+
+    // Set the output surface format same as chroma format
+    if ((eChromaFormat == cudaVideoChromaFormat_420) || (eChromaFormat == cudaVideoChromaFormat_Monochrome)) {
+        m_eOutputFormat = pVideoFormat->bit_depth_luma_minus8 ? cudaVideoSurfaceFormat_P016 : cudaVideoSurfaceFormat_NV12;
+    } else if (eChromaFormat == cudaVideoChromaFormat_444) {
+        m_eOutputFormat = pVideoFormat->bit_depth_luma_minus8 ? cudaVideoSurfaceFormat_YUV444_16Bit : cudaVideoSurfaceFormat_YUV444;
+    } else if (eChromaFormat == cudaVideoChromaFormat_422) {
+        m_eOutputFormat = cudaVideoSurfaceFormat_NV12;  // no 4:2:2 output format supported yet so make 420 default
+    }
+
+    // Check if output format supported. If not, check falback options
+    if (!(decodecaps.nOutputFormatMask & (1 << m_eOutputFormat)))
+    {
+        if (decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_NV12))
+            m_eOutputFormat = cudaVideoSurfaceFormat_NV12;
+        else if (decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_P016))
+            m_eOutputFormat = cudaVideoSurfaceFormat_P016;
+        else if (decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_YUV444))
+            m_eOutputFormat = cudaVideoSurfaceFormat_YUV444;
+        else if (decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_YUV444_16Bit))
+            m_eOutputFormat = cudaVideoSurfaceFormat_YUV444_16Bit;
+        else 
+            NVDEC_THROW_ERROR("No supported output format found", CUDA_ERROR_NOT_SUPPORTED);
+    }
+
+    CUVIDDECODECREATEINFO videoDecodeCreateInfo = { 0 };
+    videoDecodeCreateInfo.CodecType = pVideoFormat->codec;
+    videoDecodeCreateInfo.ChromaFormat = pVideoFormat->chroma_format;
+    videoDecodeCreateInfo.OutputFormat = m_eOutputFormat;
+    videoDecodeCreateInfo.bitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
+    if (pVideoFormat->progressive_sequence)
+        videoDecodeCreateInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
+    else
+        videoDecodeCreateInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Adaptive;
+    videoDecodeCreateInfo.ulNumOutputSurfaces = 2;
+    // With PreferCUVID, JPEG is still decoded by CUDA while video is decoded by NVDEC hardware
+    videoDecodeCreateInfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
+    videoDecodeCreateInfo.ulNumDecodeSurfaces = nDecodeSurface;
+    videoDecodeCreateInfo.vidLock = m_ctx->ctxLock;
+    videoDecodeCreateInfo.ulWidth = pVideoFormat->coded_width;
+    videoDecodeCreateInfo.ulHeight = pVideoFormat->coded_height;
+
+    unsigned int maxHeight = 0;
+    unsigned int maxWidth = 0;
+    // AV1 has max width/height of sequence in sequence header
+    if (pVideoFormat->codec == cudaVideoCodec_AV1 && pVideoFormat->seqhdr_data_length > 0)
+    {
+        CUVIDEOFORMATEX *vidFormatEx = (CUVIDEOFORMATEX *)pVideoFormat;
+        maxWidth = vidFormatEx->av1.max_width;
+        maxHeight = vidFormatEx->av1.max_height;
+    }
+    if (maxWidth < pVideoFormat->coded_width) {
+        maxWidth = pVideoFormat->coded_width;
+    }
+    if (maxHeight < pVideoFormat->coded_height) {
+        maxHeight = pVideoFormat->coded_height;
+    }
+    videoDecodeCreateInfo.ulMaxWidth = maxWidth;
+    videoDecodeCreateInfo.ulMaxHeight = maxHeight;
+
+    m_nWidth = pVideoFormat->display_area.right - pVideoFormat->display_area.left;
+    m_nLumaHeight = pVideoFormat->display_area.bottom - pVideoFormat->display_area.top;
+    videoDecodeCreateInfo.ulTargetWidth = pVideoFormat->coded_width;
+    videoDecodeCreateInfo.ulTargetHeight = pVideoFormat->coded_height;
+
+    m_nChromaHeight = (int)(ceil((float)m_nLumaHeight * GetChromaHeightFactor(m_eOutputFormat)));
+    m_nNumChromaPlanes = GetChromaPlaneCount(m_eOutputFormat);
+    m_nSurfaceHeight = (int) videoDecodeCreateInfo.ulTargetHeight;
+
+    CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_ctx->cuContext));
+    NVDEC_API_CALL(cuvidCreateDecoder(&(m_ctx->hDecoder), &videoDecodeCreateInfo));
+    CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
+    return nDecodeSurface;
+}
+
+
+/* Return value from HandlePictureDecode() are interpreted as:
+*  0: fail, >=1: succeeded
+*/
+int NvDecoder::HandlePictureDecode(CUVIDPICPARAMS *pPicParams) {
+    if (!(m_ctx->hDecoder))
+    {
+        NVDEC_THROW_ERROR("Decoder not initialized.", CUDA_ERROR_NOT_INITIALIZED);
+        return false;
+    }
+    CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_ctx->cuContext));
+    NVDEC_API_CALL(cuvidDecodePicture(m_ctx->hDecoder, pPicParams));
+    if ((!pPicParams->field_pic_flag) || (pPicParams->second_field))
+    {
+        CUVIDPARSERDISPINFO dispInfo;
+        memset(&dispInfo, 0, sizeof(dispInfo));
+        dispInfo.picture_index = pPicParams->CurrPicIdx;
+        dispInfo.progressive_frame = !pPicParams->field_pic_flag;
+        dispInfo.top_field_first = pPicParams->bottom_field_flag ^ 1;
+        HandlePictureDisplay(&dispInfo);
+    }
+    CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
+    return 1;
+}
+
+/* Return value from HandlePictureDisplay() are interpreted as:
+*  0: fail, >=1: succeeded
+*/
+int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
+    CUVIDPROCPARAMS videoProcessingParameters = {};
+    videoProcessingParameters.progressive_frame = pDispInfo->progressive_frame;
+    videoProcessingParameters.second_field = pDispInfo->repeat_first_field + 1;
+    videoProcessingParameters.top_field_first = pDispInfo->top_field_first;
+    videoProcessingParameters.unpaired_field = pDispInfo->repeat_first_field < 0;
+    videoProcessingParameters.output_stream = m_ctx->cuvidStream;
+
+    CUdeviceptr dpSrcFrame = 0;
+    unsigned int nSrcPitch = 0;
+    CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_ctx->cuContext));
+    NVDEC_API_CALL(cuvidMapVideoFrame(m_ctx->hDecoder, pDispInfo->picture_index, &dpSrcFrame,
+        &nSrcPitch, &videoProcessingParameters));
+
+    CUVIDGETDECODESTATUS DecodeStatus;
+    memset(&DecodeStatus, 0, sizeof(DecodeStatus));
+    CUresult result = cuvidGetDecodeStatus(m_ctx->hDecoder, pDispInfo->picture_index, &DecodeStatus);
+    if (result == CUDA_SUCCESS && (DecodeStatus.decodeStatus == cuvidDecodeStatus_Error || DecodeStatus.decodeStatus == cuvidDecodeStatus_Error_Concealed))
+    {
+        printf("Decode Error occurred for picture.\n");
+    }
+
+    dstFrame = new uint8_t[GetFrameSize()];
+    
+    // Copy luma plane
+    CUDA_MEMCPY2D m = { 0 };
+    m.srcMemoryType = CU_MEMORYTYPE_DEVICE;
+    m.srcDevice = dpSrcFrame;
+    m.srcPitch = nSrcPitch;
+    m.dstMemoryType = CU_MEMORYTYPE_HOST;
+    m.dstDevice = (CUdeviceptr)(m.dstHost = dstFrame);
+    m.dstPitch = GetWidth() * m_nBPP;
+    m.WidthInBytes = GetWidth() * m_nBPP;
+    m.Height = m_nLumaHeight;
+    CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_ctx->cuvidStream));
+
+    // Copy chroma plane
+    // NVDEC output has luma height aligned by 2. Adjust chroma offset by aligning height
+    m.srcDevice = (CUdeviceptr)((uint8_t *)dpSrcFrame + m.srcPitch * ((m_nSurfaceHeight + 1) & ~1));
+    m.dstDevice = (CUdeviceptr)(m.dstHost = dstFrame + m.dstPitch * m_nLumaHeight);
+    m.Height = m_nChromaHeight;
+    CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_ctx->cuvidStream));
+
+    if (m_nNumChromaPlanes == 2)
+    {
+        m.srcDevice = (CUdeviceptr)((uint8_t *)dpSrcFrame + m.srcPitch * ((m_nSurfaceHeight + 1) & ~1) * 2);
+        m.dstDevice = (CUdeviceptr)(m.dstHost = dstFrame + m.dstPitch * m_nLumaHeight * 2);
+        m.Height = m_nChromaHeight;
+        CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_ctx->cuvidStream));
+    }
+    CUDA_DRVAPI_CALL(cuStreamSynchronize(m_ctx->cuvidStream));
+    CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
+
+    NVDEC_API_CALL(cuvidUnmapVideoFrame(m_ctx->hDecoder, dpSrcFrame));
+    return 1;
+}
+
+NvDecoder::NvDecoder(nvdec_context * ctx) : m_ctx(ctx)
+{
+}
+
+heif_error NvDecoder::initVideoParser()
+{
+    CUVIDPARSERPARAMS videoParserParameters = {};
+    videoParserParameters.CodecType = m_ctx->eCodec;
+    videoParserParameters.ulMaxNumDecodeSurfaces = 1;
+    videoParserParameters.ulClockRate = 1000;
+    videoParserParameters.ulMaxDisplayDelay = 0;
+    videoParserParameters.pUserData = this; // TODO: make this ctx once all the members are gone
+    videoParserParameters.pfnSequenceCallback = HandleVideoSequenceProc;
+    videoParserParameters.pfnDecodePicture = HandlePictureDecodeProc;
+    videoParserParameters.pfnDisplayPicture = NULL;
+    videoParserParameters.pfnGetOperatingPoint = HandleOperatingPointProc;
+    videoParserParameters.pfnGetSEIMsg = NULL;
+    CUresult errorCode = cuvidCreateVideoParser(&(m_ctx->hParser), &videoParserParameters);
+    if (errorCode != CUDA_SUCCESS) {
+        struct heif_error err = {heif_error_Decoder_plugin_error,
+                                 heif_suberror_Plugin_loading_error,
+                                 "could not create CUVID video parser"};
+        return err;
+    }
+    return heif_error_ok;
+}
+
+NvDecoder::~NvDecoder() {
+
+    if (m_ctx->hParser) {
+        cuvidDestroyVideoParser(m_ctx->hParser);
+    }
+    cuCtxPushCurrent(m_ctx->cuContext);
+    if (m_ctx->hDecoder) {
+        cuvidDestroyDecoder(m_ctx->hDecoder);
+    }
+
+    delete dstFrame;
+
+    cuCtxPopCurrent(NULL);
+
+    cuvidCtxLockDestroy(m_ctx->ctxLock);
+}
+
+int NvDecoder::Decode(const uint8_t *pData, size_t nSize)
+{
+    CUVIDSOURCEDATAPACKET packet = { 0 };
+    packet.payload = pData;
+    packet.payload_size = nSize;
+    packet.flags = CUVID_PKT_ENDOFSTREAM;
+    packet.timestamp = 0;
+    NVDEC_API_CALL(cuvidParseVideoData(m_ctx->hParser, &packet));
+
+    return 1;
+}
+
+uint8_t* NvDecoder::GetFrame()
+{
+    // convert result to heif pixel image
+    ConvertSemiplanarToPlanar(dstFrame, GetWidth(), GetHeight(), m_nBitDepthMinus8 + 8);
+
+    return dstFrame;
+}
+
diff --git a/libheif/plugins/NvDecoder.h b/libheif/plugins/NvDecoder.h
new file mode 100644
index 0000000000..945759bfc0
--- /dev/null
+++ b/libheif/plugins/NvDecoder.h
@@ -0,0 +1,191 @@
+/*
+ * This copyright notice applies to this header file only:
+ *
+ * Copyright (c) 2010-2023 NVIDIA Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the software, and to permit persons to whom the
+ * software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include <assert.h>
+#include <cstdint>
+#include <vector>
+#include "libheif/heif.h"
+#include "nvcuvid.h"
+
+struct nvdec_context
+{
+    std::vector<uint8_t> data;
+    int strict;
+    cudaVideoCodec eCodec = cudaVideoCodec_NumCodecs;
+    CUcontext cuContext = NULL;
+    CUvideoctxlock ctxLock;
+    CUstream cuvidStream = 0;
+    CUvideoparser hParser = NULL;
+    CUvideodecoder hDecoder = NULL;
+};
+
+
+/**
+* @brief Base class for decoder interface.
+*/
+class NvDecoder {
+
+public:
+    /**
+    *  @brief This function is used to initialize the decoder session.
+    *  Application must call this function to initialize the decoder, before
+    *  starting to decode any frames.
+    */
+    NvDecoder(nvdec_context *ctx);
+    ~NvDecoder();
+
+    /**
+    *  @brief  This function is used to get the output frame width.
+    *  NV12/P016 output format width is 2 byte aligned because of U and V interleave
+    */
+    int GetWidth() { assert(m_nWidth); return (m_eOutputFormat == cudaVideoSurfaceFormat_NV12 || m_eOutputFormat == cudaVideoSurfaceFormat_P016) 
+                                                ? (m_nWidth + 1) & ~1 : m_nWidth; }
+
+    /**
+    *  @brief  This function is used to get the actual decode width
+    */
+    int GetDecodeWidth() { assert(m_nWidth); return m_nWidth; }
+
+    /**
+    *  @brief  This function is used to get the output frame height (Luma height).
+    */
+    int GetHeight() { assert(m_nLumaHeight); return m_nLumaHeight; }
+
+    /**
+    *  @brief  This function is used to get the current chroma height.
+    */
+    int GetChromaHeight() { assert(m_nChromaHeight); return m_nChromaHeight; }
+
+    /**
+    *  @brief  This function is used to get the number of chroma planes.
+    */
+    int GetNumChromaPlanes() { assert(m_nNumChromaPlanes); return m_nNumChromaPlanes; }
+    
+    /**
+    *   @brief  This function is used to get the current frame size based on pixel format.
+    */
+    int GetFrameSize() { return GetWidth() * (m_nLumaHeight + (m_nChromaHeight * m_nNumChromaPlanes)) * m_nBPP; }
+
+    /**
+    *   @brief  This function is used to get the current frame Luma plane size.
+    */
+    int GetLumaPlaneSize() { return GetWidth() * m_nLumaHeight * m_nBPP; }
+
+    /**
+    *   @brief  This function is used to get the current frame chroma plane size.
+    */
+    int GetChromaPlaneSize() { return GetWidth() *  (m_nChromaHeight * m_nNumChromaPlanes) * m_nBPP; }
+
+    /**
+    *   @brief  This function is used to get the bit depth associated with the pixel format.
+    */
+    int GetBitDepth() { return m_nBitDepthMinus8 + 8; }
+
+    /**
+    *   @brief  This function is used to get the bytes used per pixel.
+    */
+    int GetBPP() { return m_nBPP; }
+
+    /**
+    *   @brief  This function decodes a frame and returns the number of frames that are available for
+    *   display. All frames that are available for display should be read before making a subsequent decode call.
+    *   @param  pData - pointer to the data buffer that is to be decoded
+    *   @param  nSize - size of the data buffer in bytes
+    */
+    int Decode(const uint8_t *pData, size_t nSize);
+
+    /**
+    *   @brief  This function returns a decoded frame. This function should be called in a loop for
+    *   fetching all the frames that are available for display.
+    */
+    uint8_t* GetFrame();
+
+    /**
+    *   @brief  This function allows app to set operating point for AV1 SVC clips
+    *   @param  opPoint - operating point of an AV1 scalable bitstream
+    *   @param  bDispAllLayers - Output all decoded frames of an AV1 scalable bitstream
+    */
+    void SetOperatingPoint(const uint32_t opPoint, const bool bDispAllLayers) { m_nOperatingPoint = opPoint; m_bDispAllLayers = bDispAllLayers; }
+
+    heif_error initVideoParser();
+private:
+
+    /**
+    *   @brief  Callback function to be registered for getting a callback when decoding of sequence starts
+    */
+    static int CUDAAPI HandleVideoSequenceProc(void *pUserData, CUVIDEOFORMAT *pVideoFormat) { return ((NvDecoder *)pUserData)->HandleVideoSequence(pVideoFormat); }
+
+    /**
+    *   @brief  Callback function to be registered for getting a callback when a decoded frame is ready to be decoded
+    */
+    static int CUDAAPI HandlePictureDecodeProc(void *pUserData, CUVIDPICPARAMS *pPicParams) { return ((NvDecoder *)pUserData)->HandlePictureDecode(pPicParams); }
+
+    /**
+    *   @brief  Callback function to be registered for getting a callback to get operating point when AV1 SVC sequence header start.
+    */
+    static int CUDAAPI HandleOperatingPointProc(void *pUserData, CUVIDOPERATINGPOINTINFO *pOPInfo) { return ((NvDecoder *)pUserData)->GetOperatingPoint(pOPInfo); }
+
+    /**
+    *   @brief  This function gets called when a sequence is ready to be decoded. The function also gets called
+        when there is format change
+    */
+    int HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat);
+
+    /**
+    *   @brief  This function gets called when a picture is ready to be decoded. cuvidDecodePicture is called from this function
+    *   to decode the picture
+    */
+    int HandlePictureDecode(CUVIDPICPARAMS *pPicParams);
+
+    /**
+    *   @brief  This function gets called after a picture is decoded and available for display. Frames are fetched and stored in 
+        internal buffer
+    */
+    int HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo);
+
+    /**
+    *   @brief  This function gets called when AV1 sequence encounter more than one operating points
+    */
+    int GetOperatingPoint(CUVIDOPERATINGPOINTINFO *pOPInfo);
+
+private:
+    // dimension of the output
+    unsigned int m_nWidth = 0, m_nLumaHeight = 0, m_nChromaHeight = 0;
+    unsigned int m_nNumChromaPlanes = 0;
+    // height of the mapped surface 
+    int m_nSurfaceHeight = 0;
+    cudaVideoSurfaceFormat m_eOutputFormat = cudaVideoSurfaceFormat_NV12;
+    int m_nBitDepthMinus8 = 0;
+    int m_nBPP = 1;
+    uint8_t * dstFrame;
+
+    unsigned int m_nOperatingPoint = 0;
+    bool  m_bDispAllLayers = false;
+    nvdec_context *m_ctx;
+};
diff --git a/libheif/plugins/cuviddec.h b/libheif/plugins/cuviddec.h
new file mode 100644
index 0000000000..4a680375db
--- /dev/null
+++ b/libheif/plugins/cuviddec.h
@@ -0,0 +1,1188 @@
+/*
+ * This copyright notice applies to this header file only:
+ *
+ * Copyright (c) 2010-2024 NVIDIA Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the software, and to permit persons to whom the
+ * software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*****************************************************************************************************/
+//! \file cuviddec.h
+//! NVDECODE API provides video decoding interface to NVIDIA GPU devices.
+//! This file contains constants, structure definitions and function prototypes used for decoding.
+/*****************************************************************************************************/
+
+#if !defined(__CUDA_VIDEO_H__)
+#define __CUDA_VIDEO_H__
+
+#ifndef __cuda_cuda_h__
+#include <cuda.h>
+#endif // __cuda_cuda_h__
+
+#if defined(_WIN64) || defined(__LP64__) || defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
+#if (CUDA_VERSION >= 3020) && (!defined(CUDA_FORCE_API_VERSION) || (CUDA_FORCE_API_VERSION >= 3020))
+#define __CUVID_DEVPTR64
+#endif
+#endif
+
+#if defined(__cplusplus)
+extern "C" {
+#endif /* __cplusplus */
+
+typedef void *CUvideodecoder;
+typedef struct _CUcontextlock_st *CUvideoctxlock;
+
+/*********************************************************************************/
+//! \enum cudaVideoCodec
+//! Video codec enums
+//! These enums are used in CUVIDDECODECREATEINFO and CUVIDDECODECAPS structures
+/*********************************************************************************/
+typedef enum cudaVideoCodec_enum {
+    cudaVideoCodec_MPEG1=0,                                         /**<  MPEG1             */
+    cudaVideoCodec_MPEG2,                                           /**<  MPEG2             */
+    cudaVideoCodec_MPEG4,                                           /**<  MPEG4             */
+    cudaVideoCodec_VC1,                                             /**<  VC1               */
+    cudaVideoCodec_H264,                                            /**<  H264              */
+    cudaVideoCodec_JPEG,                                            /**<  JPEG              */
+    cudaVideoCodec_H264_SVC,                                        /**<  H264-SVC          */
+    cudaVideoCodec_H264_MVC,                                        /**<  H264-MVC          */
+    cudaVideoCodec_HEVC,                                            /**<  HEVC              */
+    cudaVideoCodec_VP8,                                             /**<  VP8               */
+    cudaVideoCodec_VP9,                                             /**<  VP9               */
+    cudaVideoCodec_AV1,                                             /**<  AV1               */
+    cudaVideoCodec_NumCodecs,                                       /**<  Max codecs        */
+    // Uncompressed YUV
+    cudaVideoCodec_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V')),   /**< Y,U,V (4:2:0)      */
+    cudaVideoCodec_YV12   = (('Y'<<24)|('V'<<16)|('1'<<8)|('2')),   /**< Y,V,U (4:2:0)      */
+    cudaVideoCodec_NV12   = (('N'<<24)|('V'<<16)|('1'<<8)|('2')),   /**< Y,UV  (4:2:0)      */
+    cudaVideoCodec_YUYV   = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V')),   /**< YUYV/YUY2 (4:2:2)  */
+    cudaVideoCodec_UYVY   = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y'))    /**< UYVY (4:2:2)       */
+} cudaVideoCodec;
+
+/*********************************************************************************/
+//! \enum cudaVideoSurfaceFormat
+//! Video surface format enums used for output format of decoded output
+//! These enums are used in CUVIDDECODECREATEINFO structure
+/*********************************************************************************/
+typedef enum cudaVideoSurfaceFormat_enum {
+    cudaVideoSurfaceFormat_NV12=0,          /**< Semi-Planar YUV [Y plane followed by interleaved UV plane]     */
+    cudaVideoSurfaceFormat_P016=1,          /**< 16 bit Semi-Planar YUV [Y plane followed by interleaved UV plane].
+                                                 Can be used for 10 bit(6LSB bits 0), 12 bit (4LSB bits 0)      */
+    cudaVideoSurfaceFormat_YUV444=2,        /**< Planar YUV [Y plane followed by U and V planes]                */
+    cudaVideoSurfaceFormat_YUV444_16Bit=3,  /**< 16 bit Planar YUV [Y plane followed by U and V planes]. 
+                                                 Can be used for 10 bit(6LSB bits 0), 12 bit (4LSB bits 0)      */
+} cudaVideoSurfaceFormat;
+
+/******************************************************************************************************************/
+//! \enum cudaVideoDeinterlaceMode
+//! Deinterlacing mode enums
+//! These enums are used in CUVIDDECODECREATEINFO structure
+//! Use cudaVideoDeinterlaceMode_Weave for progressive content and for content that doesn't need deinterlacing
+//! cudaVideoDeinterlaceMode_Adaptive needs more video memory than other DImodes
+/******************************************************************************************************************/
+typedef enum cudaVideoDeinterlaceMode_enum {
+    cudaVideoDeinterlaceMode_Weave=0,   /**< Weave both fields (no deinterlacing) */
+    cudaVideoDeinterlaceMode_Bob,       /**< Drop one field                       */
+    cudaVideoDeinterlaceMode_Adaptive   /**< Adaptive deinterlacing               */
+} cudaVideoDeinterlaceMode;
+
+/**************************************************************************************************************/
+//! \enum cudaVideoChromaFormat
+//! Chroma format enums
+//! These enums are used in CUVIDDECODECREATEINFO and CUVIDDECODECAPS structures
+/**************************************************************************************************************/
+typedef enum cudaVideoChromaFormat_enum {
+    cudaVideoChromaFormat_Monochrome=0,  /**< MonoChrome */
+    cudaVideoChromaFormat_420,           /**< YUV 4:2:0  */
+    cudaVideoChromaFormat_422,           /**< YUV 4:2:2  */
+    cudaVideoChromaFormat_444            /**< YUV 4:4:4  */
+} cudaVideoChromaFormat;
+
+/*************************************************************************************************************/
+//! \enum cudaVideoCreateFlags
+//! Decoder flag enums to select preferred decode path
+//! cudaVideoCreate_Default and cudaVideoCreate_PreferCUVID are most optimized, use these whenever possible
+/*************************************************************************************************************/
+typedef enum cudaVideoCreateFlags_enum {
+    cudaVideoCreate_Default     = 0x00,     /**< Default operation mode: use dedicated video engines                        */
+    cudaVideoCreate_PreferCUDA  = 0x01,     /**< Use CUDA-based decoder (requires valid vidLock object for multi-threading) */
+    cudaVideoCreate_PreferDXVA  = 0x02,     /**< Go through DXVA internally if possible (requires D3D9 interop)             */
+    cudaVideoCreate_PreferCUVID = 0x04      /**< Use dedicated video engines directly                                       */
+} cudaVideoCreateFlags;
+
+
+/*************************************************************************/
+//! \enum cuvidDecodeStatus
+//! Decode status enums
+//! These enums are used in CUVIDGETDECODESTATUS structure
+/*************************************************************************/
+typedef enum cuvidDecodeStatus_enum
+{
+    cuvidDecodeStatus_Invalid         = 0,   // Decode status is not valid
+    cuvidDecodeStatus_InProgress      = 1,   // Decode is in progress
+    cuvidDecodeStatus_Success         = 2,   // Decode is completed without any errors
+    // 3 to 7 enums are reserved for future use
+    cuvidDecodeStatus_Error           = 8,   // Decode is completed with an error (error is not concealed)
+    cuvidDecodeStatus_Error_Concealed = 9,   // Decode is completed with an error and error is concealed 
+} cuvidDecodeStatus;
+
+/**************************************************************************************************************/
+//! \struct CUVIDDECODECAPS;
+//! This structure is used in cuvidGetDecoderCaps API
+/**************************************************************************************************************/
+typedef struct _CUVIDDECODECAPS
+{
+    cudaVideoCodec          eCodecType;                 /**< IN: cudaVideoCodec_XXX                                             */
+    cudaVideoChromaFormat   eChromaFormat;              /**< IN: cudaVideoChromaFormat_XXX                                      */
+    unsigned int            nBitDepthMinus8;            /**< IN: The Value "BitDepth minus 8"                                   */
+    unsigned int            reserved1[3];               /**< Reserved for future use - set to zero                              */
+
+    unsigned char           bIsSupported;               /**< OUT: 1 if codec supported, 0 if not supported                      */
+    unsigned char           nNumNVDECs;                 /**< OUT: Number of NVDECs that can support IN params                   */
+    unsigned short          nOutputFormatMask;          /**< OUT: each bit represents corresponding cudaVideoSurfaceFormat enum */
+    unsigned int            nMaxWidth;                  /**< OUT: Max supported coded width in pixels                           */
+    unsigned int            nMaxHeight;                 /**< OUT: Max supported coded height in pixels                          */
+    unsigned int            nMaxMBCount;                /**< OUT: Max supported macroblock count
+                                                                  CodedWidth*CodedHeight/256 must be <= nMaxMBCount             */
+    unsigned short          nMinWidth;                  /**< OUT: Min supported coded width in pixels                           */
+    unsigned short          nMinHeight;                 /**< OUT: Min supported coded height in pixels                          */
+    unsigned char           bIsHistogramSupported;      /**< OUT: 1 if Y component histogram output is supported, 0 if not
+                                                                  Note: histogram is computed on original picture data before
+                                                                  any post-processing like scaling, cropping, etc. is applied   */
+    unsigned char           nCounterBitDepth;           /**< OUT: histogram counter bit depth                                   */
+    unsigned short          nMaxHistogramBins;          /**< OUT: Max number of histogram bins                                  */
+    unsigned int            reserved3[10];              /**< Reserved for future use - set to zero                              */
+} CUVIDDECODECAPS;
+
+/**************************************************************************************************************/
+//! \struct CUVIDDECODECREATEINFO
+//! This structure is used in cuvidCreateDecoder API
+/**************************************************************************************************************/
+typedef struct _CUVIDDECODECREATEINFO
+{
+    unsigned long ulWidth;              /**< IN: Coded sequence width in pixels                                             */
+    unsigned long ulHeight;             /**< IN: Coded sequence height in pixels                                            */
+    unsigned long ulNumDecodeSurfaces;  /**< IN: Maximum number of internal decode surfaces                                 */
+    cudaVideoCodec CodecType;           /**< IN: cudaVideoCodec_XXX                                                         */
+    cudaVideoChromaFormat ChromaFormat; /**< IN: cudaVideoChromaFormat_XXX                                                  */
+    unsigned long ulCreationFlags;      /**< IN: Decoder creation flags (cudaVideoCreateFlags_XXX)                          */
+    unsigned long bitDepthMinus8;       /**< IN: The value "BitDepth minus 8"                                               */
+    unsigned long ulIntraDecodeOnly;    /**< IN: Set 1 only if video has all intra frames (default value is 0). This will
+                                             optimize video memory for Intra frames only decoding. The support is limited
+                                             to specific codecs - H264, HEVC, VP9, the flag will be ignored for codecs which
+                                             are not supported. However decoding might fail if the flag is enabled in case
+                                             of supported codecs for regular bit streams having P and/or B frames.          */
+    unsigned long ulMaxWidth;           /**< IN: Coded sequence max width in pixels used with reconfigure Decoder           */
+    unsigned long ulMaxHeight;          /**< IN: Coded sequence max height in pixels used with reconfigure Decoder          */                                           
+    unsigned long Reserved1;            /**< Reserved for future use - set to zero                                          */
+    /**
+    * IN: area of the frame that should be displayed
+    */
+    struct {
+        short left;
+        short top;
+        short right;
+        short bottom;
+    } display_area;
+
+    cudaVideoSurfaceFormat OutputFormat;       /**< IN: cudaVideoSurfaceFormat_XXX                                     */
+    cudaVideoDeinterlaceMode DeinterlaceMode;  /**< IN: cudaVideoDeinterlaceMode_XXX                                   */
+    unsigned long ulTargetWidth;               /**< IN: Post-processed output width (Should be aligned to 2)           */
+    unsigned long ulTargetHeight;              /**< IN: Post-processed output height (Should be aligned to 2)          */
+    unsigned long ulNumOutputSurfaces;         /**< IN: Maximum number of output surfaces simultaneously mapped        */
+    CUvideoctxlock vidLock;                    /**< IN: If non-NULL, context lock used for synchronizing ownership of 
+                                                    the cuda context. Needed for cudaVideoCreate_PreferCUDA decode     */
+    /**
+    * IN: target rectangle in the output frame (for aspect ratio conversion)
+    * if a null rectangle is specified, {0,0,ulTargetWidth,ulTargetHeight} will be used
+    */
+    struct {
+        short left;
+        short top;
+        short right;
+        short bottom;
+    } target_rect;
+
+    unsigned long enableHistogram;             /**< IN: enable histogram output, if supported */
+    unsigned long Reserved2[4];                /**< Reserved for future use - set to zero */
+} CUVIDDECODECREATEINFO;
+
+/*********************************************************/
+//! \struct CUVIDH264DPBENTRY
+//! H.264 DPB entry
+//! This structure is used in CUVIDH264PICPARAMS structure
+/*********************************************************/
+typedef struct _CUVIDH264DPBENTRY
+{
+    int PicIdx;                 /**< picture index of reference frame                                        */
+    int FrameIdx;               /**< frame_num(short-term) or LongTermFrameIdx(long-term)                    */
+    int is_long_term;           /**< 0=short term reference, 1=long term reference                           */
+    int not_existing;           /**< non-existing reference frame (corresponding PicIdx should be set to -1) */
+    int used_for_reference;     /**< 0=unused, 1=top_field, 2=bottom_field, 3=both_fields                    */
+    int FieldOrderCnt[2];       /**< field order count of top and bottom fields                              */
+} CUVIDH264DPBENTRY;
+
+/************************************************************/
+//! \struct CUVIDH264MVCEXT
+//! H.264 MVC picture parameters ext
+//! This structure is used in CUVIDH264PICPARAMS structure
+/************************************************************/
+typedef struct _CUVIDH264MVCEXT
+{
+    int num_views_minus1;                  /**< Max number of coded views minus 1 in video : Range - 0 to 1023              */
+    int view_id;                           /**< view identifier                                                             */
+    unsigned char inter_view_flag;         /**< 1 if used for inter-view prediction, 0 if not                               */
+    unsigned char num_inter_view_refs_l0;  /**< number of inter-view ref pics in RefPicList0                                */
+    unsigned char num_inter_view_refs_l1;  /**< number of inter-view ref pics in RefPicList1                                */
+    unsigned char MVCReserved8Bits;        /**< Reserved bits                                                               */
+    int InterViewRefsL0[16];               /**< view id of the i-th view component for inter-view prediction in RefPicList0 */
+    int InterViewRefsL1[16];               /**< view id of the i-th view component for inter-view prediction in RefPicList1 */
+} CUVIDH264MVCEXT;
+
+/*********************************************************/
+//! \struct CUVIDH264SVCEXT
+//! H.264 SVC picture parameters ext
+//! This structure is used in CUVIDH264PICPARAMS structure
+/*********************************************************/
+typedef struct _CUVIDH264SVCEXT
+{
+    unsigned char profile_idc;
+    unsigned char level_idc;
+    unsigned char DQId;
+    unsigned char DQIdMax;
+    unsigned char disable_inter_layer_deblocking_filter_idc;
+    unsigned char ref_layer_chroma_phase_y_plus1;
+    signed char   inter_layer_slice_alpha_c0_offset_div2;
+    signed char   inter_layer_slice_beta_offset_div2;
+
+    unsigned short DPBEntryValidFlag;
+    unsigned char inter_layer_deblocking_filter_control_present_flag;
+    unsigned char extended_spatial_scalability_idc;
+    unsigned char adaptive_tcoeff_level_prediction_flag;
+    unsigned char slice_header_restriction_flag;
+    unsigned char chroma_phase_x_plus1_flag;
+    unsigned char chroma_phase_y_plus1;
+
+    unsigned char tcoeff_level_prediction_flag;
+    unsigned char constrained_intra_resampling_flag;
+    unsigned char ref_layer_chroma_phase_x_plus1_flag;
+    unsigned char store_ref_base_pic_flag;
+    unsigned char Reserved8BitsA;
+    unsigned char Reserved8BitsB;
+
+    short scaled_ref_layer_left_offset;
+    short scaled_ref_layer_top_offset;
+    short scaled_ref_layer_right_offset;
+    short scaled_ref_layer_bottom_offset;
+    unsigned short Reserved16Bits;
+    struct _CUVIDPICPARAMS *pNextLayer; /**< Points to the picparams for the next layer to be decoded. 
+                                             Linked list ends at the target layer. */
+    int bRefBaseLayer;                  /**< whether to store ref base pic */
+} CUVIDH264SVCEXT;
+
+/******************************************************/
+//! \struct CUVIDH264PICPARAMS
+//! H.264 picture parameters
+//! This structure is used in CUVIDPICPARAMS structure
+/******************************************************/
+typedef struct _CUVIDH264PICPARAMS
+{
+    // SPS
+    int log2_max_frame_num_minus4;
+    int pic_order_cnt_type;
+    int log2_max_pic_order_cnt_lsb_minus4;
+    int delta_pic_order_always_zero_flag;
+    int frame_mbs_only_flag;
+    int direct_8x8_inference_flag;
+    int num_ref_frames;             // NOTE: shall meet level 4.1 restrictions
+    unsigned char residual_colour_transform_flag;
+    unsigned char bit_depth_luma_minus8;    // Must be 0 (only 8-bit supported)
+    unsigned char bit_depth_chroma_minus8;  // Must be 0 (only 8-bit supported)
+    unsigned char qpprime_y_zero_transform_bypass_flag;
+    // PPS
+    int entropy_coding_mode_flag;
+    int pic_order_present_flag;
+    int num_ref_idx_l0_active_minus1;
+    int num_ref_idx_l1_active_minus1;
+    int weighted_pred_flag;
+    int weighted_bipred_idc;
+    int pic_init_qp_minus26;
+    int deblocking_filter_control_present_flag;
+    int redundant_pic_cnt_present_flag;
+    int transform_8x8_mode_flag;
+    int MbaffFrameFlag;
+    int constrained_intra_pred_flag;
+    int chroma_qp_index_offset;
+    int second_chroma_qp_index_offset;
+    int ref_pic_flag;
+    int frame_num;
+    int CurrFieldOrderCnt[2];
+    // DPB
+    CUVIDH264DPBENTRY dpb[16];          // List of reference frames within the DPB
+    // Quantization Matrices (raster-order)
+    unsigned char WeightScale4x4[6][16];
+    unsigned char WeightScale8x8[2][64];
+    // FMO/ASO
+    unsigned char fmo_aso_enable;
+    unsigned char num_slice_groups_minus1;
+    unsigned char slice_group_map_type;
+    signed char pic_init_qs_minus26;
+    unsigned int slice_group_change_rate_minus1;
+    union
+    {
+        unsigned long long slice_group_map_addr;
+        const unsigned char *pMb2SliceGroupMap;
+    } fmo;
+    unsigned int  Reserved[12];
+    // SVC/MVC
+    union
+    {
+        CUVIDH264MVCEXT mvcext;
+        CUVIDH264SVCEXT svcext;
+    };
+} CUVIDH264PICPARAMS;
+
+
+/********************************************************/
+//! \struct CUVIDMPEG2PICPARAMS
+//! MPEG-2 picture parameters
+//! This structure is used in CUVIDPICPARAMS structure
+/********************************************************/
+typedef struct _CUVIDMPEG2PICPARAMS
+{
+    int ForwardRefIdx;          // Picture index of forward reference (P/B-frames)
+    int BackwardRefIdx;         // Picture index of backward reference (B-frames)
+    int picture_coding_type;
+    int full_pel_forward_vector;
+    int full_pel_backward_vector;
+    int f_code[2][2];
+    int intra_dc_precision;
+    int frame_pred_frame_dct;
+    int concealment_motion_vectors;
+    int q_scale_type;
+    int intra_vlc_format;
+    int alternate_scan;
+    int top_field_first;
+    // Quantization matrices (raster order)
+    unsigned char QuantMatrixIntra[64];
+    unsigned char QuantMatrixInter[64];
+} CUVIDMPEG2PICPARAMS;
+
+// MPEG-4 has VOP types instead of Picture types
+#define I_VOP 0
+#define P_VOP 1
+#define B_VOP 2
+#define S_VOP 3
+
+/*******************************************************/
+//! \struct CUVIDMPEG4PICPARAMS
+//! MPEG-4 picture parameters
+//! This structure is used in CUVIDPICPARAMS structure
+/*******************************************************/
+typedef struct _CUVIDMPEG4PICPARAMS
+{
+    int ForwardRefIdx;          // Picture index of forward reference (P/B-frames)
+    int BackwardRefIdx;         // Picture index of backward reference (B-frames)
+    // VOL
+    int video_object_layer_width;
+    int video_object_layer_height;
+    int vop_time_increment_bitcount;
+    int top_field_first;
+    int resync_marker_disable;
+    int quant_type;
+    int quarter_sample;
+    int short_video_header;
+    int divx_flags;
+    // VOP
+    int vop_coding_type;
+    int vop_coded;
+    int vop_rounding_type;
+    int alternate_vertical_scan_flag;
+    int interlaced;
+    int vop_fcode_forward;
+    int vop_fcode_backward;
+    int trd[2];
+    int trb[2];
+    // Quantization matrices (raster order)
+    unsigned char QuantMatrixIntra[64];
+    unsigned char QuantMatrixInter[64];
+    int gmc_enabled;
+} CUVIDMPEG4PICPARAMS;
+
+/********************************************************/
+//! \struct CUVIDVC1PICPARAMS
+//! VC1 picture parameters
+//! This structure is used in CUVIDPICPARAMS structure
+/********************************************************/
+typedef struct _CUVIDVC1PICPARAMS
+{
+    int ForwardRefIdx;      /**< Picture index of forward reference (P/B-frames) */
+    int BackwardRefIdx;     /**< Picture index of backward reference (B-frames)  */
+    int FrameWidth;         /**< Actual frame width                              */
+    int FrameHeight;        /**< Actual frame height                             */
+    // PICTURE
+    int intra_pic_flag;     /**< Set to 1 for I,BI frames */
+    int ref_pic_flag;       /**< Set to 1 for I,P frames  */
+    int progressive_fcm;    /**< Progressive frame        */
+    // SEQUENCE
+    int profile;
+    int postprocflag;
+    int pulldown;
+    int interlace;
+    int tfcntrflag;
+    int finterpflag;
+    int psf;
+    int multires;
+    int syncmarker;
+    int rangered;
+    int maxbframes;
+    // ENTRYPOINT
+    int panscan_flag;
+    int refdist_flag;
+    int extended_mv;
+    int dquant;
+    int vstransform;
+    int loopfilter;
+    int fastuvmc;
+    int overlap;
+    int quantizer;
+    int extended_dmv;
+    int range_mapy_flag;
+    int range_mapy;
+    int range_mapuv_flag;
+    int range_mapuv;
+    int rangeredfrm;    // range reduction state
+} CUVIDVC1PICPARAMS;
+
+/***********************************************************/
+//! \struct CUVIDJPEGPICPARAMS
+//! JPEG picture parameters
+//! This structure is used in CUVIDPICPARAMS structure
+/***********************************************************/
+typedef struct _CUVIDJPEGPICPARAMS
+{
+    int Reserved;
+} CUVIDJPEGPICPARAMS;
+
+
+/*******************************************************/
+//! \struct CUVIDHEVCPICPARAMS
+//! HEVC picture parameters
+//! This structure is used in CUVIDPICPARAMS structure
+/*******************************************************/
+typedef struct _CUVIDHEVCPICPARAMS
+{
+    // sps
+    int pic_width_in_luma_samples;
+    int pic_height_in_luma_samples;
+    unsigned char log2_min_luma_coding_block_size_minus3;
+    unsigned char log2_diff_max_min_luma_coding_block_size;
+    unsigned char log2_min_transform_block_size_minus2;
+    unsigned char log2_diff_max_min_transform_block_size;
+    unsigned char pcm_enabled_flag;
+    unsigned char log2_min_pcm_luma_coding_block_size_minus3;
+    unsigned char log2_diff_max_min_pcm_luma_coding_block_size;
+    unsigned char pcm_sample_bit_depth_luma_minus1;
+
+    unsigned char pcm_sample_bit_depth_chroma_minus1;
+    unsigned char pcm_loop_filter_disabled_flag;
+    unsigned char strong_intra_smoothing_enabled_flag;
+    unsigned char max_transform_hierarchy_depth_intra;
+    unsigned char max_transform_hierarchy_depth_inter;
+    unsigned char amp_enabled_flag;
+    unsigned char separate_colour_plane_flag;
+    unsigned char log2_max_pic_order_cnt_lsb_minus4;
+
+    unsigned char num_short_term_ref_pic_sets;
+    unsigned char long_term_ref_pics_present_flag;
+    unsigned char num_long_term_ref_pics_sps;
+    unsigned char sps_temporal_mvp_enabled_flag;
+    unsigned char sample_adaptive_offset_enabled_flag;
+    unsigned char scaling_list_enable_flag;
+    unsigned char IrapPicFlag;
+    unsigned char IdrPicFlag;
+
+    unsigned char bit_depth_luma_minus8;
+    unsigned char bit_depth_chroma_minus8;
+    //sps/pps extension fields
+    unsigned char log2_max_transform_skip_block_size_minus2;
+    unsigned char log2_sao_offset_scale_luma;
+    unsigned char log2_sao_offset_scale_chroma;
+    unsigned char high_precision_offsets_enabled_flag;
+    unsigned char reserved1[10];
+
+    // pps
+    unsigned char dependent_slice_segments_enabled_flag;
+    unsigned char slice_segment_header_extension_present_flag;
+    unsigned char sign_data_hiding_enabled_flag;
+    unsigned char cu_qp_delta_enabled_flag;
+    unsigned char diff_cu_qp_delta_depth;
+    signed char init_qp_minus26;
+    signed char pps_cb_qp_offset;
+    signed char pps_cr_qp_offset;
+
+    unsigned char constrained_intra_pred_flag;
+    unsigned char weighted_pred_flag;
+    unsigned char weighted_bipred_flag;
+    unsigned char transform_skip_enabled_flag;
+    unsigned char transquant_bypass_enabled_flag;
+    unsigned char entropy_coding_sync_enabled_flag;
+    unsigned char log2_parallel_merge_level_minus2;
+    unsigned char num_extra_slice_header_bits;
+
+    unsigned char loop_filter_across_tiles_enabled_flag;
+    unsigned char loop_filter_across_slices_enabled_flag;
+    unsigned char output_flag_present_flag;
+    unsigned char num_ref_idx_l0_default_active_minus1;
+    unsigned char num_ref_idx_l1_default_active_minus1;
+    unsigned char lists_modification_present_flag;
+    unsigned char cabac_init_present_flag;
+    unsigned char pps_slice_chroma_qp_offsets_present_flag;
+
+    unsigned char deblocking_filter_override_enabled_flag;
+    unsigned char pps_deblocking_filter_disabled_flag;
+    signed char   pps_beta_offset_div2;
+    signed char   pps_tc_offset_div2;
+    unsigned char tiles_enabled_flag;
+    unsigned char uniform_spacing_flag;
+    unsigned char num_tile_columns_minus1;
+    unsigned char num_tile_rows_minus1;
+
+    unsigned short column_width_minus1[21];
+    unsigned short row_height_minus1[21];
+
+    // sps and pps extension HEVC-main 444
+    unsigned char sps_range_extension_flag;
+    unsigned char transform_skip_rotation_enabled_flag;
+    unsigned char transform_skip_context_enabled_flag;
+    unsigned char implicit_rdpcm_enabled_flag;
+
+    unsigned char explicit_rdpcm_enabled_flag;
+    unsigned char extended_precision_processing_flag;
+    unsigned char intra_smoothing_disabled_flag;
+    unsigned char persistent_rice_adaptation_enabled_flag;
+
+    unsigned char cabac_bypass_alignment_enabled_flag;
+    unsigned char pps_range_extension_flag;
+    unsigned char cross_component_prediction_enabled_flag;
+    unsigned char chroma_qp_offset_list_enabled_flag;
+
+    unsigned char diff_cu_chroma_qp_offset_depth;
+    unsigned char chroma_qp_offset_list_len_minus1;
+    signed char cb_qp_offset_list[6];
+
+    signed char cr_qp_offset_list[6];
+    unsigned char reserved2[2];
+
+    unsigned int   reserved3[8];
+
+    // RefPicSets
+    int NumBitsForShortTermRPSInSlice;
+    int NumDeltaPocsOfRefRpsIdx;
+    int NumPocTotalCurr;
+    int NumPocStCurrBefore;
+    int NumPocStCurrAfter;
+    int NumPocLtCurr;
+    int CurrPicOrderCntVal;
+    int RefPicIdx[16];                      // [refpic] Indices of valid reference pictures (-1 if unused for reference)
+    int PicOrderCntVal[16];                 // [refpic]
+    unsigned char IsLongTerm[16];           // [refpic] 0=not a long-term reference, 1=long-term reference
+    unsigned char RefPicSetStCurrBefore[8]; // [0..NumPocStCurrBefore-1] -> refpic (0..15)
+    unsigned char RefPicSetStCurrAfter[8];  // [0..NumPocStCurrAfter-1] -> refpic (0..15)
+    unsigned char RefPicSetLtCurr[8];       // [0..NumPocLtCurr-1] -> refpic (0..15)
+    unsigned char RefPicSetInterLayer0[8];
+    unsigned char RefPicSetInterLayer1[8];
+    unsigned int  reserved4[12];
+
+    // scaling lists (diag order)
+    unsigned char ScalingList4x4[6][16];       // [matrixId][i]
+    unsigned char ScalingList8x8[6][64];       // [matrixId][i]
+    unsigned char ScalingList16x16[6][64];     // [matrixId][i]
+    unsigned char ScalingList32x32[2][64];     // [matrixId][i]
+    unsigned char ScalingListDCCoeff16x16[6];  // [matrixId]
+    unsigned char ScalingListDCCoeff32x32[2];  // [matrixId]
+} CUVIDHEVCPICPARAMS;
+
+
+/***********************************************************/
+//! \struct CUVIDVP8PICPARAMS
+//! VP8 picture parameters
+//! This structure is used in CUVIDPICPARAMS structure
+/***********************************************************/
+typedef struct _CUVIDVP8PICPARAMS
+{
+    int width;
+    int height;
+    unsigned int first_partition_size;
+    //Frame Indexes
+    unsigned char LastRefIdx;
+    unsigned char GoldenRefIdx;
+    unsigned char AltRefIdx;
+    union {
+        struct {
+            unsigned char frame_type : 1;    /**< 0 = KEYFRAME, 1 = INTERFRAME  */
+            unsigned char version : 3;
+            unsigned char show_frame : 1;
+            unsigned char update_mb_segmentation_data : 1;    /**< Must be 0 if segmentation is not enabled */
+            unsigned char Reserved2Bits : 2;
+        }vp8_frame_tag;
+        unsigned char wFrameTagFlags;
+    };
+    unsigned char Reserved1[4];
+    unsigned int  Reserved2[3];
+} CUVIDVP8PICPARAMS;
+
+/***********************************************************/
+//! \struct CUVIDVP9PICPARAMS
+//! VP9 picture parameters
+//! This structure is used in CUVIDPICPARAMS structure
+/***********************************************************/
+typedef struct _CUVIDVP9PICPARAMS
+{
+    unsigned int width;
+    unsigned int height;
+
+    //Frame Indices
+    unsigned char LastRefIdx;
+    unsigned char GoldenRefIdx;
+    unsigned char AltRefIdx;
+    unsigned char colorSpace;
+
+    unsigned short profile : 3;
+    unsigned short frameContextIdx : 2;
+    unsigned short frameType : 1;
+    unsigned short showFrame : 1;
+    unsigned short errorResilient : 1;
+    unsigned short frameParallelDecoding : 1;
+    unsigned short subSamplingX : 1;
+    unsigned short subSamplingY : 1;
+    unsigned short intraOnly : 1;
+    unsigned short allow_high_precision_mv : 1;
+    unsigned short refreshEntropyProbs : 1;
+    unsigned short reserved2Bits : 2;
+
+    unsigned short reserved16Bits;
+
+    unsigned char  refFrameSignBias[4];
+
+    unsigned char bitDepthMinus8Luma;
+    unsigned char bitDepthMinus8Chroma;
+    unsigned char loopFilterLevel;
+    unsigned char loopFilterSharpness;
+
+    unsigned char modeRefLfEnabled;
+    unsigned char log2_tile_columns;
+    unsigned char log2_tile_rows;
+
+    unsigned char segmentEnabled : 1;
+    unsigned char segmentMapUpdate : 1;
+    unsigned char segmentMapTemporalUpdate : 1;
+    unsigned char segmentFeatureMode : 1;
+    unsigned char reserved4Bits : 4;
+
+
+    unsigned char segmentFeatureEnable[8][4];
+    short         segmentFeatureData[8][4];
+    unsigned char mb_segment_tree_probs[7];
+    unsigned char segment_pred_probs[3];
+    unsigned char reservedSegment16Bits[2];
+
+    int qpYAc;
+    int qpYDc;
+    int qpChDc;
+    int qpChAc;
+
+    unsigned int activeRefIdx[3];
+    unsigned int resetFrameContext;
+    unsigned int mcomp_filter_type;
+    unsigned int mbRefLfDelta[4];
+    unsigned int mbModeLfDelta[2];
+    unsigned int frameTagSize;
+    unsigned int offsetToDctParts;
+    unsigned int reserved128Bits[4];
+
+} CUVIDVP9PICPARAMS;
+
+/***********************************************************/
+//! \struct CUVIDAV1PICPARAMS
+//! AV1 picture parameters
+//! This structure is used in CUVIDPICPARAMS structure
+/***********************************************************/
+typedef struct _CUVIDAV1PICPARAMS
+{
+    unsigned int   width;                               // coded width, if superres enabled then it is upscaled width
+    unsigned int   height;                              // coded height
+    unsigned int   frame_offset;                        // defined as order_hint in AV1 specification
+    int            decodePicIdx;                        // decoded output pic index, if film grain enabled, it will keep decoded (without film grain) output
+                                                        // It can be used as reference frame for future frames
+
+    // sequence header 
+    unsigned int   profile : 3;                         // 0 = profile0, 1 = profile1, 2 = profile2
+    unsigned int   use_128x128_superblock : 1;          // superblock size 0:64x64, 1: 128x128
+    unsigned int   subsampling_x : 1;                   // (subsampling_x, _y) 1,1 = 420, 1,0 = 422, 0,0 = 444
+    unsigned int   subsampling_y : 1;
+    unsigned int   mono_chrome : 1;                     // for monochrome content, mono_chrome = 1 and (subsampling_x, _y) should be 1,1
+    unsigned int   bit_depth_minus8 : 4;                // bit depth minus 8
+    unsigned int   enable_filter_intra : 1;             // tool enable in seq level, 0 : disable 1: frame header control
+    unsigned int   enable_intra_edge_filter : 1;        // intra edge filtering process, 0 : disable 1: enabled
+    unsigned int   enable_interintra_compound : 1;      // interintra, 0 : not present 1: present
+    unsigned int   enable_masked_compound : 1;          // 1: mode info for inter blocks may contain the syntax element compound_type.
+                                                        // 0: syntax element compound_type will not be present
+    unsigned int   enable_dual_filter : 1;              // vertical and horiz filter selection, 1: enable and 0: disable 
+    unsigned int   enable_order_hint : 1;               // order hint, and related tools, 1: enable and 0: disable 
+    unsigned int   order_hint_bits_minus1 : 3;          // is used to compute OrderHintBits
+    unsigned int   enable_jnt_comp : 1;                 // joint compound modes, 1: enable and 0: disable 
+    unsigned int   enable_superres : 1;                 // superres in seq level, 0 : disable 1: frame level control
+    unsigned int   enable_cdef : 1;                     // cdef filtering in seq level, 0 : disable 1: frame level control
+    unsigned int   enable_restoration : 1;              // loop restoration filtering in seq level, 0 : disable 1: frame level control
+    unsigned int   enable_fgs : 1;                      // defined as film_grain_params_present in AV1 specification
+    unsigned int   reserved0_7bits : 7;                 // reserved bits; must be set to 0
+
+    // frame header
+    unsigned int   frame_type : 2 ;                     // 0:Key frame, 1:Inter frame, 2:intra only, 3:s-frame
+    unsigned int   show_frame : 1 ;                     // show_frame = 1 implies that frame should be immediately output once decoded
+    unsigned int   disable_cdf_update : 1;              // CDF update during symbol decoding, 1: disabled, 0: enabled
+    unsigned int   allow_screen_content_tools : 1;      // 1: intra blocks may use palette encoding, 0: palette encoding is never used
+    unsigned int   force_integer_mv : 1;                // 1: motion vectors will always be integers, 0: can contain fractional bits
+    unsigned int   coded_denom : 3;                     // coded_denom of the superres scale as specified in AV1 specification
+    unsigned int   allow_intrabc : 1;                   // 1: intra block copy may be used, 0: intra block copy is not allowed
+    unsigned int   allow_high_precision_mv : 1;         // 1/8 precision mv enable
+    unsigned int   interp_filter : 3;                   // interpolation filter. Refer to section 6.8.9 of the AV1 specification Version 1.0.0 with Errata 1
+    unsigned int   switchable_motion_mode : 1;          // defined as is_motion_mode_switchable in AV1 specification
+    unsigned int   use_ref_frame_mvs : 1;               // 1: current frame can use the previous frame mv information, 0: will not use.
+    unsigned int   disable_frame_end_update_cdf : 1;    // 1: indicates that the end of frame CDF update is disabled
+    unsigned int   delta_q_present : 1;                 // quantizer index delta values are present in the block level
+    unsigned int   delta_q_res : 2;                     // left shift which should be applied to decoded quantizer index delta values
+    unsigned int   using_qmatrix : 1;                   // 1: quantizer matrix will be used to compute quantizers
+    unsigned int   coded_lossless : 1;                  // 1: all segments use lossless coding
+    unsigned int   use_superres : 1;                    // 1: superres enabled for frame 
+    unsigned int   tx_mode : 2;                         // 0: ONLY4x4,1:LARGEST,2:SELECT
+    unsigned int   reference_mode : 1;                  // 0: SINGLE, 1: SELECT
+    unsigned int   allow_warped_motion : 1;             // 1: allow_warped_motion may be present, 0: allow_warped_motion will not be present
+    unsigned int   reduced_tx_set : 1;                  // 1: frame is restricted to subset of the full set of transform types, 0: no such restriction
+    unsigned int   skip_mode : 1;                       // 1: most of the mode info is skipped, 0: mode info is not skipped
+    unsigned int   reserved1_3bits : 3;                 // reserved bits; must be set to 0
+
+    // tiling info
+    unsigned int   num_tile_cols : 8;                   // number of tiles across the frame., max is 64
+    unsigned int   num_tile_rows : 8;                   // number of tiles down the frame., max is 64
+    unsigned int   context_update_tile_id : 16;         // specifies which tile to use for the CDF update
+    unsigned short tile_widths[64];                     // Width of each column in superblocks
+    unsigned short tile_heights[64];                    // height of each row in superblocks
+
+    // CDEF - refer to section 6.10.14 of the AV1 specification Version 1.0.0 with Errata 1
+    unsigned char  cdef_damping_minus_3 : 2;            // controls the amount of damping in the deringing filter 
+    unsigned char  cdef_bits : 2;                       // the number of bits needed to specify which CDEF filter to apply  
+    unsigned char  reserved2_4bits : 4;                 // reserved bits; must be set to 0
+    unsigned char  cdef_y_strength[8];                  // 0-3 bits: y_pri_strength, 4-7 bits y_sec_strength
+    unsigned char  cdef_uv_strength[8];                 // 0-3 bits: uv_pri_strength, 4-7 bits uv_sec_strength
+
+    // SkipModeFrames
+    unsigned char   SkipModeFrame0 : 4;                 // specifies the frames to use for compound prediction when skip_mode is equal to 1.
+    unsigned char   SkipModeFrame1 : 4;
+
+    // qp information - refer to section 6.8.11 of the AV1 specification Version 1.0.0 with Errata 1
+    unsigned char  base_qindex;                         // indicates the base frame qindex. Defined as base_q_idx in AV1 specification
+    char           qp_y_dc_delta_q;                     // indicates the Y DC quantizer relative to base_q_idx. Defined as DeltaQYDc in AV1 specification
+    char           qp_u_dc_delta_q;                     // indicates the U DC quantizer relative to base_q_idx. Defined as DeltaQUDc in AV1 specification
+    char           qp_v_dc_delta_q;                     // indicates the V DC quantizer relative to base_q_idx. Defined as DeltaQVDc in AV1 specification
+    char           qp_u_ac_delta_q;                     // indicates the U AC quantizer relative to base_q_idx. Defined as DeltaQUAc in AV1 specification
+    char           qp_v_ac_delta_q;                     // indicates the V AC quantizer relative to base_q_idx. Defined as DeltaQVAc in AV1 specification
+    unsigned char  qm_y;                                // specifies the level in the quantizer matrix that should be used for luma plane decoding
+    unsigned char  qm_u;                                // specifies the level in the quantizer matrix that should be used for chroma U plane decoding
+    unsigned char  qm_v;                                // specifies the level in the quantizer matrix that should be used for chroma V plane decoding
+
+    // segmentation - refer to section 6.8.13 of the AV1 specification Version 1.0.0 with Errata 1
+    unsigned char segmentation_enabled : 1;             // 1 indicates that this frame makes use of the segmentation tool
+    unsigned char segmentation_update_map : 1;          // 1 indicates that the segmentation map are updated during the decoding of this frame
+    unsigned char segmentation_update_data : 1;         // 1 indicates that new parameters are about to be specified for each segment
+    unsigned char segmentation_temporal_update : 1;     // 1 indicates that the updates to the segmentation map are coded relative to the existing segmentation map
+    unsigned char reserved3_4bits : 4;                  // reserved bits; must be set to 0
+    short         segmentation_feature_data[8][8];      // specifies the feature data for a segment feature
+    unsigned char segmentation_feature_mask[8];         // indicates that the corresponding feature is unused or feature value is coded
+
+    // loopfilter - refer to section 6.8.10 of the AV1 specification Version 1.0.0 with Errata 1
+    unsigned char  loop_filter_level[2];                // contains loop filter strength values
+    unsigned char  loop_filter_level_u;                 // loop filter strength value of U plane
+    unsigned char  loop_filter_level_v;                 // loop filter strength value of V plane
+    unsigned char  loop_filter_sharpness;               // indicates the sharpness level
+    char           loop_filter_ref_deltas[8];           // contains the adjustment needed for the filter level based on the chosen reference frame
+    char           loop_filter_mode_deltas[2];          // contains the adjustment needed for the filter level based on the chosen mode
+    unsigned char  loop_filter_delta_enabled : 1;       // indicates that the filter level depends on the mode and reference frame used to predict a block
+    unsigned char  loop_filter_delta_update : 1;        // indicates that additional syntax elements are present that specify which mode and
+                                                        // reference frame deltas are to be updated
+    unsigned char  delta_lf_present : 1;                // specifies whether loop filter delta values are present in the block level
+    unsigned char  delta_lf_res : 2;                    // specifies the left shift to apply to the decoded loop filter values
+    unsigned char  delta_lf_multi  : 1;                 // separate loop filter deltas for Hy,Vy,U,V edges
+    unsigned char  reserved4_2bits : 2;                 // reserved bits; must be set to 0
+
+    // restoration - refer to section 6.10.15 of the AV1 specification Version 1.0.0 with Errata 1
+    unsigned char lr_unit_size[3];                     // specifies the size of loop restoration units: 0: 32, 1: 64, 2: 128, 3: 256
+    unsigned char lr_type[3] ;                         // used to compute FrameRestorationType
+
+    // reference frames
+    unsigned char primary_ref_frame;                    // specifies which reference frame contains the CDF values and other state that should be 
+                                                        // loaded at the start of the frame
+    unsigned char ref_frame_map[8];                     // frames in dpb that can be used as reference for current or future frames
+
+    unsigned char temporal_layer_id : 4;                // temporal layer id
+    unsigned char spatial_layer_id : 4;                 // spatial layer id
+
+    unsigned char reserved5_32bits[4];                  // reserved bits; must be set to 0
+
+    // ref frame list
+    struct
+    {
+        unsigned int   width;
+        unsigned int   height;
+        unsigned char  index;
+        unsigned char  reserved24Bits[3];               // reserved bits; must be set to 0
+    } ref_frame[7];                                     // frames used as reference frame for current frame.
+    
+    // global motion
+    struct {
+        unsigned char invalid : 1;
+        unsigned char wmtype : 2;                       // defined as GmType in AV1 specification
+        unsigned char reserved5Bits : 5;                // reserved bits; must be set to 0
+        char          reserved24Bits[3];                // reserved bits; must be set to 0
+        int           wmmat[6];                         // defined as gm_params[] in AV1 specification
+    } global_motion[7];                                 // global motion params for reference frames
+    
+    // film grain params - refer to section 6.8.20 of the AV1 specification Version 1.0.0 with Errata 1
+    unsigned short apply_grain : 1;
+    unsigned short overlap_flag : 1;
+    unsigned short scaling_shift_minus8 : 2;
+    unsigned short chroma_scaling_from_luma : 1;  
+    unsigned short ar_coeff_lag : 2;
+    unsigned short ar_coeff_shift_minus6 : 2;
+    unsigned short grain_scale_shift : 2;
+    unsigned short clip_to_restricted_range : 1;
+    unsigned short reserved6_4bits : 4;                 // reserved bits; must be set to 0
+    unsigned char  num_y_points;
+    unsigned char  scaling_points_y[14][2];
+    unsigned char  num_cb_points;
+    unsigned char  scaling_points_cb[10][2];
+    unsigned char  num_cr_points;
+    unsigned char  scaling_points_cr[10][2];
+    unsigned char  reserved7_8bits;                     // reserved bits; must be set to 0
+    unsigned short random_seed;
+    short          ar_coeffs_y[24];
+    short          ar_coeffs_cb[25];
+    short          ar_coeffs_cr[25];
+    unsigned char  cb_mult;
+    unsigned char  cb_luma_mult;
+    short          cb_offset;
+    unsigned char  cr_mult;
+    unsigned char  cr_luma_mult;
+    short          cr_offset;
+
+    int            reserved[7];                       // reserved bits; must be set to 0
+} CUVIDAV1PICPARAMS;
+
+/******************************************************************************************/
+//! \struct CUVIDPICPARAMS
+//! Picture parameters for decoding
+//! This structure is used in cuvidDecodePicture API
+//! IN  for cuvidDecodePicture
+/******************************************************************************************/
+typedef struct _CUVIDPICPARAMS
+{
+    int PicWidthInMbs;                     /**< IN: Coded frame size in macroblocks                           */
+    int FrameHeightInMbs;                  /**< IN: Coded frame height in macroblocks                         */
+    int CurrPicIdx;                        /**< IN: Output index of the current picture                       */
+    int field_pic_flag;                    /**< IN: 0=frame picture, 1=field picture                          */
+    int bottom_field_flag;                 /**< IN: 0=top field, 1=bottom field (ignored if field_pic_flag=0) */
+    int second_field;                      /**< IN: Second field of a complementary field pair                */
+    // Bitstream data
+    unsigned int nBitstreamDataLen;        /**< IN: Number of bytes in bitstream data buffer                  */
+    const unsigned char *pBitstreamData;   /**< IN: Ptr to bitstream data for this picture (slice-layer)      */
+    unsigned int nNumSlices;               /**< IN: Number of slices in this picture                          */
+    const unsigned int *pSliceDataOffsets; /**< IN: nNumSlices entries, contains offset of each slice within 
+                                                        the bitstream data buffer                             */
+    int ref_pic_flag;                      /**< IN: This picture is a reference picture                       */
+    int intra_pic_flag;                    /**< IN: This picture is entirely intra coded                      */
+    unsigned int Reserved[30];             /**< Reserved for future use                                       */
+    // IN: Codec-specific data
+    union {
+        CUVIDMPEG2PICPARAMS mpeg2;         /**< Also used for MPEG-1 */
+        CUVIDH264PICPARAMS  h264;
+        CUVIDVC1PICPARAMS   vc1;
+        CUVIDMPEG4PICPARAMS mpeg4;
+        CUVIDJPEGPICPARAMS  jpeg;
+        CUVIDHEVCPICPARAMS  hevc;
+        CUVIDVP8PICPARAMS   vp8;
+        CUVIDVP9PICPARAMS   vp9;
+        CUVIDAV1PICPARAMS   av1;
+        unsigned int CodecReserved[1024];
+    } CodecSpecific;
+} CUVIDPICPARAMS;
+
+
+/******************************************************/
+//! \struct CUVIDPROCPARAMS
+//! Picture parameters for postprocessing
+//! This structure is used in cuvidMapVideoFrame API
+/******************************************************/
+typedef struct _CUVIDPROCPARAMS
+{
+    int progressive_frame;                        /**< IN: Input is progressive (deinterlace_mode will be ignored)                */
+    int second_field;                             /**< IN: Output the second field (ignored if deinterlace mode is Weave)         */
+    int top_field_first;                          /**< IN: Input frame is top field first (1st field is top, 2nd field is bottom) */
+    int unpaired_field;                           /**< IN: Input only contains one field (2nd field is invalid)                   */
+    // The fields below are used for raw YUV input
+    unsigned int reserved_flags;                  /**< Reserved for future use (set to zero)                                      */
+    unsigned int reserved_zero;                   /**< Reserved (set to zero)                                                     */
+    unsigned long long raw_input_dptr;            /**< IN: Input CUdeviceptr for raw YUV extensions                               */
+    unsigned int raw_input_pitch;                 /**< IN: pitch in bytes of raw YUV input (should be aligned appropriately)      */
+    unsigned int raw_input_format;                /**< IN: Input YUV format (cudaVideoCodec_enum)                                 */
+    unsigned long long raw_output_dptr;           /**< IN: Output CUdeviceptr for raw YUV extensions                              */
+    unsigned int raw_output_pitch;                /**< IN: pitch in bytes of raw YUV output (should be aligned appropriately)     */
+    unsigned int Reserved1;                       /**< Reserved for future use (set to zero)                                      */
+    CUstream output_stream;                       /**< IN: stream object used by cuvidMapVideoFrame                               */
+    unsigned int Reserved[46];                    /**< Reserved for future use (set to zero)                                      */
+    unsigned long long *histogram_dptr;           /**< OUT: Output CUdeviceptr for histogram extensions                           */
+    void *Reserved2[1];                           /**< Reserved for future use (set to zero)                                      */
+} CUVIDPROCPARAMS;
+
+/*********************************************************************************************************/
+//! \struct CUVIDGETDECODESTATUS
+//! Struct for reporting decode status.
+//! This structure is used in cuvidGetDecodeStatus API.
+/*********************************************************************************************************/
+typedef struct _CUVIDGETDECODESTATUS
+{
+    cuvidDecodeStatus decodeStatus;
+    unsigned int reserved[31];
+    void *pReserved[8];
+} CUVIDGETDECODESTATUS;
+
+/****************************************************/
+//! \struct CUVIDRECONFIGUREDECODERINFO
+//! Struct for decoder reset
+//! This structure is used in cuvidReconfigureDecoder() API
+/****************************************************/
+typedef struct _CUVIDRECONFIGUREDECODERINFO
+{
+    unsigned int ulWidth;             /**< IN: Coded sequence width in pixels, MUST be < = ulMaxWidth defined at CUVIDDECODECREATEINFO  */
+    unsigned int ulHeight;            /**< IN: Coded sequence height in pixels, MUST be < = ulMaxHeight defined at CUVIDDECODECREATEINFO  */
+    unsigned int ulTargetWidth;       /**< IN: Post processed output width */
+    unsigned int ulTargetHeight;      /**< IN: Post Processed output height */
+    unsigned int ulNumDecodeSurfaces; /**< IN: Maximum number of internal decode surfaces */
+    unsigned int reserved1[12];       /**< Reserved for future use. Set to Zero */
+    /**
+    * IN: Area of frame to be displayed. Use-case : Source Cropping
+    */
+    struct {
+        short left;
+        short top;
+        short right;
+        short bottom;
+    } display_area;
+    /**
+    * IN: Target Rectangle in the OutputFrame. Use-case : Aspect ratio Conversion
+    */
+    struct {
+        short left;
+        short top;
+        short right;
+        short bottom;
+    } target_rect;
+    unsigned int reserved2[11]; /**< Reserved for future use. Set to Zero */
+} CUVIDRECONFIGUREDECODERINFO; 
+
+
+/***********************************************************************************************************/
+//! VIDEO_DECODER
+//!
+//! In order to minimize decode latencies, there should be always at least 2 pictures in the decode
+//! queue at any time, in order to make sure that all decode engines are always busy.
+//!
+//! Overall data flow:
+//!  - cuvidGetDecoderCaps(...)
+//!  - cuvidCreateDecoder(...)
+//!  - For each picture:
+//!    + cuvidDecodePicture(N)
+//!    + cuvidMapVideoFrame(N-4)
+//!    + do some processing in cuda
+//!    + cuvidUnmapVideoFrame(N-4)
+//!    + cuvidDecodePicture(N+1)
+//!    + cuvidMapVideoFrame(N-3)
+//!    + ...
+//!  - cuvidDestroyDecoder(...)
+//!
+//! NOTE:
+//! - When the cuda context is created from a D3D device, the D3D device must also be created
+//!   with the D3DCREATE_MULTITHREADED flag.
+//! - There is a limit to how many pictures can be mapped simultaneously (ulNumOutputSurfaces)
+//! - cuvidDecodePicture may block the calling thread if there are too many pictures pending
+//!   in the decode queue
+/***********************************************************************************************************/
+
+
+/**********************************************************************************************************************/
+//! \fn CUresult CUDAAPI cuvidGetDecoderCaps(CUVIDDECODECAPS *pdc)
+//! Queries decode capabilities of NVDEC-HW based on CodecType, ChromaFormat and BitDepthMinus8 parameters.
+//! 1. Application fills IN parameters CodecType, ChromaFormat and BitDepthMinus8 of CUVIDDECODECAPS structure
+//! 2. On calling cuvidGetDecoderCaps, driver fills OUT parameters if the IN parameters are supported
+//!    If IN parameters passed to the driver are not supported by NVDEC-HW, then all OUT params are set to 0.
+//! E.g. on Geforce GTX 960:
+//!   App fills - eCodecType = cudaVideoCodec_H264; eChromaFormat = cudaVideoChromaFormat_420; nBitDepthMinus8 = 0;
+//!   Given IN parameters are supported, hence driver fills: bIsSupported = 1; nMinWidth   = 48; nMinHeight  = 16; 
+//!   nMaxWidth = 4096; nMaxHeight = 4096; nMaxMBCount = 65536;
+//! CodedWidth*CodedHeight/256 must be less than or equal to nMaxMBCount
+/**********************************************************************************************************************/
+extern CUresult CUDAAPI cuvidGetDecoderCaps(CUVIDDECODECAPS *pdc);
+
+/*****************************************************************************************************/
+//! \fn CUresult CUDAAPI cuvidCreateDecoder(CUvideodecoder *phDecoder, CUVIDDECODECREATEINFO *pdci)
+//! Create the decoder object based on pdci. A handle to the created decoder is returned
+/*****************************************************************************************************/
+extern CUresult CUDAAPI cuvidCreateDecoder(CUvideodecoder *phDecoder, CUVIDDECODECREATEINFO *pdci);
+
+/*****************************************************************************************************/
+//! \fn CUresult CUDAAPI cuvidDestroyDecoder(CUvideodecoder hDecoder)
+//! Destroy the decoder object
+/*****************************************************************************************************/
+extern CUresult CUDAAPI cuvidDestroyDecoder(CUvideodecoder hDecoder);
+
+/*****************************************************************************************************/
+//! \fn CUresult CUDAAPI cuvidDecodePicture(CUvideodecoder hDecoder, CUVIDPICPARAMS *pPicParams)
+//! Decode a single picture (field or frame)
+//! Kicks off HW decoding 
+/*****************************************************************************************************/
+extern CUresult CUDAAPI cuvidDecodePicture(CUvideodecoder hDecoder, CUVIDPICPARAMS *pPicParams);
+
+/************************************************************************************************************/
+//! \fn CUresult CUDAAPI cuvidGetDecodeStatus(CUvideodecoder hDecoder, int nPicIdx);
+//! Get the decode status for frame corresponding to nPicIdx
+//! API is supported for Maxwell and above generation GPUs.
+//! API is currently supported for HEVC, H264 and JPEG codecs.
+//! API returns CUDA_ERROR_NOT_SUPPORTED error code for unsupported GPU or codec.
+/************************************************************************************************************/
+extern CUresult CUDAAPI cuvidGetDecodeStatus(CUvideodecoder hDecoder, int nPicIdx, CUVIDGETDECODESTATUS* pDecodeStatus);
+
+/*********************************************************************************************************/
+//! \fn CUresult CUDAAPI cuvidReconfigureDecoder(CUvideodecoder hDecoder, CUVIDRECONFIGUREDECODERINFO *pDecReconfigParams)
+//! Used to reuse single decoder for multiple clips. Currently supports resolution change, resize params, display area 
+//! params, target area params change for same codec. Must be called during CUVIDPARSERPARAMS::pfnSequenceCallback 
+/*********************************************************************************************************/
+extern CUresult CUDAAPI cuvidReconfigureDecoder(CUvideodecoder hDecoder, CUVIDRECONFIGUREDECODERINFO *pDecReconfigParams);
+
+
+#if !defined(__CUVID_DEVPTR64) || defined(__CUVID_INTERNAL)
+/************************************************************************************************************************/
+//! \fn CUresult CUDAAPI cuvidMapVideoFrame(CUvideodecoder hDecoder, int nPicIdx, unsigned int *pDevPtr, 
+//!                                         unsigned int *pPitch, CUVIDPROCPARAMS *pVPP);
+//! Post-process and map video frame corresponding to nPicIdx for use in cuda. Returns cuda device pointer and associated
+//! pitch of the video frame
+/************************************************************************************************************************/
+extern CUresult CUDAAPI cuvidMapVideoFrame(CUvideodecoder hDecoder, int nPicIdx,
+                                           unsigned int *pDevPtr, unsigned int *pPitch,
+                                           CUVIDPROCPARAMS *pVPP);
+
+/*****************************************************************************************************/
+//! \fn CUresult CUDAAPI cuvidUnmapVideoFrame(CUvideodecoder hDecoder, unsigned int DevPtr)
+//! Unmap a previously mapped video frame
+/*****************************************************************************************************/
+extern CUresult CUDAAPI cuvidUnmapVideoFrame(CUvideodecoder hDecoder, unsigned int DevPtr);
+#endif
+
+/****************************************************************************************************************************/
+//! \fn CUresult CUDAAPI cuvidMapVideoFrame64(CUvideodecoder hDecoder, int nPicIdx, unsigned long long *pDevPtr, 
+//!                                           unsigned int * pPitch, CUVIDPROCPARAMS *pVPP);
+//! Post-process and map video frame corresponding to nPicIdx for use in cuda. Returns cuda device pointer and associated
+//! pitch of the video frame
+/****************************************************************************************************************************/
+extern CUresult CUDAAPI cuvidMapVideoFrame64(CUvideodecoder hDecoder, int nPicIdx, unsigned long long *pDevPtr,
+                                             unsigned int *pPitch, CUVIDPROCPARAMS *pVPP);
+
+/**************************************************************************************************/
+//! \fn CUresult CUDAAPI cuvidUnmapVideoFrame64(CUvideodecoder hDecoder, unsigned long long DevPtr);
+//! Unmap a previously mapped video frame
+/**************************************************************************************************/
+extern CUresult CUDAAPI cuvidUnmapVideoFrame64(CUvideodecoder hDecoder, unsigned long long DevPtr);
+
+#if defined(__CUVID_DEVPTR64) && !defined(__CUVID_INTERNAL)
+#define cuvidMapVideoFrame      cuvidMapVideoFrame64
+#define cuvidUnmapVideoFrame    cuvidUnmapVideoFrame64
+#endif
+
+
+
+/********************************************************************************************************************/
+//!
+//! Context-locking: to facilitate multi-threaded implementations, the following 4 functions
+//! provide a simple mutex-style host synchronization. If a non-NULL context is specified
+//! in CUVIDDECODECREATEINFO, the codec library will acquire the mutex associated with the given
+//! context before making any cuda calls.
+//! A multi-threaded application could create a lock associated with a context handle so that
+//! multiple threads can safely share the same cuda context:
+//!  - use cuCtxPopCurrent immediately after context creation in order to create a 'floating' context
+//!    that can be passed to cuvidCtxLockCreate.
+//!  - When using a floating context, all cuda calls should only be made within a cuvidCtxLock/cuvidCtxUnlock section.
+//!
+//! NOTE: This is a safer alternative to cuCtxPushCurrent and cuCtxPopCurrent, and is not related to video
+//! decoder in any way (implemented as a critical section associated with cuCtx{Push|Pop}Current calls).
+/********************************************************************************************************************/
+
+/********************************************************************************************************************/
+//! \fn CUresult CUDAAPI cuvidCtxLockCreate(CUvideoctxlock *pLock, CUcontext ctx)
+//! This API is used to create CtxLock object
+/********************************************************************************************************************/
+extern CUresult CUDAAPI cuvidCtxLockCreate(CUvideoctxlock *pLock, CUcontext ctx);
+
+/********************************************************************************************************************/
+//! \fn CUresult CUDAAPI cuvidCtxLockDestroy(CUvideoctxlock lck)
+//! This API is used to free CtxLock object
+/********************************************************************************************************************/
+extern CUresult CUDAAPI cuvidCtxLockDestroy(CUvideoctxlock lck);
+
+/********************************************************************************************************************/
+//! \fn CUresult CUDAAPI cuvidCtxLock(CUvideoctxlock lck, unsigned int reserved_flags)
+//! This API is used to acquire ctxlock
+/********************************************************************************************************************/
+extern CUresult CUDAAPI cuvidCtxLock(CUvideoctxlock lck, unsigned int reserved_flags);
+
+/********************************************************************************************************************/
+//! \fn CUresult CUDAAPI cuvidCtxUnlock(CUvideoctxlock lck, unsigned int reserved_flags)
+//! This API is used to release ctxlock
+/********************************************************************************************************************/
+extern CUresult CUDAAPI cuvidCtxUnlock(CUvideoctxlock lck, unsigned int reserved_flags);
+
+/**********************************************************************************************/
+
+
+#if defined(__cplusplus)
+}
+// Auto-lock helper for C++ applications
+class CCtxAutoLock
+{
+private:
+    CUvideoctxlock m_ctx;
+public:
+    CCtxAutoLock(CUvideoctxlock ctx):m_ctx(ctx) { cuvidCtxLock(m_ctx,0); }
+    ~CCtxAutoLock() { cuvidCtxUnlock(m_ctx,0); }
+};
+#endif /* __cplusplus */
+
+#endif // __CUDA_VIDEO_H__
+
diff --git a/libheif/plugins/decoder_nvdec.cc b/libheif/plugins/decoder_nvdec.cc
new file mode 100644
index 0000000000..e5387dd5cc
--- /dev/null
+++ b/libheif/plugins/decoder_nvdec.cc
@@ -0,0 +1,363 @@
+/*
+ * NVIDIA Decoder.
+ * Copyright (c) 2023 Brad Hards <bradh@frogmouth.net>
+ *
+ * This file is part of libheif.
+ *
+ * libheif is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * libheif is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with libheif.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "libheif/heif.h"
+#include "libheif/heif_plugin.h"
+#include "common_utils.h"
+#include "decoder_nvdec.h"
+#include <memory>
+#include <cstring>
+#include <cassert>
+#include <cmath>
+#include <cstddef>
+#include <vector>
+#include <iomanip>
+#include <sstream>
+#include <cuda.h>
+#include <iostream>
+#include "nvcuvid.h"
+#include "cuviddec.h"
+#include "NvDecoder.h"
+#include "nalu_utils.h"
+#include <mutex>
+
+static const int NVDEC_PLUGIN_PRIORITY = 120;
+
+#define MAX_PLUGIN_NAME_LENGTH 80
+
+static char plugin_name[MAX_PLUGIN_NAME_LENGTH];
+
+static const char *nvdec_plugin_name()
+{
+    snprintf(plugin_name, MAX_PLUGIN_NAME_LENGTH, "NVIDIA Video Decoder (Hardware)");
+
+    // make sure that the string is null-terminated
+    plugin_name[MAX_PLUGIN_NAME_LENGTH - 1] = 0;
+
+    return plugin_name;
+}
+
+static void nvdec_init_plugin()
+{
+    cuInit(0);
+}
+
+static void nvdec_deinit_plugin()
+{
+}
+
+static int nvdec_does_support_format(enum heif_compression_format format)
+{
+    // We have to check the hardware capabilities
+    CUcontext cuContext = NULL;
+    CUdevice cuDevice = 0;
+
+    CUresult result;
+    result = cuDeviceGet(&cuDevice, 0);
+    if (result != CUDA_SUCCESS)
+    {
+        return 0;
+    }
+#if 0
+  char szDeviceName[80];
+  result = cuDeviceGetName(szDeviceName, sizeof(szDeviceName), cuDevice);
+  if (result != CUDA_SUCCESS) {
+	return 0;
+  }
+  std::cout << "GPU in use: " << szDeviceName << std::endl;
+#endif
+    result = cuCtxCreate(&cuContext, 0, cuDevice);
+    if (result != CUDA_SUCCESS)
+    {
+        return 0;
+    }
+
+    CUVIDDECODECAPS decodeCaps = {};
+    decodeCaps.eChromaFormat = cudaVideoChromaFormat_420;
+    decodeCaps.nBitDepthMinus8 = 0;
+    if (format == heif_compression_JPEG)
+    {
+        decodeCaps.eCodecType = cudaVideoCodec_JPEG;
+    }
+    else if (format == heif_compression_HEVC)
+    {
+        decodeCaps.eCodecType = cudaVideoCodec_HEVC;
+    }
+    else if (format == heif_compression_AVC)
+    {
+        decodeCaps.eCodecType = cudaVideoCodec_H264;
+    }
+    else if (format == heif_compression_AV1)
+    {
+        decodeCaps.eCodecType = cudaVideoCodec_AV1;
+    }
+    else
+    {
+        cuCtxDestroy(cuContext);
+        return 0;
+    }
+    result = cuvidGetDecoderCaps(&decodeCaps);
+    if (result != CUDA_SUCCESS)
+    {
+        cuCtxDestroy(cuContext);
+        return 0;
+    }
+    cuCtxDestroy(cuContext);
+    return decodeCaps.bIsSupported ? NVDEC_PLUGIN_PRIORITY : 0;
+}
+
+struct heif_error nvdec_new_decoder(void **decoder)
+{
+    struct nvdec_context *ctx = new nvdec_context();
+    ctx->strict = false;
+    ctx->eCodec = cudaVideoCodec_HEVC;
+    *decoder = ctx;
+
+    return heif_error_ok;
+}
+
+void nvdec_free_decoder(void *decoder)
+{
+    struct nvdec_context *ctx = (nvdec_context *)decoder;
+
+    if (!ctx)
+    {
+        return;
+    }
+
+    delete ctx;
+}
+
+struct heif_error nvdec_push_data(void *decoder, const void *frame_data, size_t frame_size)
+{
+    struct nvdec_context *ctx = (struct nvdec_context *)decoder;
+
+    const uint8_t *input_data = (const uint8_t *)frame_data;
+
+    ctx->data.insert(ctx->data.end(), input_data, input_data + frame_size);
+
+    return heif_error_ok;
+}
+
+
+struct heif_error nvdec_decode_image(void *decoder, struct heif_image **out_img)
+{
+    struct nvdec_context *ctx = (struct nvdec_context *)decoder;
+
+    heif_error err;
+    NalMap nalus;
+// TODO
+#if 0
+    if (ctx->eCodec == cudaVideoCodec_HEVC) {
+        err = nalus.parseHevcNalu(ctx->data.data(), ctx->data.size());
+        if (err.code != heif_error_Ok) {
+            return err;
+        }
+        if ((!nalus.NUTs_are_valid()) || (!nalus.IDR_is_valid())) {
+            if (!nalus.NUTs_are_valid()) {
+                printf("NUTs not valid");
+            }
+            if (!nalus.IDR_is_valid()) {
+                printf("IDR not valid");
+            }
+            struct heif_error err = {heif_error_Decoder_plugin_error,
+                                    heif_suberror_End_of_data,
+                                    "Unexpected end of data"};
+            return err;
+        }
+    }
+    if (ctx->eCodec == cudaVideoCodec_H264) {
+        err = nalus.parseNALU_AVC(ctx->data.data(), ctx->data.size());
+        if (err.code != heif_error_Ok) {
+            return err;
+        }
+    }
+#endif
+    CUdevice cuDevice = 0;
+
+    CUresult result;
+    result = cuDeviceGet(&cuDevice, 0);
+    if (result != CUDA_SUCCESS)
+    {
+        struct heif_error err = {heif_error_Decoder_plugin_error,
+                                 heif_suberror_Plugin_loading_error,
+                                 "could not get CUDA device"};
+        return err;
+    }
+    result = cuCtxCreate(&(ctx->cuContext), 0, cuDevice);
+    if (result != CUDA_SUCCESS)
+    {
+        struct heif_error err = {heif_error_Decoder_plugin_error,
+                                 heif_suberror_Plugin_loading_error,
+                                 "could not get CUDA context"};
+        return err;
+    }
+    result = cuvidCtxLockCreate(&(ctx->ctxLock), ctx->cuContext);
+    if (result != CUDA_SUCCESS) {
+        cuCtxDestroy(ctx->cuContext);
+        struct heif_error err = {heif_error_Decoder_plugin_error,
+                                 heif_suberror_Plugin_loading_error,
+                                 "could not create CUDA context lock"};
+        return err;
+    }
+    result = cuStreamCreate(&(ctx->cuvidStream), CU_STREAM_DEFAULT);
+    if (result != CUDA_SUCCESS) {
+        const char *szErrName = NULL;
+        cuGetErrorName(result, &szErrName);
+        std::ostringstream errMsg;
+        errMsg << "could not create CUDA stream " << szErrName;
+        struct heif_error err = {heif_error_Decoder_plugin_error,
+                                 heif_suberror_Plugin_loading_error,
+                                 errMsg.str().c_str()};
+        cuvidCtxLockDestroy(ctx->ctxLock);
+        cuCtxDestroy(ctx->cuContext);
+        return err;
+    }
+
+    NvDecoder dec(ctx);
+    err = dec.initVideoParser();
+    if (err.code != heif_error_Ok) {
+        cuvidCtxLockDestroy(ctx->ctxLock);
+        cuCtxDestroy(ctx->cuContext);
+        return err;
+    }
+
+    int nFrameReturned;
+// TODO
+#if 0
+    if (ctx->eCodec == cudaVideoCodec_HEVC) {
+        uint8_t *hevc_data;
+        size_t avc_data_size;
+        nalus.buildWithStartCodesHEVC(&hevc_data, &avc_data_size);
+        nFrameReturned = dec.Decode(hevc_data, avc_data_size);
+    } else if (ctx->eCodec == cudaVideoCodec_H264) {
+        uint8_t *avc_data;
+        size_t avc_data_size;
+        nalus.buildWithStartCodesAVC(&avc_data, &avc_data_size);
+        nFrameReturned = dec.Decode(avc_data, avc_data_size);
+        printf("nFrameReturned: %d\n", nFrameReturned);
+    } else 
+#endif
+// TODO: else closure
+    nFrameReturned = dec.Decode(ctx->data.data(), ctx->data.size());
+    
+    if (nFrameReturned > 0) {
+        uint8_t *pFrame = dec.GetFrame();
+
+        struct heif_image *heif_img = nullptr;
+        // dummy entry for chroma
+        err = heif_image_create(dec.GetWidth(), dec.GetHeight(),
+                                heif_colorspace_YCbCr,
+                                heif_chroma_420,
+                                &heif_img);
+        if (err.code != heif_error_Ok)
+        {
+            assert(heif_img == nullptr);
+            return err;
+        }
+        heif_image_add_plane(heif_img, heif_channel_Y, dec.GetWidth(), dec.GetHeight(), dec.GetBitDepth());
+        heif_image_add_plane(heif_img, heif_channel_Cb, dec.GetWidth() / 2, dec.GetChromaHeight(), dec.GetBitDepth());
+        heif_image_add_plane(heif_img, heif_channel_Cr, dec.GetWidth() / 2, dec.GetChromaHeight(), dec.GetBitDepth());
+    
+        int strideY;
+        uint8_t *Y = heif_image_get_plane(heif_img, heif_channel_Y, &strideY);
+        for (int r = 0; r < dec.GetHeight(); r++) {
+            memcpy(Y + r * strideY, pFrame, dec.GetWidth() * dec.GetBPP());
+            pFrame += dec.GetWidth() * dec.GetBPP();
+        }
+        int strideCb;
+        uint8_t *Cb = heif_image_get_plane(heif_img, heif_channel_Cb, &strideCb);
+        for (int r = 0; r < dec.GetChromaHeight(); r++) {
+            memcpy(Cb + r * strideCb, pFrame, (dec.GetWidth() / 2) * dec.GetBPP());
+            pFrame += (dec.GetWidth() / 2) * dec.GetBPP();
+        }
+        int strideCr;
+        uint8_t *Cr = heif_image_get_plane(heif_img, heif_channel_Cr, &strideCr);
+        for (int r = 0; r < dec.GetChromaHeight(); r++) {
+            memcpy(Cr + r * strideCr, pFrame, (dec.GetWidth() / 2) * dec.GetBPP());
+            pFrame += (dec.GetWidth() / 2) * dec.GetBPP();
+        }
+        *out_img = heif_img;
+    }
+    return heif_error_ok;
+}
+
+void nvdec_set_strict_decoding(void *decoder, int strict)
+{
+    struct nvdec_context *ctx = (struct nvdec_context *)decoder;
+    ctx->strict = strict;
+}
+
+struct heif_error nvdec_new_decoder2(void **decoder, const heif_decoder_configuration *config)
+{
+    struct nvdec_context *ctx = new nvdec_context();
+    ctx->strict = false;
+    switch (config->compression_format) {
+        case heif_compression_AV1:
+            ctx->eCodec = cudaVideoCodec_AV1;
+            break;
+        case heif_compression_AVC:
+            ctx->eCodec = cudaVideoCodec_H264;
+            break;
+        case heif_compression_HEVC:
+            ctx->eCodec = cudaVideoCodec_HEVC;
+            break;
+        case heif_compression_JPEG:
+            ctx->eCodec = cudaVideoCodec_JPEG;
+            break;
+        default:
+            delete ctx;
+            struct heif_error err = {heif_error_Decoder_plugin_error,
+                                     heif_suberror_Plugin_loading_error,
+                                    "unsupported compression format"};
+            return err;
+    }
+    *decoder = ctx;
+
+    return heif_error_ok;
+}
+
+static const struct heif_decoder_plugin decoder_nvdec
+{
+    4,
+    nvdec_plugin_name,
+    nvdec_init_plugin,
+    nvdec_deinit_plugin,
+    nvdec_does_support_format,
+    nvdec_new_decoder,
+    nvdec_free_decoder,
+    nvdec_push_data,
+    nvdec_decode_image,
+    nvdec_set_strict_decoding,
+    "NVDEC",
+    nvdec_new_decoder2
+};
+
+const struct heif_decoder_plugin *get_decoder_plugin_nvdec()
+{
+    return &decoder_nvdec;
+}
+
+#if PLUGIN_NVDEC
+heif_plugin_info plugin_info{
+    1,
+    heif_plugin_type_decoder,
+    &decoder_nvdec};
+#endif
diff --git a/libheif/plugins/decoder_nvdec.h b/libheif/plugins/decoder_nvdec.h
new file mode 100644
index 0000000000..79840fddde
--- /dev/null
+++ b/libheif/plugins/decoder_nvdec.h
@@ -0,0 +1,34 @@
+/*
+ * NVIDIA Decoder.
+ * Copyright (c) 2023 Brad Hards <bradh@frogmouth.net>
+ *
+ * This file is part of libheif.
+ *
+ * libheif is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * libheif is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with libheif.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBHEIF_HEIF_DECODER_NVDEC_H
+#define LIBHEIF_HEIF_DECODER_NVDEC_H
+
+#include "common_utils.h"
+
+const struct heif_decoder_plugin* get_decoder_plugin_nvdec();
+
+#if PLUGIN_NVDEC
+extern "C" {
+MAYBE_UNUSED LIBHEIF_API extern heif_plugin_info plugin_info;
+}
+#endif
+
+#endif
diff --git a/libheif/plugins/nvcuvid.h b/libheif/plugins/nvcuvid.h
new file mode 100644
index 0000000000..c548a22666
--- /dev/null
+++ b/libheif/plugins/nvcuvid.h
@@ -0,0 +1,553 @@
+/*
+ * This copyright notice applies to this header file only:
+ *
+ * Copyright (c) 2010-2024 NVIDIA Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the software, and to permit persons to whom the
+ * software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/********************************************************************************************************************/
+//! \file nvcuvid.h
+//!   NVDECODE API provides video decoding interface to NVIDIA GPU devices.
+//! \date 2015-2024
+//!  This file contains the interface constants, structure definitions and function prototypes.
+/********************************************************************************************************************/
+
+#if !defined(__NVCUVID_H__)
+#define __NVCUVID_H__
+
+#include "cuviddec.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif /* __cplusplus */
+
+#define MAX_CLOCK_TS 3
+
+/***********************************************/
+//!
+//! High-level helper APIs for video sources
+//!
+/***********************************************/
+
+typedef void *CUvideosource;
+typedef void *CUvideoparser;
+typedef long long CUvideotimestamp;
+
+
+/************************************************************************/
+//! \enum cudaVideoState
+//! Video source state enums
+//! Used in cuvidSetVideoSourceState and cuvidGetVideoSourceState APIs
+/************************************************************************/
+typedef enum {
+    cudaVideoState_Error   = -1,    /**< Error state (invalid source)                  */
+    cudaVideoState_Stopped = 0,     /**< Source is stopped (or reached end-of-stream)  */
+    cudaVideoState_Started = 1      /**< Source is running and delivering data         */
+} cudaVideoState;
+
+/************************************************************************/
+//! \enum cudaAudioCodec
+//! Audio compression enums
+//! Used in CUAUDIOFORMAT structure
+/************************************************************************/
+typedef enum {
+    cudaAudioCodec_MPEG1=0,         /**< MPEG-1 Audio               */
+    cudaAudioCodec_MPEG2,           /**< MPEG-2 Audio               */
+    cudaAudioCodec_MP3,             /**< MPEG-1 Layer III Audio     */
+    cudaAudioCodec_AC3,             /**< Dolby Digital (AC3) Audio  */
+    cudaAudioCodec_LPCM,            /**< PCM Audio                  */
+    cudaAudioCodec_AAC,             /**< AAC Audio                  */
+} cudaAudioCodec;
+
+/************************************************************************/
+//! \ingroup STRUCTS
+//! \struct TIMECODESET
+//! Used to store Time code set extracted from H264 and HEVC codecs
+/************************************************************************/
+typedef struct _TIMECODESET
+{
+    unsigned int time_offset_value;
+    unsigned short n_frames;                 
+    unsigned char clock_timestamp_flag;
+    unsigned char units_field_based_flag;
+    unsigned char counting_type;
+    unsigned char full_timestamp_flag;
+    unsigned char discontinuity_flag;
+    unsigned char cnt_dropped_flag;
+    unsigned char seconds_value;
+    unsigned char minutes_value;
+    unsigned char hours_value;
+    unsigned char seconds_flag;
+    unsigned char minutes_flag;
+    unsigned char hours_flag;
+    unsigned char time_offset_length;
+    unsigned char reserved;
+} TIMECODESET;
+
+/************************************************************************/
+//! \ingroup STRUCTS
+//! \struct TIMECODE
+//! Used to extract Time code in H264 and HEVC codecs
+/************************************************************************/
+typedef struct _TIMECODE
+{
+    TIMECODESET time_code_set[MAX_CLOCK_TS];
+    unsigned char num_clock_ts;
+} TIMECODE;
+
+/**********************************************************************************/
+//! \ingroup STRUCTS
+//! \struct SEIMASTERINGDISPLAYINFO
+//! Used to extract mastering display color volume SEI in H264 and HEVC codecs
+/**********************************************************************************/
+typedef struct _SEIMASTERINGDISPLAYINFO
+{
+    unsigned short display_primaries_x[3];
+    unsigned short display_primaries_y[3];
+    unsigned short white_point_x;
+    unsigned short white_point_y;
+    unsigned int max_display_mastering_luminance;
+    unsigned int min_display_mastering_luminance;
+} SEIMASTERINGDISPLAYINFO;
+
+/**********************************************************************************/
+//! \ingroup STRUCTS
+//! \struct SEICONTENTLIGHTLEVELINFO
+//! Used to extract content light level info SEI in H264 and HEVC codecs
+/**********************************************************************************/
+typedef struct _SEICONTENTLIGHTLEVELINFO
+{
+    unsigned short max_content_light_level;
+    unsigned short max_pic_average_light_level;
+    unsigned int reserved;
+} SEICONTENTLIGHTLEVELINFO;
+
+/**********************************************************************************/
+//! \ingroup STRUCTS
+//! \struct TIMECODEMPEG2
+//! Used to extract Time code in MPEG2 codec
+/**********************************************************************************/
+typedef struct _TIMECODEMPEG2
+{
+    unsigned char drop_frame_flag;
+    unsigned char time_code_hours;
+    unsigned char time_code_minutes;
+    unsigned char marker_bit;
+    unsigned char time_code_seconds;
+    unsigned char time_code_pictures;
+} TIMECODEMPEG2;
+
+/**********************************************************************************/
+//! \ingroup STRUCTS
+//! \struct SEIALTERNATIVETRANSFERCHARACTERISTICS
+//! Used to extract alternative transfer characteristics SEI in H264 and HEVC codecs
+/**********************************************************************************/
+typedef struct _SEIALTERNATIVETRANSFERCHARACTERISTICS
+{
+    unsigned char preferred_transfer_characteristics;
+} SEIALTERNATIVETRANSFERCHARACTERISTICS;
+    
+/**********************************************************************************/
+//! \ingroup STRUCTS
+//! \struct CUSEIMESSAGE;
+//! Used in CUVIDSEIMESSAGEINFO structure
+/**********************************************************************************/
+typedef struct _CUSEIMESSAGE
+{
+    unsigned char sei_message_type; /**< OUT: SEI Message Type      */
+    unsigned char reserved[3];
+    unsigned int sei_message_size;  /**< OUT: SEI Message Size      */
+} CUSEIMESSAGE;
+
+/************************************************************************************************/
+//! \ingroup STRUCTS
+//! \struct CUVIDEOFORMAT
+//! Video format
+//! Used in cuvidGetSourceVideoFormat API
+/************************************************************************************************/
+typedef struct
+{
+    cudaVideoCodec codec;                   /**< OUT: Compression format          */
+   /**
+    * OUT: frame rate = numerator / denominator (for example: 30000/1001)
+    */
+    struct {
+        /**< OUT: frame rate numerator   (0 = unspecified or variable frame rate) */
+        unsigned int numerator;
+        /**< OUT: frame rate denominator (0 = unspecified or variable frame rate) */
+        unsigned int denominator;
+    } frame_rate;
+    unsigned char progressive_sequence;     /**< OUT: 0=interlaced, 1=progressive                                      */
+    unsigned char bit_depth_luma_minus8;    /**< OUT: high bit depth luma. E.g, 2 for 10-bitdepth, 4 for 12-bitdepth   */
+    unsigned char bit_depth_chroma_minus8;  /**< OUT: high bit depth chroma. E.g, 2 for 10-bitdepth, 4 for 12-bitdepth */
+    unsigned char min_num_decode_surfaces;  /**< OUT: Minimum number of decode surfaces to be allocated for correct
+                                                      decoding. The client can send this value in ulNumDecodeSurfaces
+                                                      (in CUVIDDECODECREATEINFO structure).
+                                                      This guarantees correct functionality and optimal video memory
+                                                      usage but not necessarily the best performance, which depends on
+                                                      the design of the overall application. The optimal number of
+                                                      decode surfaces (in terms of performance and memory utilization)
+                                                      should be decided by experimentation for each application, but it
+                                                      cannot go below min_num_decode_surfaces.
+                                                      If this value is used for ulNumDecodeSurfaces then it must be
+                                                      returned to parser during sequence callback.                     */
+    unsigned int coded_width;               /**< OUT: coded frame width in pixels                                      */
+    unsigned int coded_height;              /**< OUT: coded frame height in pixels                                     */
+   /**
+    * area of the frame that should be displayed
+    * typical example:
+    * coded_width = 1920, coded_height = 1088
+    * display_area = { 0,0,1920,1080 }
+    */
+    struct {
+        int left;                           /**< OUT: left position of display rect    */
+        int top;                            /**< OUT: top position of display rect     */
+        int right;                          /**< OUT: right position of display rect   */
+        int bottom;                         /**< OUT: bottom position of display rect  */
+    } display_area;
+    cudaVideoChromaFormat chroma_format;    /**< OUT:  Chroma format                   */
+    unsigned int bitrate;                   /**< OUT: video bitrate (bps, 0=unknown)   */
+   /**
+    * OUT: Display Aspect Ratio = x:y (4:3, 16:9, etc)
+    */
+    struct {
+        int x;
+        int y;
+    } display_aspect_ratio;
+    /**
+    * Video Signal Description
+    * Refer section E.2.1 (VUI parameters semantics) of H264 spec file
+    */
+    struct {
+        unsigned char video_format          : 3; /**< OUT: 0-Component, 1-PAL, 2-NTSC, 3-SECAM, 4-MAC, 5-Unspecified     */
+        unsigned char video_full_range_flag : 1; /**< OUT: indicates the black level and luma and chroma range           */
+        unsigned char reserved_zero_bits    : 4; /**< Reserved bits                                                      */
+        unsigned char color_primaries;           /**< OUT: chromaticity coordinates of source primaries                  */
+        unsigned char transfer_characteristics;  /**< OUT: opto-electronic transfer characteristic of the source picture */
+        unsigned char matrix_coefficients;       /**< OUT: used in deriving luma and chroma signals from RGB primaries   */
+    } video_signal_description;
+    unsigned int seqhdr_data_length;             /**< OUT: Additional bytes following (CUVIDEOFORMATEX)                  */
+} CUVIDEOFORMAT;
+
+/****************************************************************/
+//! \ingroup STRUCTS
+//! \struct CUVIDOPERATINGPOINTINFO
+//! Operating point information of scalable bitstream
+/****************************************************************/
+typedef struct 
+{
+    cudaVideoCodec codec;
+    union 
+    {
+        struct
+        {
+            unsigned char  operating_points_cnt;
+            unsigned char  reserved24_bits[3];
+            unsigned short operating_points_idc[32];
+        } av1;
+        unsigned char CodecReserved[1024];
+    };
+} CUVIDOPERATINGPOINTINFO;
+
+/**********************************************************************************/
+//! \ingroup STRUCTS
+//! \struct CUVIDSEIMESSAGEINFO
+//! Used in cuvidParseVideoData API with PFNVIDSEIMSGCALLBACK pfnGetSEIMsg
+/**********************************************************************************/
+typedef struct _CUVIDSEIMESSAGEINFO
+{
+    void *pSEIData;                 /**< OUT: SEI Message Data      */
+    CUSEIMESSAGE *pSEIMessage;      /**< OUT: SEI Message Info      */
+    unsigned int sei_message_count; /**< OUT: SEI Message Count     */
+    unsigned int picIdx;            /**< OUT: SEI Message Pic Index */
+} CUVIDSEIMESSAGEINFO;
+
+/****************************************************************/
+//! \ingroup STRUCTS
+//! \struct CUVIDAV1SEQHDR
+//! AV1 specific sequence header information
+/****************************************************************/
+typedef struct {
+    unsigned int max_width;
+    unsigned int max_height;
+    unsigned char reserved[1016];
+} CUVIDAV1SEQHDR;
+
+/****************************************************************/
+//! \ingroup STRUCTS
+//! \struct CUVIDEOFORMATEX
+//! Video format including raw sequence header information
+//! Used in cuvidGetSourceVideoFormat API
+/****************************************************************/
+typedef struct
+{
+    CUVIDEOFORMAT format;                 /**< OUT: CUVIDEOFORMAT structure */
+    union {
+        CUVIDAV1SEQHDR av1;
+        unsigned char raw_seqhdr_data[1024];  /**< OUT: Sequence header data    */
+    };
+} CUVIDEOFORMATEX;
+
+/****************************************************************/
+//! \ingroup STRUCTS
+//! \struct CUAUDIOFORMAT
+//! Audio formats
+//! Used in cuvidGetSourceAudioFormat API
+/****************************************************************/
+typedef struct
+{
+    cudaAudioCodec codec;       /**< OUT: Compression format                                              */
+    unsigned int channels;      /**< OUT: number of audio channels                                        */
+    unsigned int samplespersec; /**< OUT: sampling frequency                                              */
+    unsigned int bitrate;       /**< OUT: For uncompressed, can also be used to determine bits per sample */
+    unsigned int reserved1;     /**< Reserved for future use                                              */
+    unsigned int reserved2;     /**< Reserved for future use                                              */
+} CUAUDIOFORMAT;
+
+
+/***************************************************************/
+//! \enum CUvideopacketflags
+//! Data packet flags
+//! Used in CUVIDSOURCEDATAPACKET structure
+/***************************************************************/
+typedef enum {
+    CUVID_PKT_ENDOFSTREAM   = 0x01,   /**< Set when this is the last packet for this stream                              */
+    CUVID_PKT_TIMESTAMP     = 0x02,   /**< Timestamp is valid                                                            */
+    CUVID_PKT_DISCONTINUITY = 0x04,   /**< Set when a discontinuity has to be signalled                                  */
+    CUVID_PKT_ENDOFPICTURE  = 0x08,   /**< Set when the packet contains exactly one frame or one field                   */
+    CUVID_PKT_NOTIFY_EOS    = 0x10,   /**< If this flag is set along with CUVID_PKT_ENDOFSTREAM, an additional (dummy)
+                                           display callback will be invoked with null value of CUVIDPARSERDISPINFO which
+                                           should be interpreted as end of the stream.                                   */
+} CUvideopacketflags;
+
+/*****************************************************************************/
+//! \ingroup STRUCTS
+//! \struct CUVIDSOURCEDATAPACKET
+//! Data Packet
+//! Used in cuvidParseVideoData API
+//! IN for cuvidParseVideoData
+/*****************************************************************************/
+typedef struct _CUVIDSOURCEDATAPACKET
+{
+    unsigned long flags;            /**< IN: Combination of CUVID_PKT_XXX flags                              */
+    unsigned long payload_size;     /**< IN: number of bytes in the payload (may be zero if EOS flag is set) */
+    const unsigned char *payload;   /**< IN: Pointer to packet payload data (may be NULL if EOS flag is set) */
+    CUvideotimestamp timestamp;     /**< IN: Presentation time stamp (10MHz clock), only valid if
+                                             CUVID_PKT_TIMESTAMP flag is set                                 */
+} CUVIDSOURCEDATAPACKET;
+
+// Callback for packet delivery
+typedef int (CUDAAPI *PFNVIDSOURCECALLBACK)(void *, CUVIDSOURCEDATAPACKET *);
+
+/**************************************************************************************************************************/
+//! \ingroup STRUCTS
+//! \struct CUVIDSOURCEPARAMS
+//! Describes parameters needed in cuvidCreateVideoSource API
+//! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all supported
+//! containers. It's recommended to clients to use their own or third party demuxer if audio support is needed.
+/**************************************************************************************************************************/
+typedef struct _CUVIDSOURCEPARAMS
+{
+    unsigned int ulClockRate;                   /**< IN: Time stamp units in Hz (0=default=10000000Hz)      */
+    unsigned int bAnnexb : 1;                   /**< IN: AV1 annexB stream                                  */
+    unsigned int uReserved : 31;                /**< Reserved for future use - set to zero                  */
+    unsigned int uReserved1[6];                 /**< Reserved for future use - set to zero                  */
+    void *pUserData;                            /**< IN: User private data passed in to the data handlers   */
+    PFNVIDSOURCECALLBACK pfnVideoDataHandler;   /**< IN: Called to deliver video packets                    */
+    PFNVIDSOURCECALLBACK pfnAudioDataHandler;   /**< IN: Called to deliver audio packets.                   */
+    void *pvReserved2[8];                       /**< Reserved for future use - set to NULL                  */
+} CUVIDSOURCEPARAMS;
+
+
+/**********************************************/
+//! \ingroup ENUMS
+//! \enum CUvideosourceformat_flags
+//! CUvideosourceformat_flags
+//! Used in cuvidGetSourceVideoFormat API
+/**********************************************/
+typedef enum {
+    CUVID_FMT_EXTFORMATINFO = 0x100             /**< Return extended format structure (CUVIDEOFORMATEX) */
+} CUvideosourceformat_flags;
+
+#if !defined(__APPLE__)
+/***************************************************************************************************************************/
+//! \ingroup FUNCTS
+//! \fn CUresult CUDAAPI cuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams)
+//! Create CUvideosource object. CUvideosource spawns demultiplexer thread that provides two callbacks: 
+//! pfnVideoDataHandler() and pfnAudioDataHandler()
+//! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all supported 
+//! containers. It's recommended to clients to use their own or third party demuxer if audio support is needed.
+/***************************************************************************************************************************/
+CUresult CUDAAPI cuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams);
+
+/***************************************************************************************************************************/
+//! \ingroup FUNCTS
+//! \fn CUresult CUDAAPI cuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams)
+//! Create video source
+/***************************************************************************************************************************/
+CUresult CUDAAPI cuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams);
+
+/********************************************************************/
+//! \ingroup FUNCTS
+//! \fn CUresult CUDAAPI cuvidDestroyVideoSource(CUvideosource obj)
+//! Destroy video source
+/********************************************************************/
+CUresult CUDAAPI cuvidDestroyVideoSource(CUvideosource obj);
+
+/******************************************************************************************/
+//! \ingroup FUNCTS
+//! \fn CUresult CUDAAPI cuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state)
+//! Set video source state to:
+//! cudaVideoState_Started - to signal the source to run and deliver data
+//! cudaVideoState_Stopped - to stop the source from delivering the data
+//! cudaVideoState_Error   - invalid source
+/******************************************************************************************/
+CUresult CUDAAPI cuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state);
+
+/******************************************************************************************/
+//! \ingroup FUNCTS
+//! \fn cudaVideoState CUDAAPI cuvidGetVideoSourceState(CUvideosource obj)
+//! Get video source state
+//! Returns:
+//! cudaVideoState_Started - if Source is running and delivering data
+//! cudaVideoState_Stopped - if Source is stopped or reached end-of-stream
+//! cudaVideoState_Error   - if Source is in error state
+/******************************************************************************************/
+cudaVideoState CUDAAPI cuvidGetVideoSourceState(CUvideosource obj);
+
+/******************************************************************************************************************/
+//! \ingroup FUNCTS
+//! \fn CUresult CUDAAPI cuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags)
+//! Gets video source format in pvidfmt, flags is set to combination of CUvideosourceformat_flags as per requirement
+/******************************************************************************************************************/
+CUresult CUDAAPI cuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags);
+
+/**************************************************************************************************************************/
+//! \ingroup FUNCTS
+//! \fn CUresult CUDAAPI cuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags)
+//! Get audio source format
+//! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all supported 
+//! containers. It's recommended to clients to use their own or third party demuxer if audio support is needed.
+/**************************************************************************************************************************/
+CUresult CUDAAPI cuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags);
+
+#endif
+/**********************************************************************************/
+//! \ingroup STRUCTS
+//! \struct CUVIDPARSERDISPINFO
+//! Used in cuvidParseVideoData API with PFNVIDDISPLAYCALLBACK pfnDisplayPicture
+/**********************************************************************************/
+typedef struct _CUVIDPARSERDISPINFO
+{
+    int picture_index;          /**< OUT: Index of the current picture                                                         */
+    int progressive_frame;      /**< OUT: 1 if progressive frame; 0 otherwise                                                  */
+    int top_field_first;        /**< OUT: 1 if top field is displayed first; 0 otherwise                                       */
+    int repeat_first_field;     /**< OUT: Number of additional fields (1=ivtc, 2=frame doubling, 4=frame tripling, 
+                                     -1=unpaired field)                                                                        */
+    CUvideotimestamp timestamp; /**< OUT: Presentation time stamp                                                              */
+} CUVIDPARSERDISPINFO;
+
+/***********************************************************************************************************************/
+//! Parser callbacks
+//! The parser will call these synchronously from within cuvidParseVideoData(), whenever there is sequence change or a picture
+//! is ready to be decoded and/or displayed. First argument in functions is "void *pUserData" member of structure CUVIDSOURCEPARAMS
+//! Return values from these callbacks are interpreted as below. If the callbacks return failure, it will be propagated by
+//! cuvidParseVideoData() to the application.
+//! Parser picks default operating point as 0 and outputAllLayers flag as 0 if PFNVIDOPPOINTCALLBACK is not set or return value is 
+//! -1 or invalid operating point.
+//! PFNVIDSEQUENCECALLBACK : 0: fail, 1: succeeded, > 1: override dpb size of parser (set by CUVIDPARSERPARAMS::ulMaxNumDecodeSurfaces
+//! while creating parser)
+//! PFNVIDDECODECALLBACK   : 0: fail, >=1: succeeded
+//! PFNVIDDISPLAYCALLBACK  : 0: fail, >=1: succeeded
+//! PFNVIDOPPOINTCALLBACK  : <0: fail, >=0: succeeded (bit 0-9: OperatingPoint, bit 10-10: outputAllLayers, bit 11-30: reserved)
+//! PFNVIDSEIMSGCALLBACK   : 0: fail, >=1: succeeded
+/***********************************************************************************************************************/
+typedef int (CUDAAPI *PFNVIDSEQUENCECALLBACK)(void *, CUVIDEOFORMAT *);
+typedef int (CUDAAPI *PFNVIDDECODECALLBACK)(void *, CUVIDPICPARAMS *);
+typedef int (CUDAAPI *PFNVIDDISPLAYCALLBACK)(void *, CUVIDPARSERDISPINFO *);
+typedef int (CUDAAPI *PFNVIDOPPOINTCALLBACK)(void *, CUVIDOPERATINGPOINTINFO*);
+typedef int (CUDAAPI *PFNVIDSEIMSGCALLBACK) (void *, CUVIDSEIMESSAGEINFO *);
+
+/**************************************/
+//! \ingroup STRUCTS
+//! \struct CUVIDPARSERPARAMS
+//! Used in cuvidCreateVideoParser API
+/**************************************/
+typedef struct _CUVIDPARSERPARAMS
+{
+    cudaVideoCodec CodecType;                   /**< IN: cudaVideoCodec_XXX                                                  */
+    unsigned int ulMaxNumDecodeSurfaces;        /**< IN: Max # of decode surfaces (parser will cycle through these)          */
+    unsigned int ulClockRate;                   /**< IN: Timestamp units in Hz (0=default=10000000Hz)                        */
+    unsigned int ulErrorThreshold;              /**< IN: % Error threshold (0-100) for calling pfnDecodePicture (100=always 
+                                                     IN: call pfnDecodePicture even if picture bitstream is fully corrupted) */
+    unsigned int ulMaxDisplayDelay;             /**< IN: Max display queue delay (improves pipelining of decode with display)
+                                                         0=no delay (recommended values: 2..4)                               */
+    unsigned int bAnnexb : 1;                   /**< IN: AV1 annexB stream                                                   */
+    unsigned int uReserved : 31;                /**< Reserved for future use - set to zero                                   */
+    unsigned int uReserved1[4];                 /**< IN: Reserved for future use - set to 0                                  */
+    void *pUserData;                            /**< IN: User data for callbacks                                             */
+    PFNVIDSEQUENCECALLBACK pfnSequenceCallback; /**< IN: Called before decoding frames and/or whenever there is a fmt change */
+    PFNVIDDECODECALLBACK pfnDecodePicture;      /**< IN: Called when a picture is ready to be decoded (decode order)         */
+    PFNVIDDISPLAYCALLBACK pfnDisplayPicture;    /**< IN: Called whenever a picture is ready to be displayed (display order)  */
+    PFNVIDOPPOINTCALLBACK pfnGetOperatingPoint; /**< IN: Called from AV1 sequence header to get operating point of a AV1 
+                                                         scalable bitstream                                                  */
+    PFNVIDSEIMSGCALLBACK pfnGetSEIMsg;          /**< IN: Called when all SEI messages are parsed for particular frame        */
+    void *pvReserved2[5];                       /**< Reserved for future use - set to NULL                                   */
+    CUVIDEOFORMATEX *pExtVideoInfo;             /**< IN: [Optional] sequence header data from system layer                   */
+} CUVIDPARSERPARAMS;
+
+/************************************************************************************************/
+//! \ingroup FUNCTS
+//! \fn CUresult CUDAAPI cuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams)
+//! Create video parser object and initialize
+/************************************************************************************************/
+CUresult CUDAAPI cuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams);
+
+/************************************************************************************************/
+//! \ingroup FUNCTS
+//! \fn CUresult CUDAAPI cuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket)
+//! Parse the video data from source data packet in pPacket 
+//! Extracts parameter sets like SPS, PPS, bitstream etc. from pPacket and 
+//! calls back pfnDecodePicture with CUVIDPICPARAMS data for kicking of HW decoding
+//! calls back pfnSequenceCallback with CUVIDEOFORMAT data for initial sequence header or when
+//! the decoder encounters a video format change
+//! calls back pfnDisplayPicture with CUVIDPARSERDISPINFO data to display a video frame
+/************************************************************************************************/
+CUresult CUDAAPI cuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket);
+
+/************************************************************************************************/
+//! \ingroup FUNCTS
+//! \fn CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser obj)
+//! Destroy the video parser
+/************************************************************************************************/
+CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser obj);
+
+/**********************************************************************************************/
+
+#if defined(__cplusplus)
+}
+#endif /* __cplusplus */
+
+#endif // __NVCUVID_H__
+
+

From 8c7cbe0f75942fd3e374ce8f72c7231b6580f0a4 Mon Sep 17 00:00:00 2001
From: Brad Hards <bradh@frogmouth.net>
Date: Tue, 3 Sep 2024 10:10:45 +1000
Subject: [PATCH 03/41] nvdec: enable HEVC support

---
 libheif/plugins/decoder_nvdec.cc | 116 +++++++++++++++++++++++++------
 1 file changed, 94 insertions(+), 22 deletions(-)

diff --git a/libheif/plugins/decoder_nvdec.cc b/libheif/plugins/decoder_nvdec.cc
index e5387dd5cc..2e8af06399 100644
--- a/libheif/plugins/decoder_nvdec.cc
+++ b/libheif/plugins/decoder_nvdec.cc
@@ -75,7 +75,7 @@ static int nvdec_does_support_format(enum heif_compression_format format)
     {
         return 0;
     }
-#if 0
+#if 1
   char szDeviceName[80];
   result = cuDeviceGetName(szDeviceName, sizeof(szDeviceName), cuDevice);
   if (result != CUDA_SUCCESS) {
@@ -163,26 +163,14 @@ struct heif_error nvdec_decode_image(void *decoder, struct heif_image **out_img)
 
     heif_error err;
     NalMap nalus;
-// TODO
-#if 0
     if (ctx->eCodec == cudaVideoCodec_HEVC) {
         err = nalus.parseHevcNalu(ctx->data.data(), ctx->data.size());
         if (err.code != heif_error_Ok) {
             return err;
         }
-        if ((!nalus.NUTs_are_valid()) || (!nalus.IDR_is_valid())) {
-            if (!nalus.NUTs_are_valid()) {
-                printf("NUTs not valid");
-            }
-            if (!nalus.IDR_is_valid()) {
-                printf("IDR not valid");
-            }
-            struct heif_error err = {heif_error_Decoder_plugin_error,
-                                    heif_suberror_End_of_data,
-                                    "Unexpected end of data"};
-            return err;
-        }
     }
+// TODO
+#if 0
     if (ctx->eCodec == cudaVideoCodec_H264) {
         err = nalus.parseNALU_AVC(ctx->data.data(), ctx->data.size());
         if (err.code != heif_error_Ok) {
@@ -240,13 +228,97 @@ struct heif_error nvdec_decode_image(void *decoder, struct heif_image **out_img)
     }
 
     int nFrameReturned;
-// TODO
-#if 0
     if (ctx->eCodec == cudaVideoCodec_HEVC) {
         uint8_t *hevc_data;
-        size_t avc_data_size;
-        nalus.buildWithStartCodesHEVC(&hevc_data, &avc_data_size);
-        nFrameReturned = dec.Decode(hevc_data, avc_data_size);
+        size_t hevc_data_size;
+        {
+            int heif_idrpic_size;
+            int heif_vps_size;
+            int heif_sps_size;
+            int heif_pps_size;
+            const unsigned char* heif_vps_data;
+            const unsigned char* heif_sps_data;
+            const unsigned char* heif_pps_data;
+            const unsigned char* heif_idrpic_data;
+
+            if ((nalus.count(NAL_UNIT_VPS_NUT) > 0) && (nalus.count(NAL_UNIT_SPS_NUT) > 0) && (nalus.count(NAL_UNIT_PPS_NUT) > 0))
+            {
+                heif_vps_size = nalus.size(NAL_UNIT_VPS_NUT);
+                heif_vps_data = nalus.data(NAL_UNIT_VPS_NUT);
+
+                heif_sps_size = nalus.size(NAL_UNIT_SPS_NUT);
+                heif_sps_data = nalus.data(NAL_UNIT_SPS_NUT);
+
+                heif_pps_size = nalus.size(NAL_UNIT_PPS_NUT);
+                heif_pps_data = nalus.data(NAL_UNIT_PPS_NUT);
+            }
+            else
+            {
+                struct heif_error err = { heif_error_Decoder_plugin_error,
+                                            heif_suberror_End_of_data,
+                                            "Unexpected end of data" };
+                return err;
+            }
+
+            if ((nalus.count(NAL_UNIT_IDR_W_RADL) > 0) || (nalus.count(NAL_UNIT_IDR_N_LP) > 0))
+            {
+                if (nalus.count(NAL_UNIT_IDR_W_RADL) > 0)
+                {
+                    heif_idrpic_data = nalus.data(NAL_UNIT_IDR_W_RADL);
+                    heif_idrpic_size = nalus.size(NAL_UNIT_IDR_W_RADL);
+                }
+                else
+                {
+                    heif_idrpic_data = nalus.data(NAL_UNIT_IDR_N_LP);
+                    heif_idrpic_size = nalus.size(NAL_UNIT_IDR_N_LP);
+                }
+            }
+            else
+            {
+                struct heif_error err = { heif_error_Decoder_plugin_error,
+                                            heif_suberror_End_of_data,
+                                            "Unexpected end of data" };
+                return err;
+            }
+
+            const char hevc_AnnexB_StartCode[] = { 0x00, 0x00, 0x00, 0x01 };
+            int hevc_AnnexB_StartCode_size = 4;
+
+            hevc_data_size = heif_vps_size + heif_sps_size + heif_pps_size + heif_idrpic_size + 4 * hevc_AnnexB_StartCode_size;
+            hevc_data = (uint8_t*)malloc(hevc_data_size);
+
+            //Copy hevc pps data
+            uint8_t* hevc_data_ptr = hevc_data;
+            memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size);
+            hevc_data_ptr += hevc_AnnexB_StartCode_size;
+            memcpy(hevc_data_ptr, heif_vps_data, heif_vps_size);
+            hevc_data_ptr += heif_vps_size;
+
+            //Copy hevc sps data
+            memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size);
+            hevc_data_ptr += hevc_AnnexB_StartCode_size;
+            memcpy(hevc_data_ptr, heif_sps_data, heif_sps_size);
+            hevc_data_ptr += heif_sps_size;
+
+            //Copy hevc pps data
+            memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size);
+            hevc_data_ptr += hevc_AnnexB_StartCode_size;
+            memcpy(hevc_data_ptr, heif_pps_data, heif_pps_size);
+            hevc_data_ptr += heif_pps_size;
+
+            //Copy hevc idrpic data
+            memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size);
+            hevc_data_ptr += hevc_AnnexB_StartCode_size;
+            memcpy(hevc_data_ptr, heif_idrpic_data, heif_idrpic_size);
+
+            // decoder->NalMap not needed anymore
+            nalus.clear();
+        }
+
+        nFrameReturned = dec.Decode(hevc_data, hevc_data_size);
+    } else {
+// TODO
+#if 0
     } else if (ctx->eCodec == cudaVideoCodec_H264) {
         uint8_t *avc_data;
         size_t avc_data_size;
@@ -255,8 +327,8 @@ struct heif_error nvdec_decode_image(void *decoder, struct heif_image **out_img)
         printf("nFrameReturned: %d\n", nFrameReturned);
     } else 
 #endif
-// TODO: else closure
-    nFrameReturned = dec.Decode(ctx->data.data(), ctx->data.size());
+        nFrameReturned = dec.Decode(ctx->data.data(), ctx->data.size());
+    }
     
     if (nFrameReturned > 0) {
         uint8_t *pFrame = dec.GetFrame();

From 6540c04084550659a60fdeacf66213d84e7296ee Mon Sep 17 00:00:00 2001
From: Brad Hards <bradh@frogmouth.net>
Date: Tue, 3 Sep 2024 11:11:39 +1000
Subject: [PATCH 04/41] nvdec: clean up HEVC

---
 libheif/plugins/decoder_ffmpeg.cc | 90 +++----------------------------
 libheif/plugins/decoder_nvdec.cc  | 88 ++----------------------------
 libheif/plugins/nalu_utils.cc     | 88 ++++++++++++++++++++++++++++++
 libheif/plugins/nalu_utils.h      |  2 +
 4 files changed, 101 insertions(+), 167 deletions(-)

diff --git a/libheif/plugins/decoder_ffmpeg.cc b/libheif/plugins/decoder_ffmpeg.cc
index c18502d086..5019d25358 100644
--- a/libheif/plugins/decoder_ffmpeg.cc
+++ b/libheif/plugins/decoder_ffmpeg.cc
@@ -264,90 +264,12 @@ static struct heif_error ffmpeg_v1_decode_image(void* decoder_raw,
 {
   struct ffmpeg_decoder* decoder = (struct ffmpeg_decoder*) decoder_raw;
 
-  int heif_idrpic_size;
-  int heif_vps_size;
-  int heif_sps_size;
-  int heif_pps_size;
-  const unsigned char* heif_vps_data;
-  const unsigned char* heif_sps_data;
-  const unsigned char* heif_pps_data;
-  const unsigned char* heif_idrpic_data;
-
-  if ((decoder->nalMap.count(NAL_UNIT_VPS_NUT) > 0)
-      && (decoder->nalMap.count(NAL_UNIT_SPS_NUT) > 0)
-      && (decoder->nalMap.count(NAL_UNIT_PPS_NUT) > 0)
-      )
-  {
-      heif_vps_size = decoder->nalMap.size(NAL_UNIT_VPS_NUT);
-      heif_vps_data = decoder->nalMap.data(NAL_UNIT_VPS_NUT);
-
-      heif_sps_size = decoder->nalMap.size(NAL_UNIT_SPS_NUT);
-      heif_sps_data = decoder->nalMap.data(NAL_UNIT_SPS_NUT);
-
-      heif_pps_size = decoder->nalMap.size(NAL_UNIT_PPS_NUT);
-      heif_pps_data = decoder->nalMap.data(NAL_UNIT_PPS_NUT);
+  uint8_t *hevc_data;
+  size_t hevc_data_size;
+  heif_error err = decoder->nalMap.buildWithStartCodesHevc(&hevc_data, &hevc_data_size, AV_INPUT_BUFFER_PADDING_SIZE);
+  if (err.code != heif_error_Ok) {
+    return err;
   }
-  else
-  {
-      struct heif_error err = { heif_error_Decoder_plugin_error,
-                                heif_suberror_End_of_data,
-                                "Unexpected end of data" };
-      return err;
-  }
-
-  if ((decoder->nalMap.count(NAL_UNIT_IDR_W_RADL) > 0) || (decoder->nalMap.count(NAL_UNIT_IDR_N_LP) > 0))
-  {
-      if (decoder->nalMap.count(NAL_UNIT_IDR_W_RADL) > 0)
-      {
-          heif_idrpic_data = decoder->nalMap.data(NAL_UNIT_IDR_W_RADL);
-          heif_idrpic_size = decoder->nalMap.size(NAL_UNIT_IDR_W_RADL);
-      }
-      else
-      {
-          heif_idrpic_data = decoder->nalMap.data(NAL_UNIT_IDR_N_LP);
-          heif_idrpic_size = decoder->nalMap.size(NAL_UNIT_IDR_N_LP);
-      }
-  }
-  else
-  {
-      struct heif_error err = { heif_error_Decoder_plugin_error,
-                                heif_suberror_End_of_data,
-                                "Unexpected end of data" };
-      return err;
-  }
-
-  const char hevc_AnnexB_StartCode[] = { 0x00, 0x00, 0x00, 0x01 };
-  int hevc_AnnexB_StartCode_size = 4;
-
-  size_t hevc_data_size = heif_vps_size + heif_sps_size + heif_pps_size + heif_idrpic_size + 4 * hevc_AnnexB_StartCode_size;
-  uint8_t* hevc_data = (uint8_t*)malloc(hevc_data_size + AV_INPUT_BUFFER_PADDING_SIZE);
-
-  //Copy hevc pps data
-  uint8_t* hevc_data_ptr = hevc_data;
-  memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size);
-  hevc_data_ptr += hevc_AnnexB_StartCode_size;
-  memcpy(hevc_data_ptr, heif_vps_data, heif_vps_size);
-  hevc_data_ptr += heif_vps_size;
-
-  //Copy hevc sps data
-  memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size);
-  hevc_data_ptr += hevc_AnnexB_StartCode_size;
-  memcpy(hevc_data_ptr, heif_sps_data, heif_sps_size);
-  hevc_data_ptr += heif_sps_size;
-
-  //Copy hevc pps data
-  memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size);
-  hevc_data_ptr += hevc_AnnexB_StartCode_size;
-  memcpy(hevc_data_ptr, heif_pps_data, heif_pps_size);
-  hevc_data_ptr += heif_pps_size;
-
-  //Copy hevc idrpic data
-  memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size);
-  hevc_data_ptr += hevc_AnnexB_StartCode_size;
-  memcpy(hevc_data_ptr, heif_idrpic_data, heif_idrpic_size);
-
-  // decoder->NalMap not needed anymore
-  decoder->nalMap.clear();
 
   const AVCodec* hevc_codec = NULL;
   AVCodecParserContext* hevc_parser = NULL;
@@ -358,7 +280,7 @@ static struct heif_error ffmpeg_v1_decode_image(void* decoder_raw,
   struct heif_color_profile_nclx* nclx = NULL;
   int ret = 0;
 
-  struct heif_error err = heif_error_success;
+  err = heif_error_success;
 
   uint8_t* parse_hevc_data = NULL;
   int parse_hevc_data_size = 0;
diff --git a/libheif/plugins/decoder_nvdec.cc b/libheif/plugins/decoder_nvdec.cc
index 2e8af06399..a297f98c54 100644
--- a/libheif/plugins/decoder_nvdec.cc
+++ b/libheif/plugins/decoder_nvdec.cc
@@ -231,90 +231,12 @@ struct heif_error nvdec_decode_image(void *decoder, struct heif_image **out_img)
     if (ctx->eCodec == cudaVideoCodec_HEVC) {
         uint8_t *hevc_data;
         size_t hevc_data_size;
-        {
-            int heif_idrpic_size;
-            int heif_vps_size;
-            int heif_sps_size;
-            int heif_pps_size;
-            const unsigned char* heif_vps_data;
-            const unsigned char* heif_sps_data;
-            const unsigned char* heif_pps_data;
-            const unsigned char* heif_idrpic_data;
-
-            if ((nalus.count(NAL_UNIT_VPS_NUT) > 0) && (nalus.count(NAL_UNIT_SPS_NUT) > 0) && (nalus.count(NAL_UNIT_PPS_NUT) > 0))
-            {
-                heif_vps_size = nalus.size(NAL_UNIT_VPS_NUT);
-                heif_vps_data = nalus.data(NAL_UNIT_VPS_NUT);
-
-                heif_sps_size = nalus.size(NAL_UNIT_SPS_NUT);
-                heif_sps_data = nalus.data(NAL_UNIT_SPS_NUT);
-
-                heif_pps_size = nalus.size(NAL_UNIT_PPS_NUT);
-                heif_pps_data = nalus.data(NAL_UNIT_PPS_NUT);
-            }
-            else
-            {
-                struct heif_error err = { heif_error_Decoder_plugin_error,
-                                            heif_suberror_End_of_data,
-                                            "Unexpected end of data" };
-                return err;
-            }
-
-            if ((nalus.count(NAL_UNIT_IDR_W_RADL) > 0) || (nalus.count(NAL_UNIT_IDR_N_LP) > 0))
-            {
-                if (nalus.count(NAL_UNIT_IDR_W_RADL) > 0)
-                {
-                    heif_idrpic_data = nalus.data(NAL_UNIT_IDR_W_RADL);
-                    heif_idrpic_size = nalus.size(NAL_UNIT_IDR_W_RADL);
-                }
-                else
-                {
-                    heif_idrpic_data = nalus.data(NAL_UNIT_IDR_N_LP);
-                    heif_idrpic_size = nalus.size(NAL_UNIT_IDR_N_LP);
-                }
-            }
-            else
-            {
-                struct heif_error err = { heif_error_Decoder_plugin_error,
-                                            heif_suberror_End_of_data,
-                                            "Unexpected end of data" };
-                return err;
-            }
-
-            const char hevc_AnnexB_StartCode[] = { 0x00, 0x00, 0x00, 0x01 };
-            int hevc_AnnexB_StartCode_size = 4;
-
-            hevc_data_size = heif_vps_size + heif_sps_size + heif_pps_size + heif_idrpic_size + 4 * hevc_AnnexB_StartCode_size;
-            hevc_data = (uint8_t*)malloc(hevc_data_size);
-
-            //Copy hevc pps data
-            uint8_t* hevc_data_ptr = hevc_data;
-            memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size);
-            hevc_data_ptr += hevc_AnnexB_StartCode_size;
-            memcpy(hevc_data_ptr, heif_vps_data, heif_vps_size);
-            hevc_data_ptr += heif_vps_size;
-
-            //Copy hevc sps data
-            memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size);
-            hevc_data_ptr += hevc_AnnexB_StartCode_size;
-            memcpy(hevc_data_ptr, heif_sps_data, heif_sps_size);
-            hevc_data_ptr += heif_sps_size;
-
-            //Copy hevc pps data
-            memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size);
-            hevc_data_ptr += hevc_AnnexB_StartCode_size;
-            memcpy(hevc_data_ptr, heif_pps_data, heif_pps_size);
-            hevc_data_ptr += heif_pps_size;
-
-            //Copy hevc idrpic data
-            memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size);
-            hevc_data_ptr += hevc_AnnexB_StartCode_size;
-            memcpy(hevc_data_ptr, heif_idrpic_data, heif_idrpic_size);
-
-            // decoder->NalMap not needed anymore
-            nalus.clear();
+        err = nalus.buildWithStartCodesHevc(&hevc_data, &hevc_data_size, 0);
+        if (err.code != heif_error_Ok) {
+            cuvidCtxLockDestroy(ctx->ctxLock);
+            cuCtxDestroy(ctx->cuContext);
+            return err;
         }
-
         nFrameReturned = dec.Decode(hevc_data, hevc_data_size);
     } else {
 // TODO
diff --git a/libheif/plugins/nalu_utils.cc b/libheif/plugins/nalu_utils.cc
index f4f3731d60..a668ca2c04 100644
--- a/libheif/plugins/nalu_utils.cc
+++ b/libheif/plugins/nalu_utils.cc
@@ -22,6 +22,8 @@
 #include <memory>
 #include <utility>
 #include "nalu_utils.h"
+#include <cstring>
+#include <cstdlib>
 
 NalUnit::NalUnit()
 {
@@ -94,4 +96,90 @@ const heif_error NalMap::parseHevcNalu(const uint8_t *cdata, size_t size)
     return heif_error_success;
 }
 
+heif_error NalMap::buildWithStartCodesHevc(uint8_t **hevc_data, size_t *hevc_data_size, size_t additional_pad_size)
+{
+    int heif_idrpic_size;
+    int heif_vps_size;
+    int heif_sps_size;
+    int heif_pps_size;
+    const unsigned char* heif_vps_data;
+    const unsigned char* heif_sps_data;
+    const unsigned char* heif_pps_data;
+    const unsigned char* heif_idrpic_data;
+
+    if ((count(NAL_UNIT_VPS_NUT) > 0) && (count(NAL_UNIT_SPS_NUT) > 0) && (count(NAL_UNIT_PPS_NUT) > 0))
+    {
+        heif_vps_size = size(NAL_UNIT_VPS_NUT);
+        heif_vps_data = data(NAL_UNIT_VPS_NUT);
+
+        heif_sps_size = size(NAL_UNIT_SPS_NUT);
+        heif_sps_data = data(NAL_UNIT_SPS_NUT);
+
+        heif_pps_size = size(NAL_UNIT_PPS_NUT);
+        heif_pps_data = data(NAL_UNIT_PPS_NUT);
+    }
+    else
+    {
+        struct heif_error err = { heif_error_Decoder_plugin_error,
+                                    heif_suberror_End_of_data,
+                                    "Unexpected end of data" };
+        return err;
+    }
+
+    if ((count(NAL_UNIT_IDR_W_RADL) > 0) || (count(NAL_UNIT_IDR_N_LP) > 0))
+    {
+        if (count(NAL_UNIT_IDR_W_RADL) > 0)
+        {
+            heif_idrpic_data = data(NAL_UNIT_IDR_W_RADL);
+            heif_idrpic_size = size(NAL_UNIT_IDR_W_RADL);
+        }
+        else
+        {
+            heif_idrpic_data = data(NAL_UNIT_IDR_N_LP);
+            heif_idrpic_size = size(NAL_UNIT_IDR_N_LP);
+        }
+    }
+    else
+    {
+        struct heif_error err = { heif_error_Decoder_plugin_error,
+                                    heif_suberror_End_of_data,
+                                    "Unexpected end of data" };
+        return err;
+    }
+
+    const char hevc_AnnexB_StartCode[] = { 0x00, 0x00, 0x00, 0x01 };
+    int hevc_AnnexB_StartCode_size = 4;
+
+    *hevc_data_size = heif_vps_size + heif_sps_size + heif_pps_size + heif_idrpic_size + 4 * hevc_AnnexB_StartCode_size;
+    *hevc_data = (uint8_t*)malloc(*hevc_data_size + additional_pad_size);
+
+    //Copy hevc pps data
+    uint8_t* hevc_data_ptr = *hevc_data;
+    memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size);
+    hevc_data_ptr += hevc_AnnexB_StartCode_size;
+    memcpy(hevc_data_ptr, heif_vps_data, heif_vps_size);
+    hevc_data_ptr += heif_vps_size;
+
+    //Copy hevc sps data
+    memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size);
+    hevc_data_ptr += hevc_AnnexB_StartCode_size;
+    memcpy(hevc_data_ptr, heif_sps_data, heif_sps_size);
+    hevc_data_ptr += heif_sps_size;
+
+    //Copy hevc pps data
+    memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size);
+    hevc_data_ptr += hevc_AnnexB_StartCode_size;
+    memcpy(hevc_data_ptr, heif_pps_data, heif_pps_size);
+    hevc_data_ptr += heif_pps_size;
+
+    //Copy hevc idrpic data
+    memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size);
+    hevc_data_ptr += hevc_AnnexB_StartCode_size;
+    memcpy(hevc_data_ptr, heif_idrpic_data, heif_idrpic_size);
+
+    map.clear();
+
+    return heif_error_success;
+}
+
 void NalMap::clear() { map.clear(); }
\ No newline at end of file
diff --git a/libheif/plugins/nalu_utils.h b/libheif/plugins/nalu_utils.h
index 7c4b86f7ad..721fa003f7 100644
--- a/libheif/plugins/nalu_utils.h
+++ b/libheif/plugins/nalu_utils.h
@@ -55,6 +55,8 @@ class NalMap
 
     const heif_error parseHevcNalu(const uint8_t *cdata, size_t size);
 
+    heif_error buildWithStartCodesHevc(uint8_t **data, size_t *size, size_t additional_pad_size);
+
     void clear();
 private:
     std::map<int, std::unique_ptr<NalUnit>> map;

From 346bea602756b7d9e99695b7666b09b8375b7a29 Mon Sep 17 00:00:00 2001
From: Brad Hards <bradh@frogmouth.net>
Date: Tue, 3 Sep 2024 11:25:53 +1000
Subject: [PATCH 05/41] nvdec: add AVC support

---
 libheif/plugins/decoder_nvdec.cc | 72 ++++++++++++++++++++++++--------
 1 file changed, 54 insertions(+), 18 deletions(-)

diff --git a/libheif/plugins/decoder_nvdec.cc b/libheif/plugins/decoder_nvdec.cc
index a297f98c54..478430089f 100644
--- a/libheif/plugins/decoder_nvdec.cc
+++ b/libheif/plugins/decoder_nvdec.cc
@@ -38,6 +38,8 @@
 #include "nalu_utils.h"
 #include <mutex>
 
+static heif_error kError_EOF = {heif_error_Decoder_plugin_error, heif_suberror_End_of_data, "Insufficient input data"};
+
 static const int NVDEC_PLUGIN_PRIORITY = 120;
 
 #define MAX_PLUGIN_NAME_LENGTH 80
@@ -169,15 +171,6 @@ struct heif_error nvdec_decode_image(void *decoder, struct heif_image **out_img)
             return err;
         }
     }
-// TODO
-#if 0
-    if (ctx->eCodec == cudaVideoCodec_H264) {
-        err = nalus.parseNALU_AVC(ctx->data.data(), ctx->data.size());
-        if (err.code != heif_error_Ok) {
-            return err;
-        }
-    }
-#endif
     CUdevice cuDevice = 0;
 
     CUresult result;
@@ -238,17 +231,60 @@ struct heif_error nvdec_decode_image(void *decoder, struct heif_image **out_img)
             return err;
         }
         nFrameReturned = dec.Decode(hevc_data, hevc_data_size);
-    } else {
-// TODO
-#if 0
     } else if (ctx->eCodec == cudaVideoCodec_H264) {
-        uint8_t *avc_data;
-        size_t avc_data_size;
-        nalus.buildWithStartCodesAVC(&avc_data, &avc_data_size);
-        nFrameReturned = dec.Decode(avc_data, avc_data_size);
+        // TODO: ideally we'd share this code with the OpenH264 decoder
+        const std::vector<uint8_t>& indata = ctx->data;
+        std::vector<uint8_t> scdata;
+
+        size_t idx = 0;
+        while (idx < indata.size()) {
+            if (indata.size() - 4 < idx) {
+                return kError_EOF;
+            }
+
+            uint32_t size = ((indata[idx] << 24) | (indata[idx + 1] << 16) | (indata[idx + 2] << 8) | indata[idx + 3]);
+            idx += 4;
+
+            if (indata.size() < size || indata.size() - size < idx) {
+                return kError_EOF;
+            }
+
+            scdata.push_back(0);
+            scdata.push_back(0);
+            scdata.push_back(1);
+
+            // check for need of start code emulation prevention
+            bool do_start_code_emulation_check = true;
+            while (do_start_code_emulation_check && size >= 3) {
+                bool found_start_code_emulation = false;
+                for (size_t i = 0; i < size - 3; i++) {
+                    if (indata[idx + 0] == 0 && indata[idx + 1] == 0 && (indata[idx + 2] >= 0 && indata[idx + 2] <= 3)) {
+                        scdata.push_back(0);
+                        scdata.push_back(0);
+                        scdata.push_back(3);
+                        scdata.insert(scdata.end(), &indata[idx + 2], &indata[idx + i + 2]);
+                        idx += i + 2;
+                        size -= (uint32_t)(i + 2);
+                        found_start_code_emulation = true;
+                        break;
+                    }
+                }
+
+                do_start_code_emulation_check = found_start_code_emulation;
+            }
+
+            assert(size > 0);
+            scdata.insert(scdata.end(), &indata[idx], &indata[idx + size]);
+
+            idx += size;
+        }
+
+        if (idx != indata.size()) {
+            return kError_EOF;
+        }
+        nFrameReturned = dec.Decode(scdata.data(), scdata.size());
         printf("nFrameReturned: %d\n", nFrameReturned);
-    } else 
-#endif
+    } else {
         nFrameReturned = dec.Decode(ctx->data.data(), ctx->data.size());
     }
     

From d57eebf12f7ce78e2b7b6d505063c842b32d4387 Mon Sep 17 00:00:00 2001
From: Brad Hards <bradh@frogmouth.net>
Date: Tue, 3 Sep 2024 11:54:27 +1000
Subject: [PATCH 06/41] nvidia: add to CI

---
 .github/workflows/build.yml | 1 +
 scripts/install-ci-linux.sh | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index b4417fbb58..1eb7ed5174 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -16,6 +16,7 @@ jobs:
           - { NAME: "graphics", WITH_GRAPHICS: 1 }
           - { NAME: "x265", WITH_X265: 1 }
           - { NAME: "x265 / graphics", WITH_GRAPHICS: 1, WITH_X265: 1 }
+          - { NAME: "nvidia / graphics", WITH_GRAPHICS: 1, WITH_NVIDIA: 1 }
           - { NAME: "libde265 (1) / graphics", WITH_GRAPHICS: 1, WITH_LIBDE265: 1 }
           - { NAME: "libde265 (2) / graphics", WITH_GRAPHICS: 1, WITH_LIBDE265: 2 }
           - { NAME: "libde265 (1) / x265 / graphics", WITH_GRAPHICS: 1, WITH_X265: 1, WITH_LIBDE265: 1 }
diff --git a/scripts/install-ci-linux.sh b/scripts/install-ci-linux.sh
index c2f6509683..7376b4bb90 100755
--- a/scripts/install-ci-linux.sh
+++ b/scripts/install-ci-linux.sh
@@ -111,6 +111,12 @@ if [ ! -z "$WITH_GRAPHICS" ]; then
         "
 fi
 
+if [ ! -z "$WITH_NVIDIA" ]; then
+    INSTALL_PACKAGES="$INSTALL_PACKAGES \
+        libnvidia-decode \
+        "
+fi
+
 if [ ! -z "$WITH_UNCOMPRESSED_CODEC" ]; then
     INSTALL_PACKAGES="$INSTALL_PACKAGES \
         libbrotli-dev \

From b4134f1226080d6c21385ec0d1c8ba625ae1bac6 Mon Sep 17 00:00:00 2001
From: Brad Hards <bradh@frogmouth.net>
Date: Tue, 3 Sep 2024 13:12:13 +1000
Subject: [PATCH 07/41] nvdec: additional CI fixes

---
 scripts/install-ci-linux.sh | 2 +-
 scripts/run-ci.sh           | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/scripts/install-ci-linux.sh b/scripts/install-ci-linux.sh
index 7376b4bb90..5d8d66dd3d 100755
--- a/scripts/install-ci-linux.sh
+++ b/scripts/install-ci-linux.sh
@@ -113,7 +113,7 @@ fi
 
 if [ ! -z "$WITH_NVIDIA" ]; then
     INSTALL_PACKAGES="$INSTALL_PACKAGES \
-        libnvidia-decode \
+        libnvidia-decode-535 \
         "
 fi
 
diff --git a/scripts/run-ci.sh b/scripts/run-ci.sh
index b2a5b267e5..50569587af 100755
--- a/scripts/run-ci.sh
+++ b/scripts/run-ci.sh
@@ -105,6 +105,10 @@ WITH_HEIF_DECODER=
 if [ ! -z "$WITH_LIBDE265" ] ; then
     WITH_HEIF_DECODER=1
 fi
+WITH_NVIDIA_DECODER=
+if [ ! -z "$WITH_NVIDIA" ] ; then
+    WITH_NVIDIA_DECODER=1
+fi
 WITH_AVIF_ENCODER=
 WITH_HEIF_ENCODER=
 # Need decoded images before encoding.

From 5cd9fc3dcc43bd3ea77f0d8d72864a0b1dee43ff Mon Sep 17 00:00:00 2001
From: Brad Hards <bradh@frogmouth.net>
Date: Tue, 3 Sep 2024 13:19:14 +1000
Subject: [PATCH 08/41] ci: fix nvdec option

---
 scripts/run-ci.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/run-ci.sh b/scripts/run-ci.sh
index 50569587af..1657678291 100755
--- a/scripts/run-ci.sh
+++ b/scripts/run-ci.sh
@@ -105,9 +105,9 @@ WITH_HEIF_DECODER=
 if [ ! -z "$WITH_LIBDE265" ] ; then
     WITH_HEIF_DECODER=1
 fi
-WITH_NVIDIA_DECODER=
+WITH_NV_DECODER=
 if [ ! -z "$WITH_NVIDIA" ] ; then
-    WITH_NVIDIA_DECODER=1
+    WITH_NV_DECODER=1
 fi
 WITH_AVIF_ENCODER=
 WITH_HEIF_ENCODER=

From 41c129ac3e43ac2b59bdbe617d1991508ceade0e Mon Sep 17 00:00:00 2001
From: Brad Hards <bradh@frogmouth.net>
Date: Tue, 3 Sep 2024 13:50:36 +1000
Subject: [PATCH 09/41] nvidia: add to cmake presets

---
 CMakePresets.json | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/CMakePresets.json b/CMakePresets.json
index 947c37ec8d..8f2bc5e421 100644
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -43,6 +43,10 @@
         "WITH_OpenJPEG_ENCODER_PLUGIN" : "OFF",
         "WITH_FFMPEG_DECODER" : "ON",
         "WITH_FFMPEG_DECODER_PLUGIN" : "OFF",
+        "WITH_OpenH264_DECODER" : "ON",
+        "WITH_OpenH264_DECODER_PLUGIN" : "OFF",
+        "WITH_NV_DECODER" : "ON",
+        "WITH_NV_DECODER_PLUGIN" : "OFF",
 
         "WITH_REDUCED_VISIBILITY" : "OFF",
         "WITH_HEADER_COMPRESSION" : "ON",
@@ -89,6 +93,10 @@
         "WITH_OPENJPH_ENCODER" : "ON",
         "WITH_FFMPEG_DECODER" : "ON",
         "WITH_FFMPEG_DECODER_PLUGIN" : "ON",
+        "WITH_OpenH264_DECODER" : "ON",
+        "WITH_OpenH264_DECODER_PLUGIN" : "ON",
+        "WITH_NV_DECODER" : "ON",
+        "WITH_NV_DECODER_PLUGIN" : "ON",
 
         "WITH_REDUCED_VISIBILITY" : "ON",
         "WITH_HEADER_COMPRESSION" : "ON",

From ec2bb0bc14b6bd1c277b0ffba273015adbb3b2ce Mon Sep 17 00:00:00 2001
From: Brad Hards <bradh@frogmouth.net>
Date: Tue, 3 Sep 2024 13:43:50 +1000
Subject: [PATCH 10/41] avc: additional avcC box parsing

---
 libheif/codecs/avc.cc | 15 ++++++++++++---
 libheif/codecs/avc.h  |  9 +++++++++
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/libheif/codecs/avc.cc b/libheif/codecs/avc.cc
index 18218e979e..82cdcc8afb 100644
--- a/libheif/codecs/avc.cc
+++ b/libheif/codecs/avc.cc
@@ -55,7 +55,16 @@ Error Box_avcC::parse(BitstreamRange &range) {
   if ((m_configuration.AVCProfileIndication != 66) &&
       (m_configuration.AVCProfileIndication != 77) &&
       (m_configuration.AVCProfileIndication != 88)) {
-    // TODO: we don't support this yet
+    m_configuration.chroma_format = range.read8() & 0b00000011;
+    m_configuration.bit_depth_luma = 8 + (range.read8() & 0b00000111);
+    m_configuration.bit_depth_chroma = 8 + (range.read8() & 0b00000111);
+    uint8_t numOfSequenceParameterSetExt = range.read8();
+    for (int i = 0; i < numOfSequenceParameterSetExt; i++) {
+      uint16_t sequenceParameterSetExtLength = range.read16();
+      std::vector<uint8_t> sps_ext(sequenceParameterSetExtLength);
+      range.read(sps_ext.data(), sps_ext.size());
+      m_sps_ext.push_back(sps_ext);
+    }
   }
 
   return range.get_error();
@@ -279,7 +288,7 @@ int ImageItem_AVC::get_luma_bits_per_pixel() const
 {
   auto avcC_box = get_file()->get_property<Box_avcC>(get_id());
   if (avcC_box) {
-    return 8; // TODO avcC_box->get_configuration().bit_depth_luma;
+    return avcC_box->get_configuration().bit_depth_luma;
   }
 
   return -1;
@@ -290,7 +299,7 @@ int ImageItem_AVC::get_chroma_bits_per_pixel() const
 {
   auto avcC_box = get_file()->get_property<Box_avcC>(get_id());
   if (avcC_box) {
-    return 8; // TODO avcC_box->get_configuration().bit_depth_chroma;
+    return avcC_box->get_configuration().bit_depth_chroma;
   }
 
   return -1;
diff --git a/libheif/codecs/avc.h b/libheif/codecs/avc.h
index 0233e7efe6..59e2fb6369 100644
--- a/libheif/codecs/avc.h
+++ b/libheif/codecs/avc.h
@@ -41,6 +41,9 @@ class Box_avcC : public Box {
     uint8_t profile_compatibility; // constraint set flags
     uint8_t AVCLevelIndication; // level_idc
     uint8_t lengthSize;
+    uint8_t chroma_format;
+    uint8_t bit_depth_luma = 8;
+    uint8_t bit_depth_chroma = 8;
   };
 
   void set_configuration(const configuration& config)
@@ -63,6 +66,11 @@ class Box_avcC : public Box {
     return m_pps;
   }
 
+  const std::vector< std::vector<uint8_t> > getSequenceParameterSetExt() const
+  {
+    return m_sps_ext;
+  }
+
   void get_header_nals(std::vector<uint8_t>& data) const;
 
   std::string dump(Indent &) const override;
@@ -78,6 +86,7 @@ class Box_avcC : public Box {
   configuration m_configuration;
   std::vector< std::vector<uint8_t> > m_sps;
   std::vector< std::vector<uint8_t> > m_pps;
+  std::vector< std::vector<uint8_t> > m_sps_ext;
 };
 
 

From b8f8ffb151cc2f58f76aa3650e781121bc842d2d Mon Sep 17 00:00:00 2001
From: Brad Hards <bradh@frogmouth.net>
Date: Tue, 3 Sep 2024 13:56:37 +1000
Subject: [PATCH 11/41] nvdec: additional CI packages

---
 scripts/install-ci-linux.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/install-ci-linux.sh b/scripts/install-ci-linux.sh
index 5d8d66dd3d..b652aa4b2b 100755
--- a/scripts/install-ci-linux.sh
+++ b/scripts/install-ci-linux.sh
@@ -113,6 +113,8 @@ fi
 
 if [ ! -z "$WITH_NVIDIA" ]; then
     INSTALL_PACKAGES="$INSTALL_PACKAGES \
+        nvidia-cuda-dev \
+        nvidia-cuda-toolkit \
         libnvidia-decode-535 \
         "
 fi

From e2768b4e736a7ee27442e6291f54318077f89c53 Mon Sep 17 00:00:00 2001
From: Brad Hards <bradh@frogmouth.net>
Date: Tue, 3 Sep 2024 14:04:26 +1000
Subject: [PATCH 12/41] nvdec: remove mandatory requirement for CUDA

---
 cmake/modules/FindNVDEC.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/modules/FindNVDEC.cmake b/cmake/modules/FindNVDEC.cmake
index 8425c0af5e..6d3ee3c424 100644
--- a/cmake/modules/FindNVDEC.cmake
+++ b/cmake/modules/FindNVDEC.cmake
@@ -4,7 +4,7 @@ find_library(NVDEC_LIBRARY
     NAMES libnvcuvid nvcuvid
 )
 
-find_package(CUDAToolkit REQUIRED)
+find_package(CUDAToolkit)
 
 set(NVDEC_PROCESS_LIBS NVDEC_LIBRARY)
 libfind_process(NVDEC)

From 348a37ee13d4878562a5e3a96be7a7143d0a71be Mon Sep 17 00:00:00 2001
From: Brad Hards <bradh@frogmouth.net>
Date: Tue, 3 Sep 2024 14:12:25 +1000
Subject: [PATCH 13/41] nvdec: only link cuda if found

---
 libheif/plugins/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libheif/plugins/CMakeLists.txt b/libheif/plugins/CMakeLists.txt
index 49f6f47066..4eb1f011ba 100644
--- a/libheif/plugins/CMakeLists.txt
+++ b/libheif/plugins/CMakeLists.txt
@@ -115,7 +115,7 @@ plugin_compilation(openh264dec OpenH264 OpenH264_DECODER_FOUND OpenH264_DECODER
 set(NV_DECODER_sources decoder_nvdec.cc decoder_nvdec.h NvDecoder.cpp NvDecoder.h)
 set(NV_DECODER_extra_plugin_sources)
 plugin_compilation(nvdec NVDEC NVDEC_FOUND NV_DECODER NV_DECODER)
-if(WITH_NV_DECODER)
+if(NVDEC_FOUND)
     target_link_libraries(heif PRIVATE CUDA::cuda_driver)
 endif()
 

From 81fc3708b45179d0e79134ef04eb0ead463e92d0 Mon Sep 17 00:00:00 2001
From: Brad Hards <bradh@frogmouth.net>
Date: Tue, 3 Sep 2024 14:22:03 +1000
Subject: [PATCH 14/41] nvdec: check if we can build without cuda-dev

---
 scripts/install-ci-linux.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scripts/install-ci-linux.sh b/scripts/install-ci-linux.sh
index b652aa4b2b..6e73df8ed9 100755
--- a/scripts/install-ci-linux.sh
+++ b/scripts/install-ci-linux.sh
@@ -113,7 +113,6 @@ fi
 
 if [ ! -z "$WITH_NVIDIA" ]; then
     INSTALL_PACKAGES="$INSTALL_PACKAGES \
-        nvidia-cuda-dev \
         nvidia-cuda-toolkit \
         libnvidia-decode-535 \
         "

From 015c9cf06013cfa58760e203b5de8f08c652b4ba Mon Sep 17 00:00:00 2001
From: Dirk Farin <dirk.farin@gmail.com>
Date: Tue, 3 Sep 2024 23:23:57 +0200
Subject: [PATCH 15/41] AVC: pass SPS-Ext to decoder (#1297)

---
 libheif/codecs/avc.cc | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/libheif/codecs/avc.cc b/libheif/codecs/avc.cc
index 82cdcc8afb..fef9d34142 100644
--- a/libheif/codecs/avc.cc
+++ b/libheif/codecs/avc.cc
@@ -167,6 +167,15 @@ void Box_avcC::get_header_nals(std::vector<uint8_t>& data) const
     data.insert(data.end(), sps.begin(), sps.end());
   }
 
+  for (const auto& spsext : m_sps_ext) {
+    data.push_back((spsext.size() >> 24) & 0xFF);
+    data.push_back((spsext.size() >> 16) & 0xFF);
+    data.push_back((spsext.size() >> 8) & 0xFF);
+    data.push_back((spsext.size() >> 0) & 0xFF);
+
+    data.insert(data.end(), spsext.begin(), spsext.end());
+  }
+
   for (const auto& pps : m_pps) {
     data.push_back((pps.size() >> 24) & 0xFF);
     data.push_back((pps.size() >> 16) & 0xFF);

From 0672d7da55242d94b5f4fbf391ea2ea5b6cdd84d Mon Sep 17 00:00:00 2001
From: Dirk Farin <dirk.farin@gmail.com>
Date: Tue, 3 Sep 2024 23:42:57 +0200
Subject: [PATCH 16/41] AVC: dump seq-ext parameters (#1297)

---
 libheif/codecs/avc.cc | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/libheif/codecs/avc.cc b/libheif/codecs/avc.cc
index fef9d34142..f175eee0bf 100644
--- a/libheif/codecs/avc.cc
+++ b/libheif/codecs/avc.cc
@@ -98,17 +98,13 @@ Error Box_avcC::write(StreamWriter &writer) const {
 std::string Box_avcC::dump(Indent &indent) const {
   std::ostringstream sstr;
   sstr << Box::dump(indent);
-  sstr << indent << "configuration_version: "
-       << ((int)m_configuration.configuration_version) << "\n"
-       << indent << "AVCProfileIndication: "
-       << ((int)m_configuration.AVCProfileIndication) << " ("
-       << profileIndicationAsText() << ")"
-       << "\n"
-       << indent << "profile_compatibility: "
-       << ((int)m_configuration.profile_compatibility) << "\n"
-       << indent
-       << "AVCLevelIndication: " << ((int)m_configuration.AVCLevelIndication)
-       << "\n";
+  sstr << indent << "configuration_version: " << ((int)m_configuration.configuration_version) << "\n"
+       << indent << "AVCProfileIndication: " << ((int)m_configuration.AVCProfileIndication) << " (" << profileIndicationAsText() << ")\n"
+       << indent << "profile_compatibility: " << ((int)m_configuration.profile_compatibility) << "\n"
+       << indent << "AVCLevelIndication: " << ((int)m_configuration.AVCLevelIndication) << "\n"
+       << indent << "Chroma format: " << ((int)m_configuration.chroma_format) << "\n"
+       << indent << "Bit depth luma: " << ((int)m_configuration.bit_depth_luma) << "\n"
+       << indent << "Bit depth chroma: " << ((int)m_configuration.bit_depth_chroma) << "\n";
 
   for (const auto &sps : m_sps) {
     sstr << indent << "SPS: ";
@@ -119,6 +115,15 @@ std::string Box_avcC::dump(Indent &indent) const {
     sstr << std::dec;
   }
 
+  for (const auto &spsext : m_sps_ext) {
+    sstr << indent << "SPS-EXT: ";
+    for (uint8_t b : spsext) {
+      sstr << std::setfill('0') << std::setw(2) << std::hex << ((int)b) << " ";
+    }
+    sstr << "\n";
+    sstr << std::dec;
+  }
+
   for (const auto &pps : m_pps) {
     sstr << indent << "PPS: ";
     for (uint8_t b : pps) {

From 321ae91b0957e05272dc91a4fedf28efd15e30c0 Mon Sep 17 00:00:00 2001
From: Dirk Farin <dirk.farin@gmail.com>
Date: Tue, 3 Sep 2024 23:49:16 +0200
Subject: [PATCH 17/41] heif-enc: similar output of uncompressed codec in
 encoder list as in heif-dec

---
 examples/heif_enc.cc | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/examples/heif_enc.cc b/examples/heif_enc.cc
index bef60e9dde..1c58ee73eb 100644
--- a/examples/heif_enc.cc
+++ b/examples/heif_enc.cc
@@ -402,11 +402,9 @@ static const char* get_compression_format_name(heif_compression_format format)
 
 static void show_list_of_all_encoders()
 {
-    for (auto compression_format : {heif_compression_AVC, heif_compression_AV1, heif_compression_HEVC, heif_compression_JPEG, heif_compression_JPEG2000, heif_compression_HTJ2K
-#if WITH_UNCOMPRESSED_CODEC
-, heif_compression_uncompressed
-#endif
-, heif_compression_VVC
+  for (auto compression_format: {heif_compression_AVC, heif_compression_AV1, heif_compression_HEVC,
+                                 heif_compression_JPEG, heif_compression_JPEG2000, heif_compression_HTJ2K,
+                                 heif_compression_uncompressed, heif_compression_VVC
   }) {
 
     switch (compression_format) {
@@ -429,7 +427,12 @@ static void show_list_of_all_encoders()
         std::cout << "JPEG 2000 (HT)";
         break;
       case heif_compression_uncompressed:
-        std::cout << "Uncompressed";
+#if WITH_UNCOMPRESSED_CODEC
+        std::cout << "Uncompressed: yes\n";
+#else
+        std::cout << "Uncompressed: no\n";
+#endif
+        continue; // special handling of this case because it is built in without plugin
         break;
       case heif_compression_VVC:
         std::cout << "VVIC";

From 541782e18421e3aad20ebdfbbb6e9216b3dbd876 Mon Sep 17 00:00:00 2001
From: Dirk Farin <dirk.farin@gmail.com>
Date: Tue, 3 Sep 2024 23:50:56 +0200
Subject: [PATCH 18/41] heif-dec sort list of decoders

---
 examples/heif_dec.cc | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/examples/heif_dec.cc b/examples/heif_dec.cc
index 5e2b9cb213..431bdbc69a 100644
--- a/examples/heif_dec.cc
+++ b/examples/heif_dec.cc
@@ -168,17 +168,14 @@ void list_decoders(heif_compression_format format)
 
 void list_all_decoders()
 {
-  std::cout << "HEIC decoders:\n";
-  list_decoders(heif_compression_HEVC);
+  std::cout << "AVC decoders:\n";
+  list_decoders(heif_compression_AVC);
 
   std::cout << "AVIF decoders:\n";
   list_decoders(heif_compression_AV1);
 
-  std::cout << "VVIC decoders:\n";
-  list_decoders(heif_compression_VVC);
-
-  std::cout << "AVC decoders:\n";
-  list_decoders(heif_compression_AVC);
+  std::cout << "HEIC decoders:\n";
+  list_decoders(heif_compression_HEVC);
 
   std::cout << "JPEG decoders:\n";
   list_decoders(heif_compression_JPEG);
@@ -186,7 +183,7 @@ void list_all_decoders()
   std::cout << "JPEG 2000 decoders:\n";
   list_decoders(heif_compression_JPEG2000);
 
-  std::cout << "HT-J2K decoders:\n";
+  std::cout << "JPEG 2000 (HT) decoders:\n";
   list_decoders(heif_compression_HTJ2K);
 
 #if WITH_UNCOMPRESSED_CODEC
@@ -194,6 +191,9 @@ void list_all_decoders()
 #else
   std::cout << "uncompressed: no\n";
 #endif
+
+  std::cout << "VVIC decoders:\n";
+  list_decoders(heif_compression_VVC);
 }
 
 

From e65c9a357835b5848cc7e0d3af5bea627d8f32d4 Mon Sep 17 00:00:00 2001
From: Dirk Farin <dirk.farin@gmail.com>
Date: Tue, 3 Sep 2024 23:55:25 +0200
Subject: [PATCH 19/41] avc_box: adapt test output

---
 tests/avc_box.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/avc_box.cc b/tests/avc_box.cc
index 4a2fd7bc3b..9e3dc2929e 100644
--- a/tests/avc_box.cc
+++ b/tests/avc_box.cc
@@ -68,6 +68,9 @@ TEST_CASE("avcC") {
                         "AVCProfileIndication: 66 (Constrained Baseline)\n"
                         "profile_compatibility: 128\n"
                         "AVCLevelIndication: 30\n"
+                        "Chroma format: 32\n"
+                        "Bit depth luma: 8\n"
+                        "Bit depth chroma: 8\n"
                         "SPS: 67 64 00 28 ac 72 04 40 40 04 1a 10 00 00 03 00 "
                         "10 00 00 03 03 20 f1 83 18 46 \n"
                         "PPS: 68 e8 43 83 92 c8 b0 \n");

From c1784006a41ccb19070e9dce3943633ae34cc555 Mon Sep 17 00:00:00 2001
From: Dirk Farin <dirk.farin@gmail.com>
Date: Thu, 5 Sep 2024 10:51:29 +0200
Subject: [PATCH 20/41] tild: fix parsing of 'tiles_are_sequential' and more
 dump output

---
 libheif/codecs/tild.cc | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/libheif/codecs/tild.cc b/libheif/codecs/tild.cc
index d24a3fff4b..6de73cab4a 100644
--- a/libheif/codecs/tild.cc
+++ b/libheif/codecs/tild.cc
@@ -78,10 +78,6 @@ void Box_tilC::derive_box_version()
 
   uint8_t flags = 0;
 
-  if (dimensions_64bit(m_parameters)) {
-    flags |= 0x20;
-  }
-
   switch (m_parameters.offset_field_length) {
     case 32:
       flags |= 0;
@@ -122,6 +118,10 @@ void Box_tilC::derive_box_version()
     flags |= 0x10;
   }
 
+  if (dimensions_64bit(m_parameters)) {
+    flags |= 0x20;
+  }
+
   set_flags(flags);
 }
 
@@ -140,6 +140,7 @@ Error Box_tilC::write(StreamWriter& writer) const
 
   writer.write8(m_parameters.number_of_extra_dimensions);
 
+  // TODO: this is redundant because we can also get this from 'ispe' (but currently only as uint32_t)
   writer.write(dimensions_are_64bit ? 8 : 4, m_parameters.image_width);
   writer.write(dimensions_are_64bit ? 8 : 4, m_parameters.image_height);
 
@@ -164,8 +165,13 @@ std::string Box_tilC::dump(Indent& indent) const
   sstr << BoxHeader::dump(indent);
 
   sstr << indent << "version: " << ((int) get_version()) << "\n"
-       << indent << "image size: " << m_parameters.image_width << "x" << m_parameters.image_height << "\n"
-       << indent << "tile size: " << m_parameters.tile_width << "x" << m_parameters.tile_height << "\n";
+       //<< indent << "image size: " << m_parameters.image_width << "x" << m_parameters.image_height << "\n"
+       << indent << "tile size: " << m_parameters.tile_width << "x" << m_parameters.tile_height << "\n"
+       << indent << "compression: " << to_fourcc(m_parameters.compression_type_fourcc) << "\n"
+       << indent << "tiles are sequential: " << (m_parameters.tiles_are_sequential ? "yes" : "no") << "\n"
+       << indent << "offset field length: " << ((int) m_parameters.offset_field_length) << " bits\n"
+       << indent << "size field length: " << ((int) m_parameters.size_field_length) << " bits\n"
+       << indent << "number of extra dimensions: " << ((int) m_parameters.number_of_extra_dimensions) << "\n";
 
   return sstr.str();
 
@@ -219,7 +225,7 @@ Error Box_tilC::parse(BitstreamRange& range)
       break;
   }
 
-  m_parameters.tiles_are_sequential = !!(flags % 0x10);
+  m_parameters.tiles_are_sequential = !!(flags & 0x10);
   bool dimensions_are_64bit = (flags & 0x20);
 
   m_parameters.number_of_extra_dimensions = range.read8();
@@ -545,7 +551,9 @@ ImageItem_Tild::decode_grid_tile(const heif_decoding_options& options, uint32_t
   uint64_t size = m_tild_header.get_tile_size(idx);
 
   Error err = get_file()->append_data_from_iloc(get_id(), data, offset, size);
-  assert(!err.error_code);
+  if (err.error_code) {
+    return err;
+  }
 
   return decode_from_compressed_data(get_compression_format(), options, data);
 }

From d9928fe95aa7ab5378452b0ee56c9c45791762ab Mon Sep 17 00:00:00 2001
From: Dirk Farin <dirk.farin@gmail.com>
Date: Thu, 5 Sep 2024 19:37:54 +0200
Subject: [PATCH 21/41] heif_reader_range_request_result: allow 'overreading' a
 range request

---
 libheif/api/libheif/heif.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/libheif/api/libheif/heif.h b/libheif/api/libheif/heif.h
index ecd9c7eb23..82cf0b23d8 100644
--- a/libheif/api/libheif/heif.h
+++ b/libheif/api/libheif/heif.h
@@ -936,7 +936,7 @@ struct heif_reading_options;
 enum heif_reader_grow_status
 {
   heif_reader_grow_status_size_reached,    // requested size has been reached, we can read until this point
-  heif_reader_grow_status_timeout,         // size has not been reached yet, but it may still grow further
+  heif_reader_grow_status_timeout,         // size has not been reached yet, but it may still grow further (deprecated)
   heif_reader_grow_status_size_beyond_eof, // size has not been reached and never will. The file has grown to its full size
   heif_reader_grow_status_error            // an error has occurred
 };
@@ -945,8 +945,11 @@ struct heif_reader_range_request_result
 {
   enum heif_reader_grow_status status; // should not return 'heif_reader_grow_status_timeout'
 
-  // for status == 'heif_reader_grow_status_size_beyond_eof'
-  uint64_t range_end;           // if not the whole file range could be read, this is the end position
+  // Indicates until what position the file has been read.
+  // If we cannot read the whole file range (status == 'heif_reader_grow_status_size_beyond_eof'), this is the actual end position.
+  // On the other hand, it may be that the reader was reading more data than requested. In that case, it should indicate the full size here
+  // and libheif may decide to make use of the additional data (e.g. for filling 'tild' offset tables).
+  uint64_t range_end;
 
   // for status == 'heif_reader_grow_status_error'
   int reader_error_code;        // a reader specific error code

From 8cc6c796ac706bdf99c1f6f82f32b7873bbbf19d Mon Sep 17 00:00:00 2001
From: Dirk Farin <dirk.farin@gmail.com>
Date: Fri, 6 Sep 2024 10:35:29 +0200
Subject: [PATCH 22/41] tild: omit writing image size and take it from ispe
 instead

---
 libheif/codecs/tild.cc | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/libheif/codecs/tild.cc b/libheif/codecs/tild.cc
index 6de73cab4a..3b3af35743 100644
--- a/libheif/codecs/tild.cc
+++ b/libheif/codecs/tild.cc
@@ -141,8 +141,8 @@ Error Box_tilC::write(StreamWriter& writer) const
   writer.write8(m_parameters.number_of_extra_dimensions);
 
   // TODO: this is redundant because we can also get this from 'ispe' (but currently only as uint32_t)
-  writer.write(dimensions_are_64bit ? 8 : 4, m_parameters.image_width);
-  writer.write(dimensions_are_64bit ? 8 : 4, m_parameters.image_height);
+  //writer.write(dimensions_are_64bit ? 8 : 4, m_parameters.image_width);
+  //writer.write(dimensions_are_64bit ? 8 : 4, m_parameters.image_height);
 
   for (int i = 0; i < m_parameters.number_of_extra_dimensions; i++) {
     writer.write(dimensions_are_64bit ? 8 : 4, m_parameters.extra_dimensions[i]);
@@ -240,6 +240,7 @@ Error Box_tilC::parse(BitstreamRange& range)
   }
 #endif
 
+  /*
   m_parameters.image_width = (dimensions_are_64bit ? range.read64() : range.read32());
   m_parameters.image_height = (dimensions_are_64bit ? range.read64() : range.read32());
 
@@ -248,6 +249,7 @@ Error Box_tilC::parse(BitstreamRange& range)
             heif_suberror_Unspecified,
             "'tild' image with zero width or height."};
   }
+*/
 
   for (int i = 0; i < m_parameters.number_of_extra_dimensions; i++) {
     uint64_t size = (dimensions_are_64bit ? range.read64() : range.read32());
@@ -435,7 +437,24 @@ Error ImageItem_Tild::on_load_file()
             "Tiled image without 'tilC' property box."};
   }
 
-  m_tild_header.set_parameters(tilC_box->get_parameters());
+  auto ispe_box = heif_file->get_property<Box_ispe>(get_id());
+  if (!ispe_box) {
+    return {heif_error_Invalid_input,
+            heif_suberror_Unspecified,
+            "Tiled image without 'ispe' property box."};
+  }
+
+  heif_tild_image_parameters parameters = tilC_box->get_parameters();
+  parameters.image_width = ispe_box->get_width();
+  parameters.image_height = ispe_box->get_height();
+
+  if (parameters.image_width == 0 || parameters.image_height == 0) {
+    return {heif_error_Invalid_input,
+            heif_suberror_Unspecified,
+            "'tild' image with zero width or height."};
+  }
+
+  m_tild_header.set_parameters(parameters);
 
   err = m_tild_header.read_full_offset_table(heif_file, get_id());
   if (err) {

From aab4c9f879e9055ac28ada33decddf8361edbef3 Mon Sep 17 00:00:00 2001
From: Brad Hards <bradh@frogmouth.net>
Date: Sun, 8 Sep 2024 18:18:12 +1000
Subject: [PATCH 23/41] Windows build fix for C++ version.

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c1d34b935a..041f3b8e7e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -41,7 +41,7 @@ if(NOT MSVC)
   endif ()
 endif()
 
-set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD 20)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_EXTENSIONS OFF)
 

From 83c0954cd5afe3b9d6493607ea7b2a5bb6e289ab Mon Sep 17 00:00:00 2001
From: Dirk Farin <dirk.farin@gmail.com>
Date: Tue, 10 Sep 2024 15:30:08 +0200
Subject: [PATCH 24/41] use C++17 [[fallthrough]]

---
 examples/heif_dec.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/heif_dec.cc b/examples/heif_dec.cc
index 431bdbc69a..661e57809d 100644
--- a/examples/heif_dec.cc
+++ b/examples/heif_dec.cc
@@ -261,7 +261,7 @@ int main(int argc, char** argv)
         break;
       case '?':
         std::cerr << "\n";
-        // fallthrough
+        [[fallthrough]];
       case 'h':
         show_help(argv[0]);
         return 0;

From 831532a6722ad68b2ecf884f35718c8fc1c92994 Mon Sep 17 00:00:00 2001
From: Brad Hards <bradh@frogmouth.net>
Date: Sun, 8 Sep 2024 18:19:32 +1000
Subject: [PATCH 25/41] Windows alternatives for unistd and friends.

---
 libheif/box.cc | 33 +++++++++++++++++++++++++++------
 1 file changed, 27 insertions(+), 6 deletions(-)

diff --git a/libheif/box.cc b/libheif/box.cc
index 746e9cd720..72feeabac9 100644
--- a/libheif/box.cc
+++ b/libheif/box.cc
@@ -50,7 +50,13 @@
 #define M_PI 3.14159265358979323846
 #endif
 
-#include <unistd.h> // TODO: Windows
+#if !defined(_WIN32) && !defined(_WIN64)
+#include <unistd.h>
+#else
+#include <fcntl.h>
+#include <io.h>
+#endif
+
 
 Fraction::Fraction(int32_t num, int32_t den)
 {
@@ -155,7 +161,7 @@ bool Fraction::is_valid() const
   return denominator != 0;
 }
 
-uint32_t from_fourcc(const char* string)
+static uint32_t from_fourcc(const char* string)
 {
   return ((string[0] << 24) |
           (string[1] << 16) |
@@ -1035,7 +1041,7 @@ Error Box_ftyp::parse(BitstreamRange& range)
   m_major_brand = range.read32();
   m_minor_version = range.read32();
 
-  if (get_box_size() <= get_header_size() + 8) {
+  if (get_box_size() - 8 <= get_header_size()) {
     // Sanity check.
     return Error(heif_error_Invalid_input,
                  heif_suberror_Invalid_box_size,
@@ -1406,8 +1412,15 @@ void Box_iloc::set_use_tmp_file(bool flag)
 {
   m_use_tmpfile = flag;
   if (flag) {
+#if !defined(_WIN32) && !defined(_WIN64)
     strcpy(m_tmp_filename, "/tmp/libheif-XXXXXX");
     m_tmpfile_fd = mkstemp(m_tmp_filename);
+#else
+    char tmpname[L_tmpnam_s];
+    // TODO: check return value (errno_t)
+    tmpnam_s(tmpname, L_tmpnam_s);
+    _sopen_s(&m_tmpfile_fd, tmpname, _O_CREAT | _O_TEMPORARY | _O_TRUNC | _O_RDWR, _SH_DENYRW, _S_IREAD | _S_IWRITE);
+#endif
   }
 }
 
@@ -1629,7 +1642,11 @@ Error Box_iloc::append_data(heif_item_id item_ID,
   extent.length = data.size();
 
   if (m_use_tmpfile && construction_method==0) {
+#if !defined(_WIN32) && !defined(_WIN64)
     ssize_t cnt = ::write(m_tmpfile_fd, data.data(), data.size());
+#else
+    int cnt = _write(m_tmpfile_fd, data.data(), data.size());
+#endif
     if (cnt < 0) {
       std::stringstream sstr;
       sstr << "Could not write to tmp file: error " << errno;
@@ -1883,7 +1900,11 @@ Error Box_iloc::write_mdat_after_iloc(StreamWriter& writer)
 
         if (m_use_tmpfile) {
           std::vector<uint8_t> data(extent.length);
+#if !defined(_WIN32) && !defined(_WIN64)
           ssize_t cnt = ::read(m_tmpfile_fd, data.data(), extent.length);
+#else
+          int cnt = _read(m_tmpfile_fd, data.data(), extent.length);
+#endif
           if (cnt<0) {
             std::stringstream sstr;
             sstr << "Cannot read tmp data file, error " << errno;
@@ -2632,7 +2653,7 @@ Error Box_ipma::parse(BitstreamRange& range)
 
     int assoc_cnt = range.read8();
     for (int k = 0; k < assoc_cnt; k++) {
-      PropertyAssociation association;
+      PropertyAssociation association{};
 
       uint16_t index;
       if (get_flags() & 1) {
@@ -3902,9 +3923,9 @@ Error Box_cmin::write(StreamWriter& writer) const
 }
 
 
-std::array<double,9> mul(const std::array<double,9>& a, const std::array<double,9>& b)
+static std::array<double,9> mul(const std::array<double,9>& a, const std::array<double,9>& b)
 {
-  std::array<double,9> m;
+  std::array<double, 9> m{};
 
   m[0] = a[0]*b[0] + a[1]*b[3] + a[2]*b[6];
   m[1] = a[0]*b[1] + a[1]*b[4] + a[2]*b[7];

From 4af34eed99f8716fc6fc23c173ecb237e7d90494 Mon Sep 17 00:00:00 2001
From: Dirk Farin <dirk.farin@gmail.com>
Date: Tue, 10 Sep 2024 15:32:41 +0200
Subject: [PATCH 26/41] fix constness of getopt* implementation for Windows

---
 extra/getopt.h      | 4 ++--
 extra/getopt_long.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/extra/getopt.h b/extra/getopt.h
index b23a4fbeee..f5d3bfaad1 100644
--- a/extra/getopt.h
+++ b/extra/getopt.h
@@ -55,8 +55,8 @@ struct option
 #define required_argument 1
 #define optional_argument 2
 
-int getopt(int, char**, char*);
-int getopt_long(int, char**, char*, struct option*, int*);
+int getopt(int, char**, const char*);
+int getopt_long(int, char**, const char*, struct option*, int*);
 
 #ifdef __cplusplus
 }
diff --git a/extra/getopt_long.c b/extra/getopt_long.c
index 2722ce90ff..a1d5055260 100644
--- a/extra/getopt_long.c
+++ b/extra/getopt_long.c
@@ -153,7 +153,7 @@ getopt2(int nargc, char * nargv, const char *ostr)
  *	Parse argc/argv argument vector.
  */
 int
-getopt_long(int nargc, char ** nargv, char * options, struct option * long_options, int * index)
+getopt_long(int nargc, char ** nargv, const char * options, struct option * long_options, int * index)
 {
 	int retval;
 

From 24a2435058e45ead8946d0e6d52ae0bf33990064 Mon Sep 17 00:00:00 2001
From: Dirk Farin <dirk.farin@gmail.com>
Date: Tue, 10 Sep 2024 15:35:05 +0200
Subject: [PATCH 27/41] safe integer check in ftyp parsing

---
 libheif/box.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libheif/box.cc b/libheif/box.cc
index 72feeabac9..bd86034637 100644
--- a/libheif/box.cc
+++ b/libheif/box.cc
@@ -1041,7 +1041,8 @@ Error Box_ftyp::parse(BitstreamRange& range)
   m_major_brand = range.read32();
   m_minor_version = range.read32();
 
-  if (get_box_size() - 8 <= get_header_size()) {
+  uint64_t box_size = get_box_size();
+  if (box_size < 8 || box_size - 8 <= get_header_size()) {
     // Sanity check.
     return Error(heif_error_Invalid_input,
                  heif_suberror_Invalid_box_size,

From 7b7ac8e507a82de13e4c38f8d551d069ba88ec44 Mon Sep 17 00:00:00 2001
From: Dirk Farin <dirk.farin@gmail.com>
Date: Tue, 10 Sep 2024 16:28:10 +0200
Subject: [PATCH 28/41] avcC: fix test, write extended avcC fields, error
 handling

---
 libheif/codecs/avc.cc | 159 ++++++++++++++++++++++++++++++------------
 libheif/codecs/avc.h  |   2 +-
 tests/avc_box.cc      |   2 +-
 3 files changed, 118 insertions(+), 45 deletions(-)

diff --git a/libheif/codecs/avc.cc b/libheif/codecs/avc.cc
index f175eee0bf..920e3d7ea5 100644
--- a/libheif/codecs/avc.cc
+++ b/libheif/codecs/avc.cc
@@ -27,7 +27,8 @@
 #include "context.h"
 
 
-Error Box_avcC::parse(BitstreamRange &range) {
+Error Box_avcC::parse(BitstreamRange& range)
+{
   m_configuration.configuration_version = range.read8();
   m_configuration.AVCProfileIndication = range.read8();
   m_configuration.profile_compatibility = range.read8();
@@ -52,10 +53,11 @@ Error Box_avcC::parse(BitstreamRange &range) {
     m_pps.push_back(pps);
   }
 
+  // See ISO/IEC 14496-15 2017 Section 5.3.3.1.2
   if ((m_configuration.AVCProfileIndication != 66) &&
       (m_configuration.AVCProfileIndication != 77) &&
       (m_configuration.AVCProfileIndication != 88)) {
-    m_configuration.chroma_format = range.read8() & 0b00000011;
+    m_configuration.chroma_format = (heif_chroma) (range.read8() & 0b00000011);
     m_configuration.bit_depth_luma = 8 + (range.read8() & 0b00000111);
     m_configuration.bit_depth_chroma = 8 + (range.read8() & 0b00000111);
     uint8_t numOfSequenceParameterSetExt = range.read8();
@@ -70,7 +72,8 @@ Error Box_avcC::parse(BitstreamRange &range) {
   return range.get_error();
 }
 
-Error Box_avcC::write(StreamWriter &writer) const {
+Error Box_avcC::write(StreamWriter& writer) const
+{
   size_t box_start = reserve_box_header_space(writer);
 
   writer.write8(m_configuration.configuration_version);
@@ -79,55 +82,125 @@ Error Box_avcC::write(StreamWriter &writer) const {
   writer.write8(m_configuration.AVCLevelIndication);
   uint8_t lengthSizeMinusOneWithReserved = 0b11111100 | ((m_configuration.lengthSize - 1) & 0b11);
   writer.write8(lengthSizeMinusOneWithReserved);
+
+  if (m_sps.size() > 0b00011111) {
+    return {heif_error_Encoding_error,
+            heif_suberror_Unspecified,
+            "Cannot write more than 31 PPS into avcC box."};
+  }
+
   uint8_t numSpsWithReserved = 0b11100000 | (m_sps.size() & 0b00011111);
   writer.write8(numSpsWithReserved);
-  for (const auto &sps: m_sps) {
+  for (const auto& sps : m_sps) {
+    if (sps.size() > 0xFFFF) {
+      return {heif_error_Encoding_error,
+              heif_suberror_Unspecified,
+              "Cannot write SPS larger than 65535 bytes into avcC box."};
+    }
     writer.write16((uint16_t) sps.size());
     writer.write(sps);
   }
+
+  if (m_pps.size() > 0xFF) {
+    return {heif_error_Encoding_error,
+            heif_suberror_Unspecified,
+            "Cannot write more than 255 PPS into avcC box."};
+  }
+
   writer.write8(m_pps.size() & 0xFF);
-  for (const auto &pps: m_pps) {
+  for (const auto& pps : m_pps) {
+    if (pps.size() > 0xFFFF) {
+      return {heif_error_Encoding_error,
+              heif_suberror_Unspecified,
+              "Cannot write PPS larger than 65535 bytes into avcC box."};
+    }
     writer.write16((uint16_t) pps.size());
     writer.write(pps);
   }
+
+  if ((m_configuration.AVCProfileIndication != 66) &&
+      (m_configuration.AVCProfileIndication != 77) &&
+      (m_configuration.AVCProfileIndication != 88)) {
+    writer.write8(m_configuration.chroma_format);
+    writer.write8(m_configuration.bit_depth_luma - 8);
+    writer.write8(m_configuration.bit_depth_chroma - 8);
+
+    if (m_sps_ext.size() > 0xFF) {
+      return {heif_error_Encoding_error,
+              heif_suberror_Unspecified,
+              "Cannot write more than 255 SPS-Ext into avcC box."};
+    }
+
+    writer.write8(m_sps_ext.size() & 0xFF);
+    for (const auto& spsext : m_sps_ext) {
+      if (spsext.size() > 0xFFFF) {
+        return {heif_error_Encoding_error,
+                heif_suberror_Unspecified,
+                "Cannot write SPS-Ext larger than 65535 bytes into avcC box."};
+      }
+      writer.write16((uint16_t) spsext.size());
+      writer.write(spsext);
+    }
+  }
+
   prepend_header(writer, box_start);
 
   return Error::Ok;
 }
 
-std::string Box_avcC::dump(Indent &indent) const {
+std::string Box_avcC::dump(Indent& indent) const
+{
   std::ostringstream sstr;
   sstr << Box::dump(indent);
-  sstr << indent << "configuration_version: " << ((int)m_configuration.configuration_version) << "\n"
-       << indent << "AVCProfileIndication: " << ((int)m_configuration.AVCProfileIndication) << " (" << profileIndicationAsText() << ")\n"
-       << indent << "profile_compatibility: " << ((int)m_configuration.profile_compatibility) << "\n"
-       << indent << "AVCLevelIndication: " << ((int)m_configuration.AVCLevelIndication) << "\n"
-       << indent << "Chroma format: " << ((int)m_configuration.chroma_format) << "\n"
-       << indent << "Bit depth luma: " << ((int)m_configuration.bit_depth_luma) << "\n"
-       << indent << "Bit depth chroma: " << ((int)m_configuration.bit_depth_chroma) << "\n";
-
-  for (const auto &sps : m_sps) {
+  sstr << indent << "configuration_version: " << ((int) m_configuration.configuration_version) << "\n"
+       << indent << "AVCProfileIndication: " << ((int) m_configuration.AVCProfileIndication) << " (" << profileIndicationAsText() << ")\n"
+       << indent << "profile_compatibility: " << ((int) m_configuration.profile_compatibility) << "\n"
+       << indent << "AVCLevelIndication: " << ((int) m_configuration.AVCLevelIndication) << "\n"
+       << indent << "Chroma format: ";
+
+  switch (m_configuration.chroma_format) {
+    case heif_chroma_monochrome:
+      sstr << "4:0:0\n";
+      break;
+    case heif_chroma_420:
+      sstr << "4:2:0\n";
+      break;
+    case heif_chroma_422:
+      sstr << "4:2:2\n";
+      break;
+    case heif_chroma_444:
+      sstr << "4:4:4\n";
+      break;
+    default:
+      sstr << "unsupported\n";
+      break;
+  }
+
+  sstr << indent << "Bit depth luma: " << ((int) m_configuration.bit_depth_luma) << "\n"
+       << indent << "Bit depth chroma: " << ((int) m_configuration.bit_depth_chroma) << "\n";
+
+  for (const auto& sps : m_sps) {
     sstr << indent << "SPS: ";
     for (uint8_t b : sps) {
-      sstr << std::setfill('0') << std::setw(2) << std::hex << ((int)b) << " ";
+      sstr << std::setfill('0') << std::setw(2) << std::hex << ((int) b) << " ";
     }
     sstr << "\n";
     sstr << std::dec;
   }
 
-  for (const auto &spsext : m_sps_ext) {
+  for (const auto& spsext : m_sps_ext) {
     sstr << indent << "SPS-EXT: ";
     for (uint8_t b : spsext) {
-      sstr << std::setfill('0') << std::setw(2) << std::hex << ((int)b) << " ";
+      sstr << std::setfill('0') << std::setw(2) << std::hex << ((int) b) << " ";
     }
     sstr << "\n";
     sstr << std::dec;
   }
 
-  for (const auto &pps : m_pps) {
+  for (const auto& pps : m_pps) {
     sstr << indent << "PPS: ";
     for (uint8_t b : pps) {
-      sstr << std::setfill('0') << std::setw(2) << std::hex << ((int)b) << " ";
+      sstr << std::setfill('0') << std::setw(2) << std::hex << ((int) b) << " ";
     }
     sstr << "\n";
     sstr << std::dec;
@@ -136,27 +209,28 @@ std::string Box_avcC::dump(Indent &indent) const {
   return sstr.str();
 }
 
-std::string Box_avcC::profileIndicationAsText() const {
+std::string Box_avcC::profileIndicationAsText() const
+{
   // See ISO/IEC 14496-10:2022 Annex A
   switch (m_configuration.AVCProfileIndication) {
-  case 44:
-    return "CALVC 4:4:4";
-  case 66:
-    return "Constrained Baseline";
-  case 77:
-    return "Main";
-  case 88:
-    return "Extended";
-  case 100:
-    return "High variant";
-  case 110:
-    return "High 10";
-  case 122:
-    return "High 4:2:2";
-  case 244:
-    return "High 4:4:4";
-  default:
-    return "Unknown";
+    case 44:
+      return "CALVC 4:4:4";
+    case 66:
+      return "Constrained Baseline";
+    case 77:
+      return "Main";
+    case 88:
+      return "Extended";
+    case 100:
+      return "High variant";
+    case 110:
+      return "High 10";
+    case 122:
+      return "High 4:2:2";
+    case 244:
+      return "High 4:4:4";
+    default:
+      return "Unknown";
   }
 }
 
@@ -192,11 +266,10 @@ void Box_avcC::get_header_nals(std::vector<uint8_t>& data) const
 }
 
 
-
 Result<ImageItem::CodedImageData> ImageItem_AVC::encode(const std::shared_ptr<HeifPixelImage>& image,
-                                                         struct heif_encoder* encoder,
-                                                         const struct heif_encoding_options& options,
-                                                         enum heif_image_input_class input_class)
+                                                        struct heif_encoder* encoder,
+                                                        const struct heif_encoding_options& options,
+                                                        enum heif_image_input_class input_class)
 {
 #if 0
   CodedImageData codedImage;
diff --git a/libheif/codecs/avc.h b/libheif/codecs/avc.h
index 59e2fb6369..1bbbaddd4e 100644
--- a/libheif/codecs/avc.h
+++ b/libheif/codecs/avc.h
@@ -41,7 +41,7 @@ class Box_avcC : public Box {
     uint8_t profile_compatibility; // constraint set flags
     uint8_t AVCLevelIndication; // level_idc
     uint8_t lengthSize;
-    uint8_t chroma_format;
+    heif_chroma chroma_format = heif_chroma_420; // Note: avcC integer value can be cast to heif_chroma enum
     uint8_t bit_depth_luma = 8;
     uint8_t bit_depth_chroma = 8;
   };
diff --git a/tests/avc_box.cc b/tests/avc_box.cc
index 9e3dc2929e..fad1cd9814 100644
--- a/tests/avc_box.cc
+++ b/tests/avc_box.cc
@@ -68,7 +68,7 @@ TEST_CASE("avcC") {
                         "AVCProfileIndication: 66 (Constrained Baseline)\n"
                         "profile_compatibility: 128\n"
                         "AVCLevelIndication: 30\n"
-                        "Chroma format: 32\n"
+                        "Chroma format: 4:2:0\n"
                         "Bit depth luma: 8\n"
                         "Bit depth chroma: 8\n"
                         "SPS: 67 64 00 28 ac 72 04 40 40 04 1a 10 00 00 03 00 "

From 252ed4a68311104c77f94eb108e8b06371c47206 Mon Sep 17 00:00:00 2001
From: Dirk Farin <dirk.farin@gmail.com>
Date: Tue, 10 Sep 2024 16:32:10 +0200
Subject: [PATCH 29/41] fix windows compilation (#1302)

---
 libheif/box.cc | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/libheif/box.cc b/libheif/box.cc
index bd86034637..3af0e33605 100644
--- a/libheif/box.cc
+++ b/libheif/box.cc
@@ -1413,14 +1413,18 @@ void Box_iloc::set_use_tmp_file(bool flag)
 {
   m_use_tmpfile = flag;
   if (flag) {
-#if !defined(_WIN32) && !defined(_WIN64)
+#if !defined(_WIN32)
     strcpy(m_tmp_filename, "/tmp/libheif-XXXXXX");
     m_tmpfile_fd = mkstemp(m_tmp_filename);
 #else
+    // TODO Currently unused code. Implement when needed.
+    assert(false);
+#  if 0
     char tmpname[L_tmpnam_s];
     // TODO: check return value (errno_t)
     tmpnam_s(tmpname, L_tmpnam_s);
     _sopen_s(&m_tmpfile_fd, tmpname, _O_CREAT | _O_TEMPORARY | _O_TRUNC | _O_RDWR, _SH_DENYRW, _S_IREAD | _S_IWRITE);
+#  endif
 #endif
   }
 }
@@ -1643,10 +1647,14 @@ Error Box_iloc::append_data(heif_item_id item_ID,
   extent.length = data.size();
 
   if (m_use_tmpfile && construction_method==0) {
-#if !defined(_WIN32) && !defined(_WIN64)
+#if !defined(_WIN32)
     ssize_t cnt = ::write(m_tmpfile_fd, data.data(), data.size());
 #else
+    // TODO Currently unused code. Implement when needed.
+    assert(false);
+#  if 0
     int cnt = _write(m_tmpfile_fd, data.data(), data.size());
+#  endif
 #endif
     if (cnt < 0) {
       std::stringstream sstr;
@@ -1904,7 +1912,11 @@ Error Box_iloc::write_mdat_after_iloc(StreamWriter& writer)
 #if !defined(_WIN32) && !defined(_WIN64)
           ssize_t cnt = ::read(m_tmpfile_fd, data.data(), extent.length);
 #else
+          // TODO Currently unused code. Implement when needed.
+          assert(false);
+# if 0
           int cnt = _read(m_tmpfile_fd, data.data(), extent.length);
+# endif
 #endif
           if (cnt<0) {
             std::stringstream sstr;

From 9bd33c015f465365ed535634a786d80ef202624f Mon Sep 17 00:00:00 2001
From: Dirk Farin <dirk.farin@gmail.com>
Date: Tue, 10 Sep 2024 16:36:13 +0200
Subject: [PATCH 30/41] remove unnecessary check for _WIN64 (#1302)

---
 libheif/box.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libheif/box.cc b/libheif/box.cc
index 3af0e33605..256ee61421 100644
--- a/libheif/box.cc
+++ b/libheif/box.cc
@@ -50,7 +50,7 @@
 #define M_PI 3.14159265358979323846
 #endif
 
-#if !defined(_WIN32) && !defined(_WIN64)
+#if !defined(_WIN32)
 #include <unistd.h>
 #else
 #include <fcntl.h>
@@ -1909,7 +1909,7 @@ Error Box_iloc::write_mdat_after_iloc(StreamWriter& writer)
 
         if (m_use_tmpfile) {
           std::vector<uint8_t> data(extent.length);
-#if !defined(_WIN32) && !defined(_WIN64)
+#if !defined(_WIN32)
           ssize_t cnt = ::read(m_tmpfile_fd, data.data(), extent.length);
 #else
           // TODO Currently unused code. Implement when needed.

From 64c8cae5195ad4f65497845aa3493bec6c86b027 Mon Sep 17 00:00:00 2001
From: Dirk Farin <dirk.farin@gmail.com>
Date: Tue, 10 Sep 2024 16:41:36 +0200
Subject: [PATCH 31/41] fix windows compilation, undefined 'cnt' (#1302)

---
 libheif/box.cc | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/libheif/box.cc b/libheif/box.cc
index 256ee61421..94d398f78d 100644
--- a/libheif/box.cc
+++ b/libheif/box.cc
@@ -1654,6 +1654,8 @@ Error Box_iloc::append_data(heif_item_id item_ID,
     assert(false);
 #  if 0
     int cnt = _write(m_tmpfile_fd, data.data(), data.size());
+#  else
+    int cnt = -1;
 #  endif
 #endif
     if (cnt < 0) {
@@ -1916,6 +1918,8 @@ Error Box_iloc::write_mdat_after_iloc(StreamWriter& writer)
           assert(false);
 # if 0
           int cnt = _read(m_tmpfile_fd, data.data(), extent.length);
+# else
+          int cnt = -1;
 # endif
 #endif
           if (cnt<0) {

From 2395082e8a628303d659c5b354d0780767336aad Mon Sep 17 00:00:00 2001
From: Dirk Farin <dirk.farin@gmail.com>
Date: Tue, 10 Sep 2024 19:53:46 +0200
Subject: [PATCH 32/41] tild: remove support for 64bit dimensions and cleanup

---
 libheif/api/libheif/heif.h |  6 ++--
 libheif/codecs/tild.cc     | 67 +++++++++-----------------------------
 2 files changed, 19 insertions(+), 54 deletions(-)

diff --git a/libheif/api/libheif/heif.h b/libheif/api/libheif/heif.h
index 82cf0b23d8..855402b74d 100644
--- a/libheif/api/libheif/heif.h
+++ b/libheif/api/libheif/heif.h
@@ -2429,8 +2429,8 @@ struct heif_tild_image_parameters {
 
   // --- version 1
 
-  uint64_t image_width;
-  uint64_t image_height;
+  uint32_t image_width;
+  uint32_t image_height;
 
   uint32_t tile_width;
   uint32_t tile_height;
@@ -2441,7 +2441,7 @@ struct heif_tild_image_parameters {
   uint8_t size_field_length;     // one of:  0, 24, 32, 64
 
   uint8_t number_of_extra_dimensions;  // 0 for normal images, 1 for volumetric (3D), ...
-  uint64_t extra_dimensions[8];        // size of extra dimensions (first 8 dimensions)
+  uint32_t extra_dimensions[8];        // size of extra dimensions (first 8 dimensions)
 
   // boolean flags
   uint8_t tiles_are_sequential;  // TODO: can we derive this automatically
diff --git a/libheif/codecs/tild.cc b/libheif/codecs/tild.cc
index 3b3af35743..51ba05fe4b 100644
--- a/libheif/codecs/tild.cc
+++ b/libheif/codecs/tild.cc
@@ -66,12 +66,6 @@ uint64_t nTiles_v(const heif_tild_image_parameters& params)
 }
 
 
-bool dimensions_64bit(const heif_tild_image_parameters& params)
-{
-  return (params.image_width > 0xFFFF || params.image_height > 0xFFFF);
-}
-
-
 void Box_tilC::derive_box_version()
 {
   set_version(1);
@@ -112,16 +106,10 @@ void Box_tilC::derive_box_version()
       assert(false); // TODO: return error
   }
 
-  // printf("> %d %d -> %d\n", m_parameters.offset_field_length, m_parameters.size_field_length, (int)flags);
-
   if (m_parameters.tiles_are_sequential) {
     flags |= 0x10;
   }
 
-  if (dimensions_64bit(m_parameters)) {
-    flags |= 0x20;
-  }
-
   set_flags(flags);
 }
 
@@ -132,26 +120,20 @@ Error Box_tilC::write(StreamWriter& writer) const
 
   size_t box_start = reserve_box_header_space(writer);
 
-  bool dimensions_are_64bit = dimensions_64bit(m_parameters);
-
   if (m_parameters.number_of_extra_dimensions > 8) {
     assert(false); // currently not supported
   }
 
-  writer.write8(m_parameters.number_of_extra_dimensions);
+  writer.write32(m_parameters.tile_width);
+  writer.write32(m_parameters.tile_height);
+  writer.write32(m_parameters.compression_type_fourcc);
 
-  // TODO: this is redundant because we can also get this from 'ispe' (but currently only as uint32_t)
-  //writer.write(dimensions_are_64bit ? 8 : 4, m_parameters.image_width);
-  //writer.write(dimensions_are_64bit ? 8 : 4, m_parameters.image_height);
+  writer.write8(m_parameters.number_of_extra_dimensions);
 
   for (int i = 0; i < m_parameters.number_of_extra_dimensions; i++) {
-    writer.write(dimensions_are_64bit ? 8 : 4, m_parameters.extra_dimensions[i]);
+    writer.write32(m_parameters.extra_dimensions[i]);
   }
 
-  writer.write32(m_parameters.tile_width);
-  writer.write32(m_parameters.tile_height);
-  writer.write32(m_parameters.compression_type_fourcc);
-
   prepend_header(writer, box_start);
 
   return Error::Ok;
@@ -226,33 +208,25 @@ Error Box_tilC::parse(BitstreamRange& range)
   }
 
   m_parameters.tiles_are_sequential = !!(flags & 0x10);
-  bool dimensions_are_64bit = (flags & 0x20);
 
-  m_parameters.number_of_extra_dimensions = range.read8();
 
-#if 0
-  if (data.size() < idx + 2 * (dimensions_are_64bit ? 8 : 4)) {
-    return eofError;
-  }
-
-  if (data.size() < idx + (2 + m_parameters.number_of_extra_dimensions) * (dimensions_are_64bit ? 8 : 4) + 3 * 4) {
-    return eofError;
-  }
-#endif
-
-  /*
-  m_parameters.image_width = (dimensions_are_64bit ? range.read64() : range.read32());
-  m_parameters.image_height = (dimensions_are_64bit ? range.read64() : range.read32());
+  m_parameters.tile_width = range.read32();
+  m_parameters.tile_height = range.read32();
+  m_parameters.compression_type_fourcc = range.read32();
 
-  if (m_parameters.image_width == 0 || m_parameters.image_height == 0) {
+  if (m_parameters.tile_width == 0 || m_parameters.tile_height == 0) {
     return {heif_error_Invalid_input,
             heif_suberror_Unspecified,
-            "'tild' image with zero width or height."};
+            "Tile with zero width or height."};
   }
-*/
+
+
+  // --- extra dimensions
+
+  m_parameters.number_of_extra_dimensions = range.read8();
 
   for (int i = 0; i < m_parameters.number_of_extra_dimensions; i++) {
-    uint64_t size = (dimensions_are_64bit ? range.read64() : range.read32());
+    uint32_t size = range.read32();
 
     if (size == 0) {
       return {heif_error_Invalid_input,
@@ -268,15 +242,6 @@ Error Box_tilC::parse(BitstreamRange& range)
     }
   }
 
-  m_parameters.tile_width = range.read32();
-  m_parameters.tile_height = range.read32();
-  m_parameters.compression_type_fourcc = range.read32();
-
-  if (m_parameters.tile_width == 0 || m_parameters.tile_height == 0) {
-    return {heif_error_Invalid_input,
-            heif_suberror_Unspecified,
-            "Tile with zero width or height."};
-  }
 
   return range.get_error();
 }

From 9483961432ae65b911bb4208416ec5833ed736a5 Mon Sep 17 00:00:00 2001
From: Dirk Farin <dirk.farin@gmail.com>
Date: Wed, 11 Sep 2024 12:45:57 +0200
Subject: [PATCH 33/41] limit maximum memory allocation (should fix ClusterFuzz
 71389)

---
 libheif/pixelimage.cc | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/libheif/pixelimage.cc b/libheif/pixelimage.cc
index 0ec3092c2e..5583e1590b 100644
--- a/libheif/pixelimage.cc
+++ b/libheif/pixelimage.cc
@@ -21,6 +21,7 @@
 
 #include "pixelimage.h"
 #include "common_utils.h"
+#include "security_limits.h"
 
 #include <cassert>
 #include <cstring>
@@ -256,8 +257,12 @@ bool HeifPixelImage::ImagePlane::alloc(uint32_t width, uint32_t height, heif_cha
   stride = m_mem_width * bytes_per_pixel;
   stride = (stride + alignment - 1U) & ~(alignment - 1U);
 
+  if ((MAX_MEMORY_BLOCK_SIZE - (alignment + 1)) / stride < m_mem_height) {
+    return false;
+  }
+
   try {
-    allocated_mem = new uint8_t[m_mem_height * stride + alignment - 1];
+    allocated_mem = new uint8_t[static_cast<size_t>(m_mem_height) * stride + alignment - 1];
     uint8_t* mem_8 = allocated_mem;
 
     // shift beginning of image data to aligned memory position

From 10e455bdd01a468c79bc6a8c1b266467419c5256 Mon Sep 17 00:00:00 2001
From: Dirk Farin <dirk.farin@gmail.com>
Date: Wed, 11 Sep 2024 12:53:13 +0200
Subject: [PATCH 34/41] iden: make sure that references image item exists

---
 libheif/codecs/iden.cc | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/libheif/codecs/iden.cc b/libheif/codecs/iden.cc
index 26b96ab4f7..f177a7e11d 100644
--- a/libheif/codecs/iden.cc
+++ b/libheif/codecs/iden.cc
@@ -68,6 +68,11 @@ Result<std::shared_ptr<HeifPixelImage>> ImageItem_iden::decode_compressed_image(
   }
 
   std::shared_ptr<const ImageItem> imgitem = get_context()->get_image(reference_image_id);
+  if (!imgitem) {
+    return Error(heif_error_Invalid_input,
+                 heif_suberror_Unspecified,
+                 "'iden' image references unavailable image");
+  }
 
   return imgitem->decode_compressed_image(options, decode_tile_only, tile_x0, tile_y0);
 }

From f50ef3bf2193bec3b316c22e495771869aa3bcb7 Mon Sep 17 00:00:00 2001
From: Dirk Farin <dirk.farin@gmail.com>
Date: Wed, 11 Sep 2024 16:42:01 +0200
Subject: [PATCH 35/41] iovl: detect self-references

---
 libheif/codecs/overlay.cc | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/libheif/codecs/overlay.cc b/libheif/codecs/overlay.cc
index 64798350b9..e5e64c4554 100644
--- a/libheif/codecs/overlay.cc
+++ b/libheif/codecs/overlay.cc
@@ -309,8 +309,16 @@ Result<std::shared_ptr<HeifPixelImage>> ImageItem_Overlay::decode_overlay_image(
     return err;
   }
 
-
   for (size_t i = 0; i < m_overlay_image_ids.size(); i++) {
+
+    // detect if 'iovl' is referencing itself
+
+    if (m_overlay_image_ids[i] == get_id()) {
+      return Error{heif_error_Invalid_input,
+                   heif_suberror_Unspecified,
+                   "Self-reference in 'iovl' image item."};
+    }
+
     auto imgItem = get_context()->get_image(m_overlay_image_ids[i]);
     if (!imgItem) {
       return Error(heif_error_Invalid_input, heif_suberror_Nonexisting_item_referenced, "'iovl' image references a non-existing item.");

From 4e9eb8ee6264d860f928dfaa21883f8ddc1edcc8 Mon Sep 17 00:00:00 2001
From: Dirk Farin <dirk.farin@gmail.com>
Date: Wed, 11 Sep 2024 17:33:52 +0200
Subject: [PATCH 36/41] define chroma-420 sample position enum

---
 libheif/pixelimage.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/libheif/pixelimage.h b/libheif/pixelimage.h
index 42433bbbe1..24936e868c 100644
--- a/libheif/pixelimage.h
+++ b/libheif/pixelimage.h
@@ -55,6 +55,20 @@ bool is_integer_multiple_of_chroma_size(uint32_t width,
 // Returns the list of valid heif_chroma values for a given colorspace.
 std::vector<heif_chroma> get_valid_chroma_values_for_colorspace(heif_colorspace colorspace);
 
+// TODO: move to public API when used
+enum heif_chroma420_sample_position {
+  // values 0-5 according to ISO 23091-2 / ITU-T H.273
+  heif_chroma420_sample_position_00_05 = 0,
+  heif_chroma420_sample_position_05_05 = 1,
+  heif_chroma420_sample_position_00_00 = 2,
+  heif_chroma420_sample_position_05_00 = 3,
+  heif_chroma420_sample_position_00_10 = 4,
+  heif_chroma420_sample_position_05_10 = 5,
+
+  // values 6 according to ISO 23001-17
+  heif_chroma420_sample_position_00_00_01_00 = 6
+};
+
 
 class HeifPixelImage : public std::enable_shared_from_this<HeifPixelImage>,
                        public ErrorBuffer

From e962b5919ce990118bfa38aa85796b0a9a72c4a2 Mon Sep 17 00:00:00 2001
From: Dirk Farin <dirk.farin@gmail.com>
Date: Wed, 11 Sep 2024 19:44:36 +0200
Subject: [PATCH 37/41] fix HeifContext::has_alpha() for broken input (fixes
 #1305)

---
 libheif/context.cc | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/libheif/context.cc b/libheif/context.cc
index 8a8b96f9db..288e1fff61 100644
--- a/libheif/context.cc
+++ b/libheif/context.cc
@@ -831,7 +831,6 @@ Error HeifContext::interpret_heif_file()
 
 bool HeifContext::has_alpha(heif_item_id ID) const
 {
-
   assert(is_image(ID));
   auto img = m_all_images.find(ID)->second;
 
@@ -843,7 +842,10 @@ bool HeifContext::has_alpha(heif_item_id ID) const
 
   heif_colorspace colorspace;
   heif_chroma chroma;
-  img->get_coded_image_colorspace(&colorspace, &chroma);
+  Error err = img->get_coded_image_colorspace(&colorspace, &chroma);
+  if (err) {
+    return false;
+  }
 
   if (chroma == heif_chroma_interleaved_RGBA ||
       chroma == heif_chroma_interleaved_RRGGBBAA_BE ||

From 4563a2f650ccde659a47e64682e56693c3e18451 Mon Sep 17 00:00:00 2001
From: Dirk Farin <dirk.farin@gmail.com>
Date: Wed, 11 Sep 2024 19:46:04 +0200
Subject: [PATCH 38/41] move get_tile_size() into ImageItem class

---
 libheif/api/libheif/heif.cc  | 13 +------------
 libheif/codecs/grid.h        |  2 +-
 libheif/codecs/image_item.cc |  7 +++++++
 libheif/codecs/image_item.h  |  2 ++
 libheif/codecs/tild.h        |  2 +-
 libheif/context.cc           |  2 +-
 6 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/libheif/api/libheif/heif.cc b/libheif/api/libheif/heif.cc
index a98a74299d..0c8bfdbf15 100644
--- a/libheif/api/libheif/heif.cc
+++ b/libheif/api/libheif/heif.cc
@@ -909,18 +909,7 @@ struct heif_error heif_image_handle_get_tile_size(const struct heif_image_handle
 
   uint32_t w,h;
 
-  if (std::shared_ptr<ImageItem_Grid> gridItem = std::dynamic_pointer_cast<ImageItem_Grid>(handle->image)) {
-    gridItem->get_tile_size(w,h);
-  }
-  else if (std::shared_ptr<ImageItem_Tild> tildItem = std::dynamic_pointer_cast<ImageItem_Tild>(handle->image)) {
-    tildItem->get_tile_size(w,h);
-  }
-  else {
-    // return whole image size (the image is the only tile)
-
-    w = handle->image->get_width();
-    h = handle->image->get_height();
-  }
+  handle->image->get_tile_size(w,h);
 
   if (tile_width) {
     *tile_width = w;
diff --git a/libheif/codecs/grid.h b/libheif/codecs/grid.h
index 1d84cac286..dab6404a0f 100644
--- a/libheif/codecs/grid.h
+++ b/libheif/codecs/grid.h
@@ -112,7 +112,7 @@ class ImageItem_Grid : public ImageItem
 
   heif_image_tiling get_heif_image_tiling() const;
 
-  void get_tile_size(uint32_t& w, uint32_t& h) const;
+  void get_tile_size(uint32_t& w, uint32_t& h) const override;
 
 private:
   ImageGrid m_grid_spec;
diff --git a/libheif/codecs/image_item.cc b/libheif/codecs/image_item.cc
index fa969a0175..b44d384809 100644
--- a/libheif/codecs/image_item.cc
+++ b/libheif/codecs/image_item.cc
@@ -403,6 +403,13 @@ uint32_t ImageItem::get_ispe_height() const
 }
 
 
+void ImageItem::get_tile_size(uint32_t& w, uint32_t& h) const
+{
+  w = get_width();
+  h = get_height();
+}
+
+
 Error ImageItem::get_coded_image_colorspace(heif_colorspace* out_colorspace, heif_chroma* out_chroma) const
 {
   heif_item_id id;
diff --git a/libheif/codecs/image_item.h b/libheif/codecs/image_item.h
index 0bd8069165..d505409173 100644
--- a/libheif/codecs/image_item.h
+++ b/libheif/codecs/image_item.h
@@ -128,6 +128,8 @@ class ImageItem : public ErrorBuffer
     m_height = h;
   }
 
+  virtual void get_tile_size(uint32_t& w, uint32_t& h) const;
+
   Error get_coded_image_colorspace(heif_colorspace* out_colorspace, heif_chroma* out_chroma) const;
 
   virtual void process_before_write() { }
diff --git a/libheif/codecs/tild.h b/libheif/codecs/tild.h
index 955fa69671..38f377bb3a 100644
--- a/libheif/codecs/tild.h
+++ b/libheif/codecs/tild.h
@@ -159,7 +159,7 @@ class ImageItem_Tild : public ImageItem
 
   heif_image_tiling get_heif_image_tiling() const;
 
-  void get_tile_size(uint32_t& w, uint32_t& h) const;
+  void get_tile_size(uint32_t& w, uint32_t& h) const override;
 
 private:
   TildHeader m_tild_header;
diff --git a/libheif/context.cc b/libheif/context.cc
index 288e1fff61..f7c769902d 100644
--- a/libheif/context.cc
+++ b/libheif/context.cc
@@ -864,7 +864,7 @@ bool HeifContext::has_alpha(heif_item_id ID) const
     }
 
     ImageGrid grid;
-    Error err = grid.parse(grid_data);
+    err = grid.parse(grid_data);
     if (err) {
       return false;
     }

From 487318c59f3693b04ac6590f40726934d2a19775 Mon Sep 17 00:00:00 2001
From: Dirk Farin <dirk.farin@gmail.com>
Date: Thu, 12 Sep 2024 20:02:31 +0200
Subject: [PATCH 39/41] url-box: parse 'data-in-same-file' flag

---
 libheif/box.cc | 8 +++++++-
 libheif/box.h  | 2 ++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/libheif/box.cc b/libheif/box.cc
index 94d398f78d..2660611057 100644
--- a/libheif/box.cc
+++ b/libheif/box.cc
@@ -3726,7 +3726,13 @@ Error Box_url::parse(BitstreamRange& range)
     return unsupported_version_error("url");
   }
 
-  m_location = range.read_string();
+  if (get_flags() & 1) {
+    // data in same file
+    m_location.clear();
+  }
+  else {
+    m_location = range.read_string();
+  }
 
   return range.get_error();
 }
diff --git a/libheif/box.h b/libheif/box.h
index a3f037fa20..4807837cbf 100644
--- a/libheif/box.h
+++ b/libheif/box.h
@@ -1045,6 +1045,8 @@ class Box_url : public FullBox
 public:
   std::string dump(Indent&) const override;
 
+  bool is_same_file() const { return m_location.empty(); }
+
 protected:
   Error parse(BitstreamRange& range) override;
 

From 3a043d97ce65773eef7473e26f89f71cb86a1808 Mon Sep 17 00:00:00 2001
From: Dirk Farin <dirk.farin@gmail.com>
Date: Fri, 13 Sep 2024 00:12:59 +0200
Subject: [PATCH 40/41] add option to set plugin install directory
 independently from search path (#1307)

---
 CMakeLists.txt                 | 12 ++++++++++--
 libheif/plugins/CMakeLists.txt |  2 +-
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 041f3b8e7e..d5ac5c66a8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -66,11 +66,19 @@ LIST (APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/modules")
 # --- codec plugins
 
 option(ENABLE_PLUGIN_LOADING "Support loading of plugins" ON)
-set(PLUGIN_DIRECTORY "${CMAKE_INSTALL_FULL_LIBDIR}/libheif" CACHE STRING "Plugin install directory")
+set(PLUGIN_DIRECTORY "${CMAKE_INSTALL_FULL_LIBDIR}/libheif" CACHE STRING "Plugin directory")
+set(PLUGIN_INSTALL_DIRECTORY "" CACHE STRING "Plugin install directory (leaving it empty will use PLUGIN_DIRECTORY)")
 
 if (ENABLE_PLUGIN_LOADING)
     set(PLUGIN_LOADING_SUPPORTED_AND_ENABLED TRUE)
-    install(DIRECTORY DESTINATION ${PLUGIN_DIRECTORY} DIRECTORY_PERMISSIONS
+
+    if (PLUGIN_INSTALL_DIRECTORY STREQUAL "")
+        set(COMPUTED_PLUGIN_INSTALL_DIRECTORY ${PLUGIN_DIRECTORY})
+    else ()
+        set(COMPUTED_PLUGIN_INSTALL_DIRECTORY ${PLUGIN_INSTALL_DIRECTORY})
+    endif ()
+
+    install(DIRECTORY DESTINATION ${COMPUTED_PLUGIN_INSTALL_DIRECTORY} DIRECTORY_PERMISSIONS
         OWNER_WRITE OWNER_READ OWNER_EXECUTE
         GROUP_READ GROUP_EXECUTE
         WORLD_READ WORLD_EXECUTE)
diff --git a/libheif/plugins/CMakeLists.txt b/libheif/plugins/CMakeLists.txt
index 58418d9983..18e4bdb158 100644
--- a/libheif/plugins/CMakeLists.txt
+++ b/libheif/plugins/CMakeLists.txt
@@ -22,7 +22,7 @@ macro(plugin_compilation name varName foundName optionName defineName)
             target_link_libraries(heif-${name} PRIVATE ${${varName}_LIBRARIES} heif)
 
             install(TARGETS heif-${name}
-                    LIBRARY DESTINATION ${PLUGIN_DIRECTORY}
+                    LIBRARY DESTINATION ${COMPUTED_PLUGIN_INSTALL_DIRECTORY}
                     )
         else ()
             message("Compiling '" ${name} "' as built-in backend")

From 3608adf4c39325848087dd912700df5c7c0cd9d7 Mon Sep 17 00:00:00 2001
From: Dirk Farin <dirk.farin@gmail.com>
Date: Fri, 13 Sep 2024 11:55:31 +0200
Subject: [PATCH 41/41] nvdec: show gfxcard name in plugin description

---
 libheif/plugins/decoder_nvdec.cc | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/libheif/plugins/decoder_nvdec.cc b/libheif/plugins/decoder_nvdec.cc
index 478430089f..c78851c361 100644
--- a/libheif/plugins/decoder_nvdec.cc
+++ b/libheif/plugins/decoder_nvdec.cc
@@ -48,17 +48,31 @@ static char plugin_name[MAX_PLUGIN_NAME_LENGTH];
 
 static const char *nvdec_plugin_name()
 {
-    snprintf(plugin_name, MAX_PLUGIN_NAME_LENGTH, "NVIDIA Video Decoder (Hardware)");
-
-    // make sure that the string is null-terminated
-    plugin_name[MAX_PLUGIN_NAME_LENGTH - 1] = 0;
-
     return plugin_name;
 }
 
 static void nvdec_init_plugin()
 {
     cuInit(0);
+
+    CUdevice cuDevice = 0;
+    CUresult result;
+    result = cuDeviceGet(&cuDevice, 0);
+    if (result != CUDA_SUCCESS)
+    {
+      return;
+    }
+
+    char szDeviceName[50];
+    result = cuDeviceGetName(szDeviceName, sizeof(szDeviceName), cuDevice);
+    if (result != CUDA_SUCCESS) {
+      return;
+    }
+
+    snprintf(plugin_name, MAX_PLUGIN_NAME_LENGTH, "NVIDIA Video Decoder (%s)", szDeviceName);
+
+    // make sure that the string is null-terminated
+    plugin_name[MAX_PLUGIN_NAME_LENGTH - 1] = 0;
 }
 
 static void nvdec_deinit_plugin()