From 6e823804786506d0f87edb77b0f20f4fdbd2d471 Mon Sep 17 00:00:00 2001 From: Brad Hards Date: Tue, 3 Sep 2024 08:40:48 +1000 Subject: [PATCH 01/41] minor encoder reporting cleanup --- CMakeLists.txt | 17 +++++++++-------- examples/heif_enc.cc | 17 ++++++++++++----- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9c008b84a8..c1d34b935a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -164,7 +164,7 @@ endif () # openh264 decoder plugin_option(OpenH264_DECODER "OpenH264 decoder" ON OFF) -plugin_option(OpenH264_ENCODER "OpenH264 encoder" ON OFF) +# plugin_option(OpenH264_ENCODER "OpenH264 encoder" ON OFF) if (WITH_OpenH264_ENCODER OR WITH_OpenH264_DECODER) find_package(OpenH264) @@ -173,9 +173,9 @@ if (WITH_OpenH264_ENCODER OR WITH_OpenH264_DECODER) if (OpenH264_FOUND AND WITH_OpenH264_DECODER) set(OpenH264_DECODER_FOUND TRUE) endif() - if (OpenH264_FOUND AND WITH_OpenH264_ENCODER) - set(OpenH264_ENCODER_FOUND TRUE) - endif() +# if (OpenH264_FOUND AND WITH_OpenH264_ENCODER) +# set(OpenH264_ENCODER_FOUND TRUE) +# endif() endif() @@ -268,7 +268,7 @@ plugin_compilation_info(RAV1E RAV1E "Rav1e AV1 encoder") plugin_compilation_info(JPEG_DECODER JPEG "JPEG decoder") plugin_compilation_info(JPEG_ENCODER JPEG "JPEG encoder") plugin_compilation_info(OpenH264_DECODER OpenH264_DECODER "OpenH264 decoder") -plugin_compilation_info(OpenH264_ENCODER OpenH264_ENCODER "OpenH264 encoder") +# plugin_compilation_info(OpenH264_ENCODER OpenH264_ENCODER "OpenH264 encoder") plugin_compilation_info(OpenJPEG_DECODER OpenJPEG "OpenJPEG J2K decoder") plugin_compilation_info(OpenJPEG_ENCODER OpenJPEG "OpenJPEG J2K encoder") # plugin_compilation_info(OPENJPH_DECODER OPENJPH "OpenJPH HT-J2K decoder") @@ -351,14 +351,15 @@ endif() message("\n=== Supported formats ===") message("format decoding encoding") -format_compilation_info("HEIC" SUPPORTS_HEIC_DECODING SUPPORTS_HEIC_ENCODING) -format_compilation_info("AVIF" SUPPORTS_AVIF_DECODING SUPPORTS_AVIF_ENCODING) -format_compilation_info("VVC" SUPPORTS_VVC_DECODING SUPPORTS_VVC_ENCODING) format_compilation_info("AVC" SUPPORTS_AVC_DECODING SUPPORTS_AVC_ENCODING) +format_compilation_info("AVIF" SUPPORTS_AVIF_DECODING SUPPORTS_AVIF_ENCODING) +format_compilation_info("HEIC" SUPPORTS_HEIC_DECODING SUPPORTS_HEIC_ENCODING) format_compilation_info("JPEG" SUPPORTS_JPEG_DECODING SUPPORTS_JPEG_ENCODING) format_compilation_info("JPEG2000" SUPPORTS_J2K_DECODING SUPPORTS_J2K_ENCODING) format_compilation_info("JPEG2000-HT" SUPPORTS_J2K_HT_DECODING SUPPORTS_J2K_HT_ENCODING) format_compilation_info("Uncompressed" SUPPORTS_UNCOMPRESSED_DECODING SUPPORTS_UNCOMPRESSED_ENCODING) +format_compilation_info("VVC" SUPPORTS_VVC_DECODING SUPPORTS_VVC_ENCODING) + message("") # --- Libsharpyuv color space transforms diff --git a/examples/heif_enc.cc b/examples/heif_enc.cc index 1329d58965..bef60e9dde 100644 --- a/examples/heif_enc.cc +++ b/examples/heif_enc.cc @@ -373,6 +373,9 @@ static const char* get_compression_format_name(heif_compression_format format) case heif_compression_AV1: return "AV1"; break; + case heif_compression_AVC: + return "AVC"; + break; case heif_compression_VVC: return "VVC"; break; @@ -399,19 +402,20 @@ static const char* get_compression_format_name(heif_compression_format format) static void show_list_of_all_encoders() { - for (auto compression_format : {heif_compression_HEVC, heif_compression_AV1, heif_compression_VVC, heif_compression_JPEG, heif_compression_JPEG2000, heif_compression_HTJ2K + for (auto compression_format : {heif_compression_AVC, heif_compression_AV1, heif_compression_HEVC, heif_compression_JPEG, heif_compression_JPEG2000, heif_compression_HTJ2K #if WITH_UNCOMPRESSED_CODEC , heif_compression_uncompressed #endif +, heif_compression_VVC }) { switch (compression_format) { + case heif_compression_AVC: + std::cout << "AVC"; + break; case heif_compression_AV1: std::cout << "AVIF"; break; - case heif_compression_VVC: - std::cout << "VVIC"; - break; case heif_compression_HEVC: std::cout << "HEIC"; break; @@ -422,11 +426,14 @@ static void show_list_of_all_encoders() std::cout << "JPEG 2000"; break; case heif_compression_HTJ2K: - std::cout << "HT-J2K"; + std::cout << "JPEG 2000 (HT)"; break; case heif_compression_uncompressed: std::cout << "Uncompressed"; break; + case heif_compression_VVC: + std::cout << "VVIC"; + break; default: assert(false); } From 956ba0914a8c3617f3a600e7e2eea3461ea8caea Mon Sep 17 00:00:00 2001 From: Brad Hards Date: Tue, 3 Sep 2024 09:56:47 +1000 Subject: [PATCH 02/41] nvdec: initial merge --- CMakeLists.txt | 15 + cmake/modules/FindNVDEC.cmake | 16 + libheif/api/libheif/heif.h | 7 + libheif/api/libheif/heif_plugin.h | 48 +- libheif/codecs/image_item.cc | 10 +- libheif/plugin_registry.cc | 8 + libheif/plugins/CMakeLists.txt | 8 + libheif/plugins/NvDecoder.cpp | 548 +++++++++++++ libheif/plugins/NvDecoder.h | 191 +++++ libheif/plugins/cuviddec.h | 1188 +++++++++++++++++++++++++++++ libheif/plugins/decoder_nvdec.cc | 363 +++++++++ libheif/plugins/decoder_nvdec.h | 34 + libheif/plugins/nvcuvid.h | 553 ++++++++++++++ 13 files changed, 2967 insertions(+), 22 deletions(-) create mode 100644 cmake/modules/FindNVDEC.cmake create mode 100644 libheif/plugins/NvDecoder.cpp create mode 100644 libheif/plugins/NvDecoder.h create mode 100644 libheif/plugins/cuviddec.h create mode 100644 libheif/plugins/decoder_nvdec.cc create mode 100644 libheif/plugins/decoder_nvdec.h create mode 100644 libheif/plugins/nvcuvid.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 9c008b84a8..2fa668cc48 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -248,6 +248,13 @@ if (WITH_OPENJPH_ENCODER OR WITH_OPENJPH_DECODER) find_package(OPENJPH) endif() +# nvidia hardware decode + +plugin_option(NV_DECODER "NVIDIA Hardware AVC/AV1/HEVC/JPEG decoder" OFF OFF) +if (WITH_NV_DECODER) + find_package(NVDEC) +endif() + # uncompressed option(WITH_UNCOMPRESSED_CODEC " Support internal ISO/IEC 23001-17 uncompressed codec (experimental) " OFF) @@ -276,6 +283,8 @@ plugin_compilation_info(OPENJPH_ENCODER OPENJPH "OpenJPH HT-J2K encoder") plugin_compilation_info(UVG266_ENCODER UVG266 "uvg266 VVC enc. (experimental)") plugin_compilation_info(VVENC vvenc "vvenc VVC enc. (experimental)") plugin_compilation_info(VVDEC vvdec "vvdec VVC dec. (experimental)") +plugin_compilation_info(NV_DECODER NVDEC "NVIDIA hardware decoder") + # --- show summary which formats are supported @@ -343,6 +352,12 @@ endif() if (OpenH264_ENCODER_FOUND) set(SUPPORTS_AVC_ENCODING TRUE) endif() +if (NVDEC_FOUND) + set(SUPPORTS_HEIC_DECODING TRUE) + set(SUPPORTS_AVC_DECODING TRUE) + set(SUPPORTS_JPEG_DECODING TRUE) + set(SUPPORTS_AVIF_DECODING TRUE) +endif() if (WITH_UNCOMPRESSED_CODEC) set(SUPPORTS_UNCOMPRESSED_DECODING TRUE) diff --git a/cmake/modules/FindNVDEC.cmake b/cmake/modules/FindNVDEC.cmake new file mode 100644 index 0000000000..8425c0af5e --- /dev/null +++ b/cmake/modules/FindNVDEC.cmake @@ -0,0 +1,16 @@ +include(LibFindMacros) + +find_library(NVDEC_LIBRARY + NAMES libnvcuvid nvcuvid +) + +find_package(CUDAToolkit REQUIRED) + +set(NVDEC_PROCESS_LIBS NVDEC_LIBRARY) +libfind_process(NVDEC) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(NVDEC + REQUIRED_VARS + NVDEC_LIBRARY +) diff --git a/libheif/api/libheif/heif.h b/libheif/api/libheif/heif.h index ecd9c7eb23..39998fe576 100644 --- a/libheif/api/libheif/heif.h +++ b/libheif/api/libheif/heif.h @@ -856,6 +856,13 @@ typedef uint32_t heif_brand2; */ #define heif_brand2_miaf heif_fourcc('m','i','a','f') +/** + * AVC (H.264) image (`avci`) brand. + * + * See ISO/IEC 23008-12:2022 Annex E.4 + */ +#define heif_brand2_avci heif_fourcc('a','v','c','i') + /** * Single picture file brand. * diff --git a/libheif/api/libheif/heif_plugin.h b/libheif/api/libheif/heif_plugin.h index 3a438bfc94..08c771ef6d 100644 --- a/libheif/api/libheif/heif_plugin.h +++ b/libheif/api/libheif/heif_plugin.h @@ -34,14 +34,16 @@ extern "C" { // API versions table // -// release decoder encoder enc.params -// ----------------------------------------- -// 1.0 1 N/A N/A -// 1.1 1 1 1 -// 1.4 1 1 2 -// 1.8 1 2 2 -// 1.13 2 3 2 -// 1.15 3 3 2 +// release decoder dec.config encoder enc.params +// ----------------------------------------------------- +// 1.0 1 N/A N/A N/A +// 1.1 1 N/A 1 1 +// 1.4 1 N/A 1 2 +// 1.8 1 N/A 2 2 +// 1.13 2 N/A 3 2 +// 1.15 3 N/A 3 2 +// 1.19 4 1 3 2 + // ==================================================================================================== @@ -50,6 +52,14 @@ extern "C" { // added as plugins. A plugin has to implement the functions specified in heif_decoder_plugin // and the plugin has to be registered to the libheif library using heif_register_decoder(). +struct heif_decoder_configuration +{ + int version; // current version: 1 + + // --- version 1 fields --- + heif_compression_format compression_format; +}; + struct heif_decoder_plugin { // API version supported by this plugin (see table above for supported versions) @@ -88,27 +98,23 @@ struct heif_decoder_plugin struct heif_error (* decode_image)(void* decoder, struct heif_image** out_img); - // --- version 2 functions will follow below ... --- + // --- version 2 functions --- void (*set_strict_decoding)(void* decoder, int flag); - // If not NULL, this can provide a specialized function to convert YCbCr to sRGB, because - // only the codec itself knows how to interpret the chroma samples and their locations. - /* - struct heif_error (*convert_YCbCr_to_sRGB)(void* decoder, - struct heif_image* in_YCbCr_img, - struct heif_image** out_sRGB_img); - */ + // --- version 3 functions --- - // Reset decoder, such that we can feed in new data for another image. - // void (*reset_image)(void* decoder); + const char* id_name; - // --- version 3 functions will follow below ... --- - const char* id_name; + // --- version 4 functions --- - // --- version 4 functions will follow below ... --- + // Create a new decoder context for decoding an image + struct heif_error (* new_decoder2)(void** decoder, const heif_decoder_configuration* decoder_config); + + + // --- version 5 functions will follow below ... --- }; diff --git a/libheif/codecs/image_item.cc b/libheif/codecs/image_item.cc index fa969a0175..7c09667e88 100644 --- a/libheif/codecs/image_item.cc +++ b/libheif/codecs/image_item.cc @@ -1030,7 +1030,15 @@ Result> ImageItem::decode_from_compressed_data(h // --- decode image with the plugin void* decoder; - struct heif_error err = decoder_plugin->new_decoder(&decoder); + + struct heif_error err; + if (decoder_plugin->plugin_api_version >= 4) { + heif_decoder_configuration decoder_configuration {.version = 1, .compression_format = compression_format}; + err = decoder_plugin->new_decoder2(&decoder, &decoder_configuration); + } else { + err = decoder_plugin->new_decoder(&decoder); + } + if (err.code != heif_error_Ok) { return Error(err.code, err.subcode, err.message); } diff --git a/libheif/plugin_registry.cc b/libheif/plugin_registry.cc index 9bc196f467..83a41aeeca 100644 --- a/libheif/plugin_registry.cc +++ b/libheif/plugin_registry.cc @@ -104,6 +104,10 @@ #include "plugins/encoder_openjph.h" #endif +#if HAVE_NV_DECODER +#include "plugins/decoder_nvdec.h" +#endif + std::set s_decoder_plugins; std::multiset, @@ -211,6 +215,10 @@ void register_default_plugins() register_decoder(get_decoder_plugin_openh264()); #endif +#if HAVE_NV_DECODER + register_decoder(get_decoder_plugin_nvdec()); +#endif + #if WITH_UNCOMPRESSED_CODEC register_encoder(get_encoder_plugin_uncompressed()); #endif diff --git a/libheif/plugins/CMakeLists.txt b/libheif/plugins/CMakeLists.txt index 58418d9983..49f6f47066 100644 --- a/libheif/plugins/CMakeLists.txt +++ b/libheif/plugins/CMakeLists.txt @@ -112,6 +112,14 @@ set(OpenH264_DECODER_sources decoder_openh264.cc decoder_openh264.h) set(OpenH264_DECODER_extra_plugin_sources) plugin_compilation(openh264dec OpenH264 OpenH264_DECODER_FOUND OpenH264_DECODER OpenH264_DECODER) +set(NV_DECODER_sources decoder_nvdec.cc decoder_nvdec.h NvDecoder.cpp NvDecoder.h) +set(NV_DECODER_extra_plugin_sources) +plugin_compilation(nvdec NVDEC NVDEC_FOUND NV_DECODER NV_DECODER) +if(WITH_NV_DECODER) + target_link_libraries(heif PRIVATE CUDA::cuda_driver) +endif() + + target_sources(heif PRIVATE encoder_mask.h encoder_mask.cc diff --git a/libheif/plugins/NvDecoder.cpp b/libheif/plugins/NvDecoder.cpp new file mode 100644 index 0000000000..44fd9be176 --- /dev/null +++ b/libheif/plugins/NvDecoder.cpp @@ -0,0 +1,548 @@ +/* + * This copyright notice applies to this header file only: + * + * Copyright (c) 2010-2023 NVIDIA Corporation + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the software, and to permit persons to whom the + * software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// for std::cout and friends +#include + +// for ceil() +#include + +// for memset, memcpy +#include + +// TODO: remove this once we dump the errorLog +#include + +#include "libheif/heif_plugin.h" + +#include "NvDecoder.h" + +/** +* @brief Exception class for error reporting from the decode API. +*/ +class NVDECException : public std::exception +{ +public: + NVDECException(const std::string& errorStr, const CUresult errorCode) + : m_errorString(errorStr), m_errorCode(errorCode) {} + + virtual ~NVDECException() throw() {} + virtual const char* what() const throw() { return m_errorString.c_str(); } + CUresult getErrorCode() const { return m_errorCode; } + const std::string& getErrorString() const { return m_errorString; } + static NVDECException makeNVDECException(const std::string& errorStr, const CUresult errorCode, + const std::string& functionName, const std::string& fileName, int lineNo); +private: + std::string m_errorString; + CUresult m_errorCode; +}; + +inline NVDECException NVDECException::makeNVDECException(const std::string& errorStr, const CUresult errorCode, const std::string& functionName, + const std::string& fileName, int lineNo) +{ + std::ostringstream errorLog; + errorLog << functionName << " : " << errorStr << " at " << fileName << ":" << lineNo << std::endl; + NVDECException exception(errorLog.str(), errorCode); + return exception; +} + +#define NVDEC_THROW_ERROR( errorStr, errorCode ) \ + do \ + { \ + throw NVDECException::makeNVDECException(errorStr, errorCode, __FUNCTION__, __FILE__, __LINE__); \ + } while (0) + + +#define NVDEC_API_CALL( cuvidAPI ) \ + do \ + { \ + CUresult errorCode = cuvidAPI; \ + if( errorCode != CUDA_SUCCESS) \ + { \ + std::ostringstream errorLog; \ + errorLog << #cuvidAPI << " returned error " << errorCode; \ + throw NVDECException::makeNVDECException(errorLog.str(), errorCode, __FUNCTION__, __FILE__, __LINE__); \ + } \ + } while (0) + + +#define CUDA_DRVAPI_CALL( call ) \ + do \ + { \ + CUresult err__ = call; \ + if (err__ != CUDA_SUCCESS) \ + { \ + const char *szErrName = NULL; \ + cuGetErrorName(err__, &szErrName); \ + std::ostringstream errorLog; \ + errorLog << "CUDA driver API error " << szErrName ; \ + throw NVDECException::makeNVDECException(errorLog.str(), err__, __FUNCTION__, __FILE__, __LINE__); \ + } \ + } \ + while (0) + + +#ifdef __cuda_cuda_h__ +inline bool check(CUresult e, int iLine, const char *szFile) { + if (e != CUDA_SUCCESS) { + const char *szErrName = NULL; + cuGetErrorName(e, &szErrName); + // LOG(FATAL) << "CUDA driver API error " << szErrName << " at line " << iLine << " in file " << szFile; + std::cout << "CUDA driver API error " << szErrName << " at line " << iLine << " in file " << szFile << std::endl; + return false; + } + return true; +} +#endif + + +#define ck(call) check(call, __LINE__, __FILE__) + + +/** +* @brief Template class to facilitate color space conversion +*/ +template +class YuvConverter { +public: + YuvConverter(int nWidth, int nHeight) : nWidth(nWidth), nHeight(nHeight) { + pQuad = new T[((nWidth + 1) / 2) * ((nHeight + 1) / 2)]; + } + ~YuvConverter() { + delete[] pQuad; + } + void PlanarToUVInterleaved(T *pFrame, int nPitch = 0) { + if (nPitch == 0) { + nPitch = nWidth; + } + + // sizes of source surface plane + int nSizePlaneY = nPitch * nHeight; + int nSizePlaneU = ((nPitch + 1) / 2) * ((nHeight + 1) / 2); + int nSizePlaneV = nSizePlaneU; + + T *puv = pFrame + nSizePlaneY; + if (nPitch == nWidth) { + memcpy(pQuad, puv, nSizePlaneU * sizeof(T)); + } else { + for (int i = 0; i < (nHeight + 1) / 2; i++) { + memcpy(pQuad + ((nWidth + 1) / 2) * i, puv + ((nPitch + 1) / 2) * i, ((nWidth + 1) / 2) * sizeof(T)); + } + } + T *pv = puv + nSizePlaneU; + for (int y = 0; y < (nHeight + 1) / 2; y++) { + for (int x = 0; x < (nWidth + 1) / 2; x++) { + puv[y * nPitch + x * 2] = pQuad[y * ((nWidth + 1) / 2) + x]; + puv[y * nPitch + x * 2 + 1] = pv[y * ((nPitch + 1) / 2) + x]; + } + } + } + void UVInterleavedToPlanar(T *pFrame, int nPitch = 0) { + if (nPitch == 0) { + nPitch = nWidth; + } + + // sizes of source surface plane + int nSizePlaneY = nPitch * nHeight; + int nSizePlaneU = ((nPitch + 1) / 2) * ((nHeight + 1) / 2); + int nSizePlaneV = nSizePlaneU; + + T *puv = pFrame + nSizePlaneY, + *pu = puv, + *pv = puv + nSizePlaneU; + + // split chroma from interleave to planar + for (int y = 0; y < (nHeight + 1) / 2; y++) { + for (int x = 0; x < (nWidth + 1) / 2; x++) { + pu[y * ((nPitch + 1) / 2) + x] = puv[y * nPitch + x * 2]; + pQuad[y * ((nWidth + 1) / 2) + x] = puv[y * nPitch + x * 2 + 1]; + } + } + if (nPitch == nWidth) { + memcpy(pv, pQuad, nSizePlaneV * sizeof(T)); + } else { + for (int i = 0; i < (nHeight + 1) / 2; i++) { + memcpy(pv + ((nPitch + 1) / 2) * i, pQuad + ((nWidth + 1) / 2) * i, ((nWidth + 1) / 2) * sizeof(T)); + } + } + } + +private: + T *pQuad; + int nWidth, nHeight; +}; + + +void ConvertSemiplanarToPlanar(uint8_t *pHostFrame, int nWidth, int nHeight, int nBitDepth) { + if (nBitDepth == 8) { + // nv12->iyuv + YuvConverter converter8(nWidth, nHeight); + converter8.UVInterleavedToPlanar(pHostFrame); + } else { + // p016->yuv420p16 + YuvConverter converter16(nWidth, nHeight); + converter16.UVInterleavedToPlanar((uint16_t *)pHostFrame); + } +} + +static float GetChromaHeightFactor(cudaVideoSurfaceFormat eSurfaceFormat) +{ + float factor = 0.5; + switch (eSurfaceFormat) + { + case cudaVideoSurfaceFormat_NV12: + case cudaVideoSurfaceFormat_P016: + factor = 0.5; + break; + case cudaVideoSurfaceFormat_YUV444: + case cudaVideoSurfaceFormat_YUV444_16Bit: + factor = 1.0; + break; + } + + return factor; +} + +static int GetChromaPlaneCount(cudaVideoSurfaceFormat eSurfaceFormat) +{ + int numPlane = 1; + switch (eSurfaceFormat) + { + case cudaVideoSurfaceFormat_NV12: + case cudaVideoSurfaceFormat_P016: + numPlane = 1; + break; + case cudaVideoSurfaceFormat_YUV444: + case cudaVideoSurfaceFormat_YUV444_16Bit: + numPlane = 2; + break; + } + + return numPlane; +} + + +/* Called when the parser encounters sequence header for AV1 SVC content +* return value interpretation: +* < 0 : fail, >=0: succeeded (bit 0-9: currOperatingPoint, bit 10-10: bDispAllLayer, bit 11-30: reserved, must be set 0) +*/ +int NvDecoder::GetOperatingPoint(CUVIDOPERATINGPOINTINFO *pOPInfo) +{ + if (pOPInfo->codec == cudaVideoCodec_AV1) + { + if (pOPInfo->av1.operating_points_cnt > 1) + { + // clip has SVC enabled + if (m_nOperatingPoint >= pOPInfo->av1.operating_points_cnt) + m_nOperatingPoint = 0; + + printf("AV1 SVC clip: operating point count %d ", pOPInfo->av1.operating_points_cnt); + printf("Selected operating point: %d, IDC 0x%x bOutputAllLayers %d\n", m_nOperatingPoint, pOPInfo->av1.operating_points_idc[m_nOperatingPoint], m_bDispAllLayers); + return (m_nOperatingPoint | (m_bDispAllLayers << 10)); + } + } + return -1; +} + +/* Return value from HandleVideoSequence() are interpreted as : +* 0: fail, 1: succeeded, > 1: override dpb size of parser (set by CUVIDPARSERPARAMS::ulMaxNumDecodeSurfaces while creating parser) +*/ +int NvDecoder::HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat) +{ + int nDecodeSurface = pVideoFormat->min_num_decode_surfaces; + + CUVIDDECODECAPS decodecaps; + memset(&decodecaps, 0, sizeof(decodecaps)); + + decodecaps.eCodecType = pVideoFormat->codec; + decodecaps.eChromaFormat = pVideoFormat->chroma_format; + decodecaps.nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8; + + CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_ctx->cuContext)); + NVDEC_API_CALL(cuvidGetDecoderCaps(&decodecaps)); + CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL)); + + if(!decodecaps.bIsSupported){ + NVDEC_THROW_ERROR("Codec not supported on this GPU", CUDA_ERROR_NOT_SUPPORTED); + return nDecodeSurface; + } + + if ((pVideoFormat->coded_width > decodecaps.nMaxWidth) || + (pVideoFormat->coded_height > decodecaps.nMaxHeight)){ + + std::ostringstream errorString; + errorString << std::endl + << "Resolution : " << pVideoFormat->coded_width << "x" << pVideoFormat->coded_height << std::endl + << "Max Supported (wxh) : " << decodecaps.nMaxWidth << "x" << decodecaps.nMaxHeight << std::endl + << "Resolution not supported on this GPU"; + + const std::string cErr = errorString.str(); + NVDEC_THROW_ERROR(cErr, CUDA_ERROR_NOT_SUPPORTED); + return nDecodeSurface; + } + + if ((pVideoFormat->coded_width>>4)*(pVideoFormat->coded_height>>4) > decodecaps.nMaxMBCount){ + + std::ostringstream errorString; + errorString << std::endl + << "MBCount : " << (pVideoFormat->coded_width >> 4)*(pVideoFormat->coded_height >> 4) << std::endl + << "Max Supported mbcnt : " << decodecaps.nMaxMBCount << std::endl + << "MBCount not supported on this GPU"; + + const std::string cErr = errorString.str(); + NVDEC_THROW_ERROR(cErr, CUDA_ERROR_NOT_SUPPORTED); + return nDecodeSurface; + } + + m_ctx->eCodec = pVideoFormat->codec; + cudaVideoChromaFormat eChromaFormat = pVideoFormat->chroma_format; + m_nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8; + m_nBPP = m_nBitDepthMinus8 > 0 ? 2 : 1; + + // Set the output surface format same as chroma format + if ((eChromaFormat == cudaVideoChromaFormat_420) || (eChromaFormat == cudaVideoChromaFormat_Monochrome)) { + m_eOutputFormat = pVideoFormat->bit_depth_luma_minus8 ? cudaVideoSurfaceFormat_P016 : cudaVideoSurfaceFormat_NV12; + } else if (eChromaFormat == cudaVideoChromaFormat_444) { + m_eOutputFormat = pVideoFormat->bit_depth_luma_minus8 ? cudaVideoSurfaceFormat_YUV444_16Bit : cudaVideoSurfaceFormat_YUV444; + } else if (eChromaFormat == cudaVideoChromaFormat_422) { + m_eOutputFormat = cudaVideoSurfaceFormat_NV12; // no 4:2:2 output format supported yet so make 420 default + } + + // Check if output format supported. If not, check falback options + if (!(decodecaps.nOutputFormatMask & (1 << m_eOutputFormat))) + { + if (decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_NV12)) + m_eOutputFormat = cudaVideoSurfaceFormat_NV12; + else if (decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_P016)) + m_eOutputFormat = cudaVideoSurfaceFormat_P016; + else if (decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_YUV444)) + m_eOutputFormat = cudaVideoSurfaceFormat_YUV444; + else if (decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_YUV444_16Bit)) + m_eOutputFormat = cudaVideoSurfaceFormat_YUV444_16Bit; + else + NVDEC_THROW_ERROR("No supported output format found", CUDA_ERROR_NOT_SUPPORTED); + } + + CUVIDDECODECREATEINFO videoDecodeCreateInfo = { 0 }; + videoDecodeCreateInfo.CodecType = pVideoFormat->codec; + videoDecodeCreateInfo.ChromaFormat = pVideoFormat->chroma_format; + videoDecodeCreateInfo.OutputFormat = m_eOutputFormat; + videoDecodeCreateInfo.bitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8; + if (pVideoFormat->progressive_sequence) + videoDecodeCreateInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave; + else + videoDecodeCreateInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Adaptive; + videoDecodeCreateInfo.ulNumOutputSurfaces = 2; + // With PreferCUVID, JPEG is still decoded by CUDA while video is decoded by NVDEC hardware + videoDecodeCreateInfo.ulCreationFlags = cudaVideoCreate_PreferCUVID; + videoDecodeCreateInfo.ulNumDecodeSurfaces = nDecodeSurface; + videoDecodeCreateInfo.vidLock = m_ctx->ctxLock; + videoDecodeCreateInfo.ulWidth = pVideoFormat->coded_width; + videoDecodeCreateInfo.ulHeight = pVideoFormat->coded_height; + + unsigned int maxHeight = 0; + unsigned int maxWidth = 0; + // AV1 has max width/height of sequence in sequence header + if (pVideoFormat->codec == cudaVideoCodec_AV1 && pVideoFormat->seqhdr_data_length > 0) + { + CUVIDEOFORMATEX *vidFormatEx = (CUVIDEOFORMATEX *)pVideoFormat; + maxWidth = vidFormatEx->av1.max_width; + maxHeight = vidFormatEx->av1.max_height; + } + if (maxWidth < pVideoFormat->coded_width) { + maxWidth = pVideoFormat->coded_width; + } + if (maxHeight < pVideoFormat->coded_height) { + maxHeight = pVideoFormat->coded_height; + } + videoDecodeCreateInfo.ulMaxWidth = maxWidth; + videoDecodeCreateInfo.ulMaxHeight = maxHeight; + + m_nWidth = pVideoFormat->display_area.right - pVideoFormat->display_area.left; + m_nLumaHeight = pVideoFormat->display_area.bottom - pVideoFormat->display_area.top; + videoDecodeCreateInfo.ulTargetWidth = pVideoFormat->coded_width; + videoDecodeCreateInfo.ulTargetHeight = pVideoFormat->coded_height; + + m_nChromaHeight = (int)(ceil((float)m_nLumaHeight * GetChromaHeightFactor(m_eOutputFormat))); + m_nNumChromaPlanes = GetChromaPlaneCount(m_eOutputFormat); + m_nSurfaceHeight = (int) videoDecodeCreateInfo.ulTargetHeight; + + CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_ctx->cuContext)); + NVDEC_API_CALL(cuvidCreateDecoder(&(m_ctx->hDecoder), &videoDecodeCreateInfo)); + CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL)); + return nDecodeSurface; +} + + +/* Return value from HandlePictureDecode() are interpreted as: +* 0: fail, >=1: succeeded +*/ +int NvDecoder::HandlePictureDecode(CUVIDPICPARAMS *pPicParams) { + if (!(m_ctx->hDecoder)) + { + NVDEC_THROW_ERROR("Decoder not initialized.", CUDA_ERROR_NOT_INITIALIZED); + return false; + } + CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_ctx->cuContext)); + NVDEC_API_CALL(cuvidDecodePicture(m_ctx->hDecoder, pPicParams)); + if ((!pPicParams->field_pic_flag) || (pPicParams->second_field)) + { + CUVIDPARSERDISPINFO dispInfo; + memset(&dispInfo, 0, sizeof(dispInfo)); + dispInfo.picture_index = pPicParams->CurrPicIdx; + dispInfo.progressive_frame = !pPicParams->field_pic_flag; + dispInfo.top_field_first = pPicParams->bottom_field_flag ^ 1; + HandlePictureDisplay(&dispInfo); + } + CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL)); + return 1; +} + +/* Return value from HandlePictureDisplay() are interpreted as: +* 0: fail, >=1: succeeded +*/ +int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) { + CUVIDPROCPARAMS videoProcessingParameters = {}; + videoProcessingParameters.progressive_frame = pDispInfo->progressive_frame; + videoProcessingParameters.second_field = pDispInfo->repeat_first_field + 1; + videoProcessingParameters.top_field_first = pDispInfo->top_field_first; + videoProcessingParameters.unpaired_field = pDispInfo->repeat_first_field < 0; + videoProcessingParameters.output_stream = m_ctx->cuvidStream; + + CUdeviceptr dpSrcFrame = 0; + unsigned int nSrcPitch = 0; + CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_ctx->cuContext)); + NVDEC_API_CALL(cuvidMapVideoFrame(m_ctx->hDecoder, pDispInfo->picture_index, &dpSrcFrame, + &nSrcPitch, &videoProcessingParameters)); + + CUVIDGETDECODESTATUS DecodeStatus; + memset(&DecodeStatus, 0, sizeof(DecodeStatus)); + CUresult result = cuvidGetDecodeStatus(m_ctx->hDecoder, pDispInfo->picture_index, &DecodeStatus); + if (result == CUDA_SUCCESS && (DecodeStatus.decodeStatus == cuvidDecodeStatus_Error || DecodeStatus.decodeStatus == cuvidDecodeStatus_Error_Concealed)) + { + printf("Decode Error occurred for picture.\n"); + } + + dstFrame = new uint8_t[GetFrameSize()]; + + // Copy luma plane + CUDA_MEMCPY2D m = { 0 }; + m.srcMemoryType = CU_MEMORYTYPE_DEVICE; + m.srcDevice = dpSrcFrame; + m.srcPitch = nSrcPitch; + m.dstMemoryType = CU_MEMORYTYPE_HOST; + m.dstDevice = (CUdeviceptr)(m.dstHost = dstFrame); + m.dstPitch = GetWidth() * m_nBPP; + m.WidthInBytes = GetWidth() * m_nBPP; + m.Height = m_nLumaHeight; + CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_ctx->cuvidStream)); + + // Copy chroma plane + // NVDEC output has luma height aligned by 2. Adjust chroma offset by aligning height + m.srcDevice = (CUdeviceptr)((uint8_t *)dpSrcFrame + m.srcPitch * ((m_nSurfaceHeight + 1) & ~1)); + m.dstDevice = (CUdeviceptr)(m.dstHost = dstFrame + m.dstPitch * m_nLumaHeight); + m.Height = m_nChromaHeight; + CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_ctx->cuvidStream)); + + if (m_nNumChromaPlanes == 2) + { + m.srcDevice = (CUdeviceptr)((uint8_t *)dpSrcFrame + m.srcPitch * ((m_nSurfaceHeight + 1) & ~1) * 2); + m.dstDevice = (CUdeviceptr)(m.dstHost = dstFrame + m.dstPitch * m_nLumaHeight * 2); + m.Height = m_nChromaHeight; + CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_ctx->cuvidStream)); + } + CUDA_DRVAPI_CALL(cuStreamSynchronize(m_ctx->cuvidStream)); + CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL)); + + NVDEC_API_CALL(cuvidUnmapVideoFrame(m_ctx->hDecoder, dpSrcFrame)); + return 1; +} + +NvDecoder::NvDecoder(nvdec_context * ctx) : m_ctx(ctx) +{ +} + +heif_error NvDecoder::initVideoParser() +{ + CUVIDPARSERPARAMS videoParserParameters = {}; + videoParserParameters.CodecType = m_ctx->eCodec; + videoParserParameters.ulMaxNumDecodeSurfaces = 1; + videoParserParameters.ulClockRate = 1000; + videoParserParameters.ulMaxDisplayDelay = 0; + videoParserParameters.pUserData = this; // TODO: make this ctx once all the members are gone + videoParserParameters.pfnSequenceCallback = HandleVideoSequenceProc; + videoParserParameters.pfnDecodePicture = HandlePictureDecodeProc; + videoParserParameters.pfnDisplayPicture = NULL; + videoParserParameters.pfnGetOperatingPoint = HandleOperatingPointProc; + videoParserParameters.pfnGetSEIMsg = NULL; + CUresult errorCode = cuvidCreateVideoParser(&(m_ctx->hParser), &videoParserParameters); + if (errorCode != CUDA_SUCCESS) { + struct heif_error err = {heif_error_Decoder_plugin_error, + heif_suberror_Plugin_loading_error, + "could not create CUVID video parser"}; + return err; + } + return heif_error_ok; +} + +NvDecoder::~NvDecoder() { + + if (m_ctx->hParser) { + cuvidDestroyVideoParser(m_ctx->hParser); + } + cuCtxPushCurrent(m_ctx->cuContext); + if (m_ctx->hDecoder) { + cuvidDestroyDecoder(m_ctx->hDecoder); + } + + delete dstFrame; + + cuCtxPopCurrent(NULL); + + cuvidCtxLockDestroy(m_ctx->ctxLock); +} + +int NvDecoder::Decode(const uint8_t *pData, size_t nSize) +{ + CUVIDSOURCEDATAPACKET packet = { 0 }; + packet.payload = pData; + packet.payload_size = nSize; + packet.flags = CUVID_PKT_ENDOFSTREAM; + packet.timestamp = 0; + NVDEC_API_CALL(cuvidParseVideoData(m_ctx->hParser, &packet)); + + return 1; +} + +uint8_t* NvDecoder::GetFrame() +{ + // convert result to heif pixel image + ConvertSemiplanarToPlanar(dstFrame, GetWidth(), GetHeight(), m_nBitDepthMinus8 + 8); + + return dstFrame; +} + diff --git a/libheif/plugins/NvDecoder.h b/libheif/plugins/NvDecoder.h new file mode 100644 index 0000000000..945759bfc0 --- /dev/null +++ b/libheif/plugins/NvDecoder.h @@ -0,0 +1,191 @@ +/* + * This copyright notice applies to this header file only: + * + * Copyright (c) 2010-2023 NVIDIA Corporation + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the software, and to permit persons to whom the + * software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#pragma once + +#include +#include +#include +#include "libheif/heif.h" +#include "nvcuvid.h" + +struct nvdec_context +{ + std::vector data; + int strict; + cudaVideoCodec eCodec = cudaVideoCodec_NumCodecs; + CUcontext cuContext = NULL; + CUvideoctxlock ctxLock; + CUstream cuvidStream = 0; + CUvideoparser hParser = NULL; + CUvideodecoder hDecoder = NULL; +}; + + +/** +* @brief Base class for decoder interface. +*/ +class NvDecoder { + +public: + /** + * @brief This function is used to initialize the decoder session. + * Application must call this function to initialize the decoder, before + * starting to decode any frames. + */ + NvDecoder(nvdec_context *ctx); + ~NvDecoder(); + + /** + * @brief This function is used to get the output frame width. + * NV12/P016 output format width is 2 byte aligned because of U and V interleave + */ + int GetWidth() { assert(m_nWidth); return (m_eOutputFormat == cudaVideoSurfaceFormat_NV12 || m_eOutputFormat == cudaVideoSurfaceFormat_P016) + ? (m_nWidth + 1) & ~1 : m_nWidth; } + + /** + * @brief This function is used to get the actual decode width + */ + int GetDecodeWidth() { assert(m_nWidth); return m_nWidth; } + + /** + * @brief This function is used to get the output frame height (Luma height). + */ + int GetHeight() { assert(m_nLumaHeight); return m_nLumaHeight; } + + /** + * @brief This function is used to get the current chroma height. + */ + int GetChromaHeight() { assert(m_nChromaHeight); return m_nChromaHeight; } + + /** + * @brief This function is used to get the number of chroma planes. + */ + int GetNumChromaPlanes() { assert(m_nNumChromaPlanes); return m_nNumChromaPlanes; } + + /** + * @brief This function is used to get the current frame size based on pixel format. + */ + int GetFrameSize() { return GetWidth() * (m_nLumaHeight + (m_nChromaHeight * m_nNumChromaPlanes)) * m_nBPP; } + + /** + * @brief This function is used to get the current frame Luma plane size. + */ + int GetLumaPlaneSize() { return GetWidth() * m_nLumaHeight * m_nBPP; } + + /** + * @brief This function is used to get the current frame chroma plane size. + */ + int GetChromaPlaneSize() { return GetWidth() * (m_nChromaHeight * m_nNumChromaPlanes) * m_nBPP; } + + /** + * @brief This function is used to get the bit depth associated with the pixel format. + */ + int GetBitDepth() { return m_nBitDepthMinus8 + 8; } + + /** + * @brief This function is used to get the bytes used per pixel. + */ + int GetBPP() { return m_nBPP; } + + /** + * @brief This function decodes a frame and returns the number of frames that are available for + * display. All frames that are available for display should be read before making a subsequent decode call. + * @param pData - pointer to the data buffer that is to be decoded + * @param nSize - size of the data buffer in bytes + */ + int Decode(const uint8_t *pData, size_t nSize); + + /** + * @brief This function returns a decoded frame. This function should be called in a loop for + * fetching all the frames that are available for display. + */ + uint8_t* GetFrame(); + + /** + * @brief This function allows app to set operating point for AV1 SVC clips + * @param opPoint - operating point of an AV1 scalable bitstream + * @param bDispAllLayers - Output all decoded frames of an AV1 scalable bitstream + */ + void SetOperatingPoint(const uint32_t opPoint, const bool bDispAllLayers) { m_nOperatingPoint = opPoint; m_bDispAllLayers = bDispAllLayers; } + + heif_error initVideoParser(); +private: + + /** + * @brief Callback function to be registered for getting a callback when decoding of sequence starts + */ + static int CUDAAPI HandleVideoSequenceProc(void *pUserData, CUVIDEOFORMAT *pVideoFormat) { return ((NvDecoder *)pUserData)->HandleVideoSequence(pVideoFormat); } + + /** + * @brief Callback function to be registered for getting a callback when a decoded frame is ready to be decoded + */ + static int CUDAAPI HandlePictureDecodeProc(void *pUserData, CUVIDPICPARAMS *pPicParams) { return ((NvDecoder *)pUserData)->HandlePictureDecode(pPicParams); } + + /** + * @brief Callback function to be registered for getting a callback to get operating point when AV1 SVC sequence header start. + */ + static int CUDAAPI HandleOperatingPointProc(void *pUserData, CUVIDOPERATINGPOINTINFO *pOPInfo) { return ((NvDecoder *)pUserData)->GetOperatingPoint(pOPInfo); } + + /** + * @brief This function gets called when a sequence is ready to be decoded. The function also gets called + when there is format change + */ + int HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat); + + /** + * @brief This function gets called when a picture is ready to be decoded. cuvidDecodePicture is called from this function + * to decode the picture + */ + int HandlePictureDecode(CUVIDPICPARAMS *pPicParams); + + /** + * @brief This function gets called after a picture is decoded and available for display. Frames are fetched and stored in + internal buffer + */ + int HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo); + + /** + * @brief This function gets called when AV1 sequence encounter more than one operating points + */ + int GetOperatingPoint(CUVIDOPERATINGPOINTINFO *pOPInfo); + +private: + // dimension of the output + unsigned int m_nWidth = 0, m_nLumaHeight = 0, m_nChromaHeight = 0; + unsigned int m_nNumChromaPlanes = 0; + // height of the mapped surface + int m_nSurfaceHeight = 0; + cudaVideoSurfaceFormat m_eOutputFormat = cudaVideoSurfaceFormat_NV12; + int m_nBitDepthMinus8 = 0; + int m_nBPP = 1; + uint8_t * dstFrame; + + unsigned int m_nOperatingPoint = 0; + bool m_bDispAllLayers = false; + nvdec_context *m_ctx; +}; diff --git a/libheif/plugins/cuviddec.h b/libheif/plugins/cuviddec.h new file mode 100644 index 0000000000..4a680375db --- /dev/null +++ b/libheif/plugins/cuviddec.h @@ -0,0 +1,1188 @@ +/* + * This copyright notice applies to this header file only: + * + * Copyright (c) 2010-2024 NVIDIA Corporation + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the software, and to permit persons to whom the + * software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/*****************************************************************************************************/ +//! \file cuviddec.h +//! NVDECODE API provides video decoding interface to NVIDIA GPU devices. +//! This file contains constants, structure definitions and function prototypes used for decoding. +/*****************************************************************************************************/ + +#if !defined(__CUDA_VIDEO_H__) +#define __CUDA_VIDEO_H__ + +#ifndef __cuda_cuda_h__ +#include +#endif // __cuda_cuda_h__ + +#if defined(_WIN64) || defined(__LP64__) || defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) +#if (CUDA_VERSION >= 3020) && (!defined(CUDA_FORCE_API_VERSION) || (CUDA_FORCE_API_VERSION >= 3020)) +#define __CUVID_DEVPTR64 +#endif +#endif + +#if defined(__cplusplus) +extern "C" { +#endif /* __cplusplus */ + +typedef void *CUvideodecoder; +typedef struct _CUcontextlock_st *CUvideoctxlock; + +/*********************************************************************************/ +//! \enum cudaVideoCodec +//! Video codec enums +//! These enums are used in CUVIDDECODECREATEINFO and CUVIDDECODECAPS structures +/*********************************************************************************/ +typedef enum cudaVideoCodec_enum { + cudaVideoCodec_MPEG1=0, /**< MPEG1 */ + cudaVideoCodec_MPEG2, /**< MPEG2 */ + cudaVideoCodec_MPEG4, /**< MPEG4 */ + cudaVideoCodec_VC1, /**< VC1 */ + cudaVideoCodec_H264, /**< H264 */ + cudaVideoCodec_JPEG, /**< JPEG */ + cudaVideoCodec_H264_SVC, /**< H264-SVC */ + cudaVideoCodec_H264_MVC, /**< H264-MVC */ + cudaVideoCodec_HEVC, /**< HEVC */ + cudaVideoCodec_VP8, /**< VP8 */ + cudaVideoCodec_VP9, /**< VP9 */ + cudaVideoCodec_AV1, /**< AV1 */ + cudaVideoCodec_NumCodecs, /**< Max codecs */ + // Uncompressed YUV + cudaVideoCodec_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V')), /**< Y,U,V (4:2:0) */ + cudaVideoCodec_YV12 = (('Y'<<24)|('V'<<16)|('1'<<8)|('2')), /**< Y,V,U (4:2:0) */ + cudaVideoCodec_NV12 = (('N'<<24)|('V'<<16)|('1'<<8)|('2')), /**< Y,UV (4:2:0) */ + cudaVideoCodec_YUYV = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V')), /**< YUYV/YUY2 (4:2:2) */ + cudaVideoCodec_UYVY = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y')) /**< UYVY (4:2:2) */ +} cudaVideoCodec; + +/*********************************************************************************/ +//! \enum cudaVideoSurfaceFormat +//! Video surface format enums used for output format of decoded output +//! These enums are used in CUVIDDECODECREATEINFO structure +/*********************************************************************************/ +typedef enum cudaVideoSurfaceFormat_enum { + cudaVideoSurfaceFormat_NV12=0, /**< Semi-Planar YUV [Y plane followed by interleaved UV plane] */ + cudaVideoSurfaceFormat_P016=1, /**< 16 bit Semi-Planar YUV [Y plane followed by interleaved UV plane]. + Can be used for 10 bit(6LSB bits 0), 12 bit (4LSB bits 0) */ + cudaVideoSurfaceFormat_YUV444=2, /**< Planar YUV [Y plane followed by U and V planes] */ + cudaVideoSurfaceFormat_YUV444_16Bit=3, /**< 16 bit Planar YUV [Y plane followed by U and V planes]. + Can be used for 10 bit(6LSB bits 0), 12 bit (4LSB bits 0) */ +} cudaVideoSurfaceFormat; + +/******************************************************************************************************************/ +//! \enum cudaVideoDeinterlaceMode +//! Deinterlacing mode enums +//! These enums are used in CUVIDDECODECREATEINFO structure +//! Use cudaVideoDeinterlaceMode_Weave for progressive content and for content that doesn't need deinterlacing +//! cudaVideoDeinterlaceMode_Adaptive needs more video memory than other DImodes +/******************************************************************************************************************/ +typedef enum cudaVideoDeinterlaceMode_enum { + cudaVideoDeinterlaceMode_Weave=0, /**< Weave both fields (no deinterlacing) */ + cudaVideoDeinterlaceMode_Bob, /**< Drop one field */ + cudaVideoDeinterlaceMode_Adaptive /**< Adaptive deinterlacing */ +} cudaVideoDeinterlaceMode; + +/**************************************************************************************************************/ +//! \enum cudaVideoChromaFormat +//! Chroma format enums +//! These enums are used in CUVIDDECODECREATEINFO and CUVIDDECODECAPS structures +/**************************************************************************************************************/ +typedef enum cudaVideoChromaFormat_enum { + cudaVideoChromaFormat_Monochrome=0, /**< MonoChrome */ + cudaVideoChromaFormat_420, /**< YUV 4:2:0 */ + cudaVideoChromaFormat_422, /**< YUV 4:2:2 */ + cudaVideoChromaFormat_444 /**< YUV 4:4:4 */ +} cudaVideoChromaFormat; + +/*************************************************************************************************************/ +//! \enum cudaVideoCreateFlags +//! Decoder flag enums to select preferred decode path +//! cudaVideoCreate_Default and cudaVideoCreate_PreferCUVID are most optimized, use these whenever possible +/*************************************************************************************************************/ +typedef enum cudaVideoCreateFlags_enum { + cudaVideoCreate_Default = 0x00, /**< Default operation mode: use dedicated video engines */ + cudaVideoCreate_PreferCUDA = 0x01, /**< Use CUDA-based decoder (requires valid vidLock object for multi-threading) */ + cudaVideoCreate_PreferDXVA = 0x02, /**< Go through DXVA internally if possible (requires D3D9 interop) */ + cudaVideoCreate_PreferCUVID = 0x04 /**< Use dedicated video engines directly */ +} cudaVideoCreateFlags; + + +/*************************************************************************/ +//! \enum cuvidDecodeStatus +//! Decode status enums +//! These enums are used in CUVIDGETDECODESTATUS structure +/*************************************************************************/ +typedef enum cuvidDecodeStatus_enum +{ + cuvidDecodeStatus_Invalid = 0, // Decode status is not valid + cuvidDecodeStatus_InProgress = 1, // Decode is in progress + cuvidDecodeStatus_Success = 2, // Decode is completed without any errors + // 3 to 7 enums are reserved for future use + cuvidDecodeStatus_Error = 8, // Decode is completed with an error (error is not concealed) + cuvidDecodeStatus_Error_Concealed = 9, // Decode is completed with an error and error is concealed +} cuvidDecodeStatus; + +/**************************************************************************************************************/ +//! \struct CUVIDDECODECAPS; +//! This structure is used in cuvidGetDecoderCaps API +/**************************************************************************************************************/ +typedef struct _CUVIDDECODECAPS +{ + cudaVideoCodec eCodecType; /**< IN: cudaVideoCodec_XXX */ + cudaVideoChromaFormat eChromaFormat; /**< IN: cudaVideoChromaFormat_XXX */ + unsigned int nBitDepthMinus8; /**< IN: The Value "BitDepth minus 8" */ + unsigned int reserved1[3]; /**< Reserved for future use - set to zero */ + + unsigned char bIsSupported; /**< OUT: 1 if codec supported, 0 if not supported */ + unsigned char nNumNVDECs; /**< OUT: Number of NVDECs that can support IN params */ + unsigned short nOutputFormatMask; /**< OUT: each bit represents corresponding cudaVideoSurfaceFormat enum */ + unsigned int nMaxWidth; /**< OUT: Max supported coded width in pixels */ + unsigned int nMaxHeight; /**< OUT: Max supported coded height in pixels */ + unsigned int nMaxMBCount; /**< OUT: Max supported macroblock count + CodedWidth*CodedHeight/256 must be <= nMaxMBCount */ + unsigned short nMinWidth; /**< OUT: Min supported coded width in pixels */ + unsigned short nMinHeight; /**< OUT: Min supported coded height in pixels */ + unsigned char bIsHistogramSupported; /**< OUT: 1 if Y component histogram output is supported, 0 if not + Note: histogram is computed on original picture data before + any post-processing like scaling, cropping, etc. is applied */ + unsigned char nCounterBitDepth; /**< OUT: histogram counter bit depth */ + unsigned short nMaxHistogramBins; /**< OUT: Max number of histogram bins */ + unsigned int reserved3[10]; /**< Reserved for future use - set to zero */ +} CUVIDDECODECAPS; + +/**************************************************************************************************************/ +//! \struct CUVIDDECODECREATEINFO +//! This structure is used in cuvidCreateDecoder API +/**************************************************************************************************************/ +typedef struct _CUVIDDECODECREATEINFO +{ + unsigned long ulWidth; /**< IN: Coded sequence width in pixels */ + unsigned long ulHeight; /**< IN: Coded sequence height in pixels */ + unsigned long ulNumDecodeSurfaces; /**< IN: Maximum number of internal decode surfaces */ + cudaVideoCodec CodecType; /**< IN: cudaVideoCodec_XXX */ + cudaVideoChromaFormat ChromaFormat; /**< IN: cudaVideoChromaFormat_XXX */ + unsigned long ulCreationFlags; /**< IN: Decoder creation flags (cudaVideoCreateFlags_XXX) */ + unsigned long bitDepthMinus8; /**< IN: The value "BitDepth minus 8" */ + unsigned long ulIntraDecodeOnly; /**< IN: Set 1 only if video has all intra frames (default value is 0). This will + optimize video memory for Intra frames only decoding. The support is limited + to specific codecs - H264, HEVC, VP9, the flag will be ignored for codecs which + are not supported. However decoding might fail if the flag is enabled in case + of supported codecs for regular bit streams having P and/or B frames. */ + unsigned long ulMaxWidth; /**< IN: Coded sequence max width in pixels used with reconfigure Decoder */ + unsigned long ulMaxHeight; /**< IN: Coded sequence max height in pixels used with reconfigure Decoder */ + unsigned long Reserved1; /**< Reserved for future use - set to zero */ + /** + * IN: area of the frame that should be displayed + */ + struct { + short left; + short top; + short right; + short bottom; + } display_area; + + cudaVideoSurfaceFormat OutputFormat; /**< IN: cudaVideoSurfaceFormat_XXX */ + cudaVideoDeinterlaceMode DeinterlaceMode; /**< IN: cudaVideoDeinterlaceMode_XXX */ + unsigned long ulTargetWidth; /**< IN: Post-processed output width (Should be aligned to 2) */ + unsigned long ulTargetHeight; /**< IN: Post-processed output height (Should be aligned to 2) */ + unsigned long ulNumOutputSurfaces; /**< IN: Maximum number of output surfaces simultaneously mapped */ + CUvideoctxlock vidLock; /**< IN: If non-NULL, context lock used for synchronizing ownership of + the cuda context. Needed for cudaVideoCreate_PreferCUDA decode */ + /** + * IN: target rectangle in the output frame (for aspect ratio conversion) + * if a null rectangle is specified, {0,0,ulTargetWidth,ulTargetHeight} will be used + */ + struct { + short left; + short top; + short right; + short bottom; + } target_rect; + + unsigned long enableHistogram; /**< IN: enable histogram output, if supported */ + unsigned long Reserved2[4]; /**< Reserved for future use - set to zero */ +} CUVIDDECODECREATEINFO; + +/*********************************************************/ +//! \struct CUVIDH264DPBENTRY +//! H.264 DPB entry +//! This structure is used in CUVIDH264PICPARAMS structure +/*********************************************************/ +typedef struct _CUVIDH264DPBENTRY +{ + int PicIdx; /**< picture index of reference frame */ + int FrameIdx; /**< frame_num(short-term) or LongTermFrameIdx(long-term) */ + int is_long_term; /**< 0=short term reference, 1=long term reference */ + int not_existing; /**< non-existing reference frame (corresponding PicIdx should be set to -1) */ + int used_for_reference; /**< 0=unused, 1=top_field, 2=bottom_field, 3=both_fields */ + int FieldOrderCnt[2]; /**< field order count of top and bottom fields */ +} CUVIDH264DPBENTRY; + +/************************************************************/ +//! \struct CUVIDH264MVCEXT +//! H.264 MVC picture parameters ext +//! This structure is used in CUVIDH264PICPARAMS structure +/************************************************************/ +typedef struct _CUVIDH264MVCEXT +{ + int num_views_minus1; /**< Max number of coded views minus 1 in video : Range - 0 to 1023 */ + int view_id; /**< view identifier */ + unsigned char inter_view_flag; /**< 1 if used for inter-view prediction, 0 if not */ + unsigned char num_inter_view_refs_l0; /**< number of inter-view ref pics in RefPicList0 */ + unsigned char num_inter_view_refs_l1; /**< number of inter-view ref pics in RefPicList1 */ + unsigned char MVCReserved8Bits; /**< Reserved bits */ + int InterViewRefsL0[16]; /**< view id of the i-th view component for inter-view prediction in RefPicList0 */ + int InterViewRefsL1[16]; /**< view id of the i-th view component for inter-view prediction in RefPicList1 */ +} CUVIDH264MVCEXT; + +/*********************************************************/ +//! \struct CUVIDH264SVCEXT +//! H.264 SVC picture parameters ext +//! This structure is used in CUVIDH264PICPARAMS structure +/*********************************************************/ +typedef struct _CUVIDH264SVCEXT +{ + unsigned char profile_idc; + unsigned char level_idc; + unsigned char DQId; + unsigned char DQIdMax; + unsigned char disable_inter_layer_deblocking_filter_idc; + unsigned char ref_layer_chroma_phase_y_plus1; + signed char inter_layer_slice_alpha_c0_offset_div2; + signed char inter_layer_slice_beta_offset_div2; + + unsigned short DPBEntryValidFlag; + unsigned char inter_layer_deblocking_filter_control_present_flag; + unsigned char extended_spatial_scalability_idc; + unsigned char adaptive_tcoeff_level_prediction_flag; + unsigned char slice_header_restriction_flag; + unsigned char chroma_phase_x_plus1_flag; + unsigned char chroma_phase_y_plus1; + + unsigned char tcoeff_level_prediction_flag; + unsigned char constrained_intra_resampling_flag; + unsigned char ref_layer_chroma_phase_x_plus1_flag; + unsigned char store_ref_base_pic_flag; + unsigned char Reserved8BitsA; + unsigned char Reserved8BitsB; + + short scaled_ref_layer_left_offset; + short scaled_ref_layer_top_offset; + short scaled_ref_layer_right_offset; + short scaled_ref_layer_bottom_offset; + unsigned short Reserved16Bits; + struct _CUVIDPICPARAMS *pNextLayer; /**< Points to the picparams for the next layer to be decoded. + Linked list ends at the target layer. */ + int bRefBaseLayer; /**< whether to store ref base pic */ +} CUVIDH264SVCEXT; + +/******************************************************/ +//! \struct CUVIDH264PICPARAMS +//! H.264 picture parameters +//! This structure is used in CUVIDPICPARAMS structure +/******************************************************/ +typedef struct _CUVIDH264PICPARAMS +{ + // SPS + int log2_max_frame_num_minus4; + int pic_order_cnt_type; + int log2_max_pic_order_cnt_lsb_minus4; + int delta_pic_order_always_zero_flag; + int frame_mbs_only_flag; + int direct_8x8_inference_flag; + int num_ref_frames; // NOTE: shall meet level 4.1 restrictions + unsigned char residual_colour_transform_flag; + unsigned char bit_depth_luma_minus8; // Must be 0 (only 8-bit supported) + unsigned char bit_depth_chroma_minus8; // Must be 0 (only 8-bit supported) + unsigned char qpprime_y_zero_transform_bypass_flag; + // PPS + int entropy_coding_mode_flag; + int pic_order_present_flag; + int num_ref_idx_l0_active_minus1; + int num_ref_idx_l1_active_minus1; + int weighted_pred_flag; + int weighted_bipred_idc; + int pic_init_qp_minus26; + int deblocking_filter_control_present_flag; + int redundant_pic_cnt_present_flag; + int transform_8x8_mode_flag; + int MbaffFrameFlag; + int constrained_intra_pred_flag; + int chroma_qp_index_offset; + int second_chroma_qp_index_offset; + int ref_pic_flag; + int frame_num; + int CurrFieldOrderCnt[2]; + // DPB + CUVIDH264DPBENTRY dpb[16]; // List of reference frames within the DPB + // Quantization Matrices (raster-order) + unsigned char WeightScale4x4[6][16]; + unsigned char WeightScale8x8[2][64]; + // FMO/ASO + unsigned char fmo_aso_enable; + unsigned char num_slice_groups_minus1; + unsigned char slice_group_map_type; + signed char pic_init_qs_minus26; + unsigned int slice_group_change_rate_minus1; + union + { + unsigned long long slice_group_map_addr; + const unsigned char *pMb2SliceGroupMap; + } fmo; + unsigned int Reserved[12]; + // SVC/MVC + union + { + CUVIDH264MVCEXT mvcext; + CUVIDH264SVCEXT svcext; + }; +} CUVIDH264PICPARAMS; + + +/********************************************************/ +//! \struct CUVIDMPEG2PICPARAMS +//! MPEG-2 picture parameters +//! This structure is used in CUVIDPICPARAMS structure +/********************************************************/ +typedef struct _CUVIDMPEG2PICPARAMS +{ + int ForwardRefIdx; // Picture index of forward reference (P/B-frames) + int BackwardRefIdx; // Picture index of backward reference (B-frames) + int picture_coding_type; + int full_pel_forward_vector; + int full_pel_backward_vector; + int f_code[2][2]; + int intra_dc_precision; + int frame_pred_frame_dct; + int concealment_motion_vectors; + int q_scale_type; + int intra_vlc_format; + int alternate_scan; + int top_field_first; + // Quantization matrices (raster order) + unsigned char QuantMatrixIntra[64]; + unsigned char QuantMatrixInter[64]; +} CUVIDMPEG2PICPARAMS; + +// MPEG-4 has VOP types instead of Picture types +#define I_VOP 0 +#define P_VOP 1 +#define B_VOP 2 +#define S_VOP 3 + +/*******************************************************/ +//! \struct CUVIDMPEG4PICPARAMS +//! MPEG-4 picture parameters +//! This structure is used in CUVIDPICPARAMS structure +/*******************************************************/ +typedef struct _CUVIDMPEG4PICPARAMS +{ + int ForwardRefIdx; // Picture index of forward reference (P/B-frames) + int BackwardRefIdx; // Picture index of backward reference (B-frames) + // VOL + int video_object_layer_width; + int video_object_layer_height; + int vop_time_increment_bitcount; + int top_field_first; + int resync_marker_disable; + int quant_type; + int quarter_sample; + int short_video_header; + int divx_flags; + // VOP + int vop_coding_type; + int vop_coded; + int vop_rounding_type; + int alternate_vertical_scan_flag; + int interlaced; + int vop_fcode_forward; + int vop_fcode_backward; + int trd[2]; + int trb[2]; + // Quantization matrices (raster order) + unsigned char QuantMatrixIntra[64]; + unsigned char QuantMatrixInter[64]; + int gmc_enabled; +} CUVIDMPEG4PICPARAMS; + +/********************************************************/ +//! \struct CUVIDVC1PICPARAMS +//! VC1 picture parameters +//! This structure is used in CUVIDPICPARAMS structure +/********************************************************/ +typedef struct _CUVIDVC1PICPARAMS +{ + int ForwardRefIdx; /**< Picture index of forward reference (P/B-frames) */ + int BackwardRefIdx; /**< Picture index of backward reference (B-frames) */ + int FrameWidth; /**< Actual frame width */ + int FrameHeight; /**< Actual frame height */ + // PICTURE + int intra_pic_flag; /**< Set to 1 for I,BI frames */ + int ref_pic_flag; /**< Set to 1 for I,P frames */ + int progressive_fcm; /**< Progressive frame */ + // SEQUENCE + int profile; + int postprocflag; + int pulldown; + int interlace; + int tfcntrflag; + int finterpflag; + int psf; + int multires; + int syncmarker; + int rangered; + int maxbframes; + // ENTRYPOINT + int panscan_flag; + int refdist_flag; + int extended_mv; + int dquant; + int vstransform; + int loopfilter; + int fastuvmc; + int overlap; + int quantizer; + int extended_dmv; + int range_mapy_flag; + int range_mapy; + int range_mapuv_flag; + int range_mapuv; + int rangeredfrm; // range reduction state +} CUVIDVC1PICPARAMS; + +/***********************************************************/ +//! \struct CUVIDJPEGPICPARAMS +//! JPEG picture parameters +//! This structure is used in CUVIDPICPARAMS structure +/***********************************************************/ +typedef struct _CUVIDJPEGPICPARAMS +{ + int Reserved; +} CUVIDJPEGPICPARAMS; + + +/*******************************************************/ +//! \struct CUVIDHEVCPICPARAMS +//! HEVC picture parameters +//! This structure is used in CUVIDPICPARAMS structure +/*******************************************************/ +typedef struct _CUVIDHEVCPICPARAMS +{ + // sps + int pic_width_in_luma_samples; + int pic_height_in_luma_samples; + unsigned char log2_min_luma_coding_block_size_minus3; + unsigned char log2_diff_max_min_luma_coding_block_size; + unsigned char log2_min_transform_block_size_minus2; + unsigned char log2_diff_max_min_transform_block_size; + unsigned char pcm_enabled_flag; + unsigned char log2_min_pcm_luma_coding_block_size_minus3; + unsigned char log2_diff_max_min_pcm_luma_coding_block_size; + unsigned char pcm_sample_bit_depth_luma_minus1; + + unsigned char pcm_sample_bit_depth_chroma_minus1; + unsigned char pcm_loop_filter_disabled_flag; + unsigned char strong_intra_smoothing_enabled_flag; + unsigned char max_transform_hierarchy_depth_intra; + unsigned char max_transform_hierarchy_depth_inter; + unsigned char amp_enabled_flag; + unsigned char separate_colour_plane_flag; + unsigned char log2_max_pic_order_cnt_lsb_minus4; + + unsigned char num_short_term_ref_pic_sets; + unsigned char long_term_ref_pics_present_flag; + unsigned char num_long_term_ref_pics_sps; + unsigned char sps_temporal_mvp_enabled_flag; + unsigned char sample_adaptive_offset_enabled_flag; + unsigned char scaling_list_enable_flag; + unsigned char IrapPicFlag; + unsigned char IdrPicFlag; + + unsigned char bit_depth_luma_minus8; + unsigned char bit_depth_chroma_minus8; + //sps/pps extension fields + unsigned char log2_max_transform_skip_block_size_minus2; + unsigned char log2_sao_offset_scale_luma; + unsigned char log2_sao_offset_scale_chroma; + unsigned char high_precision_offsets_enabled_flag; + unsigned char reserved1[10]; + + // pps + unsigned char dependent_slice_segments_enabled_flag; + unsigned char slice_segment_header_extension_present_flag; + unsigned char sign_data_hiding_enabled_flag; + unsigned char cu_qp_delta_enabled_flag; + unsigned char diff_cu_qp_delta_depth; + signed char init_qp_minus26; + signed char pps_cb_qp_offset; + signed char pps_cr_qp_offset; + + unsigned char constrained_intra_pred_flag; + unsigned char weighted_pred_flag; + unsigned char weighted_bipred_flag; + unsigned char transform_skip_enabled_flag; + unsigned char transquant_bypass_enabled_flag; + unsigned char entropy_coding_sync_enabled_flag; + unsigned char log2_parallel_merge_level_minus2; + unsigned char num_extra_slice_header_bits; + + unsigned char loop_filter_across_tiles_enabled_flag; + unsigned char loop_filter_across_slices_enabled_flag; + unsigned char output_flag_present_flag; + unsigned char num_ref_idx_l0_default_active_minus1; + unsigned char num_ref_idx_l1_default_active_minus1; + unsigned char lists_modification_present_flag; + unsigned char cabac_init_present_flag; + unsigned char pps_slice_chroma_qp_offsets_present_flag; + + unsigned char deblocking_filter_override_enabled_flag; + unsigned char pps_deblocking_filter_disabled_flag; + signed char pps_beta_offset_div2; + signed char pps_tc_offset_div2; + unsigned char tiles_enabled_flag; + unsigned char uniform_spacing_flag; + unsigned char num_tile_columns_minus1; + unsigned char num_tile_rows_minus1; + + unsigned short column_width_minus1[21]; + unsigned short row_height_minus1[21]; + + // sps and pps extension HEVC-main 444 + unsigned char sps_range_extension_flag; + unsigned char transform_skip_rotation_enabled_flag; + unsigned char transform_skip_context_enabled_flag; + unsigned char implicit_rdpcm_enabled_flag; + + unsigned char explicit_rdpcm_enabled_flag; + unsigned char extended_precision_processing_flag; + unsigned char intra_smoothing_disabled_flag; + unsigned char persistent_rice_adaptation_enabled_flag; + + unsigned char cabac_bypass_alignment_enabled_flag; + unsigned char pps_range_extension_flag; + unsigned char cross_component_prediction_enabled_flag; + unsigned char chroma_qp_offset_list_enabled_flag; + + unsigned char diff_cu_chroma_qp_offset_depth; + unsigned char chroma_qp_offset_list_len_minus1; + signed char cb_qp_offset_list[6]; + + signed char cr_qp_offset_list[6]; + unsigned char reserved2[2]; + + unsigned int reserved3[8]; + + // RefPicSets + int NumBitsForShortTermRPSInSlice; + int NumDeltaPocsOfRefRpsIdx; + int NumPocTotalCurr; + int NumPocStCurrBefore; + int NumPocStCurrAfter; + int NumPocLtCurr; + int CurrPicOrderCntVal; + int RefPicIdx[16]; // [refpic] Indices of valid reference pictures (-1 if unused for reference) + int PicOrderCntVal[16]; // [refpic] + unsigned char IsLongTerm[16]; // [refpic] 0=not a long-term reference, 1=long-term reference + unsigned char RefPicSetStCurrBefore[8]; // [0..NumPocStCurrBefore-1] -> refpic (0..15) + unsigned char RefPicSetStCurrAfter[8]; // [0..NumPocStCurrAfter-1] -> refpic (0..15) + unsigned char RefPicSetLtCurr[8]; // [0..NumPocLtCurr-1] -> refpic (0..15) + unsigned char RefPicSetInterLayer0[8]; + unsigned char RefPicSetInterLayer1[8]; + unsigned int reserved4[12]; + + // scaling lists (diag order) + unsigned char ScalingList4x4[6][16]; // [matrixId][i] + unsigned char ScalingList8x8[6][64]; // [matrixId][i] + unsigned char ScalingList16x16[6][64]; // [matrixId][i] + unsigned char ScalingList32x32[2][64]; // [matrixId][i] + unsigned char ScalingListDCCoeff16x16[6]; // [matrixId] + unsigned char ScalingListDCCoeff32x32[2]; // [matrixId] +} CUVIDHEVCPICPARAMS; + + +/***********************************************************/ +//! \struct CUVIDVP8PICPARAMS +//! VP8 picture parameters +//! This structure is used in CUVIDPICPARAMS structure +/***********************************************************/ +typedef struct _CUVIDVP8PICPARAMS +{ + int width; + int height; + unsigned int first_partition_size; + //Frame Indexes + unsigned char LastRefIdx; + unsigned char GoldenRefIdx; + unsigned char AltRefIdx; + union { + struct { + unsigned char frame_type : 1; /**< 0 = KEYFRAME, 1 = INTERFRAME */ + unsigned char version : 3; + unsigned char show_frame : 1; + unsigned char update_mb_segmentation_data : 1; /**< Must be 0 if segmentation is not enabled */ + unsigned char Reserved2Bits : 2; + }vp8_frame_tag; + unsigned char wFrameTagFlags; + }; + unsigned char Reserved1[4]; + unsigned int Reserved2[3]; +} CUVIDVP8PICPARAMS; + +/***********************************************************/ +//! \struct CUVIDVP9PICPARAMS +//! VP9 picture parameters +//! This structure is used in CUVIDPICPARAMS structure +/***********************************************************/ +typedef struct _CUVIDVP9PICPARAMS +{ + unsigned int width; + unsigned int height; + + //Frame Indices + unsigned char LastRefIdx; + unsigned char GoldenRefIdx; + unsigned char AltRefIdx; + unsigned char colorSpace; + + unsigned short profile : 3; + unsigned short frameContextIdx : 2; + unsigned short frameType : 1; + unsigned short showFrame : 1; + unsigned short errorResilient : 1; + unsigned short frameParallelDecoding : 1; + unsigned short subSamplingX : 1; + unsigned short subSamplingY : 1; + unsigned short intraOnly : 1; + unsigned short allow_high_precision_mv : 1; + unsigned short refreshEntropyProbs : 1; + unsigned short reserved2Bits : 2; + + unsigned short reserved16Bits; + + unsigned char refFrameSignBias[4]; + + unsigned char bitDepthMinus8Luma; + unsigned char bitDepthMinus8Chroma; + unsigned char loopFilterLevel; + unsigned char loopFilterSharpness; + + unsigned char modeRefLfEnabled; + unsigned char log2_tile_columns; + unsigned char log2_tile_rows; + + unsigned char segmentEnabled : 1; + unsigned char segmentMapUpdate : 1; + unsigned char segmentMapTemporalUpdate : 1; + unsigned char segmentFeatureMode : 1; + unsigned char reserved4Bits : 4; + + + unsigned char segmentFeatureEnable[8][4]; + short segmentFeatureData[8][4]; + unsigned char mb_segment_tree_probs[7]; + unsigned char segment_pred_probs[3]; + unsigned char reservedSegment16Bits[2]; + + int qpYAc; + int qpYDc; + int qpChDc; + int qpChAc; + + unsigned int activeRefIdx[3]; + unsigned int resetFrameContext; + unsigned int mcomp_filter_type; + unsigned int mbRefLfDelta[4]; + unsigned int mbModeLfDelta[2]; + unsigned int frameTagSize; + unsigned int offsetToDctParts; + unsigned int reserved128Bits[4]; + +} CUVIDVP9PICPARAMS; + +/***********************************************************/ +//! \struct CUVIDAV1PICPARAMS +//! AV1 picture parameters +//! This structure is used in CUVIDPICPARAMS structure +/***********************************************************/ +typedef struct _CUVIDAV1PICPARAMS +{ + unsigned int width; // coded width, if superres enabled then it is upscaled width + unsigned int height; // coded height + unsigned int frame_offset; // defined as order_hint in AV1 specification + int decodePicIdx; // decoded output pic index, if film grain enabled, it will keep decoded (without film grain) output + // It can be used as reference frame for future frames + + // sequence header + unsigned int profile : 3; // 0 = profile0, 1 = profile1, 2 = profile2 + unsigned int use_128x128_superblock : 1; // superblock size 0:64x64, 1: 128x128 + unsigned int subsampling_x : 1; // (subsampling_x, _y) 1,1 = 420, 1,0 = 422, 0,0 = 444 + unsigned int subsampling_y : 1; + unsigned int mono_chrome : 1; // for monochrome content, mono_chrome = 1 and (subsampling_x, _y) should be 1,1 + unsigned int bit_depth_minus8 : 4; // bit depth minus 8 + unsigned int enable_filter_intra : 1; // tool enable in seq level, 0 : disable 1: frame header control + unsigned int enable_intra_edge_filter : 1; // intra edge filtering process, 0 : disable 1: enabled + unsigned int enable_interintra_compound : 1; // interintra, 0 : not present 1: present + unsigned int enable_masked_compound : 1; // 1: mode info for inter blocks may contain the syntax element compound_type. + // 0: syntax element compound_type will not be present + unsigned int enable_dual_filter : 1; // vertical and horiz filter selection, 1: enable and 0: disable + unsigned int enable_order_hint : 1; // order hint, and related tools, 1: enable and 0: disable + unsigned int order_hint_bits_minus1 : 3; // is used to compute OrderHintBits + unsigned int enable_jnt_comp : 1; // joint compound modes, 1: enable and 0: disable + unsigned int enable_superres : 1; // superres in seq level, 0 : disable 1: frame level control + unsigned int enable_cdef : 1; // cdef filtering in seq level, 0 : disable 1: frame level control + unsigned int enable_restoration : 1; // loop restoration filtering in seq level, 0 : disable 1: frame level control + unsigned int enable_fgs : 1; // defined as film_grain_params_present in AV1 specification + unsigned int reserved0_7bits : 7; // reserved bits; must be set to 0 + + // frame header + unsigned int frame_type : 2 ; // 0:Key frame, 1:Inter frame, 2:intra only, 3:s-frame + unsigned int show_frame : 1 ; // show_frame = 1 implies that frame should be immediately output once decoded + unsigned int disable_cdf_update : 1; // CDF update during symbol decoding, 1: disabled, 0: enabled + unsigned int allow_screen_content_tools : 1; // 1: intra blocks may use palette encoding, 0: palette encoding is never used + unsigned int force_integer_mv : 1; // 1: motion vectors will always be integers, 0: can contain fractional bits + unsigned int coded_denom : 3; // coded_denom of the superres scale as specified in AV1 specification + unsigned int allow_intrabc : 1; // 1: intra block copy may be used, 0: intra block copy is not allowed + unsigned int allow_high_precision_mv : 1; // 1/8 precision mv enable + unsigned int interp_filter : 3; // interpolation filter. Refer to section 6.8.9 of the AV1 specification Version 1.0.0 with Errata 1 + unsigned int switchable_motion_mode : 1; // defined as is_motion_mode_switchable in AV1 specification + unsigned int use_ref_frame_mvs : 1; // 1: current frame can use the previous frame mv information, 0: will not use. + unsigned int disable_frame_end_update_cdf : 1; // 1: indicates that the end of frame CDF update is disabled + unsigned int delta_q_present : 1; // quantizer index delta values are present in the block level + unsigned int delta_q_res : 2; // left shift which should be applied to decoded quantizer index delta values + unsigned int using_qmatrix : 1; // 1: quantizer matrix will be used to compute quantizers + unsigned int coded_lossless : 1; // 1: all segments use lossless coding + unsigned int use_superres : 1; // 1: superres enabled for frame + unsigned int tx_mode : 2; // 0: ONLY4x4,1:LARGEST,2:SELECT + unsigned int reference_mode : 1; // 0: SINGLE, 1: SELECT + unsigned int allow_warped_motion : 1; // 1: allow_warped_motion may be present, 0: allow_warped_motion will not be present + unsigned int reduced_tx_set : 1; // 1: frame is restricted to subset of the full set of transform types, 0: no such restriction + unsigned int skip_mode : 1; // 1: most of the mode info is skipped, 0: mode info is not skipped + unsigned int reserved1_3bits : 3; // reserved bits; must be set to 0 + + // tiling info + unsigned int num_tile_cols : 8; // number of tiles across the frame., max is 64 + unsigned int num_tile_rows : 8; // number of tiles down the frame., max is 64 + unsigned int context_update_tile_id : 16; // specifies which tile to use for the CDF update + unsigned short tile_widths[64]; // Width of each column in superblocks + unsigned short tile_heights[64]; // height of each row in superblocks + + // CDEF - refer to section 6.10.14 of the AV1 specification Version 1.0.0 with Errata 1 + unsigned char cdef_damping_minus_3 : 2; // controls the amount of damping in the deringing filter + unsigned char cdef_bits : 2; // the number of bits needed to specify which CDEF filter to apply + unsigned char reserved2_4bits : 4; // reserved bits; must be set to 0 + unsigned char cdef_y_strength[8]; // 0-3 bits: y_pri_strength, 4-7 bits y_sec_strength + unsigned char cdef_uv_strength[8]; // 0-3 bits: uv_pri_strength, 4-7 bits uv_sec_strength + + // SkipModeFrames + unsigned char SkipModeFrame0 : 4; // specifies the frames to use for compound prediction when skip_mode is equal to 1. + unsigned char SkipModeFrame1 : 4; + + // qp information - refer to section 6.8.11 of the AV1 specification Version 1.0.0 with Errata 1 + unsigned char base_qindex; // indicates the base frame qindex. Defined as base_q_idx in AV1 specification + char qp_y_dc_delta_q; // indicates the Y DC quantizer relative to base_q_idx. Defined as DeltaQYDc in AV1 specification + char qp_u_dc_delta_q; // indicates the U DC quantizer relative to base_q_idx. Defined as DeltaQUDc in AV1 specification + char qp_v_dc_delta_q; // indicates the V DC quantizer relative to base_q_idx. Defined as DeltaQVDc in AV1 specification + char qp_u_ac_delta_q; // indicates the U AC quantizer relative to base_q_idx. Defined as DeltaQUAc in AV1 specification + char qp_v_ac_delta_q; // indicates the V AC quantizer relative to base_q_idx. Defined as DeltaQVAc in AV1 specification + unsigned char qm_y; // specifies the level in the quantizer matrix that should be used for luma plane decoding + unsigned char qm_u; // specifies the level in the quantizer matrix that should be used for chroma U plane decoding + unsigned char qm_v; // specifies the level in the quantizer matrix that should be used for chroma V plane decoding + + // segmentation - refer to section 6.8.13 of the AV1 specification Version 1.0.0 with Errata 1 + unsigned char segmentation_enabled : 1; // 1 indicates that this frame makes use of the segmentation tool + unsigned char segmentation_update_map : 1; // 1 indicates that the segmentation map are updated during the decoding of this frame + unsigned char segmentation_update_data : 1; // 1 indicates that new parameters are about to be specified for each segment + unsigned char segmentation_temporal_update : 1; // 1 indicates that the updates to the segmentation map are coded relative to the existing segmentation map + unsigned char reserved3_4bits : 4; // reserved bits; must be set to 0 + short segmentation_feature_data[8][8]; // specifies the feature data for a segment feature + unsigned char segmentation_feature_mask[8]; // indicates that the corresponding feature is unused or feature value is coded + + // loopfilter - refer to section 6.8.10 of the AV1 specification Version 1.0.0 with Errata 1 + unsigned char loop_filter_level[2]; // contains loop filter strength values + unsigned char loop_filter_level_u; // loop filter strength value of U plane + unsigned char loop_filter_level_v; // loop filter strength value of V plane + unsigned char loop_filter_sharpness; // indicates the sharpness level + char loop_filter_ref_deltas[8]; // contains the adjustment needed for the filter level based on the chosen reference frame + char loop_filter_mode_deltas[2]; // contains the adjustment needed for the filter level based on the chosen mode + unsigned char loop_filter_delta_enabled : 1; // indicates that the filter level depends on the mode and reference frame used to predict a block + unsigned char loop_filter_delta_update : 1; // indicates that additional syntax elements are present that specify which mode and + // reference frame deltas are to be updated + unsigned char delta_lf_present : 1; // specifies whether loop filter delta values are present in the block level + unsigned char delta_lf_res : 2; // specifies the left shift to apply to the decoded loop filter values + unsigned char delta_lf_multi : 1; // separate loop filter deltas for Hy,Vy,U,V edges + unsigned char reserved4_2bits : 2; // reserved bits; must be set to 0 + + // restoration - refer to section 6.10.15 of the AV1 specification Version 1.0.0 with Errata 1 + unsigned char lr_unit_size[3]; // specifies the size of loop restoration units: 0: 32, 1: 64, 2: 128, 3: 256 + unsigned char lr_type[3] ; // used to compute FrameRestorationType + + // reference frames + unsigned char primary_ref_frame; // specifies which reference frame contains the CDF values and other state that should be + // loaded at the start of the frame + unsigned char ref_frame_map[8]; // frames in dpb that can be used as reference for current or future frames + + unsigned char temporal_layer_id : 4; // temporal layer id + unsigned char spatial_layer_id : 4; // spatial layer id + + unsigned char reserved5_32bits[4]; // reserved bits; must be set to 0 + + // ref frame list + struct + { + unsigned int width; + unsigned int height; + unsigned char index; + unsigned char reserved24Bits[3]; // reserved bits; must be set to 0 + } ref_frame[7]; // frames used as reference frame for current frame. + + // global motion + struct { + unsigned char invalid : 1; + unsigned char wmtype : 2; // defined as GmType in AV1 specification + unsigned char reserved5Bits : 5; // reserved bits; must be set to 0 + char reserved24Bits[3]; // reserved bits; must be set to 0 + int wmmat[6]; // defined as gm_params[] in AV1 specification + } global_motion[7]; // global motion params for reference frames + + // film grain params - refer to section 6.8.20 of the AV1 specification Version 1.0.0 with Errata 1 + unsigned short apply_grain : 1; + unsigned short overlap_flag : 1; + unsigned short scaling_shift_minus8 : 2; + unsigned short chroma_scaling_from_luma : 1; + unsigned short ar_coeff_lag : 2; + unsigned short ar_coeff_shift_minus6 : 2; + unsigned short grain_scale_shift : 2; + unsigned short clip_to_restricted_range : 1; + unsigned short reserved6_4bits : 4; // reserved bits; must be set to 0 + unsigned char num_y_points; + unsigned char scaling_points_y[14][2]; + unsigned char num_cb_points; + unsigned char scaling_points_cb[10][2]; + unsigned char num_cr_points; + unsigned char scaling_points_cr[10][2]; + unsigned char reserved7_8bits; // reserved bits; must be set to 0 + unsigned short random_seed; + short ar_coeffs_y[24]; + short ar_coeffs_cb[25]; + short ar_coeffs_cr[25]; + unsigned char cb_mult; + unsigned char cb_luma_mult; + short cb_offset; + unsigned char cr_mult; + unsigned char cr_luma_mult; + short cr_offset; + + int reserved[7]; // reserved bits; must be set to 0 +} CUVIDAV1PICPARAMS; + +/******************************************************************************************/ +//! \struct CUVIDPICPARAMS +//! Picture parameters for decoding +//! This structure is used in cuvidDecodePicture API +//! IN for cuvidDecodePicture +/******************************************************************************************/ +typedef struct _CUVIDPICPARAMS +{ + int PicWidthInMbs; /**< IN: Coded frame size in macroblocks */ + int FrameHeightInMbs; /**< IN: Coded frame height in macroblocks */ + int CurrPicIdx; /**< IN: Output index of the current picture */ + int field_pic_flag; /**< IN: 0=frame picture, 1=field picture */ + int bottom_field_flag; /**< IN: 0=top field, 1=bottom field (ignored if field_pic_flag=0) */ + int second_field; /**< IN: Second field of a complementary field pair */ + // Bitstream data + unsigned int nBitstreamDataLen; /**< IN: Number of bytes in bitstream data buffer */ + const unsigned char *pBitstreamData; /**< IN: Ptr to bitstream data for this picture (slice-layer) */ + unsigned int nNumSlices; /**< IN: Number of slices in this picture */ + const unsigned int *pSliceDataOffsets; /**< IN: nNumSlices entries, contains offset of each slice within + the bitstream data buffer */ + int ref_pic_flag; /**< IN: This picture is a reference picture */ + int intra_pic_flag; /**< IN: This picture is entirely intra coded */ + unsigned int Reserved[30]; /**< Reserved for future use */ + // IN: Codec-specific data + union { + CUVIDMPEG2PICPARAMS mpeg2; /**< Also used for MPEG-1 */ + CUVIDH264PICPARAMS h264; + CUVIDVC1PICPARAMS vc1; + CUVIDMPEG4PICPARAMS mpeg4; + CUVIDJPEGPICPARAMS jpeg; + CUVIDHEVCPICPARAMS hevc; + CUVIDVP8PICPARAMS vp8; + CUVIDVP9PICPARAMS vp9; + CUVIDAV1PICPARAMS av1; + unsigned int CodecReserved[1024]; + } CodecSpecific; +} CUVIDPICPARAMS; + + +/******************************************************/ +//! \struct CUVIDPROCPARAMS +//! Picture parameters for postprocessing +//! This structure is used in cuvidMapVideoFrame API +/******************************************************/ +typedef struct _CUVIDPROCPARAMS +{ + int progressive_frame; /**< IN: Input is progressive (deinterlace_mode will be ignored) */ + int second_field; /**< IN: Output the second field (ignored if deinterlace mode is Weave) */ + int top_field_first; /**< IN: Input frame is top field first (1st field is top, 2nd field is bottom) */ + int unpaired_field; /**< IN: Input only contains one field (2nd field is invalid) */ + // The fields below are used for raw YUV input + unsigned int reserved_flags; /**< Reserved for future use (set to zero) */ + unsigned int reserved_zero; /**< Reserved (set to zero) */ + unsigned long long raw_input_dptr; /**< IN: Input CUdeviceptr for raw YUV extensions */ + unsigned int raw_input_pitch; /**< IN: pitch in bytes of raw YUV input (should be aligned appropriately) */ + unsigned int raw_input_format; /**< IN: Input YUV format (cudaVideoCodec_enum) */ + unsigned long long raw_output_dptr; /**< IN: Output CUdeviceptr for raw YUV extensions */ + unsigned int raw_output_pitch; /**< IN: pitch in bytes of raw YUV output (should be aligned appropriately) */ + unsigned int Reserved1; /**< Reserved for future use (set to zero) */ + CUstream output_stream; /**< IN: stream object used by cuvidMapVideoFrame */ + unsigned int Reserved[46]; /**< Reserved for future use (set to zero) */ + unsigned long long *histogram_dptr; /**< OUT: Output CUdeviceptr for histogram extensions */ + void *Reserved2[1]; /**< Reserved for future use (set to zero) */ +} CUVIDPROCPARAMS; + +/*********************************************************************************************************/ +//! \struct CUVIDGETDECODESTATUS +//! Struct for reporting decode status. +//! This structure is used in cuvidGetDecodeStatus API. +/*********************************************************************************************************/ +typedef struct _CUVIDGETDECODESTATUS +{ + cuvidDecodeStatus decodeStatus; + unsigned int reserved[31]; + void *pReserved[8]; +} CUVIDGETDECODESTATUS; + +/****************************************************/ +//! \struct CUVIDRECONFIGUREDECODERINFO +//! Struct for decoder reset +//! This structure is used in cuvidReconfigureDecoder() API +/****************************************************/ +typedef struct _CUVIDRECONFIGUREDECODERINFO +{ + unsigned int ulWidth; /**< IN: Coded sequence width in pixels, MUST be < = ulMaxWidth defined at CUVIDDECODECREATEINFO */ + unsigned int ulHeight; /**< IN: Coded sequence height in pixels, MUST be < = ulMaxHeight defined at CUVIDDECODECREATEINFO */ + unsigned int ulTargetWidth; /**< IN: Post processed output width */ + unsigned int ulTargetHeight; /**< IN: Post Processed output height */ + unsigned int ulNumDecodeSurfaces; /**< IN: Maximum number of internal decode surfaces */ + unsigned int reserved1[12]; /**< Reserved for future use. Set to Zero */ + /** + * IN: Area of frame to be displayed. Use-case : Source Cropping + */ + struct { + short left; + short top; + short right; + short bottom; + } display_area; + /** + * IN: Target Rectangle in the OutputFrame. Use-case : Aspect ratio Conversion + */ + struct { + short left; + short top; + short right; + short bottom; + } target_rect; + unsigned int reserved2[11]; /**< Reserved for future use. Set to Zero */ +} CUVIDRECONFIGUREDECODERINFO; + + +/***********************************************************************************************************/ +//! VIDEO_DECODER +//! +//! In order to minimize decode latencies, there should be always at least 2 pictures in the decode +//! queue at any time, in order to make sure that all decode engines are always busy. +//! +//! Overall data flow: +//! - cuvidGetDecoderCaps(...) +//! - cuvidCreateDecoder(...) +//! - For each picture: +//! + cuvidDecodePicture(N) +//! + cuvidMapVideoFrame(N-4) +//! + do some processing in cuda +//! + cuvidUnmapVideoFrame(N-4) +//! + cuvidDecodePicture(N+1) +//! + cuvidMapVideoFrame(N-3) +//! + ... +//! - cuvidDestroyDecoder(...) +//! +//! NOTE: +//! - When the cuda context is created from a D3D device, the D3D device must also be created +//! with the D3DCREATE_MULTITHREADED flag. +//! - There is a limit to how many pictures can be mapped simultaneously (ulNumOutputSurfaces) +//! - cuvidDecodePicture may block the calling thread if there are too many pictures pending +//! in the decode queue +/***********************************************************************************************************/ + + +/**********************************************************************************************************************/ +//! \fn CUresult CUDAAPI cuvidGetDecoderCaps(CUVIDDECODECAPS *pdc) +//! Queries decode capabilities of NVDEC-HW based on CodecType, ChromaFormat and BitDepthMinus8 parameters. +//! 1. Application fills IN parameters CodecType, ChromaFormat and BitDepthMinus8 of CUVIDDECODECAPS structure +//! 2. On calling cuvidGetDecoderCaps, driver fills OUT parameters if the IN parameters are supported +//! If IN parameters passed to the driver are not supported by NVDEC-HW, then all OUT params are set to 0. +//! E.g. on Geforce GTX 960: +//! App fills - eCodecType = cudaVideoCodec_H264; eChromaFormat = cudaVideoChromaFormat_420; nBitDepthMinus8 = 0; +//! Given IN parameters are supported, hence driver fills: bIsSupported = 1; nMinWidth = 48; nMinHeight = 16; +//! nMaxWidth = 4096; nMaxHeight = 4096; nMaxMBCount = 65536; +//! CodedWidth*CodedHeight/256 must be less than or equal to nMaxMBCount +/**********************************************************************************************************************/ +extern CUresult CUDAAPI cuvidGetDecoderCaps(CUVIDDECODECAPS *pdc); + +/*****************************************************************************************************/ +//! \fn CUresult CUDAAPI cuvidCreateDecoder(CUvideodecoder *phDecoder, CUVIDDECODECREATEINFO *pdci) +//! Create the decoder object based on pdci. A handle to the created decoder is returned +/*****************************************************************************************************/ +extern CUresult CUDAAPI cuvidCreateDecoder(CUvideodecoder *phDecoder, CUVIDDECODECREATEINFO *pdci); + +/*****************************************************************************************************/ +//! \fn CUresult CUDAAPI cuvidDestroyDecoder(CUvideodecoder hDecoder) +//! Destroy the decoder object +/*****************************************************************************************************/ +extern CUresult CUDAAPI cuvidDestroyDecoder(CUvideodecoder hDecoder); + +/*****************************************************************************************************/ +//! \fn CUresult CUDAAPI cuvidDecodePicture(CUvideodecoder hDecoder, CUVIDPICPARAMS *pPicParams) +//! Decode a single picture (field or frame) +//! Kicks off HW decoding +/*****************************************************************************************************/ +extern CUresult CUDAAPI cuvidDecodePicture(CUvideodecoder hDecoder, CUVIDPICPARAMS *pPicParams); + +/************************************************************************************************************/ +//! \fn CUresult CUDAAPI cuvidGetDecodeStatus(CUvideodecoder hDecoder, int nPicIdx); +//! Get the decode status for frame corresponding to nPicIdx +//! API is supported for Maxwell and above generation GPUs. +//! API is currently supported for HEVC, H264 and JPEG codecs. +//! API returns CUDA_ERROR_NOT_SUPPORTED error code for unsupported GPU or codec. +/************************************************************************************************************/ +extern CUresult CUDAAPI cuvidGetDecodeStatus(CUvideodecoder hDecoder, int nPicIdx, CUVIDGETDECODESTATUS* pDecodeStatus); + +/*********************************************************************************************************/ +//! \fn CUresult CUDAAPI cuvidReconfigureDecoder(CUvideodecoder hDecoder, CUVIDRECONFIGUREDECODERINFO *pDecReconfigParams) +//! Used to reuse single decoder for multiple clips. Currently supports resolution change, resize params, display area +//! params, target area params change for same codec. Must be called during CUVIDPARSERPARAMS::pfnSequenceCallback +/*********************************************************************************************************/ +extern CUresult CUDAAPI cuvidReconfigureDecoder(CUvideodecoder hDecoder, CUVIDRECONFIGUREDECODERINFO *pDecReconfigParams); + + +#if !defined(__CUVID_DEVPTR64) || defined(__CUVID_INTERNAL) +/************************************************************************************************************************/ +//! \fn CUresult CUDAAPI cuvidMapVideoFrame(CUvideodecoder hDecoder, int nPicIdx, unsigned int *pDevPtr, +//! unsigned int *pPitch, CUVIDPROCPARAMS *pVPP); +//! Post-process and map video frame corresponding to nPicIdx for use in cuda. Returns cuda device pointer and associated +//! pitch of the video frame +/************************************************************************************************************************/ +extern CUresult CUDAAPI cuvidMapVideoFrame(CUvideodecoder hDecoder, int nPicIdx, + unsigned int *pDevPtr, unsigned int *pPitch, + CUVIDPROCPARAMS *pVPP); + +/*****************************************************************************************************/ +//! \fn CUresult CUDAAPI cuvidUnmapVideoFrame(CUvideodecoder hDecoder, unsigned int DevPtr) +//! Unmap a previously mapped video frame +/*****************************************************************************************************/ +extern CUresult CUDAAPI cuvidUnmapVideoFrame(CUvideodecoder hDecoder, unsigned int DevPtr); +#endif + +/****************************************************************************************************************************/ +//! \fn CUresult CUDAAPI cuvidMapVideoFrame64(CUvideodecoder hDecoder, int nPicIdx, unsigned long long *pDevPtr, +//! unsigned int * pPitch, CUVIDPROCPARAMS *pVPP); +//! Post-process and map video frame corresponding to nPicIdx for use in cuda. Returns cuda device pointer and associated +//! pitch of the video frame +/****************************************************************************************************************************/ +extern CUresult CUDAAPI cuvidMapVideoFrame64(CUvideodecoder hDecoder, int nPicIdx, unsigned long long *pDevPtr, + unsigned int *pPitch, CUVIDPROCPARAMS *pVPP); + +/**************************************************************************************************/ +//! \fn CUresult CUDAAPI cuvidUnmapVideoFrame64(CUvideodecoder hDecoder, unsigned long long DevPtr); +//! Unmap a previously mapped video frame +/**************************************************************************************************/ +extern CUresult CUDAAPI cuvidUnmapVideoFrame64(CUvideodecoder hDecoder, unsigned long long DevPtr); + +#if defined(__CUVID_DEVPTR64) && !defined(__CUVID_INTERNAL) +#define cuvidMapVideoFrame cuvidMapVideoFrame64 +#define cuvidUnmapVideoFrame cuvidUnmapVideoFrame64 +#endif + + + +/********************************************************************************************************************/ +//! +//! Context-locking: to facilitate multi-threaded implementations, the following 4 functions +//! provide a simple mutex-style host synchronization. If a non-NULL context is specified +//! in CUVIDDECODECREATEINFO, the codec library will acquire the mutex associated with the given +//! context before making any cuda calls. +//! A multi-threaded application could create a lock associated with a context handle so that +//! multiple threads can safely share the same cuda context: +//! - use cuCtxPopCurrent immediately after context creation in order to create a 'floating' context +//! that can be passed to cuvidCtxLockCreate. +//! - When using a floating context, all cuda calls should only be made within a cuvidCtxLock/cuvidCtxUnlock section. +//! +//! NOTE: This is a safer alternative to cuCtxPushCurrent and cuCtxPopCurrent, and is not related to video +//! decoder in any way (implemented as a critical section associated with cuCtx{Push|Pop}Current calls). +/********************************************************************************************************************/ + +/********************************************************************************************************************/ +//! \fn CUresult CUDAAPI cuvidCtxLockCreate(CUvideoctxlock *pLock, CUcontext ctx) +//! This API is used to create CtxLock object +/********************************************************************************************************************/ +extern CUresult CUDAAPI cuvidCtxLockCreate(CUvideoctxlock *pLock, CUcontext ctx); + +/********************************************************************************************************************/ +//! \fn CUresult CUDAAPI cuvidCtxLockDestroy(CUvideoctxlock lck) +//! This API is used to free CtxLock object +/********************************************************************************************************************/ +extern CUresult CUDAAPI cuvidCtxLockDestroy(CUvideoctxlock lck); + +/********************************************************************************************************************/ +//! \fn CUresult CUDAAPI cuvidCtxLock(CUvideoctxlock lck, unsigned int reserved_flags) +//! This API is used to acquire ctxlock +/********************************************************************************************************************/ +extern CUresult CUDAAPI cuvidCtxLock(CUvideoctxlock lck, unsigned int reserved_flags); + +/********************************************************************************************************************/ +//! \fn CUresult CUDAAPI cuvidCtxUnlock(CUvideoctxlock lck, unsigned int reserved_flags) +//! This API is used to release ctxlock +/********************************************************************************************************************/ +extern CUresult CUDAAPI cuvidCtxUnlock(CUvideoctxlock lck, unsigned int reserved_flags); + +/**********************************************************************************************/ + + +#if defined(__cplusplus) +} +// Auto-lock helper for C++ applications +class CCtxAutoLock +{ +private: + CUvideoctxlock m_ctx; +public: + CCtxAutoLock(CUvideoctxlock ctx):m_ctx(ctx) { cuvidCtxLock(m_ctx,0); } + ~CCtxAutoLock() { cuvidCtxUnlock(m_ctx,0); } +}; +#endif /* __cplusplus */ + +#endif // __CUDA_VIDEO_H__ + diff --git a/libheif/plugins/decoder_nvdec.cc b/libheif/plugins/decoder_nvdec.cc new file mode 100644 index 0000000000..e5387dd5cc --- /dev/null +++ b/libheif/plugins/decoder_nvdec.cc @@ -0,0 +1,363 @@ +/* + * NVIDIA Decoder. + * Copyright (c) 2023 Brad Hards + * + * This file is part of libheif. + * + * libheif is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation, either version 3 of + * the License, or (at your option) any later version. + * + * libheif is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with libheif. If not, see . + */ + +#include "libheif/heif.h" +#include "libheif/heif_plugin.h" +#include "common_utils.h" +#include "decoder_nvdec.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "nvcuvid.h" +#include "cuviddec.h" +#include "NvDecoder.h" +#include "nalu_utils.h" +#include + +static const int NVDEC_PLUGIN_PRIORITY = 120; + +#define MAX_PLUGIN_NAME_LENGTH 80 + +static char plugin_name[MAX_PLUGIN_NAME_LENGTH]; + +static const char *nvdec_plugin_name() +{ + snprintf(plugin_name, MAX_PLUGIN_NAME_LENGTH, "NVIDIA Video Decoder (Hardware)"); + + // make sure that the string is null-terminated + plugin_name[MAX_PLUGIN_NAME_LENGTH - 1] = 0; + + return plugin_name; +} + +static void nvdec_init_plugin() +{ + cuInit(0); +} + +static void nvdec_deinit_plugin() +{ +} + +static int nvdec_does_support_format(enum heif_compression_format format) +{ + // We have to check the hardware capabilities + CUcontext cuContext = NULL; + CUdevice cuDevice = 0; + + CUresult result; + result = cuDeviceGet(&cuDevice, 0); + if (result != CUDA_SUCCESS) + { + return 0; + } +#if 0 + char szDeviceName[80]; + result = cuDeviceGetName(szDeviceName, sizeof(szDeviceName), cuDevice); + if (result != CUDA_SUCCESS) { + return 0; + } + std::cout << "GPU in use: " << szDeviceName << std::endl; +#endif + result = cuCtxCreate(&cuContext, 0, cuDevice); + if (result != CUDA_SUCCESS) + { + return 0; + } + + CUVIDDECODECAPS decodeCaps = {}; + decodeCaps.eChromaFormat = cudaVideoChromaFormat_420; + decodeCaps.nBitDepthMinus8 = 0; + if (format == heif_compression_JPEG) + { + decodeCaps.eCodecType = cudaVideoCodec_JPEG; + } + else if (format == heif_compression_HEVC) + { + decodeCaps.eCodecType = cudaVideoCodec_HEVC; + } + else if (format == heif_compression_AVC) + { + decodeCaps.eCodecType = cudaVideoCodec_H264; + } + else if (format == heif_compression_AV1) + { + decodeCaps.eCodecType = cudaVideoCodec_AV1; + } + else + { + cuCtxDestroy(cuContext); + return 0; + } + result = cuvidGetDecoderCaps(&decodeCaps); + if (result != CUDA_SUCCESS) + { + cuCtxDestroy(cuContext); + return 0; + } + cuCtxDestroy(cuContext); + return decodeCaps.bIsSupported ? NVDEC_PLUGIN_PRIORITY : 0; +} + +struct heif_error nvdec_new_decoder(void **decoder) +{ + struct nvdec_context *ctx = new nvdec_context(); + ctx->strict = false; + ctx->eCodec = cudaVideoCodec_HEVC; + *decoder = ctx; + + return heif_error_ok; +} + +void nvdec_free_decoder(void *decoder) +{ + struct nvdec_context *ctx = (nvdec_context *)decoder; + + if (!ctx) + { + return; + } + + delete ctx; +} + +struct heif_error nvdec_push_data(void *decoder, const void *frame_data, size_t frame_size) +{ + struct nvdec_context *ctx = (struct nvdec_context *)decoder; + + const uint8_t *input_data = (const uint8_t *)frame_data; + + ctx->data.insert(ctx->data.end(), input_data, input_data + frame_size); + + return heif_error_ok; +} + + +struct heif_error nvdec_decode_image(void *decoder, struct heif_image **out_img) +{ + struct nvdec_context *ctx = (struct nvdec_context *)decoder; + + heif_error err; + NalMap nalus; +// TODO +#if 0 + if (ctx->eCodec == cudaVideoCodec_HEVC) { + err = nalus.parseHevcNalu(ctx->data.data(), ctx->data.size()); + if (err.code != heif_error_Ok) { + return err; + } + if ((!nalus.NUTs_are_valid()) || (!nalus.IDR_is_valid())) { + if (!nalus.NUTs_are_valid()) { + printf("NUTs not valid"); + } + if (!nalus.IDR_is_valid()) { + printf("IDR not valid"); + } + struct heif_error err = {heif_error_Decoder_plugin_error, + heif_suberror_End_of_data, + "Unexpected end of data"}; + return err; + } + } + if (ctx->eCodec == cudaVideoCodec_H264) { + err = nalus.parseNALU_AVC(ctx->data.data(), ctx->data.size()); + if (err.code != heif_error_Ok) { + return err; + } + } +#endif + CUdevice cuDevice = 0; + + CUresult result; + result = cuDeviceGet(&cuDevice, 0); + if (result != CUDA_SUCCESS) + { + struct heif_error err = {heif_error_Decoder_plugin_error, + heif_suberror_Plugin_loading_error, + "could not get CUDA device"}; + return err; + } + result = cuCtxCreate(&(ctx->cuContext), 0, cuDevice); + if (result != CUDA_SUCCESS) + { + struct heif_error err = {heif_error_Decoder_plugin_error, + heif_suberror_Plugin_loading_error, + "could not get CUDA context"}; + return err; + } + result = cuvidCtxLockCreate(&(ctx->ctxLock), ctx->cuContext); + if (result != CUDA_SUCCESS) { + cuCtxDestroy(ctx->cuContext); + struct heif_error err = {heif_error_Decoder_plugin_error, + heif_suberror_Plugin_loading_error, + "could not create CUDA context lock"}; + return err; + } + result = cuStreamCreate(&(ctx->cuvidStream), CU_STREAM_DEFAULT); + if (result != CUDA_SUCCESS) { + const char *szErrName = NULL; + cuGetErrorName(result, &szErrName); + std::ostringstream errMsg; + errMsg << "could not create CUDA stream " << szErrName; + struct heif_error err = {heif_error_Decoder_plugin_error, + heif_suberror_Plugin_loading_error, + errMsg.str().c_str()}; + cuvidCtxLockDestroy(ctx->ctxLock); + cuCtxDestroy(ctx->cuContext); + return err; + } + + NvDecoder dec(ctx); + err = dec.initVideoParser(); + if (err.code != heif_error_Ok) { + cuvidCtxLockDestroy(ctx->ctxLock); + cuCtxDestroy(ctx->cuContext); + return err; + } + + int nFrameReturned; +// TODO +#if 0 + if (ctx->eCodec == cudaVideoCodec_HEVC) { + uint8_t *hevc_data; + size_t avc_data_size; + nalus.buildWithStartCodesHEVC(&hevc_data, &avc_data_size); + nFrameReturned = dec.Decode(hevc_data, avc_data_size); + } else if (ctx->eCodec == cudaVideoCodec_H264) { + uint8_t *avc_data; + size_t avc_data_size; + nalus.buildWithStartCodesAVC(&avc_data, &avc_data_size); + nFrameReturned = dec.Decode(avc_data, avc_data_size); + printf("nFrameReturned: %d\n", nFrameReturned); + } else +#endif +// TODO: else closure + nFrameReturned = dec.Decode(ctx->data.data(), ctx->data.size()); + + if (nFrameReturned > 0) { + uint8_t *pFrame = dec.GetFrame(); + + struct heif_image *heif_img = nullptr; + // dummy entry for chroma + err = heif_image_create(dec.GetWidth(), dec.GetHeight(), + heif_colorspace_YCbCr, + heif_chroma_420, + &heif_img); + if (err.code != heif_error_Ok) + { + assert(heif_img == nullptr); + return err; + } + heif_image_add_plane(heif_img, heif_channel_Y, dec.GetWidth(), dec.GetHeight(), dec.GetBitDepth()); + heif_image_add_plane(heif_img, heif_channel_Cb, dec.GetWidth() / 2, dec.GetChromaHeight(), dec.GetBitDepth()); + heif_image_add_plane(heif_img, heif_channel_Cr, dec.GetWidth() / 2, dec.GetChromaHeight(), dec.GetBitDepth()); + + int strideY; + uint8_t *Y = heif_image_get_plane(heif_img, heif_channel_Y, &strideY); + for (int r = 0; r < dec.GetHeight(); r++) { + memcpy(Y + r * strideY, pFrame, dec.GetWidth() * dec.GetBPP()); + pFrame += dec.GetWidth() * dec.GetBPP(); + } + int strideCb; + uint8_t *Cb = heif_image_get_plane(heif_img, heif_channel_Cb, &strideCb); + for (int r = 0; r < dec.GetChromaHeight(); r++) { + memcpy(Cb + r * strideCb, pFrame, (dec.GetWidth() / 2) * dec.GetBPP()); + pFrame += (dec.GetWidth() / 2) * dec.GetBPP(); + } + int strideCr; + uint8_t *Cr = heif_image_get_plane(heif_img, heif_channel_Cr, &strideCr); + for (int r = 0; r < dec.GetChromaHeight(); r++) { + memcpy(Cr + r * strideCr, pFrame, (dec.GetWidth() / 2) * dec.GetBPP()); + pFrame += (dec.GetWidth() / 2) * dec.GetBPP(); + } + *out_img = heif_img; + } + return heif_error_ok; +} + +void nvdec_set_strict_decoding(void *decoder, int strict) +{ + struct nvdec_context *ctx = (struct nvdec_context *)decoder; + ctx->strict = strict; +} + +struct heif_error nvdec_new_decoder2(void **decoder, const heif_decoder_configuration *config) +{ + struct nvdec_context *ctx = new nvdec_context(); + ctx->strict = false; + switch (config->compression_format) { + case heif_compression_AV1: + ctx->eCodec = cudaVideoCodec_AV1; + break; + case heif_compression_AVC: + ctx->eCodec = cudaVideoCodec_H264; + break; + case heif_compression_HEVC: + ctx->eCodec = cudaVideoCodec_HEVC; + break; + case heif_compression_JPEG: + ctx->eCodec = cudaVideoCodec_JPEG; + break; + default: + delete ctx; + struct heif_error err = {heif_error_Decoder_plugin_error, + heif_suberror_Plugin_loading_error, + "unsupported compression format"}; + return err; + } + *decoder = ctx; + + return heif_error_ok; +} + +static const struct heif_decoder_plugin decoder_nvdec +{ + 4, + nvdec_plugin_name, + nvdec_init_plugin, + nvdec_deinit_plugin, + nvdec_does_support_format, + nvdec_new_decoder, + nvdec_free_decoder, + nvdec_push_data, + nvdec_decode_image, + nvdec_set_strict_decoding, + "NVDEC", + nvdec_new_decoder2 +}; + +const struct heif_decoder_plugin *get_decoder_plugin_nvdec() +{ + return &decoder_nvdec; +} + +#if PLUGIN_NVDEC +heif_plugin_info plugin_info{ + 1, + heif_plugin_type_decoder, + &decoder_nvdec}; +#endif diff --git a/libheif/plugins/decoder_nvdec.h b/libheif/plugins/decoder_nvdec.h new file mode 100644 index 0000000000..79840fddde --- /dev/null +++ b/libheif/plugins/decoder_nvdec.h @@ -0,0 +1,34 @@ +/* + * NVIDIA Decoder. + * Copyright (c) 2023 Brad Hards + * + * This file is part of libheif. + * + * libheif is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation, either version 3 of + * the License, or (at your option) any later version. + * + * libheif is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with libheif. If not, see . + */ + +#ifndef LIBHEIF_HEIF_DECODER_NVDEC_H +#define LIBHEIF_HEIF_DECODER_NVDEC_H + +#include "common_utils.h" + +const struct heif_decoder_plugin* get_decoder_plugin_nvdec(); + +#if PLUGIN_NVDEC +extern "C" { +MAYBE_UNUSED LIBHEIF_API extern heif_plugin_info plugin_info; +} +#endif + +#endif diff --git a/libheif/plugins/nvcuvid.h b/libheif/plugins/nvcuvid.h new file mode 100644 index 0000000000..c548a22666 --- /dev/null +++ b/libheif/plugins/nvcuvid.h @@ -0,0 +1,553 @@ +/* + * This copyright notice applies to this header file only: + * + * Copyright (c) 2010-2024 NVIDIA Corporation + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the software, and to permit persons to whom the + * software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/********************************************************************************************************************/ +//! \file nvcuvid.h +//! NVDECODE API provides video decoding interface to NVIDIA GPU devices. +//! \date 2015-2024 +//! This file contains the interface constants, structure definitions and function prototypes. +/********************************************************************************************************************/ + +#if !defined(__NVCUVID_H__) +#define __NVCUVID_H__ + +#include "cuviddec.h" + +#if defined(__cplusplus) +extern "C" { +#endif /* __cplusplus */ + +#define MAX_CLOCK_TS 3 + +/***********************************************/ +//! +//! High-level helper APIs for video sources +//! +/***********************************************/ + +typedef void *CUvideosource; +typedef void *CUvideoparser; +typedef long long CUvideotimestamp; + + +/************************************************************************/ +//! \enum cudaVideoState +//! Video source state enums +//! Used in cuvidSetVideoSourceState and cuvidGetVideoSourceState APIs +/************************************************************************/ +typedef enum { + cudaVideoState_Error = -1, /**< Error state (invalid source) */ + cudaVideoState_Stopped = 0, /**< Source is stopped (or reached end-of-stream) */ + cudaVideoState_Started = 1 /**< Source is running and delivering data */ +} cudaVideoState; + +/************************************************************************/ +//! \enum cudaAudioCodec +//! Audio compression enums +//! Used in CUAUDIOFORMAT structure +/************************************************************************/ +typedef enum { + cudaAudioCodec_MPEG1=0, /**< MPEG-1 Audio */ + cudaAudioCodec_MPEG2, /**< MPEG-2 Audio */ + cudaAudioCodec_MP3, /**< MPEG-1 Layer III Audio */ + cudaAudioCodec_AC3, /**< Dolby Digital (AC3) Audio */ + cudaAudioCodec_LPCM, /**< PCM Audio */ + cudaAudioCodec_AAC, /**< AAC Audio */ +} cudaAudioCodec; + +/************************************************************************/ +//! \ingroup STRUCTS +//! \struct TIMECODESET +//! Used to store Time code set extracted from H264 and HEVC codecs +/************************************************************************/ +typedef struct _TIMECODESET +{ + unsigned int time_offset_value; + unsigned short n_frames; + unsigned char clock_timestamp_flag; + unsigned char units_field_based_flag; + unsigned char counting_type; + unsigned char full_timestamp_flag; + unsigned char discontinuity_flag; + unsigned char cnt_dropped_flag; + unsigned char seconds_value; + unsigned char minutes_value; + unsigned char hours_value; + unsigned char seconds_flag; + unsigned char minutes_flag; + unsigned char hours_flag; + unsigned char time_offset_length; + unsigned char reserved; +} TIMECODESET; + +/************************************************************************/ +//! \ingroup STRUCTS +//! \struct TIMECODE +//! Used to extract Time code in H264 and HEVC codecs +/************************************************************************/ +typedef struct _TIMECODE +{ + TIMECODESET time_code_set[MAX_CLOCK_TS]; + unsigned char num_clock_ts; +} TIMECODE; + +/**********************************************************************************/ +//! \ingroup STRUCTS +//! \struct SEIMASTERINGDISPLAYINFO +//! Used to extract mastering display color volume SEI in H264 and HEVC codecs +/**********************************************************************************/ +typedef struct _SEIMASTERINGDISPLAYINFO +{ + unsigned short display_primaries_x[3]; + unsigned short display_primaries_y[3]; + unsigned short white_point_x; + unsigned short white_point_y; + unsigned int max_display_mastering_luminance; + unsigned int min_display_mastering_luminance; +} SEIMASTERINGDISPLAYINFO; + +/**********************************************************************************/ +//! \ingroup STRUCTS +//! \struct SEICONTENTLIGHTLEVELINFO +//! Used to extract content light level info SEI in H264 and HEVC codecs +/**********************************************************************************/ +typedef struct _SEICONTENTLIGHTLEVELINFO +{ + unsigned short max_content_light_level; + unsigned short max_pic_average_light_level; + unsigned int reserved; +} SEICONTENTLIGHTLEVELINFO; + +/**********************************************************************************/ +//! \ingroup STRUCTS +//! \struct TIMECODEMPEG2 +//! Used to extract Time code in MPEG2 codec +/**********************************************************************************/ +typedef struct _TIMECODEMPEG2 +{ + unsigned char drop_frame_flag; + unsigned char time_code_hours; + unsigned char time_code_minutes; + unsigned char marker_bit; + unsigned char time_code_seconds; + unsigned char time_code_pictures; +} TIMECODEMPEG2; + +/**********************************************************************************/ +//! \ingroup STRUCTS +//! \struct SEIALTERNATIVETRANSFERCHARACTERISTICS +//! Used to extract alternative transfer characteristics SEI in H264 and HEVC codecs +/**********************************************************************************/ +typedef struct _SEIALTERNATIVETRANSFERCHARACTERISTICS +{ + unsigned char preferred_transfer_characteristics; +} SEIALTERNATIVETRANSFERCHARACTERISTICS; + +/**********************************************************************************/ +//! \ingroup STRUCTS +//! \struct CUSEIMESSAGE; +//! Used in CUVIDSEIMESSAGEINFO structure +/**********************************************************************************/ +typedef struct _CUSEIMESSAGE +{ + unsigned char sei_message_type; /**< OUT: SEI Message Type */ + unsigned char reserved[3]; + unsigned int sei_message_size; /**< OUT: SEI Message Size */ +} CUSEIMESSAGE; + +/************************************************************************************************/ +//! \ingroup STRUCTS +//! \struct CUVIDEOFORMAT +//! Video format +//! Used in cuvidGetSourceVideoFormat API +/************************************************************************************************/ +typedef struct +{ + cudaVideoCodec codec; /**< OUT: Compression format */ + /** + * OUT: frame rate = numerator / denominator (for example: 30000/1001) + */ + struct { + /**< OUT: frame rate numerator (0 = unspecified or variable frame rate) */ + unsigned int numerator; + /**< OUT: frame rate denominator (0 = unspecified or variable frame rate) */ + unsigned int denominator; + } frame_rate; + unsigned char progressive_sequence; /**< OUT: 0=interlaced, 1=progressive */ + unsigned char bit_depth_luma_minus8; /**< OUT: high bit depth luma. E.g, 2 for 10-bitdepth, 4 for 12-bitdepth */ + unsigned char bit_depth_chroma_minus8; /**< OUT: high bit depth chroma. E.g, 2 for 10-bitdepth, 4 for 12-bitdepth */ + unsigned char min_num_decode_surfaces; /**< OUT: Minimum number of decode surfaces to be allocated for correct + decoding. The client can send this value in ulNumDecodeSurfaces + (in CUVIDDECODECREATEINFO structure). + This guarantees correct functionality and optimal video memory + usage but not necessarily the best performance, which depends on + the design of the overall application. The optimal number of + decode surfaces (in terms of performance and memory utilization) + should be decided by experimentation for each application, but it + cannot go below min_num_decode_surfaces. + If this value is used for ulNumDecodeSurfaces then it must be + returned to parser during sequence callback. */ + unsigned int coded_width; /**< OUT: coded frame width in pixels */ + unsigned int coded_height; /**< OUT: coded frame height in pixels */ + /** + * area of the frame that should be displayed + * typical example: + * coded_width = 1920, coded_height = 1088 + * display_area = { 0,0,1920,1080 } + */ + struct { + int left; /**< OUT: left position of display rect */ + int top; /**< OUT: top position of display rect */ + int right; /**< OUT: right position of display rect */ + int bottom; /**< OUT: bottom position of display rect */ + } display_area; + cudaVideoChromaFormat chroma_format; /**< OUT: Chroma format */ + unsigned int bitrate; /**< OUT: video bitrate (bps, 0=unknown) */ + /** + * OUT: Display Aspect Ratio = x:y (4:3, 16:9, etc) + */ + struct { + int x; + int y; + } display_aspect_ratio; + /** + * Video Signal Description + * Refer section E.2.1 (VUI parameters semantics) of H264 spec file + */ + struct { + unsigned char video_format : 3; /**< OUT: 0-Component, 1-PAL, 2-NTSC, 3-SECAM, 4-MAC, 5-Unspecified */ + unsigned char video_full_range_flag : 1; /**< OUT: indicates the black level and luma and chroma range */ + unsigned char reserved_zero_bits : 4; /**< Reserved bits */ + unsigned char color_primaries; /**< OUT: chromaticity coordinates of source primaries */ + unsigned char transfer_characteristics; /**< OUT: opto-electronic transfer characteristic of the source picture */ + unsigned char matrix_coefficients; /**< OUT: used in deriving luma and chroma signals from RGB primaries */ + } video_signal_description; + unsigned int seqhdr_data_length; /**< OUT: Additional bytes following (CUVIDEOFORMATEX) */ +} CUVIDEOFORMAT; + +/****************************************************************/ +//! \ingroup STRUCTS +//! \struct CUVIDOPERATINGPOINTINFO +//! Operating point information of scalable bitstream +/****************************************************************/ +typedef struct +{ + cudaVideoCodec codec; + union + { + struct + { + unsigned char operating_points_cnt; + unsigned char reserved24_bits[3]; + unsigned short operating_points_idc[32]; + } av1; + unsigned char CodecReserved[1024]; + }; +} CUVIDOPERATINGPOINTINFO; + +/**********************************************************************************/ +//! \ingroup STRUCTS +//! \struct CUVIDSEIMESSAGEINFO +//! Used in cuvidParseVideoData API with PFNVIDSEIMSGCALLBACK pfnGetSEIMsg +/**********************************************************************************/ +typedef struct _CUVIDSEIMESSAGEINFO +{ + void *pSEIData; /**< OUT: SEI Message Data */ + CUSEIMESSAGE *pSEIMessage; /**< OUT: SEI Message Info */ + unsigned int sei_message_count; /**< OUT: SEI Message Count */ + unsigned int picIdx; /**< OUT: SEI Message Pic Index */ +} CUVIDSEIMESSAGEINFO; + +/****************************************************************/ +//! \ingroup STRUCTS +//! \struct CUVIDAV1SEQHDR +//! AV1 specific sequence header information +/****************************************************************/ +typedef struct { + unsigned int max_width; + unsigned int max_height; + unsigned char reserved[1016]; +} CUVIDAV1SEQHDR; + +/****************************************************************/ +//! \ingroup STRUCTS +//! \struct CUVIDEOFORMATEX +//! Video format including raw sequence header information +//! Used in cuvidGetSourceVideoFormat API +/****************************************************************/ +typedef struct +{ + CUVIDEOFORMAT format; /**< OUT: CUVIDEOFORMAT structure */ + union { + CUVIDAV1SEQHDR av1; + unsigned char raw_seqhdr_data[1024]; /**< OUT: Sequence header data */ + }; +} CUVIDEOFORMATEX; + +/****************************************************************/ +//! \ingroup STRUCTS +//! \struct CUAUDIOFORMAT +//! Audio formats +//! Used in cuvidGetSourceAudioFormat API +/****************************************************************/ +typedef struct +{ + cudaAudioCodec codec; /**< OUT: Compression format */ + unsigned int channels; /**< OUT: number of audio channels */ + unsigned int samplespersec; /**< OUT: sampling frequency */ + unsigned int bitrate; /**< OUT: For uncompressed, can also be used to determine bits per sample */ + unsigned int reserved1; /**< Reserved for future use */ + unsigned int reserved2; /**< Reserved for future use */ +} CUAUDIOFORMAT; + + +/***************************************************************/ +//! \enum CUvideopacketflags +//! Data packet flags +//! Used in CUVIDSOURCEDATAPACKET structure +/***************************************************************/ +typedef enum { + CUVID_PKT_ENDOFSTREAM = 0x01, /**< Set when this is the last packet for this stream */ + CUVID_PKT_TIMESTAMP = 0x02, /**< Timestamp is valid */ + CUVID_PKT_DISCONTINUITY = 0x04, /**< Set when a discontinuity has to be signalled */ + CUVID_PKT_ENDOFPICTURE = 0x08, /**< Set when the packet contains exactly one frame or one field */ + CUVID_PKT_NOTIFY_EOS = 0x10, /**< If this flag is set along with CUVID_PKT_ENDOFSTREAM, an additional (dummy) + display callback will be invoked with null value of CUVIDPARSERDISPINFO which + should be interpreted as end of the stream. */ +} CUvideopacketflags; + +/*****************************************************************************/ +//! \ingroup STRUCTS +//! \struct CUVIDSOURCEDATAPACKET +//! Data Packet +//! Used in cuvidParseVideoData API +//! IN for cuvidParseVideoData +/*****************************************************************************/ +typedef struct _CUVIDSOURCEDATAPACKET +{ + unsigned long flags; /**< IN: Combination of CUVID_PKT_XXX flags */ + unsigned long payload_size; /**< IN: number of bytes in the payload (may be zero if EOS flag is set) */ + const unsigned char *payload; /**< IN: Pointer to packet payload data (may be NULL if EOS flag is set) */ + CUvideotimestamp timestamp; /**< IN: Presentation time stamp (10MHz clock), only valid if + CUVID_PKT_TIMESTAMP flag is set */ +} CUVIDSOURCEDATAPACKET; + +// Callback for packet delivery +typedef int (CUDAAPI *PFNVIDSOURCECALLBACK)(void *, CUVIDSOURCEDATAPACKET *); + +/**************************************************************************************************************************/ +//! \ingroup STRUCTS +//! \struct CUVIDSOURCEPARAMS +//! Describes parameters needed in cuvidCreateVideoSource API +//! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all supported +//! containers. It's recommended to clients to use their own or third party demuxer if audio support is needed. +/**************************************************************************************************************************/ +typedef struct _CUVIDSOURCEPARAMS +{ + unsigned int ulClockRate; /**< IN: Time stamp units in Hz (0=default=10000000Hz) */ + unsigned int bAnnexb : 1; /**< IN: AV1 annexB stream */ + unsigned int uReserved : 31; /**< Reserved for future use - set to zero */ + unsigned int uReserved1[6]; /**< Reserved for future use - set to zero */ + void *pUserData; /**< IN: User private data passed in to the data handlers */ + PFNVIDSOURCECALLBACK pfnVideoDataHandler; /**< IN: Called to deliver video packets */ + PFNVIDSOURCECALLBACK pfnAudioDataHandler; /**< IN: Called to deliver audio packets. */ + void *pvReserved2[8]; /**< Reserved for future use - set to NULL */ +} CUVIDSOURCEPARAMS; + + +/**********************************************/ +//! \ingroup ENUMS +//! \enum CUvideosourceformat_flags +//! CUvideosourceformat_flags +//! Used in cuvidGetSourceVideoFormat API +/**********************************************/ +typedef enum { + CUVID_FMT_EXTFORMATINFO = 0x100 /**< Return extended format structure (CUVIDEOFORMATEX) */ +} CUvideosourceformat_flags; + +#if !defined(__APPLE__) +/***************************************************************************************************************************/ +//! \ingroup FUNCTS +//! \fn CUresult CUDAAPI cuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams) +//! Create CUvideosource object. CUvideosource spawns demultiplexer thread that provides two callbacks: +//! pfnVideoDataHandler() and pfnAudioDataHandler() +//! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all supported +//! containers. It's recommended to clients to use their own or third party demuxer if audio support is needed. +/***************************************************************************************************************************/ +CUresult CUDAAPI cuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams); + +/***************************************************************************************************************************/ +//! \ingroup FUNCTS +//! \fn CUresult CUDAAPI cuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams) +//! Create video source +/***************************************************************************************************************************/ +CUresult CUDAAPI cuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams); + +/********************************************************************/ +//! \ingroup FUNCTS +//! \fn CUresult CUDAAPI cuvidDestroyVideoSource(CUvideosource obj) +//! Destroy video source +/********************************************************************/ +CUresult CUDAAPI cuvidDestroyVideoSource(CUvideosource obj); + +/******************************************************************************************/ +//! \ingroup FUNCTS +//! \fn CUresult CUDAAPI cuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state) +//! Set video source state to: +//! cudaVideoState_Started - to signal the source to run and deliver data +//! cudaVideoState_Stopped - to stop the source from delivering the data +//! cudaVideoState_Error - invalid source +/******************************************************************************************/ +CUresult CUDAAPI cuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state); + +/******************************************************************************************/ +//! \ingroup FUNCTS +//! \fn cudaVideoState CUDAAPI cuvidGetVideoSourceState(CUvideosource obj) +//! Get video source state +//! Returns: +//! cudaVideoState_Started - if Source is running and delivering data +//! cudaVideoState_Stopped - if Source is stopped or reached end-of-stream +//! cudaVideoState_Error - if Source is in error state +/******************************************************************************************/ +cudaVideoState CUDAAPI cuvidGetVideoSourceState(CUvideosource obj); + +/******************************************************************************************************************/ +//! \ingroup FUNCTS +//! \fn CUresult CUDAAPI cuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags) +//! Gets video source format in pvidfmt, flags is set to combination of CUvideosourceformat_flags as per requirement +/******************************************************************************************************************/ +CUresult CUDAAPI cuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags); + +/**************************************************************************************************************************/ +//! \ingroup FUNCTS +//! \fn CUresult CUDAAPI cuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags) +//! Get audio source format +//! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all supported +//! containers. It's recommended to clients to use their own or third party demuxer if audio support is needed. +/**************************************************************************************************************************/ +CUresult CUDAAPI cuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags); + +#endif +/**********************************************************************************/ +//! \ingroup STRUCTS +//! \struct CUVIDPARSERDISPINFO +//! Used in cuvidParseVideoData API with PFNVIDDISPLAYCALLBACK pfnDisplayPicture +/**********************************************************************************/ +typedef struct _CUVIDPARSERDISPINFO +{ + int picture_index; /**< OUT: Index of the current picture */ + int progressive_frame; /**< OUT: 1 if progressive frame; 0 otherwise */ + int top_field_first; /**< OUT: 1 if top field is displayed first; 0 otherwise */ + int repeat_first_field; /**< OUT: Number of additional fields (1=ivtc, 2=frame doubling, 4=frame tripling, + -1=unpaired field) */ + CUvideotimestamp timestamp; /**< OUT: Presentation time stamp */ +} CUVIDPARSERDISPINFO; + +/***********************************************************************************************************************/ +//! Parser callbacks +//! The parser will call these synchronously from within cuvidParseVideoData(), whenever there is sequence change or a picture +//! is ready to be decoded and/or displayed. First argument in functions is "void *pUserData" member of structure CUVIDSOURCEPARAMS +//! Return values from these callbacks are interpreted as below. If the callbacks return failure, it will be propagated by +//! cuvidParseVideoData() to the application. +//! Parser picks default operating point as 0 and outputAllLayers flag as 0 if PFNVIDOPPOINTCALLBACK is not set or return value is +//! -1 or invalid operating point. +//! PFNVIDSEQUENCECALLBACK : 0: fail, 1: succeeded, > 1: override dpb size of parser (set by CUVIDPARSERPARAMS::ulMaxNumDecodeSurfaces +//! while creating parser) +//! PFNVIDDECODECALLBACK : 0: fail, >=1: succeeded +//! PFNVIDDISPLAYCALLBACK : 0: fail, >=1: succeeded +//! PFNVIDOPPOINTCALLBACK : <0: fail, >=0: succeeded (bit 0-9: OperatingPoint, bit 10-10: outputAllLayers, bit 11-30: reserved) +//! PFNVIDSEIMSGCALLBACK : 0: fail, >=1: succeeded +/***********************************************************************************************************************/ +typedef int (CUDAAPI *PFNVIDSEQUENCECALLBACK)(void *, CUVIDEOFORMAT *); +typedef int (CUDAAPI *PFNVIDDECODECALLBACK)(void *, CUVIDPICPARAMS *); +typedef int (CUDAAPI *PFNVIDDISPLAYCALLBACK)(void *, CUVIDPARSERDISPINFO *); +typedef int (CUDAAPI *PFNVIDOPPOINTCALLBACK)(void *, CUVIDOPERATINGPOINTINFO*); +typedef int (CUDAAPI *PFNVIDSEIMSGCALLBACK) (void *, CUVIDSEIMESSAGEINFO *); + +/**************************************/ +//! \ingroup STRUCTS +//! \struct CUVIDPARSERPARAMS +//! Used in cuvidCreateVideoParser API +/**************************************/ +typedef struct _CUVIDPARSERPARAMS +{ + cudaVideoCodec CodecType; /**< IN: cudaVideoCodec_XXX */ + unsigned int ulMaxNumDecodeSurfaces; /**< IN: Max # of decode surfaces (parser will cycle through these) */ + unsigned int ulClockRate; /**< IN: Timestamp units in Hz (0=default=10000000Hz) */ + unsigned int ulErrorThreshold; /**< IN: % Error threshold (0-100) for calling pfnDecodePicture (100=always + IN: call pfnDecodePicture even if picture bitstream is fully corrupted) */ + unsigned int ulMaxDisplayDelay; /**< IN: Max display queue delay (improves pipelining of decode with display) + 0=no delay (recommended values: 2..4) */ + unsigned int bAnnexb : 1; /**< IN: AV1 annexB stream */ + unsigned int uReserved : 31; /**< Reserved for future use - set to zero */ + unsigned int uReserved1[4]; /**< IN: Reserved for future use - set to 0 */ + void *pUserData; /**< IN: User data for callbacks */ + PFNVIDSEQUENCECALLBACK pfnSequenceCallback; /**< IN: Called before decoding frames and/or whenever there is a fmt change */ + PFNVIDDECODECALLBACK pfnDecodePicture; /**< IN: Called when a picture is ready to be decoded (decode order) */ + PFNVIDDISPLAYCALLBACK pfnDisplayPicture; /**< IN: Called whenever a picture is ready to be displayed (display order) */ + PFNVIDOPPOINTCALLBACK pfnGetOperatingPoint; /**< IN: Called from AV1 sequence header to get operating point of a AV1 + scalable bitstream */ + PFNVIDSEIMSGCALLBACK pfnGetSEIMsg; /**< IN: Called when all SEI messages are parsed for particular frame */ + void *pvReserved2[5]; /**< Reserved for future use - set to NULL */ + CUVIDEOFORMATEX *pExtVideoInfo; /**< IN: [Optional] sequence header data from system layer */ +} CUVIDPARSERPARAMS; + +/************************************************************************************************/ +//! \ingroup FUNCTS +//! \fn CUresult CUDAAPI cuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams) +//! Create video parser object and initialize +/************************************************************************************************/ +CUresult CUDAAPI cuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams); + +/************************************************************************************************/ +//! \ingroup FUNCTS +//! \fn CUresult CUDAAPI cuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket) +//! Parse the video data from source data packet in pPacket +//! Extracts parameter sets like SPS, PPS, bitstream etc. from pPacket and +//! calls back pfnDecodePicture with CUVIDPICPARAMS data for kicking of HW decoding +//! calls back pfnSequenceCallback with CUVIDEOFORMAT data for initial sequence header or when +//! the decoder encounters a video format change +//! calls back pfnDisplayPicture with CUVIDPARSERDISPINFO data to display a video frame +/************************************************************************************************/ +CUresult CUDAAPI cuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket); + +/************************************************************************************************/ +//! \ingroup FUNCTS +//! \fn CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser obj) +//! Destroy the video parser +/************************************************************************************************/ +CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser obj); + +/**********************************************************************************************/ + +#if defined(__cplusplus) +} +#endif /* __cplusplus */ + +#endif // __NVCUVID_H__ + + From 8c7cbe0f75942fd3e374ce8f72c7231b6580f0a4 Mon Sep 17 00:00:00 2001 From: Brad Hards Date: Tue, 3 Sep 2024 10:10:45 +1000 Subject: [PATCH 03/41] nvdec: enable HEVC support --- libheif/plugins/decoder_nvdec.cc | 116 +++++++++++++++++++++++++------ 1 file changed, 94 insertions(+), 22 deletions(-) diff --git a/libheif/plugins/decoder_nvdec.cc b/libheif/plugins/decoder_nvdec.cc index e5387dd5cc..2e8af06399 100644 --- a/libheif/plugins/decoder_nvdec.cc +++ b/libheif/plugins/decoder_nvdec.cc @@ -75,7 +75,7 @@ static int nvdec_does_support_format(enum heif_compression_format format) { return 0; } -#if 0 +#if 1 char szDeviceName[80]; result = cuDeviceGetName(szDeviceName, sizeof(szDeviceName), cuDevice); if (result != CUDA_SUCCESS) { @@ -163,26 +163,14 @@ struct heif_error nvdec_decode_image(void *decoder, struct heif_image **out_img) heif_error err; NalMap nalus; -// TODO -#if 0 if (ctx->eCodec == cudaVideoCodec_HEVC) { err = nalus.parseHevcNalu(ctx->data.data(), ctx->data.size()); if (err.code != heif_error_Ok) { return err; } - if ((!nalus.NUTs_are_valid()) || (!nalus.IDR_is_valid())) { - if (!nalus.NUTs_are_valid()) { - printf("NUTs not valid"); - } - if (!nalus.IDR_is_valid()) { - printf("IDR not valid"); - } - struct heif_error err = {heif_error_Decoder_plugin_error, - heif_suberror_End_of_data, - "Unexpected end of data"}; - return err; - } } +// TODO +#if 0 if (ctx->eCodec == cudaVideoCodec_H264) { err = nalus.parseNALU_AVC(ctx->data.data(), ctx->data.size()); if (err.code != heif_error_Ok) { @@ -240,13 +228,97 @@ struct heif_error nvdec_decode_image(void *decoder, struct heif_image **out_img) } int nFrameReturned; -// TODO -#if 0 if (ctx->eCodec == cudaVideoCodec_HEVC) { uint8_t *hevc_data; - size_t avc_data_size; - nalus.buildWithStartCodesHEVC(&hevc_data, &avc_data_size); - nFrameReturned = dec.Decode(hevc_data, avc_data_size); + size_t hevc_data_size; + { + int heif_idrpic_size; + int heif_vps_size; + int heif_sps_size; + int heif_pps_size; + const unsigned char* heif_vps_data; + const unsigned char* heif_sps_data; + const unsigned char* heif_pps_data; + const unsigned char* heif_idrpic_data; + + if ((nalus.count(NAL_UNIT_VPS_NUT) > 0) && (nalus.count(NAL_UNIT_SPS_NUT) > 0) && (nalus.count(NAL_UNIT_PPS_NUT) > 0)) + { + heif_vps_size = nalus.size(NAL_UNIT_VPS_NUT); + heif_vps_data = nalus.data(NAL_UNIT_VPS_NUT); + + heif_sps_size = nalus.size(NAL_UNIT_SPS_NUT); + heif_sps_data = nalus.data(NAL_UNIT_SPS_NUT); + + heif_pps_size = nalus.size(NAL_UNIT_PPS_NUT); + heif_pps_data = nalus.data(NAL_UNIT_PPS_NUT); + } + else + { + struct heif_error err = { heif_error_Decoder_plugin_error, + heif_suberror_End_of_data, + "Unexpected end of data" }; + return err; + } + + if ((nalus.count(NAL_UNIT_IDR_W_RADL) > 0) || (nalus.count(NAL_UNIT_IDR_N_LP) > 0)) + { + if (nalus.count(NAL_UNIT_IDR_W_RADL) > 0) + { + heif_idrpic_data = nalus.data(NAL_UNIT_IDR_W_RADL); + heif_idrpic_size = nalus.size(NAL_UNIT_IDR_W_RADL); + } + else + { + heif_idrpic_data = nalus.data(NAL_UNIT_IDR_N_LP); + heif_idrpic_size = nalus.size(NAL_UNIT_IDR_N_LP); + } + } + else + { + struct heif_error err = { heif_error_Decoder_plugin_error, + heif_suberror_End_of_data, + "Unexpected end of data" }; + return err; + } + + const char hevc_AnnexB_StartCode[] = { 0x00, 0x00, 0x00, 0x01 }; + int hevc_AnnexB_StartCode_size = 4; + + hevc_data_size = heif_vps_size + heif_sps_size + heif_pps_size + heif_idrpic_size + 4 * hevc_AnnexB_StartCode_size; + hevc_data = (uint8_t*)malloc(hevc_data_size); + + //Copy hevc pps data + uint8_t* hevc_data_ptr = hevc_data; + memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size); + hevc_data_ptr += hevc_AnnexB_StartCode_size; + memcpy(hevc_data_ptr, heif_vps_data, heif_vps_size); + hevc_data_ptr += heif_vps_size; + + //Copy hevc sps data + memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size); + hevc_data_ptr += hevc_AnnexB_StartCode_size; + memcpy(hevc_data_ptr, heif_sps_data, heif_sps_size); + hevc_data_ptr += heif_sps_size; + + //Copy hevc pps data + memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size); + hevc_data_ptr += hevc_AnnexB_StartCode_size; + memcpy(hevc_data_ptr, heif_pps_data, heif_pps_size); + hevc_data_ptr += heif_pps_size; + + //Copy hevc idrpic data + memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size); + hevc_data_ptr += hevc_AnnexB_StartCode_size; + memcpy(hevc_data_ptr, heif_idrpic_data, heif_idrpic_size); + + // decoder->NalMap not needed anymore + nalus.clear(); + } + + nFrameReturned = dec.Decode(hevc_data, hevc_data_size); + } else { +// TODO +#if 0 } else if (ctx->eCodec == cudaVideoCodec_H264) { uint8_t *avc_data; size_t avc_data_size; @@ -255,8 +327,8 @@ struct heif_error nvdec_decode_image(void *decoder, struct heif_image **out_img) printf("nFrameReturned: %d\n", nFrameReturned); } else #endif -// TODO: else closure - nFrameReturned = dec.Decode(ctx->data.data(), ctx->data.size()); + nFrameReturned = dec.Decode(ctx->data.data(), ctx->data.size()); + } if (nFrameReturned > 0) { uint8_t *pFrame = dec.GetFrame(); From 6540c04084550659a60fdeacf66213d84e7296ee Mon Sep 17 00:00:00 2001 From: Brad Hards Date: Tue, 3 Sep 2024 11:11:39 +1000 Subject: [PATCH 04/41] nvdec: clean up HEVC --- libheif/plugins/decoder_ffmpeg.cc | 90 +++---------------------------- libheif/plugins/decoder_nvdec.cc | 88 ++---------------------------- libheif/plugins/nalu_utils.cc | 88 ++++++++++++++++++++++++++++++ libheif/plugins/nalu_utils.h | 2 + 4 files changed, 101 insertions(+), 167 deletions(-) diff --git a/libheif/plugins/decoder_ffmpeg.cc b/libheif/plugins/decoder_ffmpeg.cc index c18502d086..5019d25358 100644 --- a/libheif/plugins/decoder_ffmpeg.cc +++ b/libheif/plugins/decoder_ffmpeg.cc @@ -264,90 +264,12 @@ static struct heif_error ffmpeg_v1_decode_image(void* decoder_raw, { struct ffmpeg_decoder* decoder = (struct ffmpeg_decoder*) decoder_raw; - int heif_idrpic_size; - int heif_vps_size; - int heif_sps_size; - int heif_pps_size; - const unsigned char* heif_vps_data; - const unsigned char* heif_sps_data; - const unsigned char* heif_pps_data; - const unsigned char* heif_idrpic_data; - - if ((decoder->nalMap.count(NAL_UNIT_VPS_NUT) > 0) - && (decoder->nalMap.count(NAL_UNIT_SPS_NUT) > 0) - && (decoder->nalMap.count(NAL_UNIT_PPS_NUT) > 0) - ) - { - heif_vps_size = decoder->nalMap.size(NAL_UNIT_VPS_NUT); - heif_vps_data = decoder->nalMap.data(NAL_UNIT_VPS_NUT); - - heif_sps_size = decoder->nalMap.size(NAL_UNIT_SPS_NUT); - heif_sps_data = decoder->nalMap.data(NAL_UNIT_SPS_NUT); - - heif_pps_size = decoder->nalMap.size(NAL_UNIT_PPS_NUT); - heif_pps_data = decoder->nalMap.data(NAL_UNIT_PPS_NUT); + uint8_t *hevc_data; + size_t hevc_data_size; + heif_error err = decoder->nalMap.buildWithStartCodesHevc(&hevc_data, &hevc_data_size, AV_INPUT_BUFFER_PADDING_SIZE); + if (err.code != heif_error_Ok) { + return err; } - else - { - struct heif_error err = { heif_error_Decoder_plugin_error, - heif_suberror_End_of_data, - "Unexpected end of data" }; - return err; - } - - if ((decoder->nalMap.count(NAL_UNIT_IDR_W_RADL) > 0) || (decoder->nalMap.count(NAL_UNIT_IDR_N_LP) > 0)) - { - if (decoder->nalMap.count(NAL_UNIT_IDR_W_RADL) > 0) - { - heif_idrpic_data = decoder->nalMap.data(NAL_UNIT_IDR_W_RADL); - heif_idrpic_size = decoder->nalMap.size(NAL_UNIT_IDR_W_RADL); - } - else - { - heif_idrpic_data = decoder->nalMap.data(NAL_UNIT_IDR_N_LP); - heif_idrpic_size = decoder->nalMap.size(NAL_UNIT_IDR_N_LP); - } - } - else - { - struct heif_error err = { heif_error_Decoder_plugin_error, - heif_suberror_End_of_data, - "Unexpected end of data" }; - return err; - } - - const char hevc_AnnexB_StartCode[] = { 0x00, 0x00, 0x00, 0x01 }; - int hevc_AnnexB_StartCode_size = 4; - - size_t hevc_data_size = heif_vps_size + heif_sps_size + heif_pps_size + heif_idrpic_size + 4 * hevc_AnnexB_StartCode_size; - uint8_t* hevc_data = (uint8_t*)malloc(hevc_data_size + AV_INPUT_BUFFER_PADDING_SIZE); - - //Copy hevc pps data - uint8_t* hevc_data_ptr = hevc_data; - memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size); - hevc_data_ptr += hevc_AnnexB_StartCode_size; - memcpy(hevc_data_ptr, heif_vps_data, heif_vps_size); - hevc_data_ptr += heif_vps_size; - - //Copy hevc sps data - memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size); - hevc_data_ptr += hevc_AnnexB_StartCode_size; - memcpy(hevc_data_ptr, heif_sps_data, heif_sps_size); - hevc_data_ptr += heif_sps_size; - - //Copy hevc pps data - memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size); - hevc_data_ptr += hevc_AnnexB_StartCode_size; - memcpy(hevc_data_ptr, heif_pps_data, heif_pps_size); - hevc_data_ptr += heif_pps_size; - - //Copy hevc idrpic data - memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size); - hevc_data_ptr += hevc_AnnexB_StartCode_size; - memcpy(hevc_data_ptr, heif_idrpic_data, heif_idrpic_size); - - // decoder->NalMap not needed anymore - decoder->nalMap.clear(); const AVCodec* hevc_codec = NULL; AVCodecParserContext* hevc_parser = NULL; @@ -358,7 +280,7 @@ static struct heif_error ffmpeg_v1_decode_image(void* decoder_raw, struct heif_color_profile_nclx* nclx = NULL; int ret = 0; - struct heif_error err = heif_error_success; + err = heif_error_success; uint8_t* parse_hevc_data = NULL; int parse_hevc_data_size = 0; diff --git a/libheif/plugins/decoder_nvdec.cc b/libheif/plugins/decoder_nvdec.cc index 2e8af06399..a297f98c54 100644 --- a/libheif/plugins/decoder_nvdec.cc +++ b/libheif/plugins/decoder_nvdec.cc @@ -231,90 +231,12 @@ struct heif_error nvdec_decode_image(void *decoder, struct heif_image **out_img) if (ctx->eCodec == cudaVideoCodec_HEVC) { uint8_t *hevc_data; size_t hevc_data_size; - { - int heif_idrpic_size; - int heif_vps_size; - int heif_sps_size; - int heif_pps_size; - const unsigned char* heif_vps_data; - const unsigned char* heif_sps_data; - const unsigned char* heif_pps_data; - const unsigned char* heif_idrpic_data; - - if ((nalus.count(NAL_UNIT_VPS_NUT) > 0) && (nalus.count(NAL_UNIT_SPS_NUT) > 0) && (nalus.count(NAL_UNIT_PPS_NUT) > 0)) - { - heif_vps_size = nalus.size(NAL_UNIT_VPS_NUT); - heif_vps_data = nalus.data(NAL_UNIT_VPS_NUT); - - heif_sps_size = nalus.size(NAL_UNIT_SPS_NUT); - heif_sps_data = nalus.data(NAL_UNIT_SPS_NUT); - - heif_pps_size = nalus.size(NAL_UNIT_PPS_NUT); - heif_pps_data = nalus.data(NAL_UNIT_PPS_NUT); - } - else - { - struct heif_error err = { heif_error_Decoder_plugin_error, - heif_suberror_End_of_data, - "Unexpected end of data" }; - return err; - } - - if ((nalus.count(NAL_UNIT_IDR_W_RADL) > 0) || (nalus.count(NAL_UNIT_IDR_N_LP) > 0)) - { - if (nalus.count(NAL_UNIT_IDR_W_RADL) > 0) - { - heif_idrpic_data = nalus.data(NAL_UNIT_IDR_W_RADL); - heif_idrpic_size = nalus.size(NAL_UNIT_IDR_W_RADL); - } - else - { - heif_idrpic_data = nalus.data(NAL_UNIT_IDR_N_LP); - heif_idrpic_size = nalus.size(NAL_UNIT_IDR_N_LP); - } - } - else - { - struct heif_error err = { heif_error_Decoder_plugin_error, - heif_suberror_End_of_data, - "Unexpected end of data" }; - return err; - } - - const char hevc_AnnexB_StartCode[] = { 0x00, 0x00, 0x00, 0x01 }; - int hevc_AnnexB_StartCode_size = 4; - - hevc_data_size = heif_vps_size + heif_sps_size + heif_pps_size + heif_idrpic_size + 4 * hevc_AnnexB_StartCode_size; - hevc_data = (uint8_t*)malloc(hevc_data_size); - - //Copy hevc pps data - uint8_t* hevc_data_ptr = hevc_data; - memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size); - hevc_data_ptr += hevc_AnnexB_StartCode_size; - memcpy(hevc_data_ptr, heif_vps_data, heif_vps_size); - hevc_data_ptr += heif_vps_size; - - //Copy hevc sps data - memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size); - hevc_data_ptr += hevc_AnnexB_StartCode_size; - memcpy(hevc_data_ptr, heif_sps_data, heif_sps_size); - hevc_data_ptr += heif_sps_size; - - //Copy hevc pps data - memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size); - hevc_data_ptr += hevc_AnnexB_StartCode_size; - memcpy(hevc_data_ptr, heif_pps_data, heif_pps_size); - hevc_data_ptr += heif_pps_size; - - //Copy hevc idrpic data - memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size); - hevc_data_ptr += hevc_AnnexB_StartCode_size; - memcpy(hevc_data_ptr, heif_idrpic_data, heif_idrpic_size); - - // decoder->NalMap not needed anymore - nalus.clear(); + err = nalus.buildWithStartCodesHevc(&hevc_data, &hevc_data_size, 0); + if (err.code != heif_error_Ok) { + cuvidCtxLockDestroy(ctx->ctxLock); + cuCtxDestroy(ctx->cuContext); + return err; } - nFrameReturned = dec.Decode(hevc_data, hevc_data_size); } else { // TODO diff --git a/libheif/plugins/nalu_utils.cc b/libheif/plugins/nalu_utils.cc index f4f3731d60..a668ca2c04 100644 --- a/libheif/plugins/nalu_utils.cc +++ b/libheif/plugins/nalu_utils.cc @@ -22,6 +22,8 @@ #include #include #include "nalu_utils.h" +#include +#include NalUnit::NalUnit() { @@ -94,4 +96,90 @@ const heif_error NalMap::parseHevcNalu(const uint8_t *cdata, size_t size) return heif_error_success; } +heif_error NalMap::buildWithStartCodesHevc(uint8_t **hevc_data, size_t *hevc_data_size, size_t additional_pad_size) +{ + int heif_idrpic_size; + int heif_vps_size; + int heif_sps_size; + int heif_pps_size; + const unsigned char* heif_vps_data; + const unsigned char* heif_sps_data; + const unsigned char* heif_pps_data; + const unsigned char* heif_idrpic_data; + + if ((count(NAL_UNIT_VPS_NUT) > 0) && (count(NAL_UNIT_SPS_NUT) > 0) && (count(NAL_UNIT_PPS_NUT) > 0)) + { + heif_vps_size = size(NAL_UNIT_VPS_NUT); + heif_vps_data = data(NAL_UNIT_VPS_NUT); + + heif_sps_size = size(NAL_UNIT_SPS_NUT); + heif_sps_data = data(NAL_UNIT_SPS_NUT); + + heif_pps_size = size(NAL_UNIT_PPS_NUT); + heif_pps_data = data(NAL_UNIT_PPS_NUT); + } + else + { + struct heif_error err = { heif_error_Decoder_plugin_error, + heif_suberror_End_of_data, + "Unexpected end of data" }; + return err; + } + + if ((count(NAL_UNIT_IDR_W_RADL) > 0) || (count(NAL_UNIT_IDR_N_LP) > 0)) + { + if (count(NAL_UNIT_IDR_W_RADL) > 0) + { + heif_idrpic_data = data(NAL_UNIT_IDR_W_RADL); + heif_idrpic_size = size(NAL_UNIT_IDR_W_RADL); + } + else + { + heif_idrpic_data = data(NAL_UNIT_IDR_N_LP); + heif_idrpic_size = size(NAL_UNIT_IDR_N_LP); + } + } + else + { + struct heif_error err = { heif_error_Decoder_plugin_error, + heif_suberror_End_of_data, + "Unexpected end of data" }; + return err; + } + + const char hevc_AnnexB_StartCode[] = { 0x00, 0x00, 0x00, 0x01 }; + int hevc_AnnexB_StartCode_size = 4; + + *hevc_data_size = heif_vps_size + heif_sps_size + heif_pps_size + heif_idrpic_size + 4 * hevc_AnnexB_StartCode_size; + *hevc_data = (uint8_t*)malloc(*hevc_data_size + additional_pad_size); + + //Copy hevc pps data + uint8_t* hevc_data_ptr = *hevc_data; + memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size); + hevc_data_ptr += hevc_AnnexB_StartCode_size; + memcpy(hevc_data_ptr, heif_vps_data, heif_vps_size); + hevc_data_ptr += heif_vps_size; + + //Copy hevc sps data + memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size); + hevc_data_ptr += hevc_AnnexB_StartCode_size; + memcpy(hevc_data_ptr, heif_sps_data, heif_sps_size); + hevc_data_ptr += heif_sps_size; + + //Copy hevc pps data + memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size); + hevc_data_ptr += hevc_AnnexB_StartCode_size; + memcpy(hevc_data_ptr, heif_pps_data, heif_pps_size); + hevc_data_ptr += heif_pps_size; + + //Copy hevc idrpic data + memcpy(hevc_data_ptr, hevc_AnnexB_StartCode, hevc_AnnexB_StartCode_size); + hevc_data_ptr += hevc_AnnexB_StartCode_size; + memcpy(hevc_data_ptr, heif_idrpic_data, heif_idrpic_size); + + map.clear(); + + return heif_error_success; +} + void NalMap::clear() { map.clear(); } \ No newline at end of file diff --git a/libheif/plugins/nalu_utils.h b/libheif/plugins/nalu_utils.h index 7c4b86f7ad..721fa003f7 100644 --- a/libheif/plugins/nalu_utils.h +++ b/libheif/plugins/nalu_utils.h @@ -55,6 +55,8 @@ class NalMap const heif_error parseHevcNalu(const uint8_t *cdata, size_t size); + heif_error buildWithStartCodesHevc(uint8_t **data, size_t *size, size_t additional_pad_size); + void clear(); private: std::map> map; From 346bea602756b7d9e99695b7666b09b8375b7a29 Mon Sep 17 00:00:00 2001 From: Brad Hards Date: Tue, 3 Sep 2024 11:25:53 +1000 Subject: [PATCH 05/41] nvdec: add AVC support --- libheif/plugins/decoder_nvdec.cc | 72 ++++++++++++++++++++++++-------- 1 file changed, 54 insertions(+), 18 deletions(-) diff --git a/libheif/plugins/decoder_nvdec.cc b/libheif/plugins/decoder_nvdec.cc index a297f98c54..478430089f 100644 --- a/libheif/plugins/decoder_nvdec.cc +++ b/libheif/plugins/decoder_nvdec.cc @@ -38,6 +38,8 @@ #include "nalu_utils.h" #include +static heif_error kError_EOF = {heif_error_Decoder_plugin_error, heif_suberror_End_of_data, "Insufficient input data"}; + static const int NVDEC_PLUGIN_PRIORITY = 120; #define MAX_PLUGIN_NAME_LENGTH 80 @@ -169,15 +171,6 @@ struct heif_error nvdec_decode_image(void *decoder, struct heif_image **out_img) return err; } } -// TODO -#if 0 - if (ctx->eCodec == cudaVideoCodec_H264) { - err = nalus.parseNALU_AVC(ctx->data.data(), ctx->data.size()); - if (err.code != heif_error_Ok) { - return err; - } - } -#endif CUdevice cuDevice = 0; CUresult result; @@ -238,17 +231,60 @@ struct heif_error nvdec_decode_image(void *decoder, struct heif_image **out_img) return err; } nFrameReturned = dec.Decode(hevc_data, hevc_data_size); - } else { -// TODO -#if 0 } else if (ctx->eCodec == cudaVideoCodec_H264) { - uint8_t *avc_data; - size_t avc_data_size; - nalus.buildWithStartCodesAVC(&avc_data, &avc_data_size); - nFrameReturned = dec.Decode(avc_data, avc_data_size); + // TODO: ideally we'd share this code with the OpenH264 decoder + const std::vector& indata = ctx->data; + std::vector scdata; + + size_t idx = 0; + while (idx < indata.size()) { + if (indata.size() - 4 < idx) { + return kError_EOF; + } + + uint32_t size = ((indata[idx] << 24) | (indata[idx + 1] << 16) | (indata[idx + 2] << 8) | indata[idx + 3]); + idx += 4; + + if (indata.size() < size || indata.size() - size < idx) { + return kError_EOF; + } + + scdata.push_back(0); + scdata.push_back(0); + scdata.push_back(1); + + // check for need of start code emulation prevention + bool do_start_code_emulation_check = true; + while (do_start_code_emulation_check && size >= 3) { + bool found_start_code_emulation = false; + for (size_t i = 0; i < size - 3; i++) { + if (indata[idx + 0] == 0 && indata[idx + 1] == 0 && (indata[idx + 2] >= 0 && indata[idx + 2] <= 3)) { + scdata.push_back(0); + scdata.push_back(0); + scdata.push_back(3); + scdata.insert(scdata.end(), &indata[idx + 2], &indata[idx + i + 2]); + idx += i + 2; + size -= (uint32_t)(i + 2); + found_start_code_emulation = true; + break; + } + } + + do_start_code_emulation_check = found_start_code_emulation; + } + + assert(size > 0); + scdata.insert(scdata.end(), &indata[idx], &indata[idx + size]); + + idx += size; + } + + if (idx != indata.size()) { + return kError_EOF; + } + nFrameReturned = dec.Decode(scdata.data(), scdata.size()); printf("nFrameReturned: %d\n", nFrameReturned); - } else -#endif + } else { nFrameReturned = dec.Decode(ctx->data.data(), ctx->data.size()); } From d57eebf12f7ce78e2b7b6d505063c842b32d4387 Mon Sep 17 00:00:00 2001 From: Brad Hards Date: Tue, 3 Sep 2024 11:54:27 +1000 Subject: [PATCH 06/41] nvidia: add to CI --- .github/workflows/build.yml | 1 + scripts/install-ci-linux.sh | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b4417fbb58..1eb7ed5174 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -16,6 +16,7 @@ jobs: - { NAME: "graphics", WITH_GRAPHICS: 1 } - { NAME: "x265", WITH_X265: 1 } - { NAME: "x265 / graphics", WITH_GRAPHICS: 1, WITH_X265: 1 } + - { NAME: "nvidia / graphics", WITH_GRAPHICS: 1, WITH_NVIDIA: 1 } - { NAME: "libde265 (1) / graphics", WITH_GRAPHICS: 1, WITH_LIBDE265: 1 } - { NAME: "libde265 (2) / graphics", WITH_GRAPHICS: 1, WITH_LIBDE265: 2 } - { NAME: "libde265 (1) / x265 / graphics", WITH_GRAPHICS: 1, WITH_X265: 1, WITH_LIBDE265: 1 } diff --git a/scripts/install-ci-linux.sh b/scripts/install-ci-linux.sh index c2f6509683..7376b4bb90 100755 --- a/scripts/install-ci-linux.sh +++ b/scripts/install-ci-linux.sh @@ -111,6 +111,12 @@ if [ ! -z "$WITH_GRAPHICS" ]; then " fi +if [ ! -z "$WITH_NVIDIA" ]; then + INSTALL_PACKAGES="$INSTALL_PACKAGES \ + libnvidia-decode \ + " +fi + if [ ! -z "$WITH_UNCOMPRESSED_CODEC" ]; then INSTALL_PACKAGES="$INSTALL_PACKAGES \ libbrotli-dev \ From b4134f1226080d6c21385ec0d1c8ba625ae1bac6 Mon Sep 17 00:00:00 2001 From: Brad Hards Date: Tue, 3 Sep 2024 13:12:13 +1000 Subject: [PATCH 07/41] nvdec: additional CI fixes --- scripts/install-ci-linux.sh | 2 +- scripts/run-ci.sh | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/install-ci-linux.sh b/scripts/install-ci-linux.sh index 7376b4bb90..5d8d66dd3d 100755 --- a/scripts/install-ci-linux.sh +++ b/scripts/install-ci-linux.sh @@ -113,7 +113,7 @@ fi if [ ! -z "$WITH_NVIDIA" ]; then INSTALL_PACKAGES="$INSTALL_PACKAGES \ - libnvidia-decode \ + libnvidia-decode-535 \ " fi diff --git a/scripts/run-ci.sh b/scripts/run-ci.sh index b2a5b267e5..50569587af 100755 --- a/scripts/run-ci.sh +++ b/scripts/run-ci.sh @@ -105,6 +105,10 @@ WITH_HEIF_DECODER= if [ ! -z "$WITH_LIBDE265" ] ; then WITH_HEIF_DECODER=1 fi +WITH_NVIDIA_DECODER= +if [ ! -z "$WITH_NVIDIA" ] ; then + WITH_NVIDIA_DECODER=1 +fi WITH_AVIF_ENCODER= WITH_HEIF_ENCODER= # Need decoded images before encoding. From 5cd9fc3dcc43bd3ea77f0d8d72864a0b1dee43ff Mon Sep 17 00:00:00 2001 From: Brad Hards Date: Tue, 3 Sep 2024 13:19:14 +1000 Subject: [PATCH 08/41] ci: fix nvdec option --- scripts/run-ci.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/run-ci.sh b/scripts/run-ci.sh index 50569587af..1657678291 100755 --- a/scripts/run-ci.sh +++ b/scripts/run-ci.sh @@ -105,9 +105,9 @@ WITH_HEIF_DECODER= if [ ! -z "$WITH_LIBDE265" ] ; then WITH_HEIF_DECODER=1 fi -WITH_NVIDIA_DECODER= +WITH_NV_DECODER= if [ ! -z "$WITH_NVIDIA" ] ; then - WITH_NVIDIA_DECODER=1 + WITH_NV_DECODER=1 fi WITH_AVIF_ENCODER= WITH_HEIF_ENCODER= From 41c129ac3e43ac2b59bdbe617d1991508ceade0e Mon Sep 17 00:00:00 2001 From: Brad Hards Date: Tue, 3 Sep 2024 13:50:36 +1000 Subject: [PATCH 09/41] nvidia: add to cmake presets --- CMakePresets.json | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CMakePresets.json b/CMakePresets.json index 947c37ec8d..8f2bc5e421 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -43,6 +43,10 @@ "WITH_OpenJPEG_ENCODER_PLUGIN" : "OFF", "WITH_FFMPEG_DECODER" : "ON", "WITH_FFMPEG_DECODER_PLUGIN" : "OFF", + "WITH_OpenH264_DECODER" : "ON", + "WITH_OpenH264_DECODER_PLUGIN" : "OFF", + "WITH_NV_DECODER" : "ON", + "WITH_NV_DECODER_PLUGIN" : "OFF", "WITH_REDUCED_VISIBILITY" : "OFF", "WITH_HEADER_COMPRESSION" : "ON", @@ -89,6 +93,10 @@ "WITH_OPENJPH_ENCODER" : "ON", "WITH_FFMPEG_DECODER" : "ON", "WITH_FFMPEG_DECODER_PLUGIN" : "ON", + "WITH_OpenH264_DECODER" : "ON", + "WITH_OpenH264_DECODER_PLUGIN" : "ON", + "WITH_NV_DECODER" : "ON", + "WITH_NV_DECODER_PLUGIN" : "ON", "WITH_REDUCED_VISIBILITY" : "ON", "WITH_HEADER_COMPRESSION" : "ON", From ec2bb0bc14b6bd1c277b0ffba273015adbb3b2ce Mon Sep 17 00:00:00 2001 From: Brad Hards Date: Tue, 3 Sep 2024 13:43:50 +1000 Subject: [PATCH 10/41] avc: additional avcC box parsing --- libheif/codecs/avc.cc | 15 ++++++++++++--- libheif/codecs/avc.h | 9 +++++++++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/libheif/codecs/avc.cc b/libheif/codecs/avc.cc index 18218e979e..82cdcc8afb 100644 --- a/libheif/codecs/avc.cc +++ b/libheif/codecs/avc.cc @@ -55,7 +55,16 @@ Error Box_avcC::parse(BitstreamRange &range) { if ((m_configuration.AVCProfileIndication != 66) && (m_configuration.AVCProfileIndication != 77) && (m_configuration.AVCProfileIndication != 88)) { - // TODO: we don't support this yet + m_configuration.chroma_format = range.read8() & 0b00000011; + m_configuration.bit_depth_luma = 8 + (range.read8() & 0b00000111); + m_configuration.bit_depth_chroma = 8 + (range.read8() & 0b00000111); + uint8_t numOfSequenceParameterSetExt = range.read8(); + for (int i = 0; i < numOfSequenceParameterSetExt; i++) { + uint16_t sequenceParameterSetExtLength = range.read16(); + std::vector sps_ext(sequenceParameterSetExtLength); + range.read(sps_ext.data(), sps_ext.size()); + m_sps_ext.push_back(sps_ext); + } } return range.get_error(); @@ -279,7 +288,7 @@ int ImageItem_AVC::get_luma_bits_per_pixel() const { auto avcC_box = get_file()->get_property(get_id()); if (avcC_box) { - return 8; // TODO avcC_box->get_configuration().bit_depth_luma; + return avcC_box->get_configuration().bit_depth_luma; } return -1; @@ -290,7 +299,7 @@ int ImageItem_AVC::get_chroma_bits_per_pixel() const { auto avcC_box = get_file()->get_property(get_id()); if (avcC_box) { - return 8; // TODO avcC_box->get_configuration().bit_depth_chroma; + return avcC_box->get_configuration().bit_depth_chroma; } return -1; diff --git a/libheif/codecs/avc.h b/libheif/codecs/avc.h index 0233e7efe6..59e2fb6369 100644 --- a/libheif/codecs/avc.h +++ b/libheif/codecs/avc.h @@ -41,6 +41,9 @@ class Box_avcC : public Box { uint8_t profile_compatibility; // constraint set flags uint8_t AVCLevelIndication; // level_idc uint8_t lengthSize; + uint8_t chroma_format; + uint8_t bit_depth_luma = 8; + uint8_t bit_depth_chroma = 8; }; void set_configuration(const configuration& config) @@ -63,6 +66,11 @@ class Box_avcC : public Box { return m_pps; } + const std::vector< std::vector > getSequenceParameterSetExt() const + { + return m_sps_ext; + } + void get_header_nals(std::vector& data) const; std::string dump(Indent &) const override; @@ -78,6 +86,7 @@ class Box_avcC : public Box { configuration m_configuration; std::vector< std::vector > m_sps; std::vector< std::vector > m_pps; + std::vector< std::vector > m_sps_ext; }; From b8f8ffb151cc2f58f76aa3650e781121bc842d2d Mon Sep 17 00:00:00 2001 From: Brad Hards Date: Tue, 3 Sep 2024 13:56:37 +1000 Subject: [PATCH 11/41] nvdec: additional CI packages --- scripts/install-ci-linux.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/install-ci-linux.sh b/scripts/install-ci-linux.sh index 5d8d66dd3d..b652aa4b2b 100755 --- a/scripts/install-ci-linux.sh +++ b/scripts/install-ci-linux.sh @@ -113,6 +113,8 @@ fi if [ ! -z "$WITH_NVIDIA" ]; then INSTALL_PACKAGES="$INSTALL_PACKAGES \ + nvidia-cuda-dev \ + nvidia-cuda-toolkit \ libnvidia-decode-535 \ " fi From e2768b4e736a7ee27442e6291f54318077f89c53 Mon Sep 17 00:00:00 2001 From: Brad Hards Date: Tue, 3 Sep 2024 14:04:26 +1000 Subject: [PATCH 12/41] nvdec: remove mandatory requirement for CUDA --- cmake/modules/FindNVDEC.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/modules/FindNVDEC.cmake b/cmake/modules/FindNVDEC.cmake index 8425c0af5e..6d3ee3c424 100644 --- a/cmake/modules/FindNVDEC.cmake +++ b/cmake/modules/FindNVDEC.cmake @@ -4,7 +4,7 @@ find_library(NVDEC_LIBRARY NAMES libnvcuvid nvcuvid ) -find_package(CUDAToolkit REQUIRED) +find_package(CUDAToolkit) set(NVDEC_PROCESS_LIBS NVDEC_LIBRARY) libfind_process(NVDEC) From 348a37ee13d4878562a5e3a96be7a7143d0a71be Mon Sep 17 00:00:00 2001 From: Brad Hards Date: Tue, 3 Sep 2024 14:12:25 +1000 Subject: [PATCH 13/41] nvdec: only link cuda if found --- libheif/plugins/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libheif/plugins/CMakeLists.txt b/libheif/plugins/CMakeLists.txt index 49f6f47066..4eb1f011ba 100644 --- a/libheif/plugins/CMakeLists.txt +++ b/libheif/plugins/CMakeLists.txt @@ -115,7 +115,7 @@ plugin_compilation(openh264dec OpenH264 OpenH264_DECODER_FOUND OpenH264_DECODER set(NV_DECODER_sources decoder_nvdec.cc decoder_nvdec.h NvDecoder.cpp NvDecoder.h) set(NV_DECODER_extra_plugin_sources) plugin_compilation(nvdec NVDEC NVDEC_FOUND NV_DECODER NV_DECODER) -if(WITH_NV_DECODER) +if(NVDEC_FOUND) target_link_libraries(heif PRIVATE CUDA::cuda_driver) endif() From 81fc3708b45179d0e79134ef04eb0ead463e92d0 Mon Sep 17 00:00:00 2001 From: Brad Hards Date: Tue, 3 Sep 2024 14:22:03 +1000 Subject: [PATCH 14/41] nvdec: check if we can build without cuda-dev --- scripts/install-ci-linux.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/install-ci-linux.sh b/scripts/install-ci-linux.sh index b652aa4b2b..6e73df8ed9 100755 --- a/scripts/install-ci-linux.sh +++ b/scripts/install-ci-linux.sh @@ -113,7 +113,6 @@ fi if [ ! -z "$WITH_NVIDIA" ]; then INSTALL_PACKAGES="$INSTALL_PACKAGES \ - nvidia-cuda-dev \ nvidia-cuda-toolkit \ libnvidia-decode-535 \ " From 015c9cf06013cfa58760e203b5de8f08c652b4ba Mon Sep 17 00:00:00 2001 From: Dirk Farin Date: Tue, 3 Sep 2024 23:23:57 +0200 Subject: [PATCH 15/41] AVC: pass SPS-Ext to decoder (#1297) --- libheif/codecs/avc.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/libheif/codecs/avc.cc b/libheif/codecs/avc.cc index 82cdcc8afb..fef9d34142 100644 --- a/libheif/codecs/avc.cc +++ b/libheif/codecs/avc.cc @@ -167,6 +167,15 @@ void Box_avcC::get_header_nals(std::vector& data) const data.insert(data.end(), sps.begin(), sps.end()); } + for (const auto& spsext : m_sps_ext) { + data.push_back((spsext.size() >> 24) & 0xFF); + data.push_back((spsext.size() >> 16) & 0xFF); + data.push_back((spsext.size() >> 8) & 0xFF); + data.push_back((spsext.size() >> 0) & 0xFF); + + data.insert(data.end(), spsext.begin(), spsext.end()); + } + for (const auto& pps : m_pps) { data.push_back((pps.size() >> 24) & 0xFF); data.push_back((pps.size() >> 16) & 0xFF); From 0672d7da55242d94b5f4fbf391ea2ea5b6cdd84d Mon Sep 17 00:00:00 2001 From: Dirk Farin Date: Tue, 3 Sep 2024 23:42:57 +0200 Subject: [PATCH 16/41] AVC: dump seq-ext parameters (#1297) --- libheif/codecs/avc.cc | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/libheif/codecs/avc.cc b/libheif/codecs/avc.cc index fef9d34142..f175eee0bf 100644 --- a/libheif/codecs/avc.cc +++ b/libheif/codecs/avc.cc @@ -98,17 +98,13 @@ Error Box_avcC::write(StreamWriter &writer) const { std::string Box_avcC::dump(Indent &indent) const { std::ostringstream sstr; sstr << Box::dump(indent); - sstr << indent << "configuration_version: " - << ((int)m_configuration.configuration_version) << "\n" - << indent << "AVCProfileIndication: " - << ((int)m_configuration.AVCProfileIndication) << " (" - << profileIndicationAsText() << ")" - << "\n" - << indent << "profile_compatibility: " - << ((int)m_configuration.profile_compatibility) << "\n" - << indent - << "AVCLevelIndication: " << ((int)m_configuration.AVCLevelIndication) - << "\n"; + sstr << indent << "configuration_version: " << ((int)m_configuration.configuration_version) << "\n" + << indent << "AVCProfileIndication: " << ((int)m_configuration.AVCProfileIndication) << " (" << profileIndicationAsText() << ")\n" + << indent << "profile_compatibility: " << ((int)m_configuration.profile_compatibility) << "\n" + << indent << "AVCLevelIndication: " << ((int)m_configuration.AVCLevelIndication) << "\n" + << indent << "Chroma format: " << ((int)m_configuration.chroma_format) << "\n" + << indent << "Bit depth luma: " << ((int)m_configuration.bit_depth_luma) << "\n" + << indent << "Bit depth chroma: " << ((int)m_configuration.bit_depth_chroma) << "\n"; for (const auto &sps : m_sps) { sstr << indent << "SPS: "; @@ -119,6 +115,15 @@ std::string Box_avcC::dump(Indent &indent) const { sstr << std::dec; } + for (const auto &spsext : m_sps_ext) { + sstr << indent << "SPS-EXT: "; + for (uint8_t b : spsext) { + sstr << std::setfill('0') << std::setw(2) << std::hex << ((int)b) << " "; + } + sstr << "\n"; + sstr << std::dec; + } + for (const auto &pps : m_pps) { sstr << indent << "PPS: "; for (uint8_t b : pps) { From 321ae91b0957e05272dc91a4fedf28efd15e30c0 Mon Sep 17 00:00:00 2001 From: Dirk Farin Date: Tue, 3 Sep 2024 23:49:16 +0200 Subject: [PATCH 17/41] heif-enc: similar output of uncompressed codec in encoder list as in heif-dec --- examples/heif_enc.cc | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/examples/heif_enc.cc b/examples/heif_enc.cc index bef60e9dde..1c58ee73eb 100644 --- a/examples/heif_enc.cc +++ b/examples/heif_enc.cc @@ -402,11 +402,9 @@ static const char* get_compression_format_name(heif_compression_format format) static void show_list_of_all_encoders() { - for (auto compression_format : {heif_compression_AVC, heif_compression_AV1, heif_compression_HEVC, heif_compression_JPEG, heif_compression_JPEG2000, heif_compression_HTJ2K -#if WITH_UNCOMPRESSED_CODEC -, heif_compression_uncompressed -#endif -, heif_compression_VVC + for (auto compression_format: {heif_compression_AVC, heif_compression_AV1, heif_compression_HEVC, + heif_compression_JPEG, heif_compression_JPEG2000, heif_compression_HTJ2K, + heif_compression_uncompressed, heif_compression_VVC }) { switch (compression_format) { @@ -429,7 +427,12 @@ static void show_list_of_all_encoders() std::cout << "JPEG 2000 (HT)"; break; case heif_compression_uncompressed: - std::cout << "Uncompressed"; +#if WITH_UNCOMPRESSED_CODEC + std::cout << "Uncompressed: yes\n"; +#else + std::cout << "Uncompressed: no\n"; +#endif + continue; // special handling of this case because it is built in without plugin break; case heif_compression_VVC: std::cout << "VVIC"; From 541782e18421e3aad20ebdfbbb6e9216b3dbd876 Mon Sep 17 00:00:00 2001 From: Dirk Farin Date: Tue, 3 Sep 2024 23:50:56 +0200 Subject: [PATCH 18/41] heif-dec sort list of decoders --- examples/heif_dec.cc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/examples/heif_dec.cc b/examples/heif_dec.cc index 5e2b9cb213..431bdbc69a 100644 --- a/examples/heif_dec.cc +++ b/examples/heif_dec.cc @@ -168,17 +168,14 @@ void list_decoders(heif_compression_format format) void list_all_decoders() { - std::cout << "HEIC decoders:\n"; - list_decoders(heif_compression_HEVC); + std::cout << "AVC decoders:\n"; + list_decoders(heif_compression_AVC); std::cout << "AVIF decoders:\n"; list_decoders(heif_compression_AV1); - std::cout << "VVIC decoders:\n"; - list_decoders(heif_compression_VVC); - - std::cout << "AVC decoders:\n"; - list_decoders(heif_compression_AVC); + std::cout << "HEIC decoders:\n"; + list_decoders(heif_compression_HEVC); std::cout << "JPEG decoders:\n"; list_decoders(heif_compression_JPEG); @@ -186,7 +183,7 @@ void list_all_decoders() std::cout << "JPEG 2000 decoders:\n"; list_decoders(heif_compression_JPEG2000); - std::cout << "HT-J2K decoders:\n"; + std::cout << "JPEG 2000 (HT) decoders:\n"; list_decoders(heif_compression_HTJ2K); #if WITH_UNCOMPRESSED_CODEC @@ -194,6 +191,9 @@ void list_all_decoders() #else std::cout << "uncompressed: no\n"; #endif + + std::cout << "VVIC decoders:\n"; + list_decoders(heif_compression_VVC); } From e65c9a357835b5848cc7e0d3af5bea627d8f32d4 Mon Sep 17 00:00:00 2001 From: Dirk Farin Date: Tue, 3 Sep 2024 23:55:25 +0200 Subject: [PATCH 19/41] avc_box: adapt test output --- tests/avc_box.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/avc_box.cc b/tests/avc_box.cc index 4a2fd7bc3b..9e3dc2929e 100644 --- a/tests/avc_box.cc +++ b/tests/avc_box.cc @@ -68,6 +68,9 @@ TEST_CASE("avcC") { "AVCProfileIndication: 66 (Constrained Baseline)\n" "profile_compatibility: 128\n" "AVCLevelIndication: 30\n" + "Chroma format: 32\n" + "Bit depth luma: 8\n" + "Bit depth chroma: 8\n" "SPS: 67 64 00 28 ac 72 04 40 40 04 1a 10 00 00 03 00 " "10 00 00 03 03 20 f1 83 18 46 \n" "PPS: 68 e8 43 83 92 c8 b0 \n"); From c1784006a41ccb19070e9dce3943633ae34cc555 Mon Sep 17 00:00:00 2001 From: Dirk Farin Date: Thu, 5 Sep 2024 10:51:29 +0200 Subject: [PATCH 20/41] tild: fix parsing of 'tiles_are_sequential' and more dump output --- libheif/codecs/tild.cc | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/libheif/codecs/tild.cc b/libheif/codecs/tild.cc index d24a3fff4b..6de73cab4a 100644 --- a/libheif/codecs/tild.cc +++ b/libheif/codecs/tild.cc @@ -78,10 +78,6 @@ void Box_tilC::derive_box_version() uint8_t flags = 0; - if (dimensions_64bit(m_parameters)) { - flags |= 0x20; - } - switch (m_parameters.offset_field_length) { case 32: flags |= 0; @@ -122,6 +118,10 @@ void Box_tilC::derive_box_version() flags |= 0x10; } + if (dimensions_64bit(m_parameters)) { + flags |= 0x20; + } + set_flags(flags); } @@ -140,6 +140,7 @@ Error Box_tilC::write(StreamWriter& writer) const writer.write8(m_parameters.number_of_extra_dimensions); + // TODO: this is redundant because we can also get this from 'ispe' (but currently only as uint32_t) writer.write(dimensions_are_64bit ? 8 : 4, m_parameters.image_width); writer.write(dimensions_are_64bit ? 8 : 4, m_parameters.image_height); @@ -164,8 +165,13 @@ std::string Box_tilC::dump(Indent& indent) const sstr << BoxHeader::dump(indent); sstr << indent << "version: " << ((int) get_version()) << "\n" - << indent << "image size: " << m_parameters.image_width << "x" << m_parameters.image_height << "\n" - << indent << "tile size: " << m_parameters.tile_width << "x" << m_parameters.tile_height << "\n"; + //<< indent << "image size: " << m_parameters.image_width << "x" << m_parameters.image_height << "\n" + << indent << "tile size: " << m_parameters.tile_width << "x" << m_parameters.tile_height << "\n" + << indent << "compression: " << to_fourcc(m_parameters.compression_type_fourcc) << "\n" + << indent << "tiles are sequential: " << (m_parameters.tiles_are_sequential ? "yes" : "no") << "\n" + << indent << "offset field length: " << ((int) m_parameters.offset_field_length) << " bits\n" + << indent << "size field length: " << ((int) m_parameters.size_field_length) << " bits\n" + << indent << "number of extra dimensions: " << ((int) m_parameters.number_of_extra_dimensions) << "\n"; return sstr.str(); @@ -219,7 +225,7 @@ Error Box_tilC::parse(BitstreamRange& range) break; } - m_parameters.tiles_are_sequential = !!(flags % 0x10); + m_parameters.tiles_are_sequential = !!(flags & 0x10); bool dimensions_are_64bit = (flags & 0x20); m_parameters.number_of_extra_dimensions = range.read8(); @@ -545,7 +551,9 @@ ImageItem_Tild::decode_grid_tile(const heif_decoding_options& options, uint32_t uint64_t size = m_tild_header.get_tile_size(idx); Error err = get_file()->append_data_from_iloc(get_id(), data, offset, size); - assert(!err.error_code); + if (err.error_code) { + return err; + } return decode_from_compressed_data(get_compression_format(), options, data); } From d9928fe95aa7ab5378452b0ee56c9c45791762ab Mon Sep 17 00:00:00 2001 From: Dirk Farin Date: Thu, 5 Sep 2024 19:37:54 +0200 Subject: [PATCH 21/41] heif_reader_range_request_result: allow 'overreading' a range request --- libheif/api/libheif/heif.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/libheif/api/libheif/heif.h b/libheif/api/libheif/heif.h index ecd9c7eb23..82cf0b23d8 100644 --- a/libheif/api/libheif/heif.h +++ b/libheif/api/libheif/heif.h @@ -936,7 +936,7 @@ struct heif_reading_options; enum heif_reader_grow_status { heif_reader_grow_status_size_reached, // requested size has been reached, we can read until this point - heif_reader_grow_status_timeout, // size has not been reached yet, but it may still grow further + heif_reader_grow_status_timeout, // size has not been reached yet, but it may still grow further (deprecated) heif_reader_grow_status_size_beyond_eof, // size has not been reached and never will. The file has grown to its full size heif_reader_grow_status_error // an error has occurred }; @@ -945,8 +945,11 @@ struct heif_reader_range_request_result { enum heif_reader_grow_status status; // should not return 'heif_reader_grow_status_timeout' - // for status == 'heif_reader_grow_status_size_beyond_eof' - uint64_t range_end; // if not the whole file range could be read, this is the end position + // Indicates until what position the file has been read. + // If we cannot read the whole file range (status == 'heif_reader_grow_status_size_beyond_eof'), this is the actual end position. + // On the other hand, it may be that the reader was reading more data than requested. In that case, it should indicate the full size here + // and libheif may decide to make use of the additional data (e.g. for filling 'tild' offset tables). + uint64_t range_end; // for status == 'heif_reader_grow_status_error' int reader_error_code; // a reader specific error code From 8cc6c796ac706bdf99c1f6f82f32b7873bbbf19d Mon Sep 17 00:00:00 2001 From: Dirk Farin Date: Fri, 6 Sep 2024 10:35:29 +0200 Subject: [PATCH 22/41] tild: omit writing image size and take it from ispe instead --- libheif/codecs/tild.cc | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/libheif/codecs/tild.cc b/libheif/codecs/tild.cc index 6de73cab4a..3b3af35743 100644 --- a/libheif/codecs/tild.cc +++ b/libheif/codecs/tild.cc @@ -141,8 +141,8 @@ Error Box_tilC::write(StreamWriter& writer) const writer.write8(m_parameters.number_of_extra_dimensions); // TODO: this is redundant because we can also get this from 'ispe' (but currently only as uint32_t) - writer.write(dimensions_are_64bit ? 8 : 4, m_parameters.image_width); - writer.write(dimensions_are_64bit ? 8 : 4, m_parameters.image_height); + //writer.write(dimensions_are_64bit ? 8 : 4, m_parameters.image_width); + //writer.write(dimensions_are_64bit ? 8 : 4, m_parameters.image_height); for (int i = 0; i < m_parameters.number_of_extra_dimensions; i++) { writer.write(dimensions_are_64bit ? 8 : 4, m_parameters.extra_dimensions[i]); @@ -240,6 +240,7 @@ Error Box_tilC::parse(BitstreamRange& range) } #endif + /* m_parameters.image_width = (dimensions_are_64bit ? range.read64() : range.read32()); m_parameters.image_height = (dimensions_are_64bit ? range.read64() : range.read32()); @@ -248,6 +249,7 @@ Error Box_tilC::parse(BitstreamRange& range) heif_suberror_Unspecified, "'tild' image with zero width or height."}; } +*/ for (int i = 0; i < m_parameters.number_of_extra_dimensions; i++) { uint64_t size = (dimensions_are_64bit ? range.read64() : range.read32()); @@ -435,7 +437,24 @@ Error ImageItem_Tild::on_load_file() "Tiled image without 'tilC' property box."}; } - m_tild_header.set_parameters(tilC_box->get_parameters()); + auto ispe_box = heif_file->get_property(get_id()); + if (!ispe_box) { + return {heif_error_Invalid_input, + heif_suberror_Unspecified, + "Tiled image without 'ispe' property box."}; + } + + heif_tild_image_parameters parameters = tilC_box->get_parameters(); + parameters.image_width = ispe_box->get_width(); + parameters.image_height = ispe_box->get_height(); + + if (parameters.image_width == 0 || parameters.image_height == 0) { + return {heif_error_Invalid_input, + heif_suberror_Unspecified, + "'tild' image with zero width or height."}; + } + + m_tild_header.set_parameters(parameters); err = m_tild_header.read_full_offset_table(heif_file, get_id()); if (err) { From aab4c9f879e9055ac28ada33decddf8361edbef3 Mon Sep 17 00:00:00 2001 From: Brad Hards Date: Sun, 8 Sep 2024 18:18:12 +1000 Subject: [PATCH 23/41] Windows build fix for C++ version. --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c1d34b935a..041f3b8e7e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,7 +41,7 @@ if(NOT MSVC) endif () endif() -set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) From 83c0954cd5afe3b9d6493607ea7b2a5bb6e289ab Mon Sep 17 00:00:00 2001 From: Dirk Farin Date: Tue, 10 Sep 2024 15:30:08 +0200 Subject: [PATCH 24/41] use C++17 [[fallthrough]] --- examples/heif_dec.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/heif_dec.cc b/examples/heif_dec.cc index 431bdbc69a..661e57809d 100644 --- a/examples/heif_dec.cc +++ b/examples/heif_dec.cc @@ -261,7 +261,7 @@ int main(int argc, char** argv) break; case '?': std::cerr << "\n"; - // fallthrough + [[fallthrough]]; case 'h': show_help(argv[0]); return 0; From 831532a6722ad68b2ecf884f35718c8fc1c92994 Mon Sep 17 00:00:00 2001 From: Brad Hards Date: Sun, 8 Sep 2024 18:19:32 +1000 Subject: [PATCH 25/41] Windows alternatives for unistd and friends. --- libheif/box.cc | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/libheif/box.cc b/libheif/box.cc index 746e9cd720..72feeabac9 100644 --- a/libheif/box.cc +++ b/libheif/box.cc @@ -50,7 +50,13 @@ #define M_PI 3.14159265358979323846 #endif -#include // TODO: Windows +#if !defined(_WIN32) && !defined(_WIN64) +#include +#else +#include +#include +#endif + Fraction::Fraction(int32_t num, int32_t den) { @@ -155,7 +161,7 @@ bool Fraction::is_valid() const return denominator != 0; } -uint32_t from_fourcc(const char* string) +static uint32_t from_fourcc(const char* string) { return ((string[0] << 24) | (string[1] << 16) | @@ -1035,7 +1041,7 @@ Error Box_ftyp::parse(BitstreamRange& range) m_major_brand = range.read32(); m_minor_version = range.read32(); - if (get_box_size() <= get_header_size() + 8) { + if (get_box_size() - 8 <= get_header_size()) { // Sanity check. return Error(heif_error_Invalid_input, heif_suberror_Invalid_box_size, @@ -1406,8 +1412,15 @@ void Box_iloc::set_use_tmp_file(bool flag) { m_use_tmpfile = flag; if (flag) { +#if !defined(_WIN32) && !defined(_WIN64) strcpy(m_tmp_filename, "/tmp/libheif-XXXXXX"); m_tmpfile_fd = mkstemp(m_tmp_filename); +#else + char tmpname[L_tmpnam_s]; + // TODO: check return value (errno_t) + tmpnam_s(tmpname, L_tmpnam_s); + _sopen_s(&m_tmpfile_fd, tmpname, _O_CREAT | _O_TEMPORARY | _O_TRUNC | _O_RDWR, _SH_DENYRW, _S_IREAD | _S_IWRITE); +#endif } } @@ -1629,7 +1642,11 @@ Error Box_iloc::append_data(heif_item_id item_ID, extent.length = data.size(); if (m_use_tmpfile && construction_method==0) { +#if !defined(_WIN32) && !defined(_WIN64) ssize_t cnt = ::write(m_tmpfile_fd, data.data(), data.size()); +#else + int cnt = _write(m_tmpfile_fd, data.data(), data.size()); +#endif if (cnt < 0) { std::stringstream sstr; sstr << "Could not write to tmp file: error " << errno; @@ -1883,7 +1900,11 @@ Error Box_iloc::write_mdat_after_iloc(StreamWriter& writer) if (m_use_tmpfile) { std::vector data(extent.length); +#if !defined(_WIN32) && !defined(_WIN64) ssize_t cnt = ::read(m_tmpfile_fd, data.data(), extent.length); +#else + int cnt = _read(m_tmpfile_fd, data.data(), extent.length); +#endif if (cnt<0) { std::stringstream sstr; sstr << "Cannot read tmp data file, error " << errno; @@ -2632,7 +2653,7 @@ Error Box_ipma::parse(BitstreamRange& range) int assoc_cnt = range.read8(); for (int k = 0; k < assoc_cnt; k++) { - PropertyAssociation association; + PropertyAssociation association{}; uint16_t index; if (get_flags() & 1) { @@ -3902,9 +3923,9 @@ Error Box_cmin::write(StreamWriter& writer) const } -std::array mul(const std::array& a, const std::array& b) +static std::array mul(const std::array& a, const std::array& b) { - std::array m; + std::array m{}; m[0] = a[0]*b[0] + a[1]*b[3] + a[2]*b[6]; m[1] = a[0]*b[1] + a[1]*b[4] + a[2]*b[7]; From 4af34eed99f8716fc6fc23c173ecb237e7d90494 Mon Sep 17 00:00:00 2001 From: Dirk Farin Date: Tue, 10 Sep 2024 15:32:41 +0200 Subject: [PATCH 26/41] fix constness of getopt* implementation for Windows --- extra/getopt.h | 4 ++-- extra/getopt_long.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/extra/getopt.h b/extra/getopt.h index b23a4fbeee..f5d3bfaad1 100644 --- a/extra/getopt.h +++ b/extra/getopt.h @@ -55,8 +55,8 @@ struct option #define required_argument 1 #define optional_argument 2 -int getopt(int, char**, char*); -int getopt_long(int, char**, char*, struct option*, int*); +int getopt(int, char**, const char*); +int getopt_long(int, char**, const char*, struct option*, int*); #ifdef __cplusplus } diff --git a/extra/getopt_long.c b/extra/getopt_long.c index 2722ce90ff..a1d5055260 100644 --- a/extra/getopt_long.c +++ b/extra/getopt_long.c @@ -153,7 +153,7 @@ getopt2(int nargc, char * nargv, const char *ostr) * Parse argc/argv argument vector. */ int -getopt_long(int nargc, char ** nargv, char * options, struct option * long_options, int * index) +getopt_long(int nargc, char ** nargv, const char * options, struct option * long_options, int * index) { int retval; From 24a2435058e45ead8946d0e6d52ae0bf33990064 Mon Sep 17 00:00:00 2001 From: Dirk Farin Date: Tue, 10 Sep 2024 15:35:05 +0200 Subject: [PATCH 27/41] safe integer check in ftyp parsing --- libheif/box.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libheif/box.cc b/libheif/box.cc index 72feeabac9..bd86034637 100644 --- a/libheif/box.cc +++ b/libheif/box.cc @@ -1041,7 +1041,8 @@ Error Box_ftyp::parse(BitstreamRange& range) m_major_brand = range.read32(); m_minor_version = range.read32(); - if (get_box_size() - 8 <= get_header_size()) { + uint64_t box_size = get_box_size(); + if (box_size < 8 || box_size - 8 <= get_header_size()) { // Sanity check. return Error(heif_error_Invalid_input, heif_suberror_Invalid_box_size, From 7b7ac8e507a82de13e4c38f8d551d069ba88ec44 Mon Sep 17 00:00:00 2001 From: Dirk Farin Date: Tue, 10 Sep 2024 16:28:10 +0200 Subject: [PATCH 28/41] avcC: fix test, write extended avcC fields, error handling --- libheif/codecs/avc.cc | 159 ++++++++++++++++++++++++++++++------------ libheif/codecs/avc.h | 2 +- tests/avc_box.cc | 2 +- 3 files changed, 118 insertions(+), 45 deletions(-) diff --git a/libheif/codecs/avc.cc b/libheif/codecs/avc.cc index f175eee0bf..920e3d7ea5 100644 --- a/libheif/codecs/avc.cc +++ b/libheif/codecs/avc.cc @@ -27,7 +27,8 @@ #include "context.h" -Error Box_avcC::parse(BitstreamRange &range) { +Error Box_avcC::parse(BitstreamRange& range) +{ m_configuration.configuration_version = range.read8(); m_configuration.AVCProfileIndication = range.read8(); m_configuration.profile_compatibility = range.read8(); @@ -52,10 +53,11 @@ Error Box_avcC::parse(BitstreamRange &range) { m_pps.push_back(pps); } + // See ISO/IEC 14496-15 2017 Section 5.3.3.1.2 if ((m_configuration.AVCProfileIndication != 66) && (m_configuration.AVCProfileIndication != 77) && (m_configuration.AVCProfileIndication != 88)) { - m_configuration.chroma_format = range.read8() & 0b00000011; + m_configuration.chroma_format = (heif_chroma) (range.read8() & 0b00000011); m_configuration.bit_depth_luma = 8 + (range.read8() & 0b00000111); m_configuration.bit_depth_chroma = 8 + (range.read8() & 0b00000111); uint8_t numOfSequenceParameterSetExt = range.read8(); @@ -70,7 +72,8 @@ Error Box_avcC::parse(BitstreamRange &range) { return range.get_error(); } -Error Box_avcC::write(StreamWriter &writer) const { +Error Box_avcC::write(StreamWriter& writer) const +{ size_t box_start = reserve_box_header_space(writer); writer.write8(m_configuration.configuration_version); @@ -79,55 +82,125 @@ Error Box_avcC::write(StreamWriter &writer) const { writer.write8(m_configuration.AVCLevelIndication); uint8_t lengthSizeMinusOneWithReserved = 0b11111100 | ((m_configuration.lengthSize - 1) & 0b11); writer.write8(lengthSizeMinusOneWithReserved); + + if (m_sps.size() > 0b00011111) { + return {heif_error_Encoding_error, + heif_suberror_Unspecified, + "Cannot write more than 31 PPS into avcC box."}; + } + uint8_t numSpsWithReserved = 0b11100000 | (m_sps.size() & 0b00011111); writer.write8(numSpsWithReserved); - for (const auto &sps: m_sps) { + for (const auto& sps : m_sps) { + if (sps.size() > 0xFFFF) { + return {heif_error_Encoding_error, + heif_suberror_Unspecified, + "Cannot write SPS larger than 65535 bytes into avcC box."}; + } writer.write16((uint16_t) sps.size()); writer.write(sps); } + + if (m_pps.size() > 0xFF) { + return {heif_error_Encoding_error, + heif_suberror_Unspecified, + "Cannot write more than 255 PPS into avcC box."}; + } + writer.write8(m_pps.size() & 0xFF); - for (const auto &pps: m_pps) { + for (const auto& pps : m_pps) { + if (pps.size() > 0xFFFF) { + return {heif_error_Encoding_error, + heif_suberror_Unspecified, + "Cannot write PPS larger than 65535 bytes into avcC box."}; + } writer.write16((uint16_t) pps.size()); writer.write(pps); } + + if ((m_configuration.AVCProfileIndication != 66) && + (m_configuration.AVCProfileIndication != 77) && + (m_configuration.AVCProfileIndication != 88)) { + writer.write8(m_configuration.chroma_format); + writer.write8(m_configuration.bit_depth_luma - 8); + writer.write8(m_configuration.bit_depth_chroma - 8); + + if (m_sps_ext.size() > 0xFF) { + return {heif_error_Encoding_error, + heif_suberror_Unspecified, + "Cannot write more than 255 SPS-Ext into avcC box."}; + } + + writer.write8(m_sps_ext.size() & 0xFF); + for (const auto& spsext : m_sps_ext) { + if (spsext.size() > 0xFFFF) { + return {heif_error_Encoding_error, + heif_suberror_Unspecified, + "Cannot write SPS-Ext larger than 65535 bytes into avcC box."}; + } + writer.write16((uint16_t) spsext.size()); + writer.write(spsext); + } + } + prepend_header(writer, box_start); return Error::Ok; } -std::string Box_avcC::dump(Indent &indent) const { +std::string Box_avcC::dump(Indent& indent) const +{ std::ostringstream sstr; sstr << Box::dump(indent); - sstr << indent << "configuration_version: " << ((int)m_configuration.configuration_version) << "\n" - << indent << "AVCProfileIndication: " << ((int)m_configuration.AVCProfileIndication) << " (" << profileIndicationAsText() << ")\n" - << indent << "profile_compatibility: " << ((int)m_configuration.profile_compatibility) << "\n" - << indent << "AVCLevelIndication: " << ((int)m_configuration.AVCLevelIndication) << "\n" - << indent << "Chroma format: " << ((int)m_configuration.chroma_format) << "\n" - << indent << "Bit depth luma: " << ((int)m_configuration.bit_depth_luma) << "\n" - << indent << "Bit depth chroma: " << ((int)m_configuration.bit_depth_chroma) << "\n"; - - for (const auto &sps : m_sps) { + sstr << indent << "configuration_version: " << ((int) m_configuration.configuration_version) << "\n" + << indent << "AVCProfileIndication: " << ((int) m_configuration.AVCProfileIndication) << " (" << profileIndicationAsText() << ")\n" + << indent << "profile_compatibility: " << ((int) m_configuration.profile_compatibility) << "\n" + << indent << "AVCLevelIndication: " << ((int) m_configuration.AVCLevelIndication) << "\n" + << indent << "Chroma format: "; + + switch (m_configuration.chroma_format) { + case heif_chroma_monochrome: + sstr << "4:0:0\n"; + break; + case heif_chroma_420: + sstr << "4:2:0\n"; + break; + case heif_chroma_422: + sstr << "4:2:2\n"; + break; + case heif_chroma_444: + sstr << "4:4:4\n"; + break; + default: + sstr << "unsupported\n"; + break; + } + + sstr << indent << "Bit depth luma: " << ((int) m_configuration.bit_depth_luma) << "\n" + << indent << "Bit depth chroma: " << ((int) m_configuration.bit_depth_chroma) << "\n"; + + for (const auto& sps : m_sps) { sstr << indent << "SPS: "; for (uint8_t b : sps) { - sstr << std::setfill('0') << std::setw(2) << std::hex << ((int)b) << " "; + sstr << std::setfill('0') << std::setw(2) << std::hex << ((int) b) << " "; } sstr << "\n"; sstr << std::dec; } - for (const auto &spsext : m_sps_ext) { + for (const auto& spsext : m_sps_ext) { sstr << indent << "SPS-EXT: "; for (uint8_t b : spsext) { - sstr << std::setfill('0') << std::setw(2) << std::hex << ((int)b) << " "; + sstr << std::setfill('0') << std::setw(2) << std::hex << ((int) b) << " "; } sstr << "\n"; sstr << std::dec; } - for (const auto &pps : m_pps) { + for (const auto& pps : m_pps) { sstr << indent << "PPS: "; for (uint8_t b : pps) { - sstr << std::setfill('0') << std::setw(2) << std::hex << ((int)b) << " "; + sstr << std::setfill('0') << std::setw(2) << std::hex << ((int) b) << " "; } sstr << "\n"; sstr << std::dec; @@ -136,27 +209,28 @@ std::string Box_avcC::dump(Indent &indent) const { return sstr.str(); } -std::string Box_avcC::profileIndicationAsText() const { +std::string Box_avcC::profileIndicationAsText() const +{ // See ISO/IEC 14496-10:2022 Annex A switch (m_configuration.AVCProfileIndication) { - case 44: - return "CALVC 4:4:4"; - case 66: - return "Constrained Baseline"; - case 77: - return "Main"; - case 88: - return "Extended"; - case 100: - return "High variant"; - case 110: - return "High 10"; - case 122: - return "High 4:2:2"; - case 244: - return "High 4:4:4"; - default: - return "Unknown"; + case 44: + return "CALVC 4:4:4"; + case 66: + return "Constrained Baseline"; + case 77: + return "Main"; + case 88: + return "Extended"; + case 100: + return "High variant"; + case 110: + return "High 10"; + case 122: + return "High 4:2:2"; + case 244: + return "High 4:4:4"; + default: + return "Unknown"; } } @@ -192,11 +266,10 @@ void Box_avcC::get_header_nals(std::vector& data) const } - Result ImageItem_AVC::encode(const std::shared_ptr& image, - struct heif_encoder* encoder, - const struct heif_encoding_options& options, - enum heif_image_input_class input_class) + struct heif_encoder* encoder, + const struct heif_encoding_options& options, + enum heif_image_input_class input_class) { #if 0 CodedImageData codedImage; diff --git a/libheif/codecs/avc.h b/libheif/codecs/avc.h index 59e2fb6369..1bbbaddd4e 100644 --- a/libheif/codecs/avc.h +++ b/libheif/codecs/avc.h @@ -41,7 +41,7 @@ class Box_avcC : public Box { uint8_t profile_compatibility; // constraint set flags uint8_t AVCLevelIndication; // level_idc uint8_t lengthSize; - uint8_t chroma_format; + heif_chroma chroma_format = heif_chroma_420; // Note: avcC integer value can be cast to heif_chroma enum uint8_t bit_depth_luma = 8; uint8_t bit_depth_chroma = 8; }; diff --git a/tests/avc_box.cc b/tests/avc_box.cc index 9e3dc2929e..fad1cd9814 100644 --- a/tests/avc_box.cc +++ b/tests/avc_box.cc @@ -68,7 +68,7 @@ TEST_CASE("avcC") { "AVCProfileIndication: 66 (Constrained Baseline)\n" "profile_compatibility: 128\n" "AVCLevelIndication: 30\n" - "Chroma format: 32\n" + "Chroma format: 4:2:0\n" "Bit depth luma: 8\n" "Bit depth chroma: 8\n" "SPS: 67 64 00 28 ac 72 04 40 40 04 1a 10 00 00 03 00 " From 252ed4a68311104c77f94eb108e8b06371c47206 Mon Sep 17 00:00:00 2001 From: Dirk Farin Date: Tue, 10 Sep 2024 16:32:10 +0200 Subject: [PATCH 29/41] fix windows compilation (#1302) --- libheif/box.cc | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/libheif/box.cc b/libheif/box.cc index bd86034637..3af0e33605 100644 --- a/libheif/box.cc +++ b/libheif/box.cc @@ -1413,14 +1413,18 @@ void Box_iloc::set_use_tmp_file(bool flag) { m_use_tmpfile = flag; if (flag) { -#if !defined(_WIN32) && !defined(_WIN64) +#if !defined(_WIN32) strcpy(m_tmp_filename, "/tmp/libheif-XXXXXX"); m_tmpfile_fd = mkstemp(m_tmp_filename); #else + // TODO Currently unused code. Implement when needed. + assert(false); +# if 0 char tmpname[L_tmpnam_s]; // TODO: check return value (errno_t) tmpnam_s(tmpname, L_tmpnam_s); _sopen_s(&m_tmpfile_fd, tmpname, _O_CREAT | _O_TEMPORARY | _O_TRUNC | _O_RDWR, _SH_DENYRW, _S_IREAD | _S_IWRITE); +# endif #endif } } @@ -1643,10 +1647,14 @@ Error Box_iloc::append_data(heif_item_id item_ID, extent.length = data.size(); if (m_use_tmpfile && construction_method==0) { -#if !defined(_WIN32) && !defined(_WIN64) +#if !defined(_WIN32) ssize_t cnt = ::write(m_tmpfile_fd, data.data(), data.size()); #else + // TODO Currently unused code. Implement when needed. + assert(false); +# if 0 int cnt = _write(m_tmpfile_fd, data.data(), data.size()); +# endif #endif if (cnt < 0) { std::stringstream sstr; @@ -1904,7 +1912,11 @@ Error Box_iloc::write_mdat_after_iloc(StreamWriter& writer) #if !defined(_WIN32) && !defined(_WIN64) ssize_t cnt = ::read(m_tmpfile_fd, data.data(), extent.length); #else + // TODO Currently unused code. Implement when needed. + assert(false); +# if 0 int cnt = _read(m_tmpfile_fd, data.data(), extent.length); +# endif #endif if (cnt<0) { std::stringstream sstr; From 9bd33c015f465365ed535634a786d80ef202624f Mon Sep 17 00:00:00 2001 From: Dirk Farin Date: Tue, 10 Sep 2024 16:36:13 +0200 Subject: [PATCH 30/41] remove unnecessary check for _WIN64 (#1302) --- libheif/box.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libheif/box.cc b/libheif/box.cc index 3af0e33605..256ee61421 100644 --- a/libheif/box.cc +++ b/libheif/box.cc @@ -50,7 +50,7 @@ #define M_PI 3.14159265358979323846 #endif -#if !defined(_WIN32) && !defined(_WIN64) +#if !defined(_WIN32) #include #else #include @@ -1909,7 +1909,7 @@ Error Box_iloc::write_mdat_after_iloc(StreamWriter& writer) if (m_use_tmpfile) { std::vector data(extent.length); -#if !defined(_WIN32) && !defined(_WIN64) +#if !defined(_WIN32) ssize_t cnt = ::read(m_tmpfile_fd, data.data(), extent.length); #else // TODO Currently unused code. Implement when needed. From 64c8cae5195ad4f65497845aa3493bec6c86b027 Mon Sep 17 00:00:00 2001 From: Dirk Farin Date: Tue, 10 Sep 2024 16:41:36 +0200 Subject: [PATCH 31/41] fix windows compilation, undefined 'cnt' (#1302) --- libheif/box.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libheif/box.cc b/libheif/box.cc index 256ee61421..94d398f78d 100644 --- a/libheif/box.cc +++ b/libheif/box.cc @@ -1654,6 +1654,8 @@ Error Box_iloc::append_data(heif_item_id item_ID, assert(false); # if 0 int cnt = _write(m_tmpfile_fd, data.data(), data.size()); +# else + int cnt = -1; # endif #endif if (cnt < 0) { @@ -1916,6 +1918,8 @@ Error Box_iloc::write_mdat_after_iloc(StreamWriter& writer) assert(false); # if 0 int cnt = _read(m_tmpfile_fd, data.data(), extent.length); +# else + int cnt = -1; # endif #endif if (cnt<0) { From 2395082e8a628303d659c5b354d0780767336aad Mon Sep 17 00:00:00 2001 From: Dirk Farin Date: Tue, 10 Sep 2024 19:53:46 +0200 Subject: [PATCH 32/41] tild: remove support for 64bit dimensions and cleanup --- libheif/api/libheif/heif.h | 6 ++-- libheif/codecs/tild.cc | 67 +++++++++----------------------------- 2 files changed, 19 insertions(+), 54 deletions(-) diff --git a/libheif/api/libheif/heif.h b/libheif/api/libheif/heif.h index 82cf0b23d8..855402b74d 100644 --- a/libheif/api/libheif/heif.h +++ b/libheif/api/libheif/heif.h @@ -2429,8 +2429,8 @@ struct heif_tild_image_parameters { // --- version 1 - uint64_t image_width; - uint64_t image_height; + uint32_t image_width; + uint32_t image_height; uint32_t tile_width; uint32_t tile_height; @@ -2441,7 +2441,7 @@ struct heif_tild_image_parameters { uint8_t size_field_length; // one of: 0, 24, 32, 64 uint8_t number_of_extra_dimensions; // 0 for normal images, 1 for volumetric (3D), ... - uint64_t extra_dimensions[8]; // size of extra dimensions (first 8 dimensions) + uint32_t extra_dimensions[8]; // size of extra dimensions (first 8 dimensions) // boolean flags uint8_t tiles_are_sequential; // TODO: can we derive this automatically diff --git a/libheif/codecs/tild.cc b/libheif/codecs/tild.cc index 3b3af35743..51ba05fe4b 100644 --- a/libheif/codecs/tild.cc +++ b/libheif/codecs/tild.cc @@ -66,12 +66,6 @@ uint64_t nTiles_v(const heif_tild_image_parameters& params) } -bool dimensions_64bit(const heif_tild_image_parameters& params) -{ - return (params.image_width > 0xFFFF || params.image_height > 0xFFFF); -} - - void Box_tilC::derive_box_version() { set_version(1); @@ -112,16 +106,10 @@ void Box_tilC::derive_box_version() assert(false); // TODO: return error } - // printf("> %d %d -> %d\n", m_parameters.offset_field_length, m_parameters.size_field_length, (int)flags); - if (m_parameters.tiles_are_sequential) { flags |= 0x10; } - if (dimensions_64bit(m_parameters)) { - flags |= 0x20; - } - set_flags(flags); } @@ -132,26 +120,20 @@ Error Box_tilC::write(StreamWriter& writer) const size_t box_start = reserve_box_header_space(writer); - bool dimensions_are_64bit = dimensions_64bit(m_parameters); - if (m_parameters.number_of_extra_dimensions > 8) { assert(false); // currently not supported } - writer.write8(m_parameters.number_of_extra_dimensions); + writer.write32(m_parameters.tile_width); + writer.write32(m_parameters.tile_height); + writer.write32(m_parameters.compression_type_fourcc); - // TODO: this is redundant because we can also get this from 'ispe' (but currently only as uint32_t) - //writer.write(dimensions_are_64bit ? 8 : 4, m_parameters.image_width); - //writer.write(dimensions_are_64bit ? 8 : 4, m_parameters.image_height); + writer.write8(m_parameters.number_of_extra_dimensions); for (int i = 0; i < m_parameters.number_of_extra_dimensions; i++) { - writer.write(dimensions_are_64bit ? 8 : 4, m_parameters.extra_dimensions[i]); + writer.write32(m_parameters.extra_dimensions[i]); } - writer.write32(m_parameters.tile_width); - writer.write32(m_parameters.tile_height); - writer.write32(m_parameters.compression_type_fourcc); - prepend_header(writer, box_start); return Error::Ok; @@ -226,33 +208,25 @@ Error Box_tilC::parse(BitstreamRange& range) } m_parameters.tiles_are_sequential = !!(flags & 0x10); - bool dimensions_are_64bit = (flags & 0x20); - m_parameters.number_of_extra_dimensions = range.read8(); -#if 0 - if (data.size() < idx + 2 * (dimensions_are_64bit ? 8 : 4)) { - return eofError; - } - - if (data.size() < idx + (2 + m_parameters.number_of_extra_dimensions) * (dimensions_are_64bit ? 8 : 4) + 3 * 4) { - return eofError; - } -#endif - - /* - m_parameters.image_width = (dimensions_are_64bit ? range.read64() : range.read32()); - m_parameters.image_height = (dimensions_are_64bit ? range.read64() : range.read32()); + m_parameters.tile_width = range.read32(); + m_parameters.tile_height = range.read32(); + m_parameters.compression_type_fourcc = range.read32(); - if (m_parameters.image_width == 0 || m_parameters.image_height == 0) { + if (m_parameters.tile_width == 0 || m_parameters.tile_height == 0) { return {heif_error_Invalid_input, heif_suberror_Unspecified, - "'tild' image with zero width or height."}; + "Tile with zero width or height."}; } -*/ + + + // --- extra dimensions + + m_parameters.number_of_extra_dimensions = range.read8(); for (int i = 0; i < m_parameters.number_of_extra_dimensions; i++) { - uint64_t size = (dimensions_are_64bit ? range.read64() : range.read32()); + uint32_t size = range.read32(); if (size == 0) { return {heif_error_Invalid_input, @@ -268,15 +242,6 @@ Error Box_tilC::parse(BitstreamRange& range) } } - m_parameters.tile_width = range.read32(); - m_parameters.tile_height = range.read32(); - m_parameters.compression_type_fourcc = range.read32(); - - if (m_parameters.tile_width == 0 || m_parameters.tile_height == 0) { - return {heif_error_Invalid_input, - heif_suberror_Unspecified, - "Tile with zero width or height."}; - } return range.get_error(); } From 9483961432ae65b911bb4208416ec5833ed736a5 Mon Sep 17 00:00:00 2001 From: Dirk Farin Date: Wed, 11 Sep 2024 12:45:57 +0200 Subject: [PATCH 33/41] limit maximum memory allocation (should fix ClusterFuzz 71389) --- libheif/pixelimage.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/libheif/pixelimage.cc b/libheif/pixelimage.cc index 0ec3092c2e..5583e1590b 100644 --- a/libheif/pixelimage.cc +++ b/libheif/pixelimage.cc @@ -21,6 +21,7 @@ #include "pixelimage.h" #include "common_utils.h" +#include "security_limits.h" #include #include @@ -256,8 +257,12 @@ bool HeifPixelImage::ImagePlane::alloc(uint32_t width, uint32_t height, heif_cha stride = m_mem_width * bytes_per_pixel; stride = (stride + alignment - 1U) & ~(alignment - 1U); + if ((MAX_MEMORY_BLOCK_SIZE - (alignment + 1)) / stride < m_mem_height) { + return false; + } + try { - allocated_mem = new uint8_t[m_mem_height * stride + alignment - 1]; + allocated_mem = new uint8_t[static_cast(m_mem_height) * stride + alignment - 1]; uint8_t* mem_8 = allocated_mem; // shift beginning of image data to aligned memory position From 10e455bdd01a468c79bc6a8c1b266467419c5256 Mon Sep 17 00:00:00 2001 From: Dirk Farin Date: Wed, 11 Sep 2024 12:53:13 +0200 Subject: [PATCH 34/41] iden: make sure that references image item exists --- libheif/codecs/iden.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libheif/codecs/iden.cc b/libheif/codecs/iden.cc index 26b96ab4f7..f177a7e11d 100644 --- a/libheif/codecs/iden.cc +++ b/libheif/codecs/iden.cc @@ -68,6 +68,11 @@ Result> ImageItem_iden::decode_compressed_image( } std::shared_ptr imgitem = get_context()->get_image(reference_image_id); + if (!imgitem) { + return Error(heif_error_Invalid_input, + heif_suberror_Unspecified, + "'iden' image references unavailable image"); + } return imgitem->decode_compressed_image(options, decode_tile_only, tile_x0, tile_y0); } From f50ef3bf2193bec3b316c22e495771869aa3bcb7 Mon Sep 17 00:00:00 2001 From: Dirk Farin Date: Wed, 11 Sep 2024 16:42:01 +0200 Subject: [PATCH 35/41] iovl: detect self-references --- libheif/codecs/overlay.cc | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/libheif/codecs/overlay.cc b/libheif/codecs/overlay.cc index 64798350b9..e5e64c4554 100644 --- a/libheif/codecs/overlay.cc +++ b/libheif/codecs/overlay.cc @@ -309,8 +309,16 @@ Result> ImageItem_Overlay::decode_overlay_image( return err; } - for (size_t i = 0; i < m_overlay_image_ids.size(); i++) { + + // detect if 'iovl' is referencing itself + + if (m_overlay_image_ids[i] == get_id()) { + return Error{heif_error_Invalid_input, + heif_suberror_Unspecified, + "Self-reference in 'iovl' image item."}; + } + auto imgItem = get_context()->get_image(m_overlay_image_ids[i]); if (!imgItem) { return Error(heif_error_Invalid_input, heif_suberror_Nonexisting_item_referenced, "'iovl' image references a non-existing item."); From 4e9eb8ee6264d860f928dfaa21883f8ddc1edcc8 Mon Sep 17 00:00:00 2001 From: Dirk Farin Date: Wed, 11 Sep 2024 17:33:52 +0200 Subject: [PATCH 36/41] define chroma-420 sample position enum --- libheif/pixelimage.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/libheif/pixelimage.h b/libheif/pixelimage.h index 42433bbbe1..24936e868c 100644 --- a/libheif/pixelimage.h +++ b/libheif/pixelimage.h @@ -55,6 +55,20 @@ bool is_integer_multiple_of_chroma_size(uint32_t width, // Returns the list of valid heif_chroma values for a given colorspace. std::vector get_valid_chroma_values_for_colorspace(heif_colorspace colorspace); +// TODO: move to public API when used +enum heif_chroma420_sample_position { + // values 0-5 according to ISO 23091-2 / ITU-T H.273 + heif_chroma420_sample_position_00_05 = 0, + heif_chroma420_sample_position_05_05 = 1, + heif_chroma420_sample_position_00_00 = 2, + heif_chroma420_sample_position_05_00 = 3, + heif_chroma420_sample_position_00_10 = 4, + heif_chroma420_sample_position_05_10 = 5, + + // values 6 according to ISO 23001-17 + heif_chroma420_sample_position_00_00_01_00 = 6 +}; + class HeifPixelImage : public std::enable_shared_from_this, public ErrorBuffer From e962b5919ce990118bfa38aa85796b0a9a72c4a2 Mon Sep 17 00:00:00 2001 From: Dirk Farin Date: Wed, 11 Sep 2024 19:44:36 +0200 Subject: [PATCH 37/41] fix HeifContext::has_alpha() for broken input (fixes #1305) --- libheif/context.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/libheif/context.cc b/libheif/context.cc index 8a8b96f9db..288e1fff61 100644 --- a/libheif/context.cc +++ b/libheif/context.cc @@ -831,7 +831,6 @@ Error HeifContext::interpret_heif_file() bool HeifContext::has_alpha(heif_item_id ID) const { - assert(is_image(ID)); auto img = m_all_images.find(ID)->second; @@ -843,7 +842,10 @@ bool HeifContext::has_alpha(heif_item_id ID) const heif_colorspace colorspace; heif_chroma chroma; - img->get_coded_image_colorspace(&colorspace, &chroma); + Error err = img->get_coded_image_colorspace(&colorspace, &chroma); + if (err) { + return false; + } if (chroma == heif_chroma_interleaved_RGBA || chroma == heif_chroma_interleaved_RRGGBBAA_BE || From 4563a2f650ccde659a47e64682e56693c3e18451 Mon Sep 17 00:00:00 2001 From: Dirk Farin Date: Wed, 11 Sep 2024 19:46:04 +0200 Subject: [PATCH 38/41] move get_tile_size() into ImageItem class --- libheif/api/libheif/heif.cc | 13 +------------ libheif/codecs/grid.h | 2 +- libheif/codecs/image_item.cc | 7 +++++++ libheif/codecs/image_item.h | 2 ++ libheif/codecs/tild.h | 2 +- libheif/context.cc | 2 +- 6 files changed, 13 insertions(+), 15 deletions(-) diff --git a/libheif/api/libheif/heif.cc b/libheif/api/libheif/heif.cc index a98a74299d..0c8bfdbf15 100644 --- a/libheif/api/libheif/heif.cc +++ b/libheif/api/libheif/heif.cc @@ -909,18 +909,7 @@ struct heif_error heif_image_handle_get_tile_size(const struct heif_image_handle uint32_t w,h; - if (std::shared_ptr gridItem = std::dynamic_pointer_cast(handle->image)) { - gridItem->get_tile_size(w,h); - } - else if (std::shared_ptr tildItem = std::dynamic_pointer_cast(handle->image)) { - tildItem->get_tile_size(w,h); - } - else { - // return whole image size (the image is the only tile) - - w = handle->image->get_width(); - h = handle->image->get_height(); - } + handle->image->get_tile_size(w,h); if (tile_width) { *tile_width = w; diff --git a/libheif/codecs/grid.h b/libheif/codecs/grid.h index 1d84cac286..dab6404a0f 100644 --- a/libheif/codecs/grid.h +++ b/libheif/codecs/grid.h @@ -112,7 +112,7 @@ class ImageItem_Grid : public ImageItem heif_image_tiling get_heif_image_tiling() const; - void get_tile_size(uint32_t& w, uint32_t& h) const; + void get_tile_size(uint32_t& w, uint32_t& h) const override; private: ImageGrid m_grid_spec; diff --git a/libheif/codecs/image_item.cc b/libheif/codecs/image_item.cc index fa969a0175..b44d384809 100644 --- a/libheif/codecs/image_item.cc +++ b/libheif/codecs/image_item.cc @@ -403,6 +403,13 @@ uint32_t ImageItem::get_ispe_height() const } +void ImageItem::get_tile_size(uint32_t& w, uint32_t& h) const +{ + w = get_width(); + h = get_height(); +} + + Error ImageItem::get_coded_image_colorspace(heif_colorspace* out_colorspace, heif_chroma* out_chroma) const { heif_item_id id; diff --git a/libheif/codecs/image_item.h b/libheif/codecs/image_item.h index 0bd8069165..d505409173 100644 --- a/libheif/codecs/image_item.h +++ b/libheif/codecs/image_item.h @@ -128,6 +128,8 @@ class ImageItem : public ErrorBuffer m_height = h; } + virtual void get_tile_size(uint32_t& w, uint32_t& h) const; + Error get_coded_image_colorspace(heif_colorspace* out_colorspace, heif_chroma* out_chroma) const; virtual void process_before_write() { } diff --git a/libheif/codecs/tild.h b/libheif/codecs/tild.h index 955fa69671..38f377bb3a 100644 --- a/libheif/codecs/tild.h +++ b/libheif/codecs/tild.h @@ -159,7 +159,7 @@ class ImageItem_Tild : public ImageItem heif_image_tiling get_heif_image_tiling() const; - void get_tile_size(uint32_t& w, uint32_t& h) const; + void get_tile_size(uint32_t& w, uint32_t& h) const override; private: TildHeader m_tild_header; diff --git a/libheif/context.cc b/libheif/context.cc index 288e1fff61..f7c769902d 100644 --- a/libheif/context.cc +++ b/libheif/context.cc @@ -864,7 +864,7 @@ bool HeifContext::has_alpha(heif_item_id ID) const } ImageGrid grid; - Error err = grid.parse(grid_data); + err = grid.parse(grid_data); if (err) { return false; } From 487318c59f3693b04ac6590f40726934d2a19775 Mon Sep 17 00:00:00 2001 From: Dirk Farin Date: Thu, 12 Sep 2024 20:02:31 +0200 Subject: [PATCH 39/41] url-box: parse 'data-in-same-file' flag --- libheif/box.cc | 8 +++++++- libheif/box.h | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/libheif/box.cc b/libheif/box.cc index 94d398f78d..2660611057 100644 --- a/libheif/box.cc +++ b/libheif/box.cc @@ -3726,7 +3726,13 @@ Error Box_url::parse(BitstreamRange& range) return unsupported_version_error("url"); } - m_location = range.read_string(); + if (get_flags() & 1) { + // data in same file + m_location.clear(); + } + else { + m_location = range.read_string(); + } return range.get_error(); } diff --git a/libheif/box.h b/libheif/box.h index a3f037fa20..4807837cbf 100644 --- a/libheif/box.h +++ b/libheif/box.h @@ -1045,6 +1045,8 @@ class Box_url : public FullBox public: std::string dump(Indent&) const override; + bool is_same_file() const { return m_location.empty(); } + protected: Error parse(BitstreamRange& range) override; From 3a043d97ce65773eef7473e26f89f71cb86a1808 Mon Sep 17 00:00:00 2001 From: Dirk Farin Date: Fri, 13 Sep 2024 00:12:59 +0200 Subject: [PATCH 40/41] add option to set plugin install directory independently from search path (#1307) --- CMakeLists.txt | 12 ++++++++++-- libheif/plugins/CMakeLists.txt | 2 +- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 041f3b8e7e..d5ac5c66a8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -66,11 +66,19 @@ LIST (APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/modules") # --- codec plugins option(ENABLE_PLUGIN_LOADING "Support loading of plugins" ON) -set(PLUGIN_DIRECTORY "${CMAKE_INSTALL_FULL_LIBDIR}/libheif" CACHE STRING "Plugin install directory") +set(PLUGIN_DIRECTORY "${CMAKE_INSTALL_FULL_LIBDIR}/libheif" CACHE STRING "Plugin directory") +set(PLUGIN_INSTALL_DIRECTORY "" CACHE STRING "Plugin install directory (leaving it empty will use PLUGIN_DIRECTORY)") if (ENABLE_PLUGIN_LOADING) set(PLUGIN_LOADING_SUPPORTED_AND_ENABLED TRUE) - install(DIRECTORY DESTINATION ${PLUGIN_DIRECTORY} DIRECTORY_PERMISSIONS + + if (PLUGIN_INSTALL_DIRECTORY STREQUAL "") + set(COMPUTED_PLUGIN_INSTALL_DIRECTORY ${PLUGIN_DIRECTORY}) + else () + set(COMPUTED_PLUGIN_INSTALL_DIRECTORY ${PLUGIN_INSTALL_DIRECTORY}) + endif () + + install(DIRECTORY DESTINATION ${COMPUTED_PLUGIN_INSTALL_DIRECTORY} DIRECTORY_PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) diff --git a/libheif/plugins/CMakeLists.txt b/libheif/plugins/CMakeLists.txt index 58418d9983..18e4bdb158 100644 --- a/libheif/plugins/CMakeLists.txt +++ b/libheif/plugins/CMakeLists.txt @@ -22,7 +22,7 @@ macro(plugin_compilation name varName foundName optionName defineName) target_link_libraries(heif-${name} PRIVATE ${${varName}_LIBRARIES} heif) install(TARGETS heif-${name} - LIBRARY DESTINATION ${PLUGIN_DIRECTORY} + LIBRARY DESTINATION ${COMPUTED_PLUGIN_INSTALL_DIRECTORY} ) else () message("Compiling '" ${name} "' as built-in backend") From 3608adf4c39325848087dd912700df5c7c0cd9d7 Mon Sep 17 00:00:00 2001 From: Dirk Farin Date: Fri, 13 Sep 2024 11:55:31 +0200 Subject: [PATCH 41/41] nvdec: show gfxcard name in plugin description --- libheif/plugins/decoder_nvdec.cc | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/libheif/plugins/decoder_nvdec.cc b/libheif/plugins/decoder_nvdec.cc index 478430089f..c78851c361 100644 --- a/libheif/plugins/decoder_nvdec.cc +++ b/libheif/plugins/decoder_nvdec.cc @@ -48,17 +48,31 @@ static char plugin_name[MAX_PLUGIN_NAME_LENGTH]; static const char *nvdec_plugin_name() { - snprintf(plugin_name, MAX_PLUGIN_NAME_LENGTH, "NVIDIA Video Decoder (Hardware)"); - - // make sure that the string is null-terminated - plugin_name[MAX_PLUGIN_NAME_LENGTH - 1] = 0; - return plugin_name; } static void nvdec_init_plugin() { cuInit(0); + + CUdevice cuDevice = 0; + CUresult result; + result = cuDeviceGet(&cuDevice, 0); + if (result != CUDA_SUCCESS) + { + return; + } + + char szDeviceName[50]; + result = cuDeviceGetName(szDeviceName, sizeof(szDeviceName), cuDevice); + if (result != CUDA_SUCCESS) { + return; + } + + snprintf(plugin_name, MAX_PLUGIN_NAME_LENGTH, "NVIDIA Video Decoder (%s)", szDeviceName); + + // make sure that the string is null-terminated + plugin_name[MAX_PLUGIN_NAME_LENGTH - 1] = 0; } static void nvdec_deinit_plugin()