From c39c47cfda79f98464f0589e689a6ac9efb6f87a Mon Sep 17 00:00:00 2001 From: Matthew Lai Date: Sat, 14 Dec 2024 17:42:16 +0800 Subject: [PATCH] Implemented hardware decoding This implements hardware decoding continuing from the work of @rvillalba-novetta and @mikeboers in https://github.com/PyAV-Org/PyAV/compare/main...rvillalba-novetta:PyAV:hwaccel (and children commits) --- av/__init__.py | 1 + av/__main__.py | 10 ++ av/audio/codeccontext.pyx | 5 +- av/codec/codec.pxd | 2 + av/codec/codec.pyx | 71 ++++++++++--- av/codec/context.pxd | 12 ++- av/codec/context.pyi | 1 + av/codec/context.pyx | 19 +++- av/codec/hwaccel.pxd | 21 ++++ av/codec/hwaccel.pyx | 176 ++++++++++++++++++++++++++++++++ av/container/core.pxd | 3 + av/container/core.pyi | 6 ++ av/container/core.pyx | 11 +- av/container/input.pyx | 2 +- av/container/output.pyx | 6 +- av/video/codeccontext.pxd | 12 +++ av/video/codeccontext.pyx | 66 +++++++++++- examples/basics/hw_decode.py | 76 ++++++++++++++ include/libav.pxd | 3 + include/libavcodec/avcodec.pxd | 8 ++ include/libavcodec/hwaccel.pxd | 19 ++++ include/libavutil/buffer.pxd | 13 ++- include/libavutil/hwcontext.pxd | 24 +++++ scripts/build-deps | 26 +++++ tests/test_decode.py | 62 +++++++++++ 25 files changed, 617 insertions(+), 38 deletions(-) create mode 100644 av/codec/hwaccel.pxd create mode 100644 av/codec/hwaccel.pyx create mode 100644 examples/basics/hw_decode.py create mode 100644 include/libavcodec/hwaccel.pxd create mode 100644 include/libavutil/hwcontext.pxd diff --git a/av/__init__.py b/av/__init__.py index cbc3c8a2f..e2f9e5a6d 100644 --- a/av/__init__.py +++ b/av/__init__.py @@ -17,6 +17,7 @@ from av.bitstream import BitStreamFilterContext, bitstream_filters_available from av.codec.codec import Codec, codecs_available from av.codec.context import CodecContext +from av.codec.hwaccel import HWConfig from av.container import open from av.format import ContainerFormat, formats_available from av.packet import Packet diff --git a/av/__main__.py b/av/__main__.py index bc353d147..4cde2147d 100644 --- a/av/__main__.py +++ b/av/__main__.py @@ -6,6 +6,8 @@ def main() -> None: parser = argparse.ArgumentParser() parser.add_argument("--codecs", action="store_true") + parser.add_argument('--hwdevices', action='store_true') + parser.add_argument('--hwconfigs', action='store_true') parser.add_argument("--version", action="store_true") args = parser.parse_args() @@ -30,6 +32,14 @@ def main() -> None: version = config["version"] print(f"{libname:<13} {version[0]:3d}.{version[1]:3d}.{version[2]:3d}") + if args.hwdevices: + from av.codec.hwaccel import dump_hwdevices + dump_hwdevices() + + if args.hwconfigs: + from av.codec.codec import dump_hwconfigs + dump_hwconfigs() + if args.codecs: from av.codec.codec import dump_codecs diff --git a/av/audio/codeccontext.pyx b/av/audio/codeccontext.pyx index 54319ddaf..856af555c 100644 --- a/av/audio/codeccontext.pyx +++ b/av/audio/codeccontext.pyx @@ -3,13 +3,14 @@ cimport libav as lib from av.audio.format cimport AudioFormat, get_audio_format from av.audio.frame cimport AudioFrame, alloc_audio_frame from av.audio.layout cimport AudioLayout, get_audio_layout +from av.codec.hwaccel cimport HWAccel from av.frame cimport Frame from av.packet cimport Packet cdef class AudioCodecContext(CodecContext): - cdef _init(self, lib.AVCodecContext *ptr, const lib.AVCodec *codec): - CodecContext._init(self, ptr, codec) + cdef _init(self, lib.AVCodecContext *ptr, const lib.AVCodec *codec, HWAccel hwaccel): + CodecContext._init(self, ptr, codec, hwaccel) cdef _prepare_frames_for_encode(self, Frame input_frame): diff --git a/av/codec/codec.pxd b/av/codec/codec.pxd index b9925df13..576c659b4 100644 --- a/av/codec/codec.pxd +++ b/av/codec/codec.pxd @@ -7,6 +7,8 @@ cdef class Codec: cdef const lib.AVCodecDescriptor *desc cdef readonly bint is_encoder + cdef tuple _hardware_configs + cdef _init(self, name=?) diff --git a/av/codec/codec.pyx b/av/codec/codec.pyx index 1493f0f7b..26d487a48 100644 --- a/av/codec/codec.pyx +++ b/av/codec/codec.pyx @@ -1,4 +1,5 @@ from av.audio.format cimport get_audio_format +from av.codec.hwaccel cimport wrap_hwconfig from av.descriptor cimport wrap_avclass from av.utils cimport avrational_to_fraction from av.video.format cimport get_video_format @@ -117,18 +118,25 @@ cdef class Codec: if self.is_encoder and lib.av_codec_is_decoder(self.ptr): raise RuntimeError("%s is both encoder and decoder.") - def create(self, kind = None): + def __repr__(self): + return f'' + + def create(self, kind = None, *args, **kwargs): """Create a :class:`.CodecContext` for this codec. :param str kind: Gives a hint to static type checkers for what exact CodecContext is used. """ from .context import CodecContext - return CodecContext.create(self) + return CodecContext.create(self, *args, **kwargs) @property def is_decoder(self): return not self.is_encoder + @property + def mode(self): + return 'w' if self.is_encoder else 'r' + @property def descriptor(self): return wrap_avclass(self.ptr.priv_class) @@ -203,6 +211,23 @@ cdef class Codec: i += 1 return ret + @property + def hardware_configs(self): + if self._hardware_configs: + return self._hardware_configs + ret = [] + cdef int i = 0 + cdef lib.AVCodecHWConfig *ptr + while True: + ptr = lib.avcodec_get_hw_config(self.ptr, i) + if not ptr: + break + ret.append(wrap_hwconfig(ptr)) + i += 1 + ret = tuple(ret) + self._hardware_configs = ret + return ret + @property def properties(self): return self.desc.props @@ -294,18 +319,16 @@ codec_descriptor = wrap_avclass(lib.avcodec_get_class()) def dump_codecs(): """Print information about available codecs.""" - print( - """Codecs: - D..... = Decoding supported - .E.... = Encoding supported - ..V... = Video codec - ..A... = Audio codec - ..S... = Subtitle codec - ...I.. = Intra frame-only codec - ....L. = Lossy compression - .....S = Lossless compression - ------""" - ) + print('''Codecs: + D.... = Decoding supported + .E... = Encoding supported + ..V.. = Video codec + ..A.. = Audio codec + ..S.. = Subtitle codec + ...I. = Intra frame-only codec + ....L = Lossless compression + .....H = Hardware decoding supported + ------''') for name in sorted(codecs_available): try: @@ -323,17 +346,31 @@ def dump_codecs(): try: print( - " %s%s%s%s%s%s %-18s %s" + " %s%s%s%s%s%s %-18s %s" % ( ".D"[bool(d_codec)], ".E"[bool(e_codec)], codec.type[0].upper(), ".I"[codec.intra_only], - ".L"[codec.lossy], - ".S"[codec.lossless], + ".L"[codec.lossless], + ".H"[bool((d_codec or codec).hardware_configs)], codec.name, codec.long_name, ) ) except Exception as e: print(f"...... {codec.name:<18} ERROR: {e}") + +def dump_hwconfigs(): + print('Hardware configs:') + for name in sorted(codecs_available): + try: + codec = Codec(name, 'r') + except ValueError: + continue + configs = codec.hardware_configs + if not configs: + continue + print(' ', codec.name) + for config in configs: + print(' ', config) diff --git a/av/codec/context.pxd b/av/codec/context.pxd index 42b2d63e7..2cfa0c895 100644 --- a/av/codec/context.pxd +++ b/av/codec/context.pxd @@ -3,6 +3,7 @@ from libc.stdint cimport int64_t from av.bytesource cimport ByteSource from av.codec.codec cimport Codec +from av.codec.hwaccel cimport HWAccel, HWAccelContext from av.frame cimport Frame from av.packet cimport Packet @@ -18,11 +19,12 @@ cdef class CodecContext: cdef int stream_index cdef lib.AVCodecParserContext *parser - cdef _init(self, lib.AVCodecContext *ptr, const lib.AVCodec *codec) + cdef _init(self, lib.AVCodecContext *ptr, const lib.AVCodec *codec, HWAccel hwaccel) # Public API. cdef readonly bint is_open cdef readonly Codec codec + cdef readonly HWAccel hwaccel cdef public dict options cpdef open(self, bint strict=?) @@ -31,6 +33,9 @@ cdef class CodecContext: cpdef decode(self, Packet packet=?) cpdef flush_buffers(self) + # Used by hardware-accelerated decode. + cdef HWAccelContext hwaccel_ctx + # Used by both transcode APIs to setup user-land objects. # TODO: Remove the `Packet` from `_setup_decoded_frame` (because flushing packets # are bogus). It should take all info it needs from the context and/or stream. @@ -49,10 +54,11 @@ cdef class CodecContext: cdef _send_packet_and_recv(self, Packet packet) cdef _recv_frame(self) + cdef _transfer_hwframe(self, Frame frame) + # Implemented by children for the generic send/recv API, so we have the # correct subclass of Frame. cdef Frame _next_frame cdef Frame _alloc_next_frame(self) - -cdef CodecContext wrap_codec_context(lib.AVCodecContext*, const lib.AVCodec*) +cdef CodecContext wrap_codec_context(lib.AVCodecContext*, const lib.AVCodec*, HWAccel hwaccel) diff --git a/av/codec/context.pyi b/av/codec/context.pyi index a6ca9647e..0ac3ca988 100644 --- a/av/codec/context.pyi +++ b/av/codec/context.pyi @@ -92,3 +92,4 @@ class CodecContext: self, raw_input: bytes | bytearray | memoryview | None = None ) -> list[Packet]: ... def flush_buffers(self) -> None: ... + def is_hardware_accelerated(self) -> bool: ... diff --git a/av/codec/context.pyx b/av/codec/context.pyx index 29b7b80d1..78174defa 100644 --- a/av/codec/context.pyx +++ b/av/codec/context.pyx @@ -18,7 +18,7 @@ from av.dictionary import Dictionary cdef object _cinit_sentinel = object() -cdef CodecContext wrap_codec_context(lib.AVCodecContext *c_ctx, const lib.AVCodec *c_codec): +cdef CodecContext wrap_codec_context(lib.AVCodecContext *c_ctx, const lib.AVCodec *c_codec, HWAccel hwaccel): """Build an av.CodecContext for an existing AVCodecContext.""" cdef CodecContext py_ctx @@ -35,7 +35,7 @@ cdef CodecContext wrap_codec_context(lib.AVCodecContext *c_ctx, const lib.AVCode else: py_ctx = CodecContext(_cinit_sentinel) - py_ctx._init(c_ctx, c_codec) + py_ctx._init(c_ctx, c_codec, hwaccel) return py_ctx @@ -83,10 +83,10 @@ class Flags2(IntEnum): cdef class CodecContext: @staticmethod - def create(codec, mode=None): + def create(codec, mode=None, hwaccel=None): cdef Codec cy_codec = codec if isinstance(codec, Codec) else Codec(codec, mode) cdef lib.AVCodecContext *c_ctx = lib.avcodec_alloc_context3(cy_codec.ptr) - return wrap_codec_context(c_ctx, cy_codec.ptr) + return wrap_codec_context(c_ctx, cy_codec.ptr, hwaccel) def __cinit__(self, sentinel=None, *args, **kwargs): if sentinel is not _cinit_sentinel: @@ -96,11 +96,12 @@ cdef class CodecContext: self.stream_index = -1 # This is set by the container immediately. self.is_open = False - cdef _init(self, lib.AVCodecContext *ptr, const lib.AVCodec *codec): + cdef _init(self, lib.AVCodecContext *ptr, const lib.AVCodec *codec, HWAccel hwaccel): self.ptr = ptr if self.ptr.codec and codec and self.ptr.codec != codec: raise RuntimeError("Wrapping CodecContext with mismatched codec.") self.codec = wrap_codec(codec if codec != NULL else self.ptr.codec) + self.hwaccel = hwaccel # Set reasonable threading defaults. self.ptr.thread_count = 0 # use as many threads as there are CPUs. @@ -310,6 +311,9 @@ cdef class CodecContext: return packets + def is_hardware_accelerated(self): + return self.hwaccel_ctx is not None + def _send_frame_and_recv(self, Frame frame): cdef Packet packet @@ -359,10 +363,15 @@ cdef class CodecContext: return err_check(res) + frame = self._transfer_hwframe(frame) + if not res: self._next_frame = None return frame + cdef _transfer_hwframe(self, Frame frame): + return frame + cdef _recv_packet(self): cdef Packet packet = Packet() diff --git a/av/codec/hwaccel.pxd b/av/codec/hwaccel.pxd new file mode 100644 index 000000000..e6c8057d6 --- /dev/null +++ b/av/codec/hwaccel.pxd @@ -0,0 +1,21 @@ +cimport libav as lib + +from av.codec.codec cimport Codec + +cdef class HWConfig(object): + cdef object __weakref__ + cdef lib.AVCodecHWConfig *ptr + cdef void _init(self, lib.AVCodecHWConfig *ptr) + +cdef HWConfig wrap_hwconfig(lib.AVCodecHWConfig *ptr) + +cdef class HWAccel(object): + cdef int _device_type + cdef str _device + cdef public bint allow_software_fallback + cdef public dict options + +cdef class HWAccelContext(HWAccel): + cdef readonly Codec codec + cdef readonly HWConfig config + cdef lib.AVBufferRef *ptr diff --git a/av/codec/hwaccel.pyx b/av/codec/hwaccel.pyx new file mode 100644 index 000000000..cd008c9e7 --- /dev/null +++ b/av/codec/hwaccel.pyx @@ -0,0 +1,176 @@ +from __future__ import print_function + +from enum import IntEnum + +import weakref + +cimport libav as lib + +from av.codec.codec cimport Codec +from av.dictionary cimport _Dictionary +from av.error cimport err_check +from av.video.format cimport get_video_format + +from av.dictionary import Dictionary + +class Capabilities(IntEnum): + none = 0 + draw_horiz_band = lib.AV_CODEC_CAP_DRAW_HORIZ_BAND + dr1 = lib.AV_CODEC_CAP_DR1 + hwaccel = 1 << 4 + delay = lib.AV_CODEC_CAP_DELAY + small_last_frame = lib.AV_CODEC_CAP_SMALL_LAST_FRAME + hwaccel_vdpau = 1 << 7 + subframes = lib.AV_CODEC_CAP_SUBFRAMES + experimental = lib.AV_CODEC_CAP_EXPERIMENTAL + channel_conf = lib.AV_CODEC_CAP_CHANNEL_CONF + neg_linesizes = 1 << 11 + frame_threads = lib.AV_CODEC_CAP_FRAME_THREADS + slice_threads = lib.AV_CODEC_CAP_SLICE_THREADS + param_change = lib.AV_CODEC_CAP_PARAM_CHANGE + auto_threads = lib.AV_CODEC_CAP_OTHER_THREADS + variable_frame_size = lib.AV_CODEC_CAP_VARIABLE_FRAME_SIZE + avoid_probing = lib.AV_CODEC_CAP_AVOID_PROBING + hardware = lib.AV_CODEC_CAP_HARDWARE + hybrid = lib.AV_CODEC_CAP_HYBRID + encoder_reordered_opaque = 1 << 20 + encoder_flush = 1 << 21 + encoder_recon_frame = 1 << 22 + +class HWDeviceType(IntEnum): + NONE = lib.AV_HWDEVICE_TYPE_NONE + VDPAU = lib.AV_HWDEVICE_TYPE_VDPAU + CUDA = lib.AV_HWDEVICE_TYPE_CUDA + VAAPI = lib.AV_HWDEVICE_TYPE_VAAPI + DXVA2 = lib.AV_HWDEVICE_TYPE_DXVA2 + QSV = lib.AV_HWDEVICE_TYPE_QSV + VIDEOTOOLBOX = lib.AV_HWDEVICE_TYPE_VIDEOTOOLBOX + D3D11VA = lib.AV_HWDEVICE_TYPE_D3D11VA + DRM = lib.AV_HWDEVICE_TYPE_DRM + OPENCL = lib.AV_HWDEVICE_TYPE_OPENCL + MEDIACODEC = lib.AV_HWDEVICE_TYPE_MEDIACODEC + VULKAN = lib.AV_HWDEVICE_TYPE_VULKAN + D3D12VA = lib.AV_HWDEVICE_TYPE_D3D12VA + +class HWConfigMethod(IntEnum): + NONE = 0 + HW_DEVICE_CTX = lib.AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX # This is the only one we support. + HW_FRAME_CTX = lib.AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX + INTERNAL = lib.AV_CODEC_HW_CONFIG_METHOD_INTERNAL + AD_HOC = lib.AV_CODEC_HW_CONFIG_METHOD_AD_HOC + + +cdef object _cinit_sentinel = object() +cdef object _singletons = weakref.WeakValueDictionary() + +cdef HWConfig wrap_hwconfig(lib.AVCodecHWConfig *ptr): + try: + return _singletons[ptr] + except KeyError: + pass + cdef HWConfig config = HWConfig(_cinit_sentinel) + config._init(ptr) + _singletons[ptr] = config + return config + + +cdef class HWConfig(object): + + def __init__(self, sentinel): + if sentinel is not _cinit_sentinel: + raise RuntimeError('Cannot instantiate CodecContext') + + cdef void _init(self, lib.AVCodecHWConfig *ptr): + self.ptr = ptr + + def __repr__(self): + return ( + f'self.ptr:x}>' + ) + + @property + def device_type(self): + return HWDeviceType(self.ptr.device_type) + + @property + def format(self): + return get_video_format(self.ptr.pix_fmt, 0, 0) + + @property + def methods(self): + return HWConfigMethod(self.ptr.methods) + + @property + def is_supported(self): + return bool(self.ptr.methods & lib.AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX) + +hwdevices_available = [] + +cdef lib.AVHWDeviceType x = lib.AV_HWDEVICE_TYPE_NONE +while True: + x = lib.av_hwdevice_iterate_types(x) + if x == lib.AV_HWDEVICE_TYPE_NONE: + break + hwdevices_available.append(lib.av_hwdevice_get_type_name(HWDeviceType(x))) + +def dump_hwdevices(): + print('Hardware device types:') + for x in hwdevices_available: + print(' ', x) + +cdef class HWAccel(object): + def __init__(self, device_type: str | HWDeviceType, device: str | None = None, + allow_software_fallback: bool = True, options=None, **kwargs): + if isinstance(device_type, HWDeviceType): + self._device_type = device_type + elif isinstance(device_type, str): + self._device_type = int(lib.av_hwdevice_find_type_by_name(device_type)) + else: + raise ValueError('Unknown type for device_type') + self._device = device + self.allow_software_fallback = allow_software_fallback + + if options and kwargs: + raise ValueError("accepts only one of options arg or kwargs") + self.options = dict(options or kwargs) + + def create(self, Codec codec): + return HWAccelContext( + device_type=HWDeviceType(self._device_type), + device=self._device, + options=self.options, + codec=codec, + allow_software_fallback=self.allow_software_fallback) + +cdef class HWAccelContext(HWAccel): + def __init__(self, device_type, device, options, codec, allow_software_fallback, **kwargs): + super().__init__(device_type, device, options, **kwargs) + if not codec: + raise ValueError("codec is required") + self.codec = codec + cdef HWConfig config + for config in codec.hardware_configs: + if not (config.ptr.methods & lib.AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX): + continue + if self._device_type and config.device_type != self._device_type: + continue + break + else: + raise NotImplementedError(f"no supported hardware config for {codec}") + self.config = config + cdef char *c_device = NULL + if self._device: + device_bytes = self._device.encode() + c_device = device_bytes + cdef _Dictionary c_options = Dictionary(self.options) + err_check(lib.av_hwdevice_ctx_create(&self.ptr, config.ptr.device_type, c_device, c_options.ptr, 0)) + + def __dealloc__(self): + if self.ptr: + lib.av_buffer_unref(&self.ptr) + def create(self, *args, **kwargs): + raise ValueError("cannot call HWAccelContext.create") diff --git a/av/container/core.pxd b/av/container/core.pxd index 1aed54b90..87bb792b3 100644 --- a/av/container/core.pxd +++ b/av/container/core.pxd @@ -1,5 +1,6 @@ cimport libav as lib +from av.codec.hwaccel cimport HWAccel from av.container.pyio cimport PyIOFile from av.container.streams cimport StreamContainer from av.dictionary cimport _Dictionary @@ -33,6 +34,8 @@ cdef class Container: cdef readonly dict container_options cdef readonly list stream_options + cdef HWAccel hwaccel + cdef readonly StreamContainer streams cdef readonly dict metadata diff --git a/av/container/core.pyi b/av/container/core.pyi index 227a7d32a..7310abe74 100644 --- a/av/container/core.pyi +++ b/av/container/core.pyi @@ -4,6 +4,7 @@ from pathlib import Path from types import TracebackType from typing import Any, Callable, ClassVar, Literal, Type, overload +from av.codec.hwaccel cimport HWAccel from av.format import ContainerFormat from .input import InputContainer @@ -43,6 +44,7 @@ class Container: options: dict[str, str] container_options: dict[str, str] stream_options: list[dict[str, str]] + hwaccel: HWAccel streams: StreamContainer metadata: dict[str, str] open_timeout: Real | None @@ -73,6 +75,7 @@ def open( buffer_size: int = 32768, timeout: Real | None | tuple[Real | None, Real | None] = None, io_open: Callable[..., Any] | None = None, + hwaccel: HWAccel | None = None, ) -> InputContainer: ... @overload def open( @@ -87,6 +90,7 @@ def open( buffer_size: int = 32768, timeout: Real | None | tuple[Real | None, Real | None] = None, io_open: Callable[..., Any] | None = None, + hwaccel: HWAccel | None = None, ) -> InputContainer: ... @overload def open( @@ -101,6 +105,7 @@ def open( buffer_size: int = 32768, timeout: Real | None | tuple[Real | None, Real | None] = None, io_open: Callable[..., Any] | None = None, + hwaccel: HWAccel | None = None, ) -> OutputContainer: ... @overload def open( @@ -115,4 +120,5 @@ def open( buffer_size: int = 32768, timeout: Real | None | tuple[Real | None, Real | None] = None, io_open: Callable[..., Any] | None = None, + hwaccel: HWAccel | None = None, ) -> InputContainer | OutputContainer: ... diff --git a/av/container/core.pyx b/av/container/core.pyx index 563c79d21..201570c62 100755 --- a/av/container/core.pyx +++ b/av/container/core.pyx @@ -8,6 +8,7 @@ from pathlib import Path cimport libav as lib +from av.codec.hwaccel cimport HWAccel from av.container.core cimport timeout_info from av.container.input cimport InputContainer from av.container.output cimport OutputContainer @@ -143,7 +144,7 @@ class Flags(Flag): cdef class Container: def __cinit__(self, sentinel, file_, format_name, options, - container_options, stream_options, + container_options, stream_options, hwaccel, metadata_encoding, metadata_errors, buffer_size, open_timeout, read_timeout, io_open): @@ -164,6 +165,8 @@ cdef class Container: self.container_options = dict(container_options or ()) self.stream_options = [dict(x) for x in stream_options or ()] + self.hwaccel = hwaccel + self.metadata_encoding = metadata_encoding self.metadata_errors = metadata_errors @@ -296,6 +299,7 @@ def open( buffer_size=32768, timeout=None, io_open=None, + hwaccel=None ): """open(file, mode='r', **kwargs) @@ -322,6 +326,7 @@ def open( ``url`` is the url to open, ``flags`` is a combination of AVIO_FLAG_* and ``options`` is a dictionary of additional options. The callable should return a file-like object. + :param HWAccel hwaccel: Optional settings for hardware-accelerated decoding. :rtype: Container For devices (via ``libavdevice``), pass the name of the device to ``format``, @@ -367,7 +372,7 @@ def open( if mode.startswith("r"): return InputContainer(_cinit_sentinel, file, format, options, - container_options, stream_options, metadata_encoding, metadata_errors, + container_options, stream_options, hwaccel, metadata_encoding, metadata_errors, buffer_size, open_timeout, read_timeout, io_open, ) @@ -376,6 +381,6 @@ def open( "Provide stream options via Container.add_stream(..., options={})." ) return OutputContainer(_cinit_sentinel, file, format, options, - container_options, stream_options, metadata_encoding, metadata_errors, + container_options, stream_options, None, metadata_encoding, metadata_errors, buffer_size, open_timeout, read_timeout, io_open, ) diff --git a/av/container/input.pyx b/av/container/input.pyx index 7246f8245..aa9940452 100644 --- a/av/container/input.pyx +++ b/av/container/input.pyx @@ -77,7 +77,7 @@ cdef class InputContainer(Container): codec_context = lib.avcodec_alloc_context3(codec) err_check(lib.avcodec_parameters_to_context(codec_context, stream.codecpar)) codec_context.pkt_timebase = stream.time_base - py_codec_context = wrap_codec_context(codec_context, codec) + py_codec_context = wrap_codec_context(codec_context, codec, self.hwaccel) else: # no decoder is available py_codec_context = None diff --git a/av/container/output.pyx b/av/container/output.pyx index a75e47d43..e61ef2297 100644 --- a/av/container/output.pyx +++ b/av/container/output.pyx @@ -125,7 +125,7 @@ cdef class OutputContainer(Container): err_check(lib.avcodec_parameters_from_context(stream.codecpar, codec_context)) # Construct the user-land stream - cdef CodecContext py_codec_context = wrap_codec_context(codec_context, codec) + cdef CodecContext py_codec_context = wrap_codec_context(codec_context, codec, None) cdef Stream py_stream = wrap_stream(self, stream, py_codec_context) self.streams.add_stream(py_stream) @@ -179,7 +179,7 @@ cdef class OutputContainer(Container): err_check(lib.avcodec_parameters_from_context(stream.codecpar, codec_context)) # Construct the user-land stream - cdef CodecContext py_codec_context = wrap_codec_context(codec_context, codec) + cdef CodecContext py_codec_context = wrap_codec_context(codec_context, codec, None) cdef Stream py_stream = wrap_stream(self, stream, py_codec_context) self.streams.add_stream(py_stream) @@ -237,7 +237,7 @@ cdef class OutputContainer(Container): # Construct the user-land stream cdef CodecContext py_codec_context = None if codec_context != NULL: - py_codec_context = wrap_codec_context(codec_context, codec) + py_codec_context = wrap_codec_context(codec_context, codec, None) cdef Stream py_stream = wrap_stream(self, stream, py_codec_context) self.streams.add_stream(py_stream) diff --git a/av/video/codeccontext.pxd b/av/video/codeccontext.pxd index 9693caa9b..895ba74b1 100644 --- a/av/video/codeccontext.pxd +++ b/av/video/codeccontext.pxd @@ -1,3 +1,4 @@ +cimport libav as lib from av.codec.context cimport CodecContext from av.video.format cimport VideoFormat @@ -5,8 +6,19 @@ from av.video.frame cimport VideoFrame from av.video.reformatter cimport VideoReformatter +# The get_format callback in AVCodecContext is called by the decoder to pick a format out of a list. +# When we want accelerated decoding, we need to figure out ahead of time what the format should be, +# and find a way to pass that into our callback so we can return it to the decoder. We use the 'opaque' +# user data field in AVCodecContext for that. This is the struct we store a pointer to in that field. +cdef struct AVCodecPrivateData: + lib.AVPixelFormat hardware_pix_fmt + bint allow_software_fallback + + cdef class VideoCodecContext(CodecContext): + cdef AVCodecPrivateData _private_data + cdef VideoFormat _format cdef _build_format(self) diff --git a/av/video/codeccontext.pyx b/av/video/codeccontext.pyx index d2f4c9e14..e19f3c6ae 100644 --- a/av/video/codeccontext.pyx +++ b/av/video/codeccontext.pyx @@ -2,6 +2,8 @@ cimport libav as lib from libc.stdint cimport int64_t from av.codec.context cimport CodecContext +from av.codec.hwaccel cimport HWAccel, HWConfig +from av.error cimport err_check from av.frame cimport Frame from av.packet cimport Packet from av.utils cimport avrational_to_fraction, to_avrational @@ -10,13 +12,53 @@ from av.video.frame cimport VideoFrame, alloc_video_frame from av.video.reformatter cimport VideoReformatter +cdef lib.AVPixelFormat _get_hw_format(lib.AVCodecContext *ctx, const lib.AVPixelFormat *pix_fmts) noexcept: + # In the case where we requested accelerated decoding, the decoder first calls this function + # with a list that includes both the hardware format and software formats. + # First we try to pick the hardware format if it's in the list. + # However, if the decoder fails to initialize the hardware, it will call this function again, + # with only software formats in pix_fmts. We return ctx->sw_pix_fmt regardless in this case, + # because that should be in the candidate list. If not, we are out of ideas anyways. + cdef AVCodecPrivateData* private_data = ctx.opaque + i = 0 + while pix_fmts[i] != -1: + if pix_fmts[i] == private_data.hardware_pix_fmt: + return pix_fmts[i] + i += 1 + return ctx.sw_pix_fmt if private_data.allow_software_fallback else lib.AV_PIX_FMT_NONE + + cdef class VideoCodecContext(CodecContext): + def __cinit__(self, *args, **kwargs): self.last_w = 0 self.last_h = 0 - cdef _init(self, lib.AVCodecContext *ptr, const lib.AVCodec *codec): - CodecContext._init(self, ptr, codec) # TODO: Can this be `super`? + cdef _init(self, lib.AVCodecContext *ptr, const lib.AVCodec *codec, HWAccel hwaccel): + CodecContext._init(self, ptr, codec, hwaccel) # TODO: Can this be `super`? + + self.ptr.pix_fmt = lib.AV_PIX_FMT_NONE + + if hwaccel is not None: + try: + self.hwaccel_ctx = hwaccel.create(self.codec) + self.ptr.hw_device_ctx = lib.av_buffer_ref(self.hwaccel_ctx.ptr) + self.ptr.pix_fmt = self.hwaccel_ctx.config.ptr.pix_fmt + self.ptr.get_format = _get_hw_format + self._private_data.hardware_pix_fmt = self.hwaccel_ctx.config.ptr.pix_fmt + self._private_data.allow_software_fallback = self.hwaccel.allow_software_fallback + self.ptr.opaque = &self._private_data + except NotImplementedError: + # Some streams may not have a hardware decoder. For example, many action + # cam videos have a low resolution mjpeg stream, which is usually not + # compatible with hardware decoders. + # The user may have passed in a hwaccel because they want to decode the main + # stream with it, so we shouldn't abort even if we find a stream that can't + # be HW decoded. + # If the user wants to make sure hwaccel is actually used, they can check with the + # is_hardware_accelerated() function on each stream's codec context. + self.hwaccel_ctx = None + self._build_format() self.encoded_frame_count = 0 @@ -58,6 +100,26 @@ cdef class VideoCodecContext(CodecContext): cdef VideoFrame vframe = frame vframe._init_user_attributes() + cdef _transfer_hwframe(self, Frame frame): + if self.hwaccel_ctx is None: + return frame + + if frame.ptr.format != self.hwaccel_ctx.config.ptr.pix_fmt: + # If we get a software frame, that means we are in software fallback mode, and don't actually + # need to transfer. + return frame + + cdef Frame frame_sw + + frame_sw = self._alloc_next_frame() + + err_check(lib.av_hwframe_transfer_data(frame_sw.ptr, frame.ptr, 0)) + + # TODO: Is there anything else to transfer?! + frame_sw.pts = frame.pts + + return frame_sw + cdef _build_format(self): self._format = get_video_format(self.ptr.pix_fmt, self.ptr.width, self.ptr.height) diff --git a/examples/basics/hw_decode.py b/examples/basics/hw_decode.py new file mode 100644 index 000000000..a90e6b9ed --- /dev/null +++ b/examples/basics/hw_decode.py @@ -0,0 +1,76 @@ +import os +import time + +import av +import av.datasets + +# What accelerator to use. +# Recommendations: +# Windows: +# - d3d11va (Direct3D 11) +# * available with built-in ffmpeg in PyAV binary wheels, and gives access to +# all decoders, but performance may not be as good as vendor native interfaces. +# - cuda (NVIDIA NVDEC), qsv (Intel QuickSync) +# * may be faster than d3d11va, but requires custom ffmpeg built with those libraries. +# Linux (all options require custom FFmpeg): +# - vaapi (Intel, AMD) +# - cuda (NVIDIA) +# Mac: +# - videotoolbox +# * available with built-in ffmpeg in PyAV binary wheels, and gives access to +# all accelerators available on Macs. This is the only option on MacOS. + +HW_DEVICE = os.environ['HW_DEVICE'] if 'HW_DEVICE' in os.environ else None + +if 'TEST_FILE_PATH' in os.environ: + test_file_path = os.environ['TEST_FILE_PATH'] +else: + test_file_path = av.datasets.curated("pexels/time-lapse-video-of-night-sky-857195.mp4") + +if HW_DEVICE is None: + av.codec.hwaccel.dump_hwdevices() + print('Please set HW_DEVICE.') + exit() + +assert HW_DEVICE in av.codec.hwaccel.hwdevices_available, f'{HW_DEVICE} not available.' + +print("Decoding in software (auto threading)...") + +container = av.open(test_file_path) + +container.streams.video[0].thread_type = "AUTO" + +start_time = time.time() +frame_count = 0 +for packet in container.demux(video=0): + for _ in packet.decode(): + frame_count += 1 + +sw_time = time.time() - start_time +sw_fps = frame_count / sw_time +assert frame_count == container.streams.video[0].frames +container.close() + +print(f"Decoded with software in {sw_time:.2f}s ({sw_fps:.2f} fps).") + +print(f"Decoding with {HW_DEVICE}") + +hwaccel = av.codec.hwaccel.HWAccel( + device_type=HW_DEVICE, + allow_software_fallback=False) + +# Note the additional argument here. +container = av.open(test_file_path, hwaccel=hwaccel) + +start_time = time.time() +frame_count = 0 +for packet in container.demux(video=0): + for _ in packet.decode(): + frame_count += 1 + +hw_time = time.time() - start_time +hw_fps = frame_count / hw_time +assert frame_count == container.streams.video[0].frames +container.close() + +print(f"Decoded with {HW_DEVICE} in {hw_time:.2f}s ({hw_fps:.2f} fps).") diff --git a/include/libav.pxd b/include/libav.pxd index c793b9988..e2fe323a4 100644 --- a/include/libav.pxd +++ b/include/libav.pxd @@ -4,11 +4,14 @@ include "libavutil/channel_layout.pxd" include "libavutil/dict.pxd" include "libavutil/error.pxd" include "libavutil/frame.pxd" +include "libavutil/hwcontext.pxd" include "libavutil/samplefmt.pxd" include "libavutil/motion_vector.pxd" include "libavcodec/avcodec.pxd" include "libavcodec/bsf.pxd" +include "libavcodec/hwaccel.pxd" + include "libavdevice/avdevice.pxd" include "libavformat/avformat.pxd" include "libswresample/swresample.pxd" diff --git a/include/libavcodec/avcodec.pxd b/include/libavcodec/avcodec.pxd index 172c9cc65..bcb342373 100644 --- a/include/libavcodec/avcodec.pxd +++ b/include/libavcodec/avcodec.pxd @@ -213,6 +213,8 @@ cdef extern from "libavcodec/avcodec.h" nogil: AVFrame* coded_frame + void* opaque + int bit_rate int bit_rate_tolerance int mb_decision @@ -247,6 +249,7 @@ cdef extern from "libavcodec/avcodec.h" nogil: int coded_height AVPixelFormat pix_fmt + AVPixelFormat sw_pix_fmt AVRational sample_aspect_ratio int gop_size # The number of pictures in a group of pictures, or 0 for intra_only. int max_b_frames @@ -266,6 +269,11 @@ cdef extern from "libavcodec/avcodec.h" nogil: int get_buffer(AVCodecContext *ctx, AVFrame *frame) void release_buffer(AVCodecContext *ctx, AVFrame *frame) + # Hardware acceleration + AVHWAccel *hwaccel + AVBufferRef *hw_device_ctx + AVPixelFormat (*get_format)(AVCodecContext *s, const AVPixelFormat *fmt) + # User Data void *opaque diff --git a/include/libavcodec/hwaccel.pxd b/include/libavcodec/hwaccel.pxd new file mode 100644 index 000000000..cb9ac41b6 --- /dev/null +++ b/include/libavcodec/hwaccel.pxd @@ -0,0 +1,19 @@ +cdef extern from "libavcodec/avcodec.h" nogil: + cdef enum: + AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX, + AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX, + AV_CODEC_HW_CONFIG_METHOD_INTERNAL, + AV_CODEC_HW_CONFIG_METHOD_AD_HOC, + cdef struct AVCodecHWConfig: + AVPixelFormat pix_fmt + int methods + AVHWDeviceType device_type + cdef const AVCodecHWConfig* avcodec_get_hw_config(const AVCodec *codec, int index) + cdef enum: + AV_HWACCEL_CODEC_CAP_EXPERIMENTAL + cdef struct AVHWAccel: + char *name + AVMediaType type + AVCodecID id + AVPixelFormat pix_fmt + int capabilities diff --git a/include/libavutil/buffer.pxd b/include/libavutil/buffer.pxd index daf86105b..d4ff4cd17 100644 --- a/include/libavutil/buffer.pxd +++ b/include/libavutil/buffer.pxd @@ -1,9 +1,18 @@ -from libc.stdint cimport uint8_t +from libc.stdint cimport intptr_t, uint8_t cdef extern from "libavutil/buffer.h" nogil: - AVBufferRef *av_buffer_create(uint8_t *data, size_t size, void (*free)(void *opaque, uint8_t *data), void *opaque, int flags) + AVBufferRef* av_buffer_ref(AVBufferRef *buf) void av_buffer_unref(AVBufferRef **buf) + cdef struct AVBuffer: + uint8_t *data + int size + intptr_t refcount + void (*free)(void *opaque, uint8_t *data) + void *opaque + int flags cdef struct AVBufferRef: + AVBuffer *buffer uint8_t *data + int size diff --git a/include/libavutil/hwcontext.pxd b/include/libavutil/hwcontext.pxd new file mode 100644 index 000000000..beda15a2c --- /dev/null +++ b/include/libavutil/hwcontext.pxd @@ -0,0 +1,24 @@ +cdef extern from "libavutil/hwcontext.h" nogil: + + enum AVHWDeviceType: + AV_HWDEVICE_TYPE_NONE + AV_HWDEVICE_TYPE_VDPAU + AV_HWDEVICE_TYPE_CUDA + AV_HWDEVICE_TYPE_VAAPI + AV_HWDEVICE_TYPE_DXVA2 + AV_HWDEVICE_TYPE_QSV + AV_HWDEVICE_TYPE_VIDEOTOOLBOX + AV_HWDEVICE_TYPE_D3D11VA + AV_HWDEVICE_TYPE_DRM + AV_HWDEVICE_TYPE_OPENCL + AV_HWDEVICE_TYPE_MEDIACODEC + AV_HWDEVICE_TYPE_VULKAN + AV_HWDEVICE_TYPE_D3D12VA + + cdef int av_hwdevice_ctx_create(AVBufferRef **device_ctx, AVHWDeviceType type, const char *device, AVDictionary *opts, int flags) + + cdef AVHWDeviceType av_hwdevice_find_type_by_name(const char *name) + cdef const char *av_hwdevice_get_type_name(AVHWDeviceType type) + cdef AVHWDeviceType av_hwdevice_iterate_types(AVHWDeviceType prev) + + cdef int av_hwframe_transfer_data(AVFrame *dst, const AVFrame *src, int flags) diff --git a/scripts/build-deps b/scripts/build-deps index 4cb90f074..de4a6e547 100755 --- a/scripts/build-deps +++ b/scripts/build-deps @@ -13,6 +13,31 @@ if [[ -e "$PYAV_LIBRARY_PREFIX/bin/ffmpeg" ]]; then exit 0 fi +# Add CUDA support if available +CONFFLAGS_NVIDIA="" +if [[ -e /usr/local/cuda ]]; then + # Get Nvidia headers for ffmpeg + cd $PYAV_LIBRARY_ROOT + if [[ ! -e "$PYAV_LIBRARY_ROOT/nv-codec-headers" ]]; then + git clone https://github.com/FFmpeg/nv-codec-headers.git + cd nv-codec-headers + make -j4 + make PREFIX="$PYAV_LIBRARY_PREFIX" install + fi + + PKG_CONFIG_PATH="/usr/local/lib/pkgconfig:$PKG_CONFIG_PATH" + CONFFLAGS_NVIDIA="--enable-cuda \ + --enable-cuvid \ + --enable-nvenc \ + --enable-nonfree \ + --enable-libnpp \ + --extra-cflags=-I/usr/local/cuda/include \ + --extra-ldflags=-L/usr/local/cuda/lib64" +else + echo "WARNING: Did not find cuda libraries in /usr/local/cuda..." + echo " Building without NVIDIA NVENC/NVDEC support" +fi + mkdir -p "$PYAV_LIBRARY_ROOT" mkdir -p "$PYAV_LIBRARY_PREFIX" @@ -44,6 +69,7 @@ echo ./configure --enable-sse \ --enable-avx \ --enable-avx2 \ + $CONFFLAGS_NVIDIA \ --prefix="$PYAV_LIBRARY_PREFIX" \ || exit 2 echo diff --git a/tests/test_decode.py b/tests/test_decode.py index 05f636977..66a5bdfd0 100644 --- a/tests/test_decode.py +++ b/tests/test_decode.py @@ -1,10 +1,43 @@ from fractions import Fraction +import functools +import os +import pathlib import av +import numpy as np +import pytest from .common import TestCase, fate_suite +@functools.cache +def make_h264_test_video(path: str) -> None: + """Generates a black H264 test video for testing hardware decoding.""" + + # We generate a file here that's designed to be as compatible as possible with hardware + # encoders. Hardware encoders are sometimes very picky and the errors we get are often + # opaque, so there is nothing much we (PyAV) can do. The user needs to figure that out + # if they want to use hwaccel. We only want to test the PyAV plumbing here. + # Our video is H264, 1280x720p (note that some decoders have a minimum resolution limit), 24fps, + # 8-bit yuv420p. + pathlib.Path(path).parent.mkdir(parents=True, exist_ok=True) + output_container = av.open(path, "w") + stream = output_container.add_stream("libx264", rate=24) + stream.width = 1280 + stream.height = 720 + stream.pix_fmt = "yuv420p" + + for _ in range(24): + frame = av.VideoFrame.from_ndarray(np.zeros((720, 1280, 3), dtype=np.uint8), format="rgb24") + for packet in stream.encode(frame): + output_container.mux(packet) + + for packet in stream.encode(): + output_container.mux(packet) + + output_container.close() + + class TestDecode(TestCase): def test_decoded_video_frame_count(self) -> None: container = av.open(fate_suite("h264/interlaced_crop.mp4")) @@ -165,3 +198,32 @@ def test_side_data(self) -> None: container = av.open(fate_suite("mov/displaymatrix.mov")) frame = next(container.decode(video=0)) assert frame.rotation == -90 + + def test_hardware_decode(self) -> None: + if 'HWACCEL_DEVICE_TYPE' not in os.environ: + pytest.skip( + "Set the HWACCEL_DEVICE_TYPE to run this test. " + f"Options are {' '.join(av.codec.hwaccel.hwdevices_available)}") + + HWACCEL_DEVICE_TYPE = os.environ["HWACCEL_DEVICE_TYPE"] + + assert HWACCEL_DEVICE_TYPE in av.codec.hwaccel.hwdevices_available, f'{HWACCEL_DEVICE_TYPE} not available' + + test_video_path = "tests/assets/black.mp4" + make_h264_test_video(test_video_path) + + # Test decode. + hwaccel = av.codec.hwaccel.HWAccel(device_type=HWACCEL_DEVICE_TYPE, allow_software_fallback=False) + + container = av.open(test_video_path, hwaccel=hwaccel) + video_stream = next(s for s in container.streams if s.type == "video") + + assert video_stream is container.streams.video[0] + + assert video_stream.codec_context.is_hardware_accelerated() + + frame_count = 0 + for frame in container.decode(video_stream): + frame_count += 1 + + assert frame_count == video_stream.frames