From c39c47cfda79f98464f0589e689a6ac9efb6f87a Mon Sep 17 00:00:00 2001
From: Matthew Lai <m@matthewlai.ca>
Date: Sat, 14 Dec 2024 17:42:16 +0800
Subject: [PATCH] Implemented hardware decoding

This implements hardware decoding continuing from the work of @rvillalba-novetta and @mikeboers in https://github.com/PyAV-Org/PyAV/compare/main...rvillalba-novetta:PyAV:hwaccel (and children commits)
---
 av/__init__.py                  |   1 +
 av/__main__.py                  |  10 ++
 av/audio/codeccontext.pyx       |   5 +-
 av/codec/codec.pxd              |   2 +
 av/codec/codec.pyx              |  71 ++++++++++---
 av/codec/context.pxd            |  12 ++-
 av/codec/context.pyi            |   1 +
 av/codec/context.pyx            |  19 +++-
 av/codec/hwaccel.pxd            |  21 ++++
 av/codec/hwaccel.pyx            | 176 ++++++++++++++++++++++++++++++++
 av/container/core.pxd           |   3 +
 av/container/core.pyi           |   6 ++
 av/container/core.pyx           |  11 +-
 av/container/input.pyx          |   2 +-
 av/container/output.pyx         |   6 +-
 av/video/codeccontext.pxd       |  12 +++
 av/video/codeccontext.pyx       |  66 +++++++++++-
 examples/basics/hw_decode.py    |  76 ++++++++++++++
 include/libav.pxd               |   3 +
 include/libavcodec/avcodec.pxd  |   8 ++
 include/libavcodec/hwaccel.pxd  |  19 ++++
 include/libavutil/buffer.pxd    |  13 ++-
 include/libavutil/hwcontext.pxd |  24 +++++
 scripts/build-deps              |  26 +++++
 tests/test_decode.py            |  62 +++++++++++
 25 files changed, 617 insertions(+), 38 deletions(-)
 create mode 100644 av/codec/hwaccel.pxd
 create mode 100644 av/codec/hwaccel.pyx
 create mode 100644 examples/basics/hw_decode.py
 create mode 100644 include/libavcodec/hwaccel.pxd
 create mode 100644 include/libavutil/hwcontext.pxd

diff --git a/av/__init__.py b/av/__init__.py
index cbc3c8a2f..e2f9e5a6d 100644
--- a/av/__init__.py
+++ b/av/__init__.py
@@ -17,6 +17,7 @@
 from av.bitstream import BitStreamFilterContext, bitstream_filters_available
 from av.codec.codec import Codec, codecs_available
 from av.codec.context import CodecContext
+from av.codec.hwaccel import HWConfig
 from av.container import open
 from av.format import ContainerFormat, formats_available
 from av.packet import Packet
diff --git a/av/__main__.py b/av/__main__.py
index bc353d147..4cde2147d 100644
--- a/av/__main__.py
+++ b/av/__main__.py
@@ -6,6 +6,8 @@
 def main() -> None:
     parser = argparse.ArgumentParser()
     parser.add_argument("--codecs", action="store_true")
+    parser.add_argument('--hwdevices', action='store_true')
+    parser.add_argument('--hwconfigs', action='store_true')
     parser.add_argument("--version", action="store_true")
     args = parser.parse_args()
 
@@ -30,6 +32,14 @@ def main() -> None:
                 version = config["version"]
                 print(f"{libname:<13} {version[0]:3d}.{version[1]:3d}.{version[2]:3d}")
 
+    if args.hwdevices:
+        from av.codec.hwaccel import dump_hwdevices
+        dump_hwdevices()
+
+    if args.hwconfigs:
+        from av.codec.codec import dump_hwconfigs
+        dump_hwconfigs()
+
     if args.codecs:
         from av.codec.codec import dump_codecs
 
diff --git a/av/audio/codeccontext.pyx b/av/audio/codeccontext.pyx
index 54319ddaf..856af555c 100644
--- a/av/audio/codeccontext.pyx
+++ b/av/audio/codeccontext.pyx
@@ -3,13 +3,14 @@ cimport libav as lib
 from av.audio.format cimport AudioFormat, get_audio_format
 from av.audio.frame cimport AudioFrame, alloc_audio_frame
 from av.audio.layout cimport AudioLayout, get_audio_layout
+from av.codec.hwaccel cimport HWAccel
 from av.frame cimport Frame
 from av.packet cimport Packet
 
 
 cdef class AudioCodecContext(CodecContext):
-    cdef _init(self, lib.AVCodecContext *ptr, const lib.AVCodec *codec):
-        CodecContext._init(self, ptr, codec)
+    cdef _init(self, lib.AVCodecContext *ptr, const lib.AVCodec *codec, HWAccel hwaccel):
+        CodecContext._init(self, ptr, codec, hwaccel)
 
     cdef _prepare_frames_for_encode(self, Frame input_frame):
 
diff --git a/av/codec/codec.pxd b/av/codec/codec.pxd
index b9925df13..576c659b4 100644
--- a/av/codec/codec.pxd
+++ b/av/codec/codec.pxd
@@ -7,6 +7,8 @@ cdef class Codec:
     cdef const lib.AVCodecDescriptor *desc
     cdef readonly bint is_encoder
 
+    cdef tuple _hardware_configs
+
     cdef _init(self, name=?)
 
 
diff --git a/av/codec/codec.pyx b/av/codec/codec.pyx
index 1493f0f7b..26d487a48 100644
--- a/av/codec/codec.pyx
+++ b/av/codec/codec.pyx
@@ -1,4 +1,5 @@
 from av.audio.format cimport get_audio_format
+from av.codec.hwaccel cimport wrap_hwconfig
 from av.descriptor cimport wrap_avclass
 from av.utils cimport avrational_to_fraction
 from av.video.format cimport get_video_format
@@ -117,18 +118,25 @@ cdef class Codec:
         if self.is_encoder and lib.av_codec_is_decoder(self.ptr):
             raise RuntimeError("%s is both encoder and decoder.")
 
-    def create(self, kind = None):
+    def __repr__(self):
+        return f'<av.{self.__class__.__name__}({self.name!r}, {self.mode!r})>'
+
+    def create(self, kind = None, *args, **kwargs):
         """Create a :class:`.CodecContext` for this codec.
 
         :param str kind: Gives a hint to static type checkers for what exact CodecContext is used.
         """
         from .context import CodecContext
-        return CodecContext.create(self)
+        return CodecContext.create(self, *args, **kwargs)
 
     @property
     def is_decoder(self):
         return not self.is_encoder
 
+    @property
+    def mode(self):
+        return 'w' if self.is_encoder else 'r'
+
     @property
     def descriptor(self): return wrap_avclass(self.ptr.priv_class)
 
@@ -203,6 +211,23 @@ cdef class Codec:
             i += 1
         return ret
 
+    @property
+    def hardware_configs(self):
+        if self._hardware_configs:
+            return self._hardware_configs
+        ret = []
+        cdef int i = 0
+        cdef lib.AVCodecHWConfig *ptr
+        while True:
+            ptr = lib.avcodec_get_hw_config(self.ptr, i)
+            if not ptr:
+                break
+            ret.append(wrap_hwconfig(ptr))
+            i += 1
+        ret = tuple(ret)
+        self._hardware_configs = ret
+        return ret
+
     @property
     def properties(self):
         return self.desc.props
@@ -294,18 +319,16 @@ codec_descriptor = wrap_avclass(lib.avcodec_get_class())
 def dump_codecs():
     """Print information about available codecs."""
 
-    print(
-        """Codecs:
- D..... = Decoding supported
- .E.... = Encoding supported
- ..V... = Video codec
- ..A... = Audio codec
- ..S... = Subtitle codec
- ...I.. = Intra frame-only codec
- ....L. = Lossy compression
- .....S = Lossless compression
- ------"""
-    )
+    print('''Codecs:
+    D....  = Decoding supported
+    .E...  = Encoding supported
+    ..V..  = Video codec
+    ..A..  = Audio codec
+    ..S..  = Subtitle codec
+    ...I.  = Intra frame-only codec
+    ....L  = Lossless compression
+    .....H = Hardware decoding supported
+    ------''')
 
     for name in sorted(codecs_available):
         try:
@@ -323,17 +346,31 @@ def dump_codecs():
 
         try:
             print(
-                " %s%s%s%s%s%s %-18s %s"
+                "    %s%s%s%s%s%s %-18s %s"
                 % (
                     ".D"[bool(d_codec)],
                     ".E"[bool(e_codec)],
                     codec.type[0].upper(),
                     ".I"[codec.intra_only],
-                    ".L"[codec.lossy],
-                    ".S"[codec.lossless],
+                    ".L"[codec.lossless],
+                    ".H"[bool((d_codec or codec).hardware_configs)],
                     codec.name,
                     codec.long_name,
                 )
             )
         except Exception as e:
             print(f"...... {codec.name:<18} ERROR: {e}")
+
+def dump_hwconfigs():
+    print('Hardware configs:')
+    for name in sorted(codecs_available):
+        try:
+            codec = Codec(name, 'r')
+        except ValueError:
+            continue
+        configs = codec.hardware_configs
+        if not configs:
+            continue
+        print('   ', codec.name)
+        for config in configs:
+            print('       ', config)
diff --git a/av/codec/context.pxd b/av/codec/context.pxd
index 42b2d63e7..2cfa0c895 100644
--- a/av/codec/context.pxd
+++ b/av/codec/context.pxd
@@ -3,6 +3,7 @@ from libc.stdint cimport int64_t
 
 from av.bytesource cimport ByteSource
 from av.codec.codec cimport Codec
+from av.codec.hwaccel cimport HWAccel, HWAccelContext
 from av.frame cimport Frame
 from av.packet cimport Packet
 
@@ -18,11 +19,12 @@ cdef class CodecContext:
     cdef int stream_index
 
     cdef lib.AVCodecParserContext *parser
-    cdef _init(self, lib.AVCodecContext *ptr, const lib.AVCodec *codec)
+    cdef _init(self, lib.AVCodecContext *ptr, const lib.AVCodec *codec, HWAccel hwaccel)
 
     # Public API.
     cdef readonly bint is_open
     cdef readonly Codec codec
+    cdef readonly HWAccel hwaccel
     cdef public dict options
     cpdef open(self, bint strict=?)
 
@@ -31,6 +33,9 @@ cdef class CodecContext:
     cpdef decode(self, Packet packet=?)
     cpdef flush_buffers(self)
 
+    # Used by hardware-accelerated decode.
+    cdef HWAccelContext hwaccel_ctx
+
     # Used by both transcode APIs to setup user-land objects.
     # TODO: Remove the `Packet` from `_setup_decoded_frame` (because flushing packets
     # are bogus). It should take all info it needs from the context and/or stream.
@@ -49,10 +54,11 @@ cdef class CodecContext:
     cdef _send_packet_and_recv(self, Packet packet)
     cdef _recv_frame(self)
 
+    cdef _transfer_hwframe(self, Frame frame)
+
     # Implemented by children for the generic send/recv API, so we have the
     # correct subclass of Frame.
     cdef Frame _next_frame
     cdef Frame _alloc_next_frame(self)
 
-
-cdef CodecContext wrap_codec_context(lib.AVCodecContext*, const lib.AVCodec*)
+cdef CodecContext wrap_codec_context(lib.AVCodecContext*, const lib.AVCodec*, HWAccel hwaccel)
diff --git a/av/codec/context.pyi b/av/codec/context.pyi
index a6ca9647e..0ac3ca988 100644
--- a/av/codec/context.pyi
+++ b/av/codec/context.pyi
@@ -92,3 +92,4 @@ class CodecContext:
         self, raw_input: bytes | bytearray | memoryview | None = None
     ) -> list[Packet]: ...
     def flush_buffers(self) -> None: ...
+    def is_hardware_accelerated(self) -> bool: ...
diff --git a/av/codec/context.pyx b/av/codec/context.pyx
index 29b7b80d1..78174defa 100644
--- a/av/codec/context.pyx
+++ b/av/codec/context.pyx
@@ -18,7 +18,7 @@ from av.dictionary import Dictionary
 cdef object _cinit_sentinel = object()
 
 
-cdef CodecContext wrap_codec_context(lib.AVCodecContext *c_ctx, const lib.AVCodec *c_codec):
+cdef CodecContext wrap_codec_context(lib.AVCodecContext *c_ctx, const lib.AVCodec *c_codec, HWAccel hwaccel):
     """Build an av.CodecContext for an existing AVCodecContext."""
 
     cdef CodecContext py_ctx
@@ -35,7 +35,7 @@ cdef CodecContext wrap_codec_context(lib.AVCodecContext *c_ctx, const lib.AVCode
     else:
         py_ctx = CodecContext(_cinit_sentinel)
 
-    py_ctx._init(c_ctx, c_codec)
+    py_ctx._init(c_ctx, c_codec, hwaccel)
 
     return py_ctx
 
@@ -83,10 +83,10 @@ class Flags2(IntEnum):
 
 cdef class CodecContext:
     @staticmethod
-    def create(codec, mode=None):
+    def create(codec, mode=None, hwaccel=None):
         cdef Codec cy_codec = codec if isinstance(codec, Codec) else Codec(codec, mode)
         cdef lib.AVCodecContext *c_ctx = lib.avcodec_alloc_context3(cy_codec.ptr)
-        return wrap_codec_context(c_ctx, cy_codec.ptr)
+        return wrap_codec_context(c_ctx, cy_codec.ptr, hwaccel)
 
     def __cinit__(self, sentinel=None, *args, **kwargs):
         if sentinel is not _cinit_sentinel:
@@ -96,11 +96,12 @@ cdef class CodecContext:
         self.stream_index = -1  # This is set by the container immediately.
         self.is_open = False
 
-    cdef _init(self, lib.AVCodecContext *ptr, const lib.AVCodec *codec):
+    cdef _init(self, lib.AVCodecContext *ptr, const lib.AVCodec *codec, HWAccel hwaccel):
         self.ptr = ptr
         if self.ptr.codec and codec and self.ptr.codec != codec:
             raise RuntimeError("Wrapping CodecContext with mismatched codec.")
         self.codec = wrap_codec(codec if codec != NULL else self.ptr.codec)
+        self.hwaccel = hwaccel
 
         # Set reasonable threading defaults.
         self.ptr.thread_count = 0  # use as many threads as there are CPUs.
@@ -310,6 +311,9 @@ cdef class CodecContext:
 
         return packets
 
+    def is_hardware_accelerated(self):
+        return self.hwaccel_ctx is not None
+
     def _send_frame_and_recv(self, Frame frame):
         cdef Packet packet
 
@@ -359,10 +363,15 @@ cdef class CodecContext:
             return
         err_check(res)
 
+        frame = self._transfer_hwframe(frame)
+
         if not res:
             self._next_frame = None
             return frame
 
+    cdef _transfer_hwframe(self, Frame frame):
+        return frame
+
     cdef _recv_packet(self):
         cdef Packet packet = Packet()
 
diff --git a/av/codec/hwaccel.pxd b/av/codec/hwaccel.pxd
new file mode 100644
index 000000000..e6c8057d6
--- /dev/null
+++ b/av/codec/hwaccel.pxd
@@ -0,0 +1,21 @@
+cimport libav as lib
+
+from av.codec.codec cimport Codec
+
+cdef class HWConfig(object):
+    cdef object __weakref__
+    cdef lib.AVCodecHWConfig *ptr
+    cdef void _init(self, lib.AVCodecHWConfig *ptr)
+
+cdef HWConfig wrap_hwconfig(lib.AVCodecHWConfig *ptr)
+
+cdef class HWAccel(object):
+    cdef int _device_type
+    cdef str _device
+    cdef public bint allow_software_fallback
+    cdef public dict options
+
+cdef class HWAccelContext(HWAccel):
+    cdef readonly Codec codec
+    cdef readonly HWConfig config
+    cdef lib.AVBufferRef *ptr
diff --git a/av/codec/hwaccel.pyx b/av/codec/hwaccel.pyx
new file mode 100644
index 000000000..cd008c9e7
--- /dev/null
+++ b/av/codec/hwaccel.pyx
@@ -0,0 +1,176 @@
+from __future__ import print_function
+
+from enum import IntEnum
+
+import weakref
+
+cimport libav as lib
+
+from av.codec.codec cimport Codec
+from av.dictionary cimport _Dictionary
+from av.error cimport err_check
+from av.video.format cimport get_video_format
+
+from av.dictionary import Dictionary
+
+class Capabilities(IntEnum):
+    none = 0
+    draw_horiz_band = lib.AV_CODEC_CAP_DRAW_HORIZ_BAND
+    dr1 = lib.AV_CODEC_CAP_DR1
+    hwaccel = 1 << 4
+    delay = lib.AV_CODEC_CAP_DELAY
+    small_last_frame = lib.AV_CODEC_CAP_SMALL_LAST_FRAME
+    hwaccel_vdpau = 1 << 7
+    subframes = lib.AV_CODEC_CAP_SUBFRAMES
+    experimental = lib.AV_CODEC_CAP_EXPERIMENTAL
+    channel_conf = lib.AV_CODEC_CAP_CHANNEL_CONF
+    neg_linesizes = 1 << 11
+    frame_threads = lib.AV_CODEC_CAP_FRAME_THREADS
+    slice_threads = lib.AV_CODEC_CAP_SLICE_THREADS
+    param_change = lib.AV_CODEC_CAP_PARAM_CHANGE
+    auto_threads = lib.AV_CODEC_CAP_OTHER_THREADS
+    variable_frame_size = lib.AV_CODEC_CAP_VARIABLE_FRAME_SIZE
+    avoid_probing = lib.AV_CODEC_CAP_AVOID_PROBING
+    hardware = lib.AV_CODEC_CAP_HARDWARE
+    hybrid = lib.AV_CODEC_CAP_HYBRID
+    encoder_reordered_opaque = 1 << 20
+    encoder_flush = 1 << 21
+    encoder_recon_frame = 1 << 22
+
+class HWDeviceType(IntEnum):
+    NONE = lib.AV_HWDEVICE_TYPE_NONE
+    VDPAU = lib.AV_HWDEVICE_TYPE_VDPAU
+    CUDA = lib.AV_HWDEVICE_TYPE_CUDA
+    VAAPI = lib.AV_HWDEVICE_TYPE_VAAPI
+    DXVA2 = lib.AV_HWDEVICE_TYPE_DXVA2
+    QSV = lib.AV_HWDEVICE_TYPE_QSV
+    VIDEOTOOLBOX = lib.AV_HWDEVICE_TYPE_VIDEOTOOLBOX
+    D3D11VA = lib.AV_HWDEVICE_TYPE_D3D11VA
+    DRM = lib.AV_HWDEVICE_TYPE_DRM
+    OPENCL = lib.AV_HWDEVICE_TYPE_OPENCL
+    MEDIACODEC = lib.AV_HWDEVICE_TYPE_MEDIACODEC
+    VULKAN = lib.AV_HWDEVICE_TYPE_VULKAN
+    D3D12VA = lib.AV_HWDEVICE_TYPE_D3D12VA
+
+class HWConfigMethod(IntEnum):
+    NONE = 0
+    HW_DEVICE_CTX = lib.AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX  # This is the only one we support.
+    HW_FRAME_CTX = lib.AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX
+    INTERNAL = lib.AV_CODEC_HW_CONFIG_METHOD_INTERNAL
+    AD_HOC = lib.AV_CODEC_HW_CONFIG_METHOD_AD_HOC
+
+
+cdef object _cinit_sentinel = object()
+cdef object _singletons = weakref.WeakValueDictionary()
+
+cdef HWConfig wrap_hwconfig(lib.AVCodecHWConfig *ptr):
+    try:
+        return _singletons[<int>ptr]
+    except KeyError:
+        pass
+    cdef HWConfig config = HWConfig(_cinit_sentinel)
+    config._init(ptr)
+    _singletons[<int>ptr] = config
+    return config
+
+
+cdef class HWConfig(object):
+
+    def __init__(self, sentinel):
+        if sentinel is not _cinit_sentinel:
+            raise RuntimeError('Cannot instantiate CodecContext')
+
+    cdef void _init(self, lib.AVCodecHWConfig *ptr):
+        self.ptr = ptr
+
+    def __repr__(self):
+        return (
+            f'<av.{self.__class__.__name__} '
+            f'device_type={lib.av_hwdevice_get_type_name(self.device_type)} '
+            f'format={self.format.name if self.format else None} '
+            f'is_supported={self.is_supported} '
+            f'at 0x{<int>self.ptr:x}>'
+        )
+
+    @property
+    def device_type(self):
+        return HWDeviceType(self.ptr.device_type)
+
+    @property
+    def format(self):
+        return get_video_format(self.ptr.pix_fmt, 0, 0)
+
+    @property
+    def methods(self):
+        return HWConfigMethod(self.ptr.methods)
+
+    @property
+    def is_supported(self):
+        return bool(self.ptr.methods & lib.AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX)
+
+hwdevices_available = []
+
+cdef lib.AVHWDeviceType x = lib.AV_HWDEVICE_TYPE_NONE
+while True:
+    x = lib.av_hwdevice_iterate_types(x)
+    if x == lib.AV_HWDEVICE_TYPE_NONE:
+        break
+    hwdevices_available.append(lib.av_hwdevice_get_type_name(HWDeviceType(x)))
+
+def dump_hwdevices():
+    print('Hardware device types:')
+    for x in hwdevices_available:
+        print('   ', x)
+
+cdef class HWAccel(object):
+    def __init__(self, device_type: str | HWDeviceType, device: str | None = None,
+                 allow_software_fallback: bool = True, options=None, **kwargs):
+        if isinstance(device_type, HWDeviceType):
+            self._device_type = device_type
+        elif isinstance(device_type, str):
+            self._device_type = int(lib.av_hwdevice_find_type_by_name(device_type))
+        else:
+            raise ValueError('Unknown type for device_type')
+        self._device = device
+        self.allow_software_fallback = allow_software_fallback
+
+        if options and kwargs:
+            raise ValueError("accepts only one of options arg or kwargs")
+        self.options = dict(options or kwargs)
+
+    def create(self, Codec codec):
+        return HWAccelContext(
+            device_type=HWDeviceType(self._device_type),
+            device=self._device,
+            options=self.options,
+            codec=codec,
+            allow_software_fallback=self.allow_software_fallback)
+
+cdef class HWAccelContext(HWAccel):
+    def __init__(self, device_type, device, options, codec, allow_software_fallback, **kwargs):
+        super().__init__(device_type, device, options, **kwargs)
+        if not codec:
+            raise ValueError("codec is required")
+        self.codec = codec
+        cdef HWConfig config
+        for config in codec.hardware_configs:
+            if not (config.ptr.methods & lib.AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX):
+                continue
+            if self._device_type and config.device_type != self._device_type:
+                continue
+            break
+        else:
+            raise NotImplementedError(f"no supported hardware config for {codec}")
+        self.config = config
+        cdef char *c_device = NULL
+        if self._device:
+            device_bytes = self._device.encode()
+            c_device = device_bytes
+        cdef _Dictionary c_options = Dictionary(self.options)
+        err_check(lib.av_hwdevice_ctx_create(&self.ptr, config.ptr.device_type, c_device, c_options.ptr, 0))
+
+    def __dealloc__(self):
+        if self.ptr:
+            lib.av_buffer_unref(&self.ptr)
+    def create(self, *args, **kwargs):
+        raise ValueError("cannot call HWAccelContext.create")
diff --git a/av/container/core.pxd b/av/container/core.pxd
index 1aed54b90..87bb792b3 100644
--- a/av/container/core.pxd
+++ b/av/container/core.pxd
@@ -1,5 +1,6 @@
 cimport libav as lib
 
+from av.codec.hwaccel cimport HWAccel
 from av.container.pyio cimport PyIOFile
 from av.container.streams cimport StreamContainer
 from av.dictionary cimport _Dictionary
@@ -33,6 +34,8 @@ cdef class Container:
     cdef readonly dict container_options
     cdef readonly list stream_options
 
+    cdef HWAccel hwaccel
+
     cdef readonly StreamContainer streams
     cdef readonly dict metadata
 
diff --git a/av/container/core.pyi b/av/container/core.pyi
index 227a7d32a..7310abe74 100644
--- a/av/container/core.pyi
+++ b/av/container/core.pyi
@@ -4,6 +4,7 @@ from pathlib import Path
 from types import TracebackType
 from typing import Any, Callable, ClassVar, Literal, Type, overload
 
+from av.codec.hwaccel cimport HWAccel
 from av.format import ContainerFormat
 
 from .input import InputContainer
@@ -43,6 +44,7 @@ class Container:
     options: dict[str, str]
     container_options: dict[str, str]
     stream_options: list[dict[str, str]]
+    hwaccel: HWAccel
     streams: StreamContainer
     metadata: dict[str, str]
     open_timeout: Real | None
@@ -73,6 +75,7 @@ def open(
     buffer_size: int = 32768,
     timeout: Real | None | tuple[Real | None, Real | None] = None,
     io_open: Callable[..., Any] | None = None,
+    hwaccel: HWAccel | None = None,
 ) -> InputContainer: ...
 @overload
 def open(
@@ -87,6 +90,7 @@ def open(
     buffer_size: int = 32768,
     timeout: Real | None | tuple[Real | None, Real | None] = None,
     io_open: Callable[..., Any] | None = None,
+    hwaccel: HWAccel | None = None,
 ) -> InputContainer: ...
 @overload
 def open(
@@ -101,6 +105,7 @@ def open(
     buffer_size: int = 32768,
     timeout: Real | None | tuple[Real | None, Real | None] = None,
     io_open: Callable[..., Any] | None = None,
+    hwaccel: HWAccel | None = None,
 ) -> OutputContainer: ...
 @overload
 def open(
@@ -115,4 +120,5 @@ def open(
     buffer_size: int = 32768,
     timeout: Real | None | tuple[Real | None, Real | None] = None,
     io_open: Callable[..., Any] | None = None,
+    hwaccel: HWAccel | None = None,
 ) -> InputContainer | OutputContainer: ...
diff --git a/av/container/core.pyx b/av/container/core.pyx
index 563c79d21..201570c62 100755
--- a/av/container/core.pyx
+++ b/av/container/core.pyx
@@ -8,6 +8,7 @@ from pathlib import Path
 
 cimport libav as lib
 
+from av.codec.hwaccel cimport HWAccel
 from av.container.core cimport timeout_info
 from av.container.input cimport InputContainer
 from av.container.output cimport OutputContainer
@@ -143,7 +144,7 @@ class Flags(Flag):
 
 cdef class Container:
     def __cinit__(self, sentinel, file_, format_name, options,
-                  container_options, stream_options,
+                  container_options, stream_options, hwaccel,
                   metadata_encoding, metadata_errors,
                   buffer_size, open_timeout, read_timeout,
                   io_open):
@@ -164,6 +165,8 @@ cdef class Container:
         self.container_options = dict(container_options or ())
         self.stream_options = [dict(x) for x in stream_options or ()]
 
+        self.hwaccel = hwaccel
+
         self.metadata_encoding = metadata_encoding
         self.metadata_errors = metadata_errors
 
@@ -296,6 +299,7 @@ def open(
     buffer_size=32768,
     timeout=None,
     io_open=None,
+    hwaccel=None
 ):
     """open(file, mode='r', **kwargs)
 
@@ -322,6 +326,7 @@ def open(
         ``url`` is the url to open, ``flags`` is a combination of AVIO_FLAG_* and
         ``options`` is a dictionary of additional options. The callable should return a
         file-like object.
+    :param HWAccel hwaccel: Optional settings for hardware-accelerated decoding.
     :rtype: Container
 
     For devices (via ``libavdevice``), pass the name of the device to ``format``,
@@ -367,7 +372,7 @@ def open(
 
     if mode.startswith("r"):
         return InputContainer(_cinit_sentinel, file, format, options,
-            container_options, stream_options, metadata_encoding, metadata_errors,
+            container_options, stream_options, hwaccel, metadata_encoding, metadata_errors,
             buffer_size, open_timeout, read_timeout, io_open,
         )
 
@@ -376,6 +381,6 @@ def open(
             "Provide stream options via Container.add_stream(..., options={})."
         )
     return OutputContainer(_cinit_sentinel, file, format, options,
-        container_options, stream_options, metadata_encoding, metadata_errors,
+        container_options, stream_options, None, metadata_encoding, metadata_errors,
         buffer_size, open_timeout, read_timeout, io_open,
     )
diff --git a/av/container/input.pyx b/av/container/input.pyx
index 7246f8245..aa9940452 100644
--- a/av/container/input.pyx
+++ b/av/container/input.pyx
@@ -77,7 +77,7 @@ cdef class InputContainer(Container):
                 codec_context = lib.avcodec_alloc_context3(codec)
                 err_check(lib.avcodec_parameters_to_context(codec_context, stream.codecpar))
                 codec_context.pkt_timebase = stream.time_base
-                py_codec_context = wrap_codec_context(codec_context, codec)
+                py_codec_context = wrap_codec_context(codec_context, codec, self.hwaccel)
             else:
                 # no decoder is available
                 py_codec_context = None
diff --git a/av/container/output.pyx b/av/container/output.pyx
index a75e47d43..e61ef2297 100644
--- a/av/container/output.pyx
+++ b/av/container/output.pyx
@@ -125,7 +125,7 @@ cdef class OutputContainer(Container):
         err_check(lib.avcodec_parameters_from_context(stream.codecpar, codec_context))
 
         # Construct the user-land stream
-        cdef CodecContext py_codec_context = wrap_codec_context(codec_context, codec)
+        cdef CodecContext py_codec_context = wrap_codec_context(codec_context, codec, None)
         cdef Stream py_stream = wrap_stream(self, stream, py_codec_context)
         self.streams.add_stream(py_stream)
 
@@ -179,7 +179,7 @@ cdef class OutputContainer(Container):
         err_check(lib.avcodec_parameters_from_context(stream.codecpar, codec_context))
 
         # Construct the user-land stream
-        cdef CodecContext py_codec_context = wrap_codec_context(codec_context, codec)
+        cdef CodecContext py_codec_context = wrap_codec_context(codec_context, codec, None)
         cdef Stream py_stream = wrap_stream(self, stream, py_codec_context)
         self.streams.add_stream(py_stream)
 
@@ -237,7 +237,7 @@ cdef class OutputContainer(Container):
         # Construct the user-land stream
         cdef CodecContext py_codec_context = None
         if codec_context != NULL:
-            py_codec_context = wrap_codec_context(codec_context, codec)
+            py_codec_context = wrap_codec_context(codec_context, codec, None)
 
         cdef Stream py_stream = wrap_stream(self, stream, py_codec_context)
         self.streams.add_stream(py_stream)
diff --git a/av/video/codeccontext.pxd b/av/video/codeccontext.pxd
index 9693caa9b..895ba74b1 100644
--- a/av/video/codeccontext.pxd
+++ b/av/video/codeccontext.pxd
@@ -1,3 +1,4 @@
+cimport libav as lib
 
 from av.codec.context cimport CodecContext
 from av.video.format cimport VideoFormat
@@ -5,8 +6,19 @@ from av.video.frame cimport VideoFrame
 from av.video.reformatter cimport VideoReformatter
 
 
+# The get_format callback in AVCodecContext is called by the decoder to pick a format out of a list.
+# When we want accelerated decoding, we need to figure out ahead of time what the format should be,
+# and find a way to pass that into our callback so we can return it to the decoder. We use the 'opaque'
+# user data field in AVCodecContext for that. This is the struct we store a pointer to in that field.
+cdef struct AVCodecPrivateData:
+    lib.AVPixelFormat hardware_pix_fmt
+    bint allow_software_fallback
+
+
 cdef class VideoCodecContext(CodecContext):
 
+    cdef AVCodecPrivateData _private_data
+
     cdef VideoFormat _format
     cdef _build_format(self)
 
diff --git a/av/video/codeccontext.pyx b/av/video/codeccontext.pyx
index d2f4c9e14..e19f3c6ae 100644
--- a/av/video/codeccontext.pyx
+++ b/av/video/codeccontext.pyx
@@ -2,6 +2,8 @@ cimport libav as lib
 from libc.stdint cimport int64_t
 
 from av.codec.context cimport CodecContext
+from av.codec.hwaccel cimport HWAccel, HWConfig
+from av.error cimport err_check
 from av.frame cimport Frame
 from av.packet cimport Packet
 from av.utils cimport avrational_to_fraction, to_avrational
@@ -10,13 +12,53 @@ from av.video.frame cimport VideoFrame, alloc_video_frame
 from av.video.reformatter cimport VideoReformatter
 
 
+cdef lib.AVPixelFormat _get_hw_format(lib.AVCodecContext *ctx, const lib.AVPixelFormat *pix_fmts) noexcept:
+    # In the case where we requested accelerated decoding, the decoder first calls this function
+    # with a list that includes both the hardware format and software formats.
+    # First we try to pick the hardware format if it's in the list.
+    # However, if the decoder fails to initialize the hardware, it will call this function again,
+    # with only software formats in pix_fmts. We return ctx->sw_pix_fmt regardless in this case,
+    # because that should be in the candidate list. If not, we are out of ideas anyways.
+    cdef AVCodecPrivateData* private_data = <AVCodecPrivateData*>ctx.opaque
+    i = 0
+    while pix_fmts[i] != -1:
+        if pix_fmts[i] == private_data.hardware_pix_fmt:
+            return pix_fmts[i]
+        i += 1
+    return ctx.sw_pix_fmt if private_data.allow_software_fallback else lib.AV_PIX_FMT_NONE
+
+
 cdef class VideoCodecContext(CodecContext):
+
     def __cinit__(self, *args, **kwargs):
         self.last_w = 0
         self.last_h = 0
 
-    cdef _init(self, lib.AVCodecContext *ptr, const lib.AVCodec *codec):
-        CodecContext._init(self, ptr, codec)  # TODO: Can this be `super`?
+    cdef _init(self, lib.AVCodecContext *ptr, const lib.AVCodec *codec, HWAccel hwaccel):
+        CodecContext._init(self, ptr, codec, hwaccel)  # TODO: Can this be `super`?
+
+        self.ptr.pix_fmt = lib.AV_PIX_FMT_NONE
+
+        if hwaccel is not None:
+            try:
+                self.hwaccel_ctx = hwaccel.create(self.codec)
+                self.ptr.hw_device_ctx = lib.av_buffer_ref(self.hwaccel_ctx.ptr)
+                self.ptr.pix_fmt = self.hwaccel_ctx.config.ptr.pix_fmt
+                self.ptr.get_format = _get_hw_format
+                self._private_data.hardware_pix_fmt = self.hwaccel_ctx.config.ptr.pix_fmt
+                self._private_data.allow_software_fallback = self.hwaccel.allow_software_fallback
+                self.ptr.opaque = &self._private_data
+            except NotImplementedError:
+                # Some streams may not have a hardware decoder. For example, many action
+                # cam videos have a low resolution mjpeg stream, which is usually not
+                # compatible with hardware decoders.
+                # The user may have passed in a hwaccel because they want to decode the main
+                # stream with it, so we shouldn't abort even if we find a stream that can't
+                # be HW decoded.
+                # If the user wants to make sure hwaccel is actually used, they can check with the
+                # is_hardware_accelerated() function on each stream's codec context.
+                self.hwaccel_ctx = None
+
         self._build_format()
         self.encoded_frame_count = 0
 
@@ -58,6 +100,26 @@ cdef class VideoCodecContext(CodecContext):
         cdef VideoFrame vframe = frame
         vframe._init_user_attributes()
 
+    cdef _transfer_hwframe(self, Frame frame):
+        if self.hwaccel_ctx is None:
+            return frame
+
+        if frame.ptr.format != self.hwaccel_ctx.config.ptr.pix_fmt:
+            # If we get a software frame, that means we are in software fallback mode, and don't actually
+            # need to transfer.
+            return frame
+
+        cdef Frame frame_sw
+
+        frame_sw = self._alloc_next_frame()
+
+        err_check(lib.av_hwframe_transfer_data(frame_sw.ptr, frame.ptr, 0))
+
+        # TODO: Is there anything else to transfer?!
+        frame_sw.pts = frame.pts
+
+        return frame_sw
+
     cdef _build_format(self):
         self._format = get_video_format(<lib.AVPixelFormat>self.ptr.pix_fmt, self.ptr.width, self.ptr.height)
 
diff --git a/examples/basics/hw_decode.py b/examples/basics/hw_decode.py
new file mode 100644
index 000000000..a90e6b9ed
--- /dev/null
+++ b/examples/basics/hw_decode.py
@@ -0,0 +1,76 @@
+import os
+import time
+
+import av
+import av.datasets
+
+# What accelerator to use.
+# Recommendations:
+#   Windows:
+#       - d3d11va (Direct3D 11)
+#           * available with built-in ffmpeg in PyAV binary wheels, and gives access to
+#             all decoders, but performance may not be as good as vendor native interfaces.
+#       - cuda (NVIDIA NVDEC), qsv (Intel QuickSync)
+#           * may be faster than d3d11va, but requires custom ffmpeg built with those libraries.
+#   Linux (all options require custom FFmpeg): 
+#       - vaapi (Intel, AMD)
+#       - cuda (NVIDIA)
+#   Mac:
+#       - videotoolbox
+#           * available with built-in ffmpeg in PyAV binary wheels, and gives access to
+#             all accelerators available on Macs. This is the only option on MacOS.
+
+HW_DEVICE = os.environ['HW_DEVICE'] if 'HW_DEVICE' in os.environ else None
+
+if 'TEST_FILE_PATH' in os.environ:
+    test_file_path = os.environ['TEST_FILE_PATH']
+else:
+    test_file_path = av.datasets.curated("pexels/time-lapse-video-of-night-sky-857195.mp4")
+
+if HW_DEVICE is None:
+    av.codec.hwaccel.dump_hwdevices()   
+    print('Please set HW_DEVICE.')
+    exit()
+
+assert HW_DEVICE in av.codec.hwaccel.hwdevices_available, f'{HW_DEVICE} not available.'
+
+print("Decoding in software (auto threading)...")
+
+container = av.open(test_file_path)
+
+container.streams.video[0].thread_type = "AUTO"
+
+start_time = time.time()
+frame_count = 0
+for packet in container.demux(video=0):
+    for _ in packet.decode():
+        frame_count += 1
+
+sw_time = time.time() - start_time
+sw_fps = frame_count / sw_time
+assert frame_count == container.streams.video[0].frames
+container.close()
+
+print(f"Decoded with software in {sw_time:.2f}s ({sw_fps:.2f} fps).")
+
+print(f"Decoding with {HW_DEVICE}")
+
+hwaccel = av.codec.hwaccel.HWAccel(
+    device_type=HW_DEVICE,
+    allow_software_fallback=False)
+
+# Note the additional argument here.
+container = av.open(test_file_path, hwaccel=hwaccel)
+
+start_time = time.time()
+frame_count = 0
+for packet in container.demux(video=0):
+    for _ in packet.decode():
+        frame_count += 1
+
+hw_time = time.time() - start_time
+hw_fps = frame_count / hw_time
+assert frame_count == container.streams.video[0].frames
+container.close()
+
+print(f"Decoded with {HW_DEVICE} in {hw_time:.2f}s ({hw_fps:.2f} fps).")
diff --git a/include/libav.pxd b/include/libav.pxd
index c793b9988..e2fe323a4 100644
--- a/include/libav.pxd
+++ b/include/libav.pxd
@@ -4,11 +4,14 @@ include "libavutil/channel_layout.pxd"
 include "libavutil/dict.pxd"
 include "libavutil/error.pxd"
 include "libavutil/frame.pxd"
+include "libavutil/hwcontext.pxd"
 include "libavutil/samplefmt.pxd"
 include "libavutil/motion_vector.pxd"
 
 include "libavcodec/avcodec.pxd"
 include "libavcodec/bsf.pxd"
+include "libavcodec/hwaccel.pxd"
+
 include "libavdevice/avdevice.pxd"
 include "libavformat/avformat.pxd"
 include "libswresample/swresample.pxd"
diff --git a/include/libavcodec/avcodec.pxd b/include/libavcodec/avcodec.pxd
index 172c9cc65..bcb342373 100644
--- a/include/libavcodec/avcodec.pxd
+++ b/include/libavcodec/avcodec.pxd
@@ -213,6 +213,8 @@ cdef extern from "libavcodec/avcodec.h" nogil:
 
         AVFrame* coded_frame
 
+        void* opaque
+
         int bit_rate
         int bit_rate_tolerance
         int mb_decision
@@ -247,6 +249,7 @@ cdef extern from "libavcodec/avcodec.h" nogil:
         int coded_height
 
         AVPixelFormat pix_fmt
+        AVPixelFormat sw_pix_fmt
         AVRational sample_aspect_ratio
         int gop_size  # The number of pictures in a group of pictures, or 0 for intra_only.
         int max_b_frames
@@ -266,6 +269,11 @@ cdef extern from "libavcodec/avcodec.h" nogil:
         int get_buffer(AVCodecContext *ctx, AVFrame *frame)
         void release_buffer(AVCodecContext *ctx, AVFrame *frame)
 
+        # Hardware acceleration
+        AVHWAccel *hwaccel
+        AVBufferRef *hw_device_ctx
+        AVPixelFormat (*get_format)(AVCodecContext *s, const AVPixelFormat *fmt)
+
         # User Data
         void *opaque
 
diff --git a/include/libavcodec/hwaccel.pxd b/include/libavcodec/hwaccel.pxd
new file mode 100644
index 000000000..cb9ac41b6
--- /dev/null
+++ b/include/libavcodec/hwaccel.pxd
@@ -0,0 +1,19 @@
+cdef extern from "libavcodec/avcodec.h" nogil:
+    cdef enum:
+        AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX,
+        AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX,
+        AV_CODEC_HW_CONFIG_METHOD_INTERNAL,
+        AV_CODEC_HW_CONFIG_METHOD_AD_HOC,
+    cdef struct AVCodecHWConfig:
+        AVPixelFormat pix_fmt
+        int methods
+        AVHWDeviceType device_type
+    cdef const AVCodecHWConfig* avcodec_get_hw_config(const AVCodec *codec, int index)
+    cdef enum:
+        AV_HWACCEL_CODEC_CAP_EXPERIMENTAL
+    cdef struct AVHWAccel:
+        char *name
+        AVMediaType type
+        AVCodecID id
+        AVPixelFormat pix_fmt
+        int capabilities
diff --git a/include/libavutil/buffer.pxd b/include/libavutil/buffer.pxd
index daf86105b..d4ff4cd17 100644
--- a/include/libavutil/buffer.pxd
+++ b/include/libavutil/buffer.pxd
@@ -1,9 +1,18 @@
-from libc.stdint cimport uint8_t
+from libc.stdint cimport intptr_t, uint8_t
 
 cdef extern from "libavutil/buffer.h" nogil:
-
     AVBufferRef *av_buffer_create(uint8_t *data, size_t size, void (*free)(void *opaque, uint8_t *data), void *opaque, int flags)
+    AVBufferRef* av_buffer_ref(AVBufferRef *buf)
     void av_buffer_unref(AVBufferRef **buf)
 
+    cdef struct AVBuffer:
+        uint8_t *data
+        int size
+        intptr_t refcount
+        void (*free)(void *opaque, uint8_t *data)
+        void *opaque
+        int flags
     cdef struct AVBufferRef:
+        AVBuffer *buffer
         uint8_t *data
+        int size
diff --git a/include/libavutil/hwcontext.pxd b/include/libavutil/hwcontext.pxd
new file mode 100644
index 000000000..beda15a2c
--- /dev/null
+++ b/include/libavutil/hwcontext.pxd
@@ -0,0 +1,24 @@
+cdef extern from "libavutil/hwcontext.h" nogil:
+
+    enum AVHWDeviceType:
+        AV_HWDEVICE_TYPE_NONE
+        AV_HWDEVICE_TYPE_VDPAU
+        AV_HWDEVICE_TYPE_CUDA
+        AV_HWDEVICE_TYPE_VAAPI
+        AV_HWDEVICE_TYPE_DXVA2
+        AV_HWDEVICE_TYPE_QSV
+        AV_HWDEVICE_TYPE_VIDEOTOOLBOX
+        AV_HWDEVICE_TYPE_D3D11VA
+        AV_HWDEVICE_TYPE_DRM
+        AV_HWDEVICE_TYPE_OPENCL
+        AV_HWDEVICE_TYPE_MEDIACODEC
+        AV_HWDEVICE_TYPE_VULKAN
+        AV_HWDEVICE_TYPE_D3D12VA
+
+    cdef int av_hwdevice_ctx_create(AVBufferRef **device_ctx, AVHWDeviceType type, const char *device, AVDictionary *opts, int flags)
+
+    cdef AVHWDeviceType av_hwdevice_find_type_by_name(const char *name)
+    cdef const char *av_hwdevice_get_type_name(AVHWDeviceType type)
+    cdef AVHWDeviceType av_hwdevice_iterate_types(AVHWDeviceType prev)
+
+    cdef int av_hwframe_transfer_data(AVFrame *dst, const AVFrame *src, int flags)
diff --git a/scripts/build-deps b/scripts/build-deps
index 4cb90f074..de4a6e547 100755
--- a/scripts/build-deps
+++ b/scripts/build-deps
@@ -13,6 +13,31 @@ if [[ -e "$PYAV_LIBRARY_PREFIX/bin/ffmpeg" ]]; then
     exit 0
 fi
 
+# Add CUDA support if available
+CONFFLAGS_NVIDIA=""
+if [[ -e /usr/local/cuda ]]; then
+    # Get Nvidia headers for ffmpeg
+    cd $PYAV_LIBRARY_ROOT
+    if [[ ! -e "$PYAV_LIBRARY_ROOT/nv-codec-headers" ]]; then
+        git clone https://github.com/FFmpeg/nv-codec-headers.git
+        cd nv-codec-headers
+        make -j4
+        make PREFIX="$PYAV_LIBRARY_PREFIX" install
+    fi
+
+    PKG_CONFIG_PATH="/usr/local/lib/pkgconfig:$PKG_CONFIG_PATH"
+    CONFFLAGS_NVIDIA="--enable-cuda \
+                      --enable-cuvid \
+                      --enable-nvenc \
+                      --enable-nonfree \
+                      --enable-libnpp \
+                      --extra-cflags=-I/usr/local/cuda/include \
+                      --extra-ldflags=-L/usr/local/cuda/lib64"
+else
+    echo "WARNING: Did not find cuda libraries in /usr/local/cuda..."
+    echo "         Building without NVIDIA NVENC/NVDEC support"
+fi
+
 
 mkdir -p "$PYAV_LIBRARY_ROOT"
 mkdir -p "$PYAV_LIBRARY_PREFIX"
@@ -44,6 +69,7 @@ echo ./configure
     --enable-sse \
     --enable-avx \
     --enable-avx2 \
+    $CONFFLAGS_NVIDIA \
     --prefix="$PYAV_LIBRARY_PREFIX" \
     || exit 2
 echo
diff --git a/tests/test_decode.py b/tests/test_decode.py
index 05f636977..66a5bdfd0 100644
--- a/tests/test_decode.py
+++ b/tests/test_decode.py
@@ -1,10 +1,43 @@
 from fractions import Fraction
+import functools
+import os
+import pathlib
 
 import av
+import numpy as np
+import pytest
 
 from .common import TestCase, fate_suite
 
 
+@functools.cache
+def make_h264_test_video(path: str) -> None:
+    """Generates a black H264 test video for testing hardware decoding."""
+
+    # We generate a file here that's designed to be as compatible as possible with hardware
+    # encoders. Hardware encoders are sometimes very picky and the errors we get are often
+    # opaque, so there is nothing much we (PyAV) can do. The user needs to figure that out
+    # if they want to use hwaccel. We only want to test the PyAV plumbing here.
+    # Our video is H264, 1280x720p (note that some decoders have a minimum resolution limit), 24fps,
+    # 8-bit yuv420p.
+    pathlib.Path(path).parent.mkdir(parents=True, exist_ok=True)
+    output_container = av.open(path, "w")
+    stream = output_container.add_stream("libx264", rate=24)
+    stream.width = 1280
+    stream.height = 720
+    stream.pix_fmt = "yuv420p"
+
+    for _ in range(24):
+        frame = av.VideoFrame.from_ndarray(np.zeros((720, 1280, 3), dtype=np.uint8), format="rgb24")
+        for packet in stream.encode(frame):
+            output_container.mux(packet)
+
+    for packet in stream.encode():
+        output_container.mux(packet)
+
+    output_container.close()
+
+
 class TestDecode(TestCase):
     def test_decoded_video_frame_count(self) -> None:
         container = av.open(fate_suite("h264/interlaced_crop.mp4"))
@@ -165,3 +198,32 @@ def test_side_data(self) -> None:
         container = av.open(fate_suite("mov/displaymatrix.mov"))
         frame = next(container.decode(video=0))
         assert frame.rotation == -90
+
+    def test_hardware_decode(self) -> None:
+        if 'HWACCEL_DEVICE_TYPE' not in os.environ:
+            pytest.skip(
+                "Set the HWACCEL_DEVICE_TYPE to run this test. "
+                f"Options are {' '.join(av.codec.hwaccel.hwdevices_available)}")
+
+        HWACCEL_DEVICE_TYPE = os.environ["HWACCEL_DEVICE_TYPE"]
+
+        assert HWACCEL_DEVICE_TYPE in av.codec.hwaccel.hwdevices_available, f'{HWACCEL_DEVICE_TYPE} not available'
+
+        test_video_path = "tests/assets/black.mp4"
+        make_h264_test_video(test_video_path)
+
+        # Test decode.
+        hwaccel = av.codec.hwaccel.HWAccel(device_type=HWACCEL_DEVICE_TYPE, allow_software_fallback=False)
+
+        container = av.open(test_video_path, hwaccel=hwaccel)
+        video_stream = next(s for s in container.streams if s.type == "video")
+
+        assert video_stream is container.streams.video[0]
+
+        assert video_stream.codec_context.is_hardware_accelerated()
+
+        frame_count = 0
+        for frame in container.decode(video_stream):
+            frame_count += 1
+
+        assert frame_count == video_stream.frames