diff --git a/README.md b/README.md index 14b32452..8643e0e3 100644 --- a/README.md +++ b/README.md @@ -185,7 +185,8 @@ format you want. Refer to Nvidia's GPU support matrix for more details ## Benchmark Results -The following was generated by running [our benchmark script](./benchmarks/decoders/generate_readme_data.py) on a lightly loaded 56-core machine. +The following was generated by running [our benchmark script](./benchmarks/decoders/generate_readme_data.py) on a lightly loaded 22-core machine with an Nvidia A100 with +5 [NVDEC decoders](https://docs.nvidia.com/video-technologies/video-codec-sdk/12.1/nvdec-application-note/index.html#). ![benchmark_results](./benchmarks/decoders/benchmark_readme_chart.png) diff --git a/benchmarks/decoders/benchmark_decoders_library.py b/benchmarks/decoders/benchmark_decoders_library.py index 33d25c63..a5102fd3 100644 --- a/benchmarks/decoders/benchmark_decoders_library.py +++ b/benchmarks/decoders/benchmark_decoders_library.py @@ -38,6 +38,14 @@ def __init__(self): def get_frames_from_video(self, video_file, pts_list): pass + @abc.abstractmethod + def get_consecutive_frames_from_video(self, video_file, numFramesToDecode): + pass + + @abc.abstractmethod + def decode_and_transform(self, video_file, pts_list, height, width, device): + pass + class DecordAccurate(AbstractDecoder): def __init__(self): @@ -89,8 +97,10 @@ def __init__(self, backend): self._backend = backend self._print_each_iteration_time = False import torchvision # noqa: F401 + from torchvision.transforms import v2 as transforms_v2 self.torchvision = torchvision + self.transforms_v2 = transforms_v2 def get_frames_from_video(self, video_file, pts_list): self.torchvision.set_video_backend(self._backend) @@ -111,6 +121,20 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode): frames.append(frame["data"].permute(1, 2, 0)) return frames + def decode_and_transform(self, video_file, pts_list, height, width, device): + self.torchvision.set_video_backend(self._backend) + reader = self.torchvision.io.VideoReader(video_file, "video") + frames = [] + for pts in pts_list: + reader.seek(pts) + frame = next(reader) + frames.append(frame["data"].permute(1, 2, 0)) + frames = [ + self.transforms_v2.functional.resize(frame.to(device), (height, width)) + for frame in frames + ] + return frames + class TorchCodecCore(AbstractDecoder): def __init__(self, num_threads=None, color_conversion_library=None, device="cpu"): @@ -239,6 +263,10 @@ def __init__(self, num_ffmpeg_threads=None, device="cpu"): ) self._device = device + from torchvision.transforms import v2 as transforms_v2 + + self.transforms_v2 = transforms_v2 + def get_frames_from_video(self, video_file, pts_list): decoder = VideoDecoder( video_file, num_ffmpeg_threads=self._num_ffmpeg_threads, device=self._device @@ -258,6 +286,14 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode): break return frames + def decode_and_transform(self, video_file, pts_list, height, width, device): + decoder = VideoDecoder( + video_file, num_ffmpeg_threads=self._num_ffmpeg_threads, device=self._device + ) + frames = decoder.get_frames_played_at(pts_list) + frames = self.transforms_v2.functional.resize(frames.data, (height, width)) + return frames + @torch.compile(fullgraph=True, backend="eager") def compiled_seek_and_next(decoder, pts): @@ -299,7 +335,9 @@ def __init__(self): self.torchaudio = torchaudio - pass + from torchvision.transforms import v2 as transforms_v2 + + self.transforms_v2 = transforms_v2 def get_frames_from_video(self, video_file, pts_list): stream_reader = self.torchaudio.io.StreamReader(src=video_file) @@ -325,6 +363,21 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode): return frames + def decode_and_transform(self, video_file, pts_list, height, width, device): + stream_reader = self.torchaudio.io.StreamReader(src=video_file) + stream_reader.add_basic_video_stream(frames_per_chunk=1) + frames = [] + for pts in pts_list: + stream_reader.seek(pts) + stream_reader.fill_buffer() + clip = stream_reader.pop_chunks() + frames.append(clip[0][0]) + frames = [ + self.transforms_v2.functional.resize(frame.to(device), (height, width)) + for frame in frames + ] + return frames + def create_torchcodec_decoder_from_file(video_file): video_decoder = create_from_file(video_file) @@ -443,7 +496,7 @@ def plot_data(df_data, plot_path): # Set the title for the subplot base_video = Path(video).name.removesuffix(".mp4") - ax.set_title(f"{base_video}\n{vcount} x {vtype}", fontsize=11) + ax.set_title(f"{base_video}\n{vtype}", fontsize=11) # Plot bars with error bars ax.barh( @@ -486,6 +539,14 @@ class BatchParameters: batch_size: int +@dataclass +class DataLoaderInspiredWorkloadParameters: + batch_parameters: BatchParameters + resize_height: int + resize_width: int + resize_device: str + + def run_batch_using_threads( function, *args, @@ -525,6 +586,7 @@ def run_benchmarks( num_sequential_frames_from_start: list[int], min_runtime_seconds: float, benchmark_video_creation: bool, + dataloader_parameters: DataLoaderInspiredWorkloadParameters = None, batch_parameters: BatchParameters = None, ) -> list[dict[str, str | float | int]]: # Ensure that we have the same seed across benchmark runs. @@ -550,6 +612,39 @@ def run_benchmarks( for decoder_name, decoder in decoder_dict.items(): print(f"video={video_file_path}, decoder={decoder_name}") + if dataloader_parameters: + bp = dataloader_parameters.batch_parameters + dataloader_result = benchmark.Timer( + stmt="run_batch_using_threads(decoder.decode_and_transform, video_file, pts_list, height, width, device, batch_parameters=batch_parameters)", + globals={ + "video_file": str(video_file_path), + "pts_list": uniform_pts_list, + "decoder": decoder, + "run_batch_using_threads": run_batch_using_threads, + "batch_parameters": dataloader_parameters.batch_parameters, + "height": dataloader_parameters.resize_height, + "width": dataloader_parameters.resize_width, + "device": dataloader_parameters.resize_device, + }, + label=f"video={video_file_path} {metadata_label}", + sub_label=decoder_name, + description=f"dataloader[threads={bp.num_threads} batch_size={bp.batch_size}] {num_samples} decode_and_transform()", + ) + results.append( + dataloader_result.blocked_autorange( + min_run_time=min_runtime_seconds + ) + ) + df_data.append( + convert_result_to_df_item( + results[-1], + decoder_name, + video_file_path, + num_samples * dataloader_parameters.batch_parameters.batch_size, + f"dataloader[threads={bp.num_threads} batch_size={bp.batch_size}] {num_samples} x decode_and_transform()", + ) + ) + for kind, pts_list in [ ("uniform", uniform_pts_list), ("random", random_pts_list), diff --git a/benchmarks/decoders/benchmark_readme_chart.png b/benchmarks/decoders/benchmark_readme_chart.png index 28ceabe4..f433f619 100644 Binary files a/benchmarks/decoders/benchmark_readme_chart.png and b/benchmarks/decoders/benchmark_readme_chart.png differ diff --git a/benchmarks/decoders/benchmark_readme_data.json b/benchmarks/decoders/benchmark_readme_data.json index 9639d107..7e4354d4 100644 --- a/benchmarks/decoders/benchmark_readme_data.json +++ b/benchmarks/decoders/benchmark_readme_data.json @@ -1,225 +1,394 @@ [ + { + "decoder": "torchcodec", + "description": "dataloader[threads=8 batch_size=64] 10 decode_and_transform()", + "fps_median": 3.722043790887239, + "fps_p25": 3.722043790887239, + "fps_p75": 3.722043790887239, + "frame_count": 640, + "iqr": 0.0, + "median": 171.94854116626084, + "type": "dataloader[threads=8 batch_size=64] 10 x decode_and_transform()", + "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" + }, { "decoder": "torchcodec", "description": "uniform 10 seek()+next()", - "fps_median": 2.888099110308182, - "fps_p25": 2.9416150999827617, - "fps_p75": 2.8451474402295633, + "fps_median": 1.4397872541120416, + "fps_p25": 1.451428122051996, + "fps_p75": 1.4351890231275515, "frame_count": 10, - "iqr": 0.1152633186429739, - "median": 3.4624850526452065, + "iqr": 0.07795738987624645, + "median": 6.945470569655299, "type": "uniform seek()+next()", - "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1280x720_120s_60fps_600gop_libx264_yuv420p.mp4" + "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchcodec", "description": "random 10 seek()+next()", - "fps_median": 3.679673367614735, - "fps_p25": 3.7010260633892753, - "fps_p75": 3.6730411925903006, + "fps_median": 1.736215701229184, + "fps_p25": 1.7631990988377526, + "fps_p75": 1.725260270510397, "frame_count": 10, - "iqr": 0.020586160942912102, - "median": 2.7176325181499124, + "iqr": 0.12471765535883605, + "median": 5.7596530159935355, "type": "random seek()+next()", - "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1280x720_120s_60fps_600gop_libx264_yuv420p.mp4" + "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchcodec", "description": "100 next()", - "fps_median": 332.62889220665653, - "fps_p25": 335.30421912694004, - "fps_p75": 329.14228955425125, + "fps_median": 125.18633474918164, + "fps_p25": 131.35032274770882, + "fps_p75": 121.19682688127735, + "frame_count": 100, + "iqr": 0.0637812758795917, + "median": 0.7988092326559126, + "type": "100 next()", + "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" + }, + { + "decoder": "torchcodec[cuda]", + "description": "dataloader[threads=8 batch_size=64] 10 decode_and_transform()", + "fps_median": 6.71338393971077, + "fps_p25": 6.71338393971077, + "fps_p75": 6.71338393971077, + "frame_count": 640, + "iqr": 0.0, + "median": 95.33195267058909, + "type": "dataloader[threads=8 batch_size=64] 10 x decode_and_transform()", + "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" + }, + { + "decoder": "torchcodec[cuda]", + "description": "uniform 10 seek()+next()", + "fps_median": 1.243389954704256, + "fps_p25": 1.2474136405315202, + "fps_p75": 1.231124865014404, + "frame_count": 10, + "iqr": 0.1060659158974886, + "median": 8.042529185768217, + "type": "uniform seek()+next()", + "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" + }, + { + "decoder": "torchcodec[cuda]", + "description": "random 10 seek()+next()", + "fps_median": 1.5460842780696675, + "fps_p25": 1.5614610546171193, + "fps_p75": 1.5057133085494405, + "frame_count": 10, + "iqr": 0.2371121821925044, + "median": 6.467952712439001, + "type": "random seek()+next()", + "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" + }, + { + "decoder": "torchcodec[cuda]", + "description": "100 next()", + "fps_median": 178.9611538836387, + "fps_p25": 182.15441067650494, + "fps_p75": 124.20054668713973, "frame_count": 100, - "iqr": 0.005583339370787144, - "median": 0.300635339692235, + "iqr": 0.2561646499671042, + "median": 0.5587804829701781, "type": "100 next()", - "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1280x720_120s_60fps_600gop_libx264_yuv420p.mp4" + "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" + }, + { + "decoder": "torchvision[video_reader]", + "description": "dataloader[threads=8 batch_size=64] 10 decode_and_transform()", + "fps_median": 1.5697915959359987, + "fps_p25": 1.5697915959359987, + "fps_p75": 1.5697915959359987, + "frame_count": 640, + "iqr": 0.0, + "median": 407.69743044674397, + "type": "dataloader[threads=8 batch_size=64] 10 x decode_and_transform()", + "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchvision[video_reader]", "description": "uniform 10 seek()+next()", - "fps_median": 0.4183496816958986, - "fps_p25": 0.42028123679840773, - "fps_p75": 0.41643579969655253, + "fps_median": 0.1978081509193199, + "fps_p25": 0.1978081509193199, + "fps_p75": 0.1978081509193199, "frame_count": 10, - "iqr": 0.21971396077424288, - "median": 23.90344832930714, + "iqr": 0.0, + "median": 50.5540340654552, "type": "uniform seek()+next()", - "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1280x720_120s_60fps_600gop_libx264_yuv420p.mp4" + "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchvision[video_reader]", "description": "random 10 seek()+next()", - "fps_median": 0.3201664495067429, - "fps_p25": 0.3201664495067429, - "fps_p75": 0.3201664495067429, + "fps_median": 0.14806268866290584, + "fps_p25": 0.14806268866290584, + "fps_p75": 0.14806268866290584, "frame_count": 10, "iqr": 0.0, - "median": 31.23375361599028, + "median": 67.53895995207131, "type": "random seek()+next()", - "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1280x720_120s_60fps_600gop_libx264_yuv420p.mp4" + "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchvision[video_reader]", "description": "100 next()", - "fps_median": 183.6059253297136, - "fps_p25": 184.88581057175523, - "fps_p75": 180.4772551158132, + "fps_median": 90.71105513124185, + "fps_p25": 93.42498301579607, + "fps_p75": 66.99561256477986, "frame_count": 100, - "iqr": 0.01321205124258995, - "median": 0.5446447320282459, + "iqr": 0.4222575547173619, + "median": 1.1024014642462134, "type": "100 next()", - "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1280x720_120s_60fps_600gop_libx264_yuv420p.mp4" + "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" + }, + { + "decoder": "torchaudio", + "description": "dataloader[threads=8 batch_size=64] 10 decode_and_transform()", + "fps_median": 0.22995229905603712, + "fps_p25": 0.22995229905603712, + "fps_p75": 0.22995229905603712, + "frame_count": 640, + "iqr": 0.0, + "median": 2783.1859156321734, + "type": "dataloader[threads=8 batch_size=64] 10 x decode_and_transform()", + "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchaudio", "description": "uniform 10 seek()+next()", - "fps_median": 0.5240865109999612, - "fps_p25": 0.5245880071682041, - "fps_p75": 0.5235859727575772, + "fps_median": 0.2240825403534482, + "fps_p25": 0.2240825403534482, + "fps_p75": 0.2240825403534482, "frame_count": 10, - "iqr": 0.03648180142045021, - "median": 19.080819273367524, + "iqr": 0.0, + "median": 44.62641303613782, "type": "uniform seek()+next()", - "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1280x720_120s_60fps_600gop_libx264_yuv420p.mp4" + "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchaudio", "description": "random 10 seek()+next()", - "fps_median": 0.41470785541122906, - "fps_p25": 0.4150239639037595, - "fps_p75": 0.41439222808863957, + "fps_median": 0.17111174619117042, + "fps_p25": 0.17111174619117042, + "fps_p75": 0.17111174619117042, "frame_count": 10, - "iqr": 0.03673251997679472, - "median": 24.113360452465713, + "iqr": 0.0, + "median": 58.44134153611958, "type": "random seek()+next()", - "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1280x720_120s_60fps_600gop_libx264_yuv420p.mp4" + "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchaudio", "description": "100 next()", - "fps_median": 189.09233561323046, - "fps_p25": 190.02373443273453, - "fps_p75": 187.75804451204894, + "fps_median": 71.79386812605445, + "fps_p25": 72.51511528517597, + "fps_p75": 71.07345912080734, "frame_count": 100, - "iqr": 0.0063502974808216095, - "median": 0.5288421642035246, + "iqr": 0.027972140349447727, + "median": 1.3928766148164868, "type": "100 next()", - "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1280x720_120s_60fps_600gop_libx264_yuv420p.mp4" + "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" + }, + { + "decoder": "torchcodec", + "description": "dataloader[threads=8 batch_size=64] 10 decode_and_transform()", + "fps_median": 67.79397429639815, + "fps_p25": 68.59028564862771, + "fps_p75": 65.71375822403033, + "frame_count": 640, + "iqr": 0.40844123042188585, + "median": 9.440367033239454, + "type": "dataloader[threads=8 batch_size=64] 10 x decode_and_transform()", + "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { "decoder": "torchcodec", "description": "uniform 10 seek()+next()", - "fps_median": 33.23248091744692, - "fps_p25": 33.74429257495837, - "fps_p75": 32.677051134717146, + "fps_median": 28.46557984604316, + "fps_p25": 29.922295359471935, + "fps_p75": 26.73851645668455, "frame_count": 10, - "iqr": 0.009678754955530167, - "median": 0.3009104263037443, + "iqr": 0.03979336703196168, + "median": 0.3513014684431255, "type": "uniform seek()+next()", "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { "decoder": "torchcodec", "description": "random 10 seek()+next()", - "fps_median": 32.0340867355421, - "fps_p25": 32.33995696335928, - "fps_p75": 31.48660833047371, + "fps_median": 29.945412645922318, + "fps_p25": 30.85213034698643, + "fps_p75": 28.594925068542125, "frame_count": 10, - "iqr": 0.008380331099033356, - "median": 0.3121674759313464, + "iqr": 0.025585678406059742, + "median": 0.3339409651234746, "type": "random seek()+next()", "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { "decoder": "torchcodec", "description": "100 next()", - "fps_median": 373.9297931168172, - "fps_p25": 377.8974427908539, - "fps_p75": 366.9518403987283, + "fps_median": 614.0952680137761, + "fps_p25": 628.2472414155694, + "fps_p75": 593.4204378926277, + "frame_count": 100, + "iqr": 0.00934158405289054, + "median": 0.16284118313342333, + "type": "100 next()", + "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" + }, + { + "decoder": "torchcodec[cuda]", + "description": "dataloader[threads=8 batch_size=64] 10 decode_and_transform()", + "fps_median": 14.269868091545673, + "fps_p25": 14.269868091545673, + "fps_p75": 14.269868091545673, + "frame_count": 640, + "iqr": 0.0, + "median": 44.84974884800613, + "type": "dataloader[threads=8 batch_size=64] 10 x decode_and_transform()", + "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" + }, + { + "decoder": "torchcodec[cuda]", + "description": "uniform 10 seek()+next()", + "fps_median": 3.3887557630838336, + "fps_p25": 3.3996850012955604, + "fps_p75": 3.378710590173761, + "frame_count": 10, + "iqr": 0.018259971868246794, + "median": 2.950935593806207, + "type": "uniform seek()+next()", + "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" + }, + { + "decoder": "torchcodec[cuda]", + "description": "random 10 seek()+next()", + "fps_median": 4.206317288181642, + "fps_p25": 4.215251838912759, + "fps_p75": 4.200058005638581, + "frame_count": 10, + "iqr": 0.008582000620663166, + "median": 2.3773765303194523, + "type": "random seek()+next()", + "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" + }, + { + "decoder": "torchcodec[cuda]", + "description": "100 next()", + "fps_median": 108.15849644090362, + "fps_p25": 108.87629308883722, + "fps_p75": 107.1563304726156, "frame_count": 100, - "iqr": 0.007893264293670654, - "median": 0.267429880797863, + "iqr": 0.014742388390004635, + "median": 0.92456906568259, "type": "100 next()", "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, + { + "decoder": "torchvision[video_reader]", + "description": "dataloader[threads=8 batch_size=64] 10 decode_and_transform()", + "fps_median": 34.07782542607838, + "fps_p25": 34.40001338466056, + "fps_p75": 33.76161664373731, + "frame_count": 640, + "iqr": 0.35179429268464446, + "median": 18.780541070271283, + "type": "dataloader[threads=8 batch_size=64] 10 x decode_and_transform()", + "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" + }, { "decoder": "torchvision[video_reader]", "description": "uniform 10 seek()+next()", - "fps_median": 5.826583628637648, - "fps_p25": 5.869409916500439, - "fps_p75": 5.80448564059718, + "fps_median": 4.782367451226253, + "fps_p25": 4.8422619348709155, + "fps_p75": 4.588920179004961, "frame_count": 10, - "iqr": 0.01905675418674946, - "median": 1.7162715988233685, + "iqr": 0.11401132540777326, + "median": 2.091014565899968, "type": "uniform seek()+next()", "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { "decoder": "torchvision[video_reader]", "description": "random 10 seek()+next()", - "fps_median": 3.5913843964539565, - "fps_p25": 3.641605801630432, - "fps_p75": 3.557561124756978, + "fps_median": 2.9594605005777126, + "fps_p25": 3.0142969757047573, + "fps_p75": 2.939646297328533, "frame_count": 10, - "iqr": 0.06487313844263554, - "median": 2.7844415679574013, + "iqr": 0.08424665033817291, + "median": 3.37899424508214, "type": "random seek()+next()", "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { "decoder": "torchvision[video_reader]", "description": "100 next()", - "fps_median": 220.82061460318502, - "fps_p25": 222.48418761498934, - "fps_p75": 218.84938919419412, + "fps_median": 195.10398914661542, + "fps_p25": 198.12837004507918, + "fps_p75": 179.9937080258182, "frame_count": 100, - "iqr": 0.007465103641152382, - "median": 0.452856270596385, + "iqr": 0.05085169989615679, + "median": 0.5125471828505397, "type": "100 next()", "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, + { + "decoder": "torchaudio", + "description": "dataloader[threads=8 batch_size=64] 10 decode_and_transform()", + "fps_median": 9.635713291548546, + "fps_p25": 9.635713291548546, + "fps_p75": 9.635713291548546, + "frame_count": 640, + "iqr": 0.0, + "median": 66.41957690473646, + "type": "dataloader[threads=8 batch_size=64] 10 x decode_and_transform()", + "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" + }, { "decoder": "torchaudio", "description": "uniform 10 seek()+next()", - "fps_median": 10.54381002725746, - "fps_p25": 10.612585301638902, - "fps_p75": 10.516430888261935, + "fps_median": 9.561191528064604, + "fps_p25": 9.803393073168538, + "fps_p75": 9.349710586702399, "frame_count": 10, - "iqr": 0.008615483529865742, - "median": 0.9484237646684051, + "iqr": 0.049496835097670555, + "median": 1.0458947475999594, "type": "uniform seek()+next()", "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { "decoder": "torchaudio", "description": "random 10 seek()+next()", - "fps_median": 7.206591092692971, - "fps_p25": 7.231744857114745, - "fps_p75": 7.073061841995869, + "fps_median": 6.485374499539733, + "fps_p25": 6.651050187497567, + "fps_p75": 6.3834662410329805, "frame_count": 10, - "iqr": 0.0310227214358747, - "median": 1.3876186218112707, + "iqr": 0.06302505941130221, + "median": 1.5419310019351542, "type": "random seek()+next()", "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { "decoder": "torchaudio", "description": "100 next()", - "fps_median": 204.94896843488522, - "fps_p25": 206.320878222405, - "fps_p75": 203.29397516227598, + "fps_median": 258.3824937093069, + "fps_p25": 265.91286414117894, + "fps_p75": 250.26730738957093, "frame_count": 100, - "iqr": 0.007216569967567921, - "median": 0.4879263397306204, + "iqr": 0.02350972522981465, + "median": 0.38702312437817454, "type": "100 next()", "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { - "cpu_count": 56, + "cpu_count": 22, + "is_cuda_available": "True", "machine": "x86_64", "processor": "x86_64", - "python_version": "3.11.10", + "python_version": "3.12.5", "system": "Linux" } ] diff --git a/benchmarks/decoders/generate_readme_data.py b/benchmarks/decoders/generate_readme_data.py index 91be59dc..3dee3913 100644 --- a/benchmarks/decoders/generate_readme_data.py +++ b/benchmarks/decoders/generate_readme_data.py @@ -10,7 +10,11 @@ import shutil from pathlib import Path +import torch + from benchmark_decoders_library import ( + BatchParameters, + DataLoaderInspiredWorkloadParameters, generate_videos, retrieve_videos, run_benchmarks, @@ -26,42 +30,45 @@ def main() -> None: """Benchmarks the performance of a few video decoders on synthetic videos""" videos_dir_path = "/tmp/torchcodec_benchmarking_videos" - shutil.rmtree(videos_dir_path, ignore_errors=True) - os.makedirs(videos_dir_path) + if not os.path.exists(videos_dir_path): + shutil.rmtree(videos_dir_path, ignore_errors=True) + os.makedirs(videos_dir_path) - resolutions = ["1280x720"] - encodings = ["libx264"] - patterns = ["mandelbrot"] - fpses = [60] - gop_sizes = [600] - durations = [120] - pix_fmts = ["yuv420p"] - ffmpeg_path = "ffmpeg" - generate_videos( - resolutions, - encodings, - patterns, - fpses, - gop_sizes, - durations, - pix_fmts, - ffmpeg_path, - videos_dir_path, - ) + resolutions = ["1920x1080"] + encodings = ["libx264"] + patterns = ["mandelbrot"] + fpses = [60] + gop_sizes = [600] + durations = [120] + pix_fmts = ["yuv420p"] + ffmpeg_path = "ffmpeg" + generate_videos( + resolutions, + encodings, + patterns, + fpses, + gop_sizes, + durations, + pix_fmts, + ffmpeg_path, + videos_dir_path, + ) - urls_and_dest_paths = [ - (NASA_URL, f"{videos_dir_path}/nasa_960x540_206s_30fps_yuv420p.mp4") - ] - retrieve_videos(urls_and_dest_paths) + urls_and_dest_paths = [ + (NASA_URL, f"{videos_dir_path}/nasa_960x540_206s_30fps_yuv420p.mp4") + ] + retrieve_videos(urls_and_dest_paths) decoder_dict = {} decoder_dict["torchcodec"] = TorchCodecPublic() + decoder_dict["torchcodec[cuda]"] = TorchCodecPublic(device="cuda") decoder_dict["torchvision[video_reader]"] = TorchVision("video_reader") decoder_dict["torchaudio"] = TorchAudioDecoder() # These are the number of uniform seeks we do in the seek+decode benchmark. num_samples = 10 video_files_paths = list(Path(videos_dir_path).glob("*.mp4")) + assert len(video_files_paths) == 2, "Expected exactly 2 videos" df_data = run_benchmarks( decoder_dict, video_files_paths, @@ -69,6 +76,12 @@ def main() -> None: num_sequential_frames_from_start=[100], min_runtime_seconds=30, benchmark_video_creation=False, + dataloader_parameters=DataLoaderInspiredWorkloadParameters( + batch_parameters=BatchParameters(batch_size=64, num_threads=8), + resize_height=256, + resize_width=256, + resize_device="cuda", + ), ) df_data.append( { @@ -77,6 +90,7 @@ def main() -> None: "machine": platform.machine(), "processor": platform.processor(), "python_version": str(platform.python_version()), + "is_cuda_available": str(torch.cuda.is_available()), } )