-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[torchcodec] Add support for Nvidia GPU Decoding (#137)
Summary: Pull Request resolved: #137 Pull Request resolved: #58 X-link: #58 1. Add CUDA support to VideoDecoder.cpp. This is done by checking what device is passed into the options and using CUDA if the device type is cuda. 2. Add -DENABLE_CUDA flag in cmake. 3. Check ENABLE_CUDA environment variable in setup.py and pass it down to cmake if it is present. 4. Add a unit test to demonstrate that CUDA decoding does work. This uses a different tensor than the one from CPU decoding because hardware decoding is intrinsically a bit inaccurate. I generated the reference tensor by dumping the tensor from the GPU on my devVM. It is possible different Nvidia hardware show different outputs. How to test this in a more robust way is TBD. 5. Added a new parameter for cuda device index for `add_video_stream`. If this is present, we will use it to do hardware decoding on a CUDA device. There is a whole bunch of TODOs: 1. Currently GPU utilization is only 7-8% when decoding the video. We need to get this higher. 2. Speed it up compared to CPU implementation. Currently this is slower than CPU decoding even for HD videos (probably because we can't hide the CPU to GPU memcpy). However, decode+resize is faster as the benchmark says. Reviewed By: scotts Differential Revision: D59121006
- Loading branch information
1 parent
361968f
commit 7826e2d
Showing
18 changed files
with
400 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
import argparse | ||
import os | ||
import time | ||
|
||
import torch.utils.benchmark as benchmark | ||
|
||
import torchcodec | ||
from torchvision.transforms import Resize | ||
|
||
|
||
def transfer_and_resize_frame(frame, device): | ||
# This should be a no-op if the frame is already on the device. | ||
frame = frame.to(device) | ||
frame = Resize((256, 256))(frame) | ||
return frame | ||
|
||
|
||
def decode_full_video(video_path, decode_device): | ||
decoder = torchcodec.decoders._core.create_from_file(video_path) | ||
num_threads = None | ||
if "cuda" in decode_device: | ||
num_threads = 1 | ||
torchcodec.decoders._core.add_video_stream( | ||
decoder, stream_index=0, device_string=decode_device, num_threads=num_threads | ||
) | ||
start_time = time.time() | ||
frame_count = 0 | ||
while True: | ||
try: | ||
frame, *_ = torchcodec.decoders._core.get_next_frame(decoder) | ||
# You can do a resize to simulate extra preproc work that happens | ||
# on the GPU by uncommenting the following line: | ||
# frame = transfer_and_resize_frame(frame, decode_device) | ||
|
||
frame_count += 1 | ||
except Exception as e: | ||
print("EXCEPTION", e) | ||
break | ||
# print(f"current {frame_count=}", flush=True) | ||
end_time = time.time() | ||
elapsed = end_time - start_time | ||
fps = frame_count / (end_time - start_time) | ||
print( | ||
f"****** DECODED full video {decode_device=} {frame_count=} {elapsed=} {fps=}" | ||
) | ||
return frame_count, end_time - start_time | ||
|
||
|
||
def main(): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument( | ||
"--devices", | ||
default="cuda:0,cpu", | ||
type=str, | ||
help="Comma-separated devices to test decoding on.", | ||
) | ||
parser.add_argument( | ||
"--video", | ||
type=str, | ||
default=os.path.dirname(__file__) + "/../../test/resources/nasa_13013.mp4", | ||
) | ||
parser.add_argument( | ||
"--use_torch_benchmark", | ||
action=argparse.BooleanOptionalAction, | ||
default=True, | ||
help=( | ||
"Use pytorch benchmark to measure decode time with warmup and " | ||
"autorange. Without this we just run one iteration without warmup " | ||
"to measure the cold start time." | ||
), | ||
) | ||
args = parser.parse_args() | ||
video_path = args.video | ||
|
||
if not args.use_torch_benchmark: | ||
for device in args.devices.split(","): | ||
print("Testing on", device) | ||
decode_full_video(video_path, device) | ||
return | ||
|
||
results = [] | ||
for device in args.devices.split(","): | ||
print("device", device) | ||
t = benchmark.Timer( | ||
stmt="decode_full_video(video_path, device)", | ||
globals={ | ||
"device": device, | ||
"video_path": video_path, | ||
"decode_full_video": decode_full_video, | ||
}, | ||
label="Decode+Resize Time", | ||
sub_label=f"video={os.path.basename(video_path)}", | ||
description=f"decode_device={device}", | ||
).blocked_autorange() | ||
results.append(t) | ||
compare = benchmark.Compare(results) | ||
compare.print() | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.