diff --git a/test/decoders/test_video_decoder.py b/test/decoders/test_video_decoder.py index 854ea91c..4f80cbc0 100644 --- a/test/decoders/test_video_decoder.py +++ b/test/decoders/test_video_decoder.py @@ -11,13 +11,7 @@ from torchcodec.decoders import _core, VideoDecoder -from ..utils import ( - assert_tensor_close, - assert_tensor_equal, - cpu_and_cuda, - H265_VIDEO, - NASA_VIDEO, -) +from ..utils import assert_frames_equal, cpu_and_cuda, H265_VIDEO, NASA_VIDEO class TestVideoDecoder: @@ -73,10 +67,10 @@ def test_getitem_int(self, num_ffmpeg_threads, device): ref_frame180 = NASA_VIDEO.get_frame_data_by_index(180).to(device) ref_frame_last = NASA_VIDEO.get_frame_data_by_index(289).to(device) - assert_tensor_equal(ref_frame0, decoder[0]) - assert_tensor_equal(ref_frame1, decoder[1]) - assert_tensor_equal(ref_frame180, decoder[180]) - assert_tensor_equal(ref_frame_last, decoder[-1]) + assert_frames_equal(ref_frame0, decoder[0]) + assert_frames_equal(ref_frame1, decoder[1]) + assert_frames_equal(ref_frame180, decoder[180]) + assert_frames_equal(ref_frame_last, decoder[-1]) def test_getitem_numpy_int(self): decoder = VideoDecoder(NASA_VIDEO.path) @@ -87,26 +81,26 @@ def test_getitem_numpy_int(self): ref_frame_last = NASA_VIDEO.get_frame_data_by_index(289) # test against numpy.int64 - assert_tensor_equal(ref_frame0, decoder[numpy.int64(0)]) - assert_tensor_equal(ref_frame1, decoder[numpy.int64(1)]) - assert_tensor_equal(ref_frame180, decoder[numpy.int64(180)]) - assert_tensor_equal(ref_frame_last, decoder[numpy.int64(-1)]) + assert_frames_equal(ref_frame0, decoder[numpy.int64(0)]) + assert_frames_equal(ref_frame1, decoder[numpy.int64(1)]) + assert_frames_equal(ref_frame180, decoder[numpy.int64(180)]) + assert_frames_equal(ref_frame_last, decoder[numpy.int64(-1)]) # test against numpy.int32 - assert_tensor_equal(ref_frame0, decoder[numpy.int32(0)]) - assert_tensor_equal(ref_frame1, decoder[numpy.int32(1)]) - assert_tensor_equal(ref_frame180, decoder[numpy.int32(180)]) - assert_tensor_equal(ref_frame_last, decoder[numpy.int32(-1)]) + assert_frames_equal(ref_frame0, decoder[numpy.int32(0)]) + assert_frames_equal(ref_frame1, decoder[numpy.int32(1)]) + assert_frames_equal(ref_frame180, decoder[numpy.int32(180)]) + assert_frames_equal(ref_frame_last, decoder[numpy.int32(-1)]) # test against numpy.uint64 - assert_tensor_equal(ref_frame0, decoder[numpy.uint64(0)]) - assert_tensor_equal(ref_frame1, decoder[numpy.uint64(1)]) - assert_tensor_equal(ref_frame180, decoder[numpy.uint64(180)]) + assert_frames_equal(ref_frame0, decoder[numpy.uint64(0)]) + assert_frames_equal(ref_frame1, decoder[numpy.uint64(1)]) + assert_frames_equal(ref_frame180, decoder[numpy.uint64(180)]) # test against numpy.uint32 - assert_tensor_equal(ref_frame0, decoder[numpy.uint32(0)]) - assert_tensor_equal(ref_frame1, decoder[numpy.uint32(1)]) - assert_tensor_equal(ref_frame180, decoder[numpy.uint32(180)]) + assert_frames_equal(ref_frame0, decoder[numpy.uint32(0)]) + assert_frames_equal(ref_frame1, decoder[numpy.uint32(1)]) + assert_frames_equal(ref_frame180, decoder[numpy.uint32(180)]) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_getitem_slice(self, device): @@ -124,7 +118,7 @@ def test_getitem_slice(self, device): NASA_VIDEO.width, ] ) - assert_tensor_equal(ref0, slice0) + assert_frames_equal(ref0, slice0) ref4 = NASA_VIDEO.get_frame_data_by_range(4, 5).to(device) slice4 = decoder[4:5] @@ -136,7 +130,7 @@ def test_getitem_slice(self, device): NASA_VIDEO.width, ] ) - assert_tensor_equal(ref4, slice4) + assert_frames_equal(ref4, slice4) ref8 = NASA_VIDEO.get_frame_data_by_range(8, 9).to(device) slice8 = decoder[8:9] @@ -148,7 +142,7 @@ def test_getitem_slice(self, device): NASA_VIDEO.width, ] ) - assert_tensor_equal(ref8, slice8) + assert_frames_equal(ref8, slice8) ref180 = NASA_VIDEO.get_frame_data_by_index(180).to(device) slice180 = decoder[180:181] @@ -160,7 +154,7 @@ def test_getitem_slice(self, device): NASA_VIDEO.width, ] ) - assert_tensor_equal(ref180, slice180[0]) + assert_frames_equal(ref180, slice180[0]) # contiguous ranges ref0_9 = NASA_VIDEO.get_frame_data_by_range(0, 9).to(device) @@ -173,7 +167,7 @@ def test_getitem_slice(self, device): NASA_VIDEO.width, ] ) - assert_tensor_equal(ref0_9, slice0_9) + assert_frames_equal(ref0_9, slice0_9) ref4_8 = NASA_VIDEO.get_frame_data_by_range(4, 8).to(device) slice4_8 = decoder[4:8] @@ -185,7 +179,7 @@ def test_getitem_slice(self, device): NASA_VIDEO.width, ] ) - assert_tensor_equal(ref4_8, slice4_8) + assert_frames_equal(ref4_8, slice4_8) # ranges with a stride ref15_35 = NASA_VIDEO.get_frame_data_by_range(15, 36, 5).to(device) @@ -198,7 +192,7 @@ def test_getitem_slice(self, device): NASA_VIDEO.width, ] ) - assert_tensor_equal(ref15_35, slice15_35) + assert_frames_equal(ref15_35, slice15_35) ref0_9_2 = NASA_VIDEO.get_frame_data_by_range(0, 9, 2).to(device) slice0_9_2 = decoder[0:9:2] @@ -210,7 +204,7 @@ def test_getitem_slice(self, device): NASA_VIDEO.width, ] ) - assert_tensor_equal(ref0_9_2, slice0_9_2) + assert_frames_equal(ref0_9_2, slice0_9_2) # negative numbers in the slice ref386_389 = NASA_VIDEO.get_frame_data_by_range(386, 390).to(device) @@ -223,15 +217,15 @@ def test_getitem_slice(self, device): NASA_VIDEO.width, ] ) - assert_tensor_equal(ref386_389, slice386_389) + assert_frames_equal(ref386_389, slice386_389) # an empty range is valid! empty_frame = decoder[5:5] - assert_tensor_equal(empty_frame, NASA_VIDEO.empty_chw_tensor.to(device)) + assert_frames_equal(empty_frame, NASA_VIDEO.empty_chw_tensor.to(device)) # slices that are out-of-range are also valid - they return an empty tensor also_empty = decoder[10000:] - assert_tensor_equal(also_empty, NASA_VIDEO.empty_chw_tensor.to(device)) + assert_frames_equal(also_empty, NASA_VIDEO.empty_chw_tensor.to(device)) # should be just a copy all_frames = decoder[:].to(device) @@ -244,7 +238,7 @@ def test_getitem_slice(self, device): ] ) for sliced, ref in zip(all_frames, decoder): - assert_tensor_equal(sliced, ref) + assert_frames_equal(sliced, ref) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_getitem_fails(self, device): @@ -277,21 +271,21 @@ def test_iteration(self, device): # still works as expected. The underlying C++ decoder object is # actually stateful, and accessing a frame will move its internal # cursor. - assert_tensor_equal(ref_frame35, decoder[35]) + assert_frames_equal(ref_frame35, decoder[35]) for i, frame in enumerate(decoder): if i == 0: - assert_tensor_equal(ref_frame0, frame) + assert_frames_equal(ref_frame0, frame) elif i == 1: - assert_tensor_equal(ref_frame1, frame) + assert_frames_equal(ref_frame1, frame) elif i == 9: - assert_tensor_equal(ref_frame9, frame) + assert_frames_equal(ref_frame9, frame) elif i == 35: - assert_tensor_equal(ref_frame35, frame) + assert_frames_equal(ref_frame35, frame) elif i == 180: - assert_tensor_equal(ref_frame180, frame) + assert_frames_equal(ref_frame180, frame) elif i == 389: - assert_tensor_equal(ref_frame_last, frame) + assert_frames_equal(ref_frame_last, frame) def test_iteration_slow(self): decoder = VideoDecoder(NASA_VIDEO.path) @@ -302,7 +296,7 @@ def test_iteration_slow(self): # of total frames. iterations = 0 for frame in decoder: - assert_tensor_equal(ref_frame_last, decoder[-1]) + assert_frames_equal(ref_frame_last, decoder[-1]) iterations += 1 assert iterations == len(decoder) == 390 @@ -314,7 +308,7 @@ def test_get_frame_at(self, device): ref_frame9 = NASA_VIDEO.get_frame_data_by_index(9).to(device) frame9 = decoder.get_frame_at(9) - assert_tensor_equal(ref_frame9, frame9.data) + assert_frames_equal(ref_frame9, frame9.data) assert isinstance(frame9.pts_seconds, float) expected_frame_info = NASA_VIDEO.get_frame_info(9) assert frame9.pts_seconds == pytest.approx(expected_frame_info.pts_seconds) @@ -325,19 +319,19 @@ def test_get_frame_at(self, device): # test numpy.int64 frame9 = decoder.get_frame_at(numpy.int64(9)) - assert_tensor_equal(ref_frame9, frame9.data) + assert_frames_equal(ref_frame9, frame9.data) # test numpy.int32 frame9 = decoder.get_frame_at(numpy.int32(9)) - assert_tensor_equal(ref_frame9, frame9.data) + assert_frames_equal(ref_frame9, frame9.data) # test numpy.uint64 frame9 = decoder.get_frame_at(numpy.uint64(9)) - assert_tensor_equal(ref_frame9, frame9.data) + assert_frames_equal(ref_frame9, frame9.data) # test numpy.uint32 frame9 = decoder.get_frame_at(numpy.uint32(9)) - assert_tensor_equal(ref_frame9, frame9.data) + assert_frames_equal(ref_frame9, frame9.data) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_get_frame_at_tuple_unpacking(self, device): @@ -346,7 +340,7 @@ def test_get_frame_at_tuple_unpacking(self, device): frame = decoder.get_frame_at(50) data, pts, duration = decoder.get_frame_at(50) - assert_tensor_equal(frame.data, data) + assert_frames_equal(frame.data, data) assert frame.pts_seconds == pts assert frame.duration_seconds == duration @@ -368,10 +362,10 @@ def test_get_frames_at(self, device): assert isinstance(frames, FrameBatch) - assert_tensor_equal( + assert_frames_equal( frames[0].data, NASA_VIDEO.get_frame_data_by_index(35).to(device) ) - assert_tensor_equal( + assert_frames_equal( frames[1].data, NASA_VIDEO.get_frame_data_by_index(25).to(device) ) @@ -417,13 +411,13 @@ def test_get_frame_played_at(self, device): decoder = VideoDecoder(NASA_VIDEO.path, device=device) ref_frame_played_at_6 = NASA_VIDEO.get_frame_data_by_index(180).to(device) - assert_tensor_equal( + assert_frames_equal( ref_frame_played_at_6, decoder.get_frame_played_at(6.006).data ) - assert_tensor_equal( + assert_frames_equal( ref_frame_played_at_6, decoder.get_frame_played_at(6.02).data ) - assert_tensor_equal( + assert_frames_equal( ref_frame_played_at_6, decoder.get_frame_played_at(6.039366).data ) assert isinstance(decoder.get_frame_played_at(6.02).pts_seconds, float) @@ -436,7 +430,7 @@ def test_get_frame_played_at_h265(self): # https://github.com/pytorch/torchcodec/pull/350#issuecomment-2465011730 decoder = VideoDecoder(H265_VIDEO.path) ref_frame6 = H265_VIDEO.get_frame_data_by_index(5) - assert_tensor_equal(ref_frame6, decoder.get_frame_played_at(0.5).data) + assert_frames_equal(ref_frame6, decoder.get_frame_played_at(0.5).data) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_get_frame_played_at_fails(self, device): @@ -462,7 +456,7 @@ def test_get_frames_played_at(self, device): assert isinstance(frames, FrameBatch) for i in range(len(reference_indices)): - assert_tensor_equal( + assert_frames_equal( frames.data[i], NASA_VIDEO.get_frame_data_by_index(reference_indices[i]).to(device), ) @@ -511,7 +505,7 @@ def test_get_frames_in_range(self, stream_index, device): ).to(device) frames9 = decoder.get_frames_in_range(start=9, stop=10) - assert_tensor_equal(ref_frames9, frames9.data) + assert_frames_equal(ref_frames9, frames9.data) assert frames9.pts_seconds.device.type == "cpu" assert frames9.pts_seconds[0].item() == pytest.approx( @@ -537,14 +531,18 @@ def test_get_frames_in_range(self, stream_index, device): NASA_VIDEO.get_width(stream_index=stream_index), ] ) - assert_tensor_equal(ref_frames0_9, frames0_9.data) - assert_tensor_close( + assert_frames_equal(ref_frames0_9, frames0_9.data) + torch.testing.assert_close( NASA_VIDEO.get_pts_seconds_by_range(0, 10, stream_index=stream_index), frames0_9.pts_seconds, + atol=1e-6, + rtol=1e-6, ) - assert_tensor_close( + torch.testing.assert_close( NASA_VIDEO.get_duration_seconds_by_range(0, 10, stream_index=stream_index), frames0_9.duration_seconds, + atol=1e-6, + rtol=1e-6, ) # test steps @@ -560,32 +558,38 @@ def test_get_frames_in_range(self, stream_index, device): NASA_VIDEO.get_width(stream_index=stream_index), ] ) - assert_tensor_equal(ref_frames0_8_2, frames0_8_2.data) - assert_tensor_close( + assert_frames_equal(ref_frames0_8_2, frames0_8_2.data) + torch.testing.assert_close( NASA_VIDEO.get_pts_seconds_by_range(0, 10, 2, stream_index=stream_index), frames0_8_2.pts_seconds, + atol=1e-6, + rtol=1e-6, ) - assert_tensor_close( + torch.testing.assert_close( NASA_VIDEO.get_duration_seconds_by_range( 0, 10, 2, stream_index=stream_index ), frames0_8_2.duration_seconds, + atol=1e-6, + rtol=1e-6, ) # test numpy.int64 for indices frames0_8_2 = decoder.get_frames_in_range( start=numpy.int64(0), stop=numpy.int64(10), step=numpy.int64(2) ) - assert_tensor_equal(ref_frames0_8_2, frames0_8_2.data) + assert_frames_equal(ref_frames0_8_2, frames0_8_2.data) # an empty range is valid! empty_frames = decoder.get_frames_in_range(5, 5) - assert_tensor_equal( + assert_frames_equal( empty_frames.data, NASA_VIDEO.get_empty_chw_tensor(stream_index=stream_index).to(device), ) - assert_tensor_equal(empty_frames.pts_seconds, NASA_VIDEO.empty_pts_seconds) - assert_tensor_equal( + torch.testing.assert_close( + empty_frames.pts_seconds, NASA_VIDEO.empty_pts_seconds + ) + torch.testing.assert_close( empty_frames.duration_seconds, NASA_VIDEO.empty_duration_seconds ) @@ -653,7 +657,7 @@ def test_get_frames_by_pts_in_range(self, stream_index, device): frames0_4 = decoder.get_frames_played_in_range( decoder.get_frame_at(0).pts_seconds, decoder.get_frame_at(5).pts_seconds ) - assert_tensor_equal( + assert_frames_equal( frames0_4.data, NASA_VIDEO.get_frame_data_by_range(0, 5, stream_index=stream_index).to( device @@ -665,7 +669,7 @@ def test_get_frames_by_pts_in_range(self, stream_index, device): decoder.get_frame_at(0).pts_seconds, decoder.get_frame_at(4).pts_seconds + HALF_DURATION, ) - assert_tensor_equal(also_frames0_4.data, frames0_4.data) + assert_frames_equal(also_frames0_4.data, frames0_4.data) # Again, the intention here is to provide the exact values we care about. In practice, our # pts values are slightly smaller, so we nudge the start upwards. @@ -673,7 +677,7 @@ def test_get_frames_by_pts_in_range(self, stream_index, device): decoder.get_frame_at(5).pts_seconds, decoder.get_frame_at(10).pts_seconds, ) - assert_tensor_equal( + assert_frames_equal( frames5_9.data, NASA_VIDEO.get_frame_data_by_range(5, 10, stream_index=stream_index).to( device @@ -687,7 +691,7 @@ def test_get_frames_by_pts_in_range(self, stream_index, device): decoder.get_frame_at(6).pts_seconds, decoder.get_frame_at(6).pts_seconds + HALF_DURATION, ) - assert_tensor_equal( + assert_frames_equal( frame6.data, NASA_VIDEO.get_frame_data_by_range(6, 7, stream_index=stream_index).to( device @@ -699,7 +703,7 @@ def test_get_frames_by_pts_in_range(self, stream_index, device): decoder.get_frame_at(35).pts_seconds, decoder.get_frame_at(35).pts_seconds + 1e-10, ) - assert_tensor_equal( + assert_frames_equal( frame35.data, NASA_VIDEO.get_frame_data_by_range(35, 36, stream_index=stream_index).to( device @@ -715,7 +719,7 @@ def test_get_frames_by_pts_in_range(self, stream_index, device): NASA_VIDEO.get_frame_info(8, stream_index=stream_index).pts_seconds + HALF_DURATION, ) - assert_tensor_equal( + assert_frames_equal( frames7_8.data, NASA_VIDEO.get_frame_data_by_range(7, 9, stream_index=stream_index).to( device @@ -727,16 +731,18 @@ def test_get_frames_by_pts_in_range(self, stream_index, device): NASA_VIDEO.get_frame_info(4, stream_index=stream_index).pts_seconds, NASA_VIDEO.get_frame_info(4, stream_index=stream_index).pts_seconds, ) - assert_tensor_equal( + assert_frames_equal( empty_frame.data, NASA_VIDEO.get_empty_chw_tensor(stream_index=stream_index).to(device), ) - assert_tensor_equal( - empty_frame.pts_seconds, - NASA_VIDEO.empty_pts_seconds, + torch.testing.assert_close( + empty_frame.pts_seconds, NASA_VIDEO.empty_pts_seconds, atol=0, rtol=0 ) - assert_tensor_equal( - empty_frame.duration_seconds, NASA_VIDEO.empty_duration_seconds + torch.testing.assert_close( + empty_frame.duration_seconds, + NASA_VIDEO.empty_duration_seconds, + atol=0, + rtol=0, ) # Start and stop seconds land within the first frame. @@ -745,7 +751,7 @@ def test_get_frames_by_pts_in_range(self, stream_index, device): NASA_VIDEO.get_frame_info(0, stream_index=stream_index).pts_seconds + HALF_DURATION, ) - assert_tensor_equal( + assert_frames_equal( frame0.data, NASA_VIDEO.get_frame_data_by_range(0, 1, stream_index=stream_index).to( device @@ -757,7 +763,7 @@ def test_get_frames_by_pts_in_range(self, stream_index, device): all_frames = decoder.get_frames_played_in_range( decoder.metadata.begin_stream_seconds, decoder.metadata.end_stream_seconds ) - assert_tensor_equal(all_frames.data, decoder[:]) + assert_frames_equal(all_frames.data, decoder[:]) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_get_frames_by_pts_in_range_fails(self, device): diff --git a/test/decoders/test_video_decoder_ops.py b/test/decoders/test_video_decoder_ops.py index bc3d0e62..9baf6a39 100644 --- a/test/decoders/test_video_decoder_ops.py +++ b/test/decoders/test_video_decoder_ops.py @@ -37,7 +37,7 @@ ) from ..utils import ( - assert_tensor_equal, + assert_frames_equal, cpu_and_cuda, NASA_AUDIO, NASA_VIDEO, @@ -70,16 +70,16 @@ def test_seek_and_next(self, device): add_video_stream(decoder, device=device) frame0, _, _ = get_next_frame(decoder) reference_frame0 = NASA_VIDEO.get_frame_data_by_index(0) - assert_tensor_equal(frame0, reference_frame0.to(device)) + assert_frames_equal(frame0, reference_frame0.to(device)) reference_frame1 = NASA_VIDEO.get_frame_data_by_index(1) frame1, _, _ = get_next_frame(decoder) - assert_tensor_equal(frame1, reference_frame1.to(device)) + assert_frames_equal(frame1, reference_frame1.to(device)) seek_to_pts(decoder, 6.0) frame_time6, _, _ = get_next_frame(decoder) reference_frame_time6 = NASA_VIDEO.get_frame_data_by_index( INDEX_OF_FRAME_AT_6_SECONDS ) - assert_tensor_equal(frame_time6, reference_frame_time6.to(device)) + assert_frames_equal(frame_time6, reference_frame_time6.to(device)) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_seek_to_negative_pts(self, device): @@ -88,11 +88,11 @@ def test_seek_to_negative_pts(self, device): add_video_stream(decoder, device=device) frame0, _, _ = get_next_frame(decoder) reference_frame0 = NASA_VIDEO.get_frame_data_by_index(0) - assert_tensor_equal(frame0, reference_frame0.to(device)) + assert_frames_equal(frame0, reference_frame0.to(device)) seek_to_pts(decoder, -1e-4) frame0, _, _ = get_next_frame(decoder) - assert_tensor_equal(frame0, reference_frame0.to(device)) + assert_frames_equal(frame0, reference_frame0.to(device)) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_get_frame_at_pts(self, device): @@ -104,11 +104,11 @@ def test_get_frame_at_pts(self, device): reference_frame6 = NASA_VIDEO.get_frame_data_by_index( INDEX_OF_FRAME_AT_6_SECONDS ) - assert_tensor_equal(frame6, reference_frame6.to(device)) + assert_frames_equal(frame6, reference_frame6.to(device)) frame6, _, _ = get_frame_at_pts(decoder, 6.02) - assert_tensor_equal(frame6, reference_frame6.to(device)) + assert_frames_equal(frame6, reference_frame6.to(device)) frame6, _, _ = get_frame_at_pts(decoder, 6.039366) - assert_tensor_equal(frame6, reference_frame6.to(device)) + assert_frames_equal(frame6, reference_frame6.to(device)) # Note that this timestamp is exactly on a frame boundary, so it should # return the next frame since the right boundary of the interval is # open. @@ -116,7 +116,7 @@ def test_get_frame_at_pts(self, device): if device == "cpu": # We can only compare exact equality on CPU. with pytest.raises(AssertionError): - assert_tensor_equal(next_frame, reference_frame6.to(device)) + assert_frames_equal(next_frame, reference_frame6.to(device)) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_get_frame_at_index(self, device): @@ -125,13 +125,13 @@ def test_get_frame_at_index(self, device): add_video_stream(decoder, device=device) frame0, _, _ = get_frame_at_index(decoder, stream_index=3, frame_index=0) reference_frame0 = NASA_VIDEO.get_frame_data_by_index(0) - assert_tensor_equal(frame0, reference_frame0.to(device)) + assert_frames_equal(frame0, reference_frame0.to(device)) # The frame that is played at 6 seconds is frame 180 from a 0-based index. frame6, _, _ = get_frame_at_index(decoder, stream_index=3, frame_index=180) reference_frame6 = NASA_VIDEO.get_frame_data_by_index( INDEX_OF_FRAME_AT_6_SECONDS ) - assert_tensor_equal(frame6, reference_frame6.to(device)) + assert_frames_equal(frame6, reference_frame6.to(device)) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_get_frame_with_info_at_index(self, device): @@ -144,7 +144,7 @@ def test_get_frame_with_info_at_index(self, device): reference_frame6 = NASA_VIDEO.get_frame_data_by_index( INDEX_OF_FRAME_AT_6_SECONDS ) - assert_tensor_equal(frame6, reference_frame6.to(device)) + assert_frames_equal(frame6, reference_frame6.to(device)) assert pts.item() == pytest.approx(6.006, rel=1e-3) assert duration.item() == pytest.approx(0.03337, rel=1e-3) @@ -160,8 +160,8 @@ def test_get_frames_at_indices(self, device): reference_frame180 = NASA_VIDEO.get_frame_data_by_index( INDEX_OF_FRAME_AT_6_SECONDS ) - assert_tensor_equal(frames0and180[0], reference_frame0.to(device)) - assert_tensor_equal(frames0and180[1], reference_frame180.to(device)) + assert_frames_equal(frames0and180[0], reference_frame0.to(device)) + assert_frames_equal(frames0and180[1], reference_frame180.to(device)) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_get_frames_at_indices_unsorted_indices(self, device): @@ -185,15 +185,15 @@ def test_get_frames_at_indices_unsorted_indices(self, device): frame_indices=frame_indices, ) for frame, expected_frame in zip(frames, expected_frames): - assert_tensor_equal(frame, expected_frame) + assert_frames_equal(frame, expected_frame) # first and last frame should be equal, at index 2. We then modify the # first frame and assert that it's now different from the last frame. # This ensures a copy was properly made during the de-duplication logic. - assert_tensor_equal(frames[0], frames[-1]) + assert_frames_equal(frames[0], frames[-1]) frames[0] += 20 with pytest.raises(AssertionError): - assert_tensor_equal(frames[0], frames[-1]) + assert_frames_equal(frames[0], frames[-1]) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_get_frames_by_pts(self, device): @@ -215,16 +215,16 @@ def test_get_frames_by_pts(self, device): timestamps=timestamps, ) for frame, expected_frame in zip(frames, expected_frames): - assert_tensor_equal(frame, expected_frame) + assert_frames_equal(frame, expected_frame) # first and last frame should be equal, at pts=2 [+ eps]. We then modify # the first frame and assert that it's now different from the last # frame. This ensures a copy was properly made during the de-duplication # logic. - assert_tensor_equal(frames[0], frames[-1]) + assert_frames_equal(frames[0], frames[-1]) frames[0] += 20 with pytest.raises(AssertionError): - assert_tensor_equal(frames[0], frames[-1]) + assert_frames_equal(frames[0], frames[-1]) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_pts_apis_against_index_ref(self, device): @@ -258,7 +258,7 @@ def test_pts_apis_against_index_ref(self, device): *[get_frame_at_pts(decoder, seconds=pts) for pts in all_pts_seconds_ref] ) pts_seconds = torch.tensor(pts_seconds) - assert_tensor_equal(pts_seconds, all_pts_seconds_ref) + torch.testing.assert_close(pts_seconds, all_pts_seconds_ref, atol=0, rtol=0) _, pts_seconds, _ = get_frames_by_pts_in_range( decoder, @@ -266,7 +266,7 @@ def test_pts_apis_against_index_ref(self, device): start_seconds=0, stop_seconds=all_pts_seconds_ref[-1] + 1e-4, ) - assert_tensor_equal(pts_seconds, all_pts_seconds_ref) + torch.testing.assert_close(pts_seconds, all_pts_seconds_ref, atol=0, rtol=0) _, pts_seconds, _ = zip( *[ @@ -280,12 +280,12 @@ def test_pts_apis_against_index_ref(self, device): ] ) pts_seconds = torch.tensor(pts_seconds) - assert_tensor_equal(pts_seconds, all_pts_seconds_ref) + torch.testing.assert_close(pts_seconds, all_pts_seconds_ref, atol=0, rtol=0) _, pts_seconds, _ = get_frames_by_pts( decoder, stream_index=stream_index, timestamps=all_pts_seconds_ref.tolist() ) - assert_tensor_equal(pts_seconds, all_pts_seconds_ref) + torch.testing.assert_close(pts_seconds, all_pts_seconds_ref, atol=0, rtol=0) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_get_frames_in_range(self, device): @@ -296,47 +296,47 @@ def test_get_frames_in_range(self, device): # ensure that the degenerate case of a range of size 1 works ref_frame0 = NASA_VIDEO.get_frame_data_by_range(0, 1) bulk_frame0, *_ = get_frames_in_range(decoder, stream_index=3, start=0, stop=1) - assert_tensor_equal(bulk_frame0, ref_frame0.to(device)) + assert_frames_equal(bulk_frame0, ref_frame0.to(device)) ref_frame1 = NASA_VIDEO.get_frame_data_by_range(1, 2) bulk_frame1, *_ = get_frames_in_range(decoder, stream_index=3, start=1, stop=2) - assert_tensor_equal(bulk_frame1, ref_frame1.to(device)) + assert_frames_equal(bulk_frame1, ref_frame1.to(device)) ref_frame389 = NASA_VIDEO.get_frame_data_by_range(389, 390) bulk_frame389, *_ = get_frames_in_range( decoder, stream_index=3, start=389, stop=390 ) - assert_tensor_equal(bulk_frame389, ref_frame389.to(device)) + assert_frames_equal(bulk_frame389, ref_frame389.to(device)) # contiguous ranges ref_frames0_9 = NASA_VIDEO.get_frame_data_by_range(0, 9) bulk_frames0_9, *_ = get_frames_in_range( decoder, stream_index=3, start=0, stop=9 ) - assert_tensor_equal(bulk_frames0_9, ref_frames0_9.to(device)) + assert_frames_equal(bulk_frames0_9, ref_frames0_9.to(device)) ref_frames4_8 = NASA_VIDEO.get_frame_data_by_range(4, 8) bulk_frames4_8, *_ = get_frames_in_range( decoder, stream_index=3, start=4, stop=8 ) - assert_tensor_equal(bulk_frames4_8, ref_frames4_8.to(device)) + assert_frames_equal(bulk_frames4_8, ref_frames4_8.to(device)) # ranges with a stride ref_frames15_35 = NASA_VIDEO.get_frame_data_by_range(15, 36, 5) bulk_frames15_35, *_ = get_frames_in_range( decoder, stream_index=3, start=15, stop=36, step=5 ) - assert_tensor_equal(bulk_frames15_35, ref_frames15_35.to(device)) + assert_frames_equal(bulk_frames15_35, ref_frames15_35.to(device)) ref_frames0_9_2 = NASA_VIDEO.get_frame_data_by_range(0, 9, 2) bulk_frames0_9_2, *_ = get_frames_in_range( decoder, stream_index=3, start=0, stop=9, step=2 ) - assert_tensor_equal(bulk_frames0_9_2, ref_frames0_9_2.to(device)) + assert_frames_equal(bulk_frames0_9_2, ref_frames0_9_2.to(device)) # an empty range is valid! empty_frame, *_ = get_frames_in_range(decoder, stream_index=3, start=5, stop=5) - assert_tensor_equal(empty_frame, NASA_VIDEO.empty_chw_tensor.to(device)) + assert_frames_equal(empty_frame, NASA_VIDEO.empty_chw_tensor.to(device)) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_throws_exception_at_eof(self, device): @@ -345,7 +345,7 @@ def test_throws_exception_at_eof(self, device): seek_to_pts(decoder, 12.979633) last_frame, _, _ = get_next_frame(decoder) reference_last_frame = NASA_VIDEO.get_frame_data_by_index(289) - assert_tensor_equal(last_frame, reference_last_frame.to(device)) + assert_frames_equal(last_frame, reference_last_frame.to(device)) with pytest.raises(IndexError, match="no more frames"): get_next_frame(decoder) @@ -379,8 +379,8 @@ def get_frame1_and_frame_time6(decoder): reference_frame_time6 = NASA_VIDEO.get_frame_data_by_index( INDEX_OF_FRAME_AT_6_SECONDS ) - assert_tensor_equal(frame0, reference_frame0.to(device)) - assert_tensor_equal(frame_time6, reference_frame_time6.to(device)) + assert_frames_equal(frame0, reference_frame0.to(device)) + assert_frames_equal(frame_time6, reference_frame_time6.to(device)) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_class_based_compile_seek_and_next(self, device): @@ -400,8 +400,8 @@ def class_based_get_frame1_and_frame_time6( reference_frame_time6 = NASA_VIDEO.get_frame_data_by_index( INDEX_OF_FRAME_AT_6_SECONDS ) - assert_tensor_equal(frame0, reference_frame0.to(device)) - assert_tensor_equal(frame_time6, reference_frame_time6.to(device)) + assert_frames_equal(frame0, reference_frame0.to(device)) + assert_frames_equal(frame_time6, reference_frame_time6.to(device)) @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("create_from", ("file", "tensor", "bytes")) @@ -421,16 +421,16 @@ def test_create_decoder(self, create_from, device): add_video_stream(decoder, device=device) frame0, _, _ = get_next_frame(decoder) reference_frame0 = NASA_VIDEO.get_frame_data_by_index(0) - assert_tensor_equal(frame0, reference_frame0.to(device)) + assert_frames_equal(frame0, reference_frame0.to(device)) reference_frame1 = NASA_VIDEO.get_frame_data_by_index(1) frame1, _, _ = get_next_frame(decoder) - assert_tensor_equal(frame1, reference_frame1.to(device)) + assert_frames_equal(frame1, reference_frame1.to(device)) seek_to_pts(decoder, 6.0) frame_time6, _, _ = get_next_frame(decoder) reference_frame_time6 = NASA_VIDEO.get_frame_data_by_index( INDEX_OF_FRAME_AT_6_SECONDS ) - assert_tensor_equal(frame_time6, reference_frame_time6.to(device)) + assert_frames_equal(frame_time6, reference_frame_time6.to(device)) # Keeping the metadata tests below for now, but we should remove them # once we remove get_json_metadata(). @@ -504,16 +504,16 @@ def test_color_conversion_library(self, color_conversion_library): _add_video_stream(decoder, color_conversion_library=color_conversion_library) frame0, *_ = get_next_frame(decoder) reference_frame0 = NASA_VIDEO.get_frame_data_by_index(0) - assert_tensor_equal(frame0, reference_frame0) + assert_frames_equal(frame0, reference_frame0) reference_frame1 = NASA_VIDEO.get_frame_data_by_index(1) frame1, *_ = get_next_frame(decoder) - assert_tensor_equal(frame1, reference_frame1) + assert_frames_equal(frame1, reference_frame1) seek_to_pts(decoder, 6.0) frame_time6, *_ = get_next_frame(decoder) reference_frame_time6 = NASA_VIDEO.get_frame_data_by_index( INDEX_OF_FRAME_AT_6_SECONDS ) - assert_tensor_equal(frame_time6, reference_frame_time6) + assert_frames_equal(frame_time6, reference_frame_time6) # We choose arbitrary values for width and height scaling to get better # test coverage. Some pairs upscale the image while others downscale it. @@ -557,7 +557,7 @@ def test_color_conversion_library_with_scaling( color_conversion_library="swscale", ) swscale_frame0, _, _ = get_next_frame(swscale_decoder) - assert_tensor_equal(filtergraph_frame0, swscale_frame0) + assert_frames_equal(filtergraph_frame0, swscale_frame0) @pytest.mark.parametrize("dimension_order", ("NHWC", "NCHW")) @pytest.mark.parametrize("color_conversion_library", ("filtergraph", "swscale")) @@ -582,29 +582,29 @@ def test_color_conversion_library_with_dimension_order( decoder, stream_index=stream_index, frame_index=0 ) assert frame0.shape == expected_shape - assert_tensor_equal(frame0, frame0_ref) + assert_frames_equal(frame0, frame0_ref) frame0, *_ = get_frame_at_pts(decoder, seconds=0.0) assert frame0.shape == expected_shape - assert_tensor_equal(frame0, frame0_ref) + assert_frames_equal(frame0, frame0_ref) frames, *_ = get_frames_in_range( decoder, stream_index=stream_index, start=0, stop=3 ) assert frames.shape[1:] == expected_shape - assert_tensor_equal(frames[0], frame0_ref) + assert_frames_equal(frames[0], frame0_ref) frames, *_ = get_frames_by_pts_in_range( decoder, stream_index=stream_index, start_seconds=0, stop_seconds=1 ) assert frames.shape[1:] == expected_shape - assert_tensor_equal(frames[0], frame0_ref) + assert_frames_equal(frames[0], frame0_ref) frames, *_ = get_frames_at_indices( decoder, stream_index=stream_index, frame_indices=[0, 1, 3, 4] ) assert frames.shape[1:] == expected_shape - assert_tensor_equal(frames[0], frame0_ref) + assert_frames_equal(frames[0], frame0_ref) @pytest.mark.parametrize( "width_scaling_factor,height_scaling_factor", @@ -681,7 +681,7 @@ def test_color_conversion_library_with_generated_videos( height=target_height, ) auto_frame0, _, _ = get_next_frame(auto_decoder) - assert_tensor_equal(filtergraph_frame0, auto_frame0) + assert_frames_equal(filtergraph_frame0, auto_frame0) @needs_cuda def test_cuda_decoder(self): @@ -691,7 +691,7 @@ def test_cuda_decoder(self): frame0, pts, duration = get_next_frame(decoder) assert frame0.device.type == "cuda" reference_frame0 = NASA_VIDEO.get_frame_data_by_index(0) - assert_tensor_equal(frame0, reference_frame0.to("cuda")) + assert_frames_equal(frame0, reference_frame0.to("cuda")) assert pts == torch.tensor([0]) torch.testing.assert_close( duration, torch.tensor(0.0334).double(), atol=0, rtol=1e-3 diff --git a/test/samplers/test_samplers.py b/test/samplers/test_samplers.py index 854efd9c..d5c7eb44 100644 --- a/test/samplers/test_samplers.py +++ b/test/samplers/test_samplers.py @@ -19,7 +19,7 @@ from torchcodec.samplers._index_based import _build_all_clips_indices from torchcodec.samplers._time_based import _build_all_clips_timestamps -from ..utils import assert_tensor_equal, NASA_VIDEO +from ..utils import assert_frames_equal, NASA_VIDEO def _assert_output_type_and_shapes( @@ -190,7 +190,7 @@ def test_against_ref(sampler): ) clip = sampler(decoder, num_frames_per_clip=num_frames_per_clip)[0] - assert_tensor_equal(clip.data, expected_clip_data) + assert_frames_equal(clip.data, expected_clip_data) @pytest.mark.parametrize( @@ -245,7 +245,7 @@ def test_sampling_range( ) # This context manager is used to ensure that the call to - # assert_tensor_equal() below either passes (nullcontext) or fails + # assert_frames_equal() below either passes (nullcontext) or fails # (pytest.raises) cm = ( contextlib.nullcontext() @@ -254,7 +254,7 @@ def test_sampling_range( ) with cm: for clip in clips: - assert_tensor_equal(clip.data, clips[0].data) + assert_frames_equal(clip.data, clips[0].data) @pytest.mark.parametrize("sampler", (clips_at_random_indices, clips_at_regular_indices)) @@ -282,10 +282,10 @@ def test_sampling_range_negative(sampler): # There is only one unique clip in clips_1... for clip in clips_1: - assert_tensor_equal(clip.data, clips_1[0].data) + assert_frames_equal(clip.data, clips_1[0].data) # ... and it's the same that's in clips_2 for clip in clips_2: - assert_tensor_equal(clip.data, clips_1[0].data) + assert_frames_equal(clip.data, clips_1[0].data) @pytest.mark.parametrize( @@ -436,15 +436,19 @@ def test_random_sampler_randomness(sampler): clips_2 = sampler(decoder, num_clips=num_clips) for clip_1, clip_2 in zip(clips_1, clips_2): - assert_tensor_equal(clip_1.data, clip_2.data) - assert_tensor_equal(clip_1.pts_seconds, clip_2.pts_seconds) - assert_tensor_equal(clip_1.duration_seconds, clip_2.duration_seconds) + assert_frames_equal(clip_1.data, clip_2.data) + torch.testing.assert_close( + clip_1.pts_seconds, clip_2.pts_seconds, rtol=0, atol=0 + ) + torch.testing.assert_close( + clip_1.duration_seconds, clip_2.duration_seconds, rtol=0, atol=0 + ) # Call with a different seed, expect different results torch.manual_seed(1) clips_3 = sampler(decoder, num_clips=num_clips) with pytest.raises(AssertionError, match="Tensor-likes are not"): - assert_tensor_equal(clips_1[0].data, clips_3[0].data) + assert_frames_equal(clips_1[0].data, clips_3[0].data) # Make sure we didn't alter the builtin Python RNG builtin_random_state_end = random.getstate() diff --git a/test/utils.py b/test/utils.py index d0663b0a..8ab9602f 100644 --- a/test/utils.py +++ b/test/utils.py @@ -28,7 +28,7 @@ def cpu_and_cuda(): # On other platforms (e.g. MacOS), we also allow a small tolerance. FFmpeg does # not guarantee bit-for-bit equality across systems and architectures, so we # also cannot. We currently use Linux on x86_64 as our reference system. -def assert_tensor_equal(*args, **kwargs): +def assert_frames_equal(*args, **kwargs): if sys.platform == "linux": if args[0].device.type == "cuda": # CUDA tensors are not exactly equal on Linux, so we need to use a @@ -41,15 +41,6 @@ def assert_tensor_equal(*args, **kwargs): torch.testing.assert_close(*args, **kwargs, atol=absolute_tolerance, rtol=0) -# For use with floating point metadata, or in other instances where we are not confident -# that reference and test tensors can be exactly equal. This is true for pts and duration -# in seconds, as the reference values are from ffprobe's JSON output. In that case, it is -# limiting the floating point precision when printing the value as a string. The value from -# JSON and the value we retrieve during decoding are not exactly the same. -def assert_tensor_close(*args, **kwargs): - torch.testing.assert_close(*args, **kwargs, atol=1e-6, rtol=1e-6) - - def in_fbcode() -> bool: return os.environ.get("IN_FBCODE_TORCHCODEC") == "1"