Skip to content

Commit

Permalink
Clipping subsampler refactor (#275)
Browse files Browse the repository at this point in the history
* ClippingSubsampler rewrite and bug fixes

* More refactoring of ClippingSubsampler, plus a fix to _get_clip_intervals

* Finished refactoring ClippingSubsampler

* Final code changes

* Added docstrings

* Passed tests and linting

* Made type annotations consistent with Python 3.8

* More annotation fixes

* The Python 3.8 annotation needs a lot of hand-holding, it seems

* Pylint has to cut it out, I swear to God

* No real change, just relauching unit tests which failed due to connection timeouts

* Linting issue

* Another linting issue

---------

Co-authored-by: iejMac <[email protected]>
Co-authored-by: Romain Beaumont <[email protected]>
  • Loading branch information
3 people authored Jan 24, 2024
1 parent 7ae58f8 commit e7a4591
Show file tree
Hide file tree
Showing 3 changed files with 248 additions and 171 deletions.
10 changes: 5 additions & 5 deletions tests/test_subsamplers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
ClippingSubsampler,
_get_seconds,
_split_time_frame,
Streams,
FFProbeSubsampler,
ResolutionSubsampler,
FrameSubsampler,
Expand Down Expand Up @@ -45,8 +46,8 @@ def test_clipping_subsampler(clips):
min_length = 5.0 if clips == MULTI else 2.0
max_length = 999999.0 if clips == MULTI else 3.0
subsampler = ClippingSubsampler(
3,
{"video": "mp4", "audio": "mp3"},
oom_clip_count=3,
encode_formats={"video": "mp4", "audio": "mp3"},
min_length=min_length,
max_length=max_length,
max_length_strategy="all",
Expand All @@ -58,7 +59,7 @@ def test_clipping_subsampler(clips):
"clips": clips,
}

streams = {"video": [video_bytes], "audio": [audio_bytes]}
streams: Streams = {"video": [video_bytes], "audio": [audio_bytes]}
stream_fragments, meta_fragments, error_message = subsampler(streams, metadata)
video_fragments = stream_fragments["video"]
audio_fragments = stream_fragments["audio"]
Expand All @@ -84,15 +85,14 @@ def test_clipping_subsampler(clips):
s_target, e_target = clips[key_ind]
s_target, e_target = _get_seconds(s_target), _get_seconds(e_target)
expected_clips = _split_time_frame(s_target, e_target, min_length, max_length)
assert (_get_seconds(s), _get_seconds(e)) in expected_clips
assert [_get_seconds(s), _get_seconds(e)] in expected_clips
assert _get_seconds(e) - _get_seconds(s) >= min_length

s_s, e_s = _get_seconds(s), _get_seconds(e)
probe = ffmpeg.probe(tmp.name)
video_stream = [stream for stream in probe["streams"] if stream["codec_type"] == "video"][0]
frag_len = float(video_stream["duration"])

# currently some segments can be pretty innacurate
assert abs(frag_len - (e_s - s_s)) < 5.0


Expand Down
2 changes: 1 addition & 1 deletion video2dataset/subsamplers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""

from .audio_rate_subsampler import AudioRateSubsampler
from .clipping_subsampler import ClippingSubsampler, _get_seconds, _split_time_frame
from .clipping_subsampler import ClippingSubsampler, _get_seconds, _split_time_frame, Streams
from .frame_subsampler import FrameSubsampler
from .ffprobe_subsampler import FFProbeSubsampler
from .noop_subsampler import NoOpSubsampler
Expand Down
Loading

0 comments on commit e7a4591

Please sign in to comment.