diff --git a/av/subtitles/subtitle.pyi b/av/subtitles/subtitle.pyi index 2ac9195ec..2a35d0a55 100644 --- a/av/subtitles/subtitle.pyi +++ b/av/subtitles/subtitle.pyi @@ -27,10 +27,11 @@ class BitmapSubtitlePlane: index: int buffer_size: int -class TextSubtitle(Subtitle): - type: Literal[b"text"] - text: bytes - class AssSubtitle(Subtitle): - type: Literal[b"ass"] - ass: bytes + type: Literal[b"ass", b"text"] + @property + def ass(self) -> bytes: ... + @property + def dialogue(self) -> bytes: ... + @property + def text(self) -> bytes: ... diff --git a/av/subtitles/subtitle.pyx b/av/subtitles/subtitle.pyx index 7ced655f6..373bb529b 100644 --- a/av/subtitles/subtitle.pyx +++ b/av/subtitles/subtitle.pyx @@ -55,9 +55,7 @@ cdef Subtitle build_subtitle(SubtitleSet subtitle, int index): if ptr.type == lib.SUBTITLE_BITMAP: return BitmapSubtitle(subtitle, index) - elif ptr.type == lib.SUBTITLE_TEXT: - return TextSubtitle(subtitle, index) - elif ptr.type == lib.SUBTITLE_ASS: + elif ptr.type == lib.SUBTITLE_ASS or ptr.type == lib.SUBTITLE_TEXT: return AssSubtitle(subtitle, index) else: raise ValueError("unknown subtitle type %r" % ptr.type) @@ -141,7 +139,10 @@ cdef class BitmapSubtitlePlane: PyBuffer_FillInfo(view, self, self._buffer, self.buffer_size, 0, flags) -cdef class TextSubtitle(Subtitle): +cdef class AssSubtitle(Subtitle): + """ + Represents an ASS/Text subtitle format, as opposed to a bitmap Subtitle format. + """ def __repr__(self): return ( f"<{self.__class__.__module__}.{self.__class__.__name__} " @@ -149,21 +150,55 @@ cdef class TextSubtitle(Subtitle): ) @property - def text(self): - if self.ptr.text is not NULL: - return PyBytes_FromString(self.ptr.text) + def ass(self): + """ + Returns the subtitle in the ASS/SSA format. Used by the vast majority of subtitle formats. + """ + if self.ptr.ass is not NULL: + return PyBytes_FromString(self.ptr.ass) return b"" - -cdef class AssSubtitle(Subtitle): - def __repr__(self): - return ( - f"<{self.__class__.__module__}.{self.__class__.__name__} " - f"{self.ass!r} at 0x{id(self):x}>" - ) + @property + def dialogue(self): + """ + Extract the dialogue from the ass format. Strip comments. + """ + comma_count = 0 + i = 0 + cdef bytes ass_text = self.ass + cdef bytes result = b"" + + while comma_count < 8 and i < len(ass_text): + if bytes([ass_text[i]]) == b",": + comma_count += 1 + i += 1 + + state = False + while i < len(ass_text): + char = bytes([ass_text[i]]) + next_char = b"" if i + 1 >= len(ass_text) else bytes([ass_text[i + 1]]) + + if char == b"\\" and next_char == b"N": + result += b"\n" + i += 2 + continue + + if not state: + if char == b"{" and next_char != b"\\": + state = True + else: + result += char + elif char == b"}": + state = False + i += 1 + + return result @property - def ass(self): - if self.ptr.ass is not NULL: - return PyBytes_FromString(self.ptr.ass) + def text(self): + """ + Rarely used attribute. You're probably looking for dialogue. + """ + if self.ptr.text is not NULL: + return PyBytes_FromString(self.ptr.text) return b"" diff --git a/tests/test_subtitles.py b/tests/test_subtitles.py index fe4f2ee3d..cc1409f2b 100644 --- a/tests/test_subtitles.py +++ b/tests/test_subtitles.py @@ -5,7 +5,7 @@ class TestSubtitle(TestCase): - def test_movtext(self): + def test_movtext(self) -> None: path = fate_suite("sub/MovText_capability_tester.mp4") subs = [] @@ -23,8 +23,12 @@ def test_movtext(self): sub = subset[0] self.assertIsInstance(sub, AssSubtitle) + assert isinstance(sub, AssSubtitle) + self.assertEqual(sub.type, b"ass") + self.assertEqual(sub.text, b"") self.assertEqual(sub.ass, b"0,0,Default,,0,0,0,,- Test 1.\\N- Test 2.") + self.assertEqual(sub.dialogue, b"- Test 1.\n- Test 2.") def test_vobsub(self): path = fate_suite("sub/vobsub.sub")