From d01f151e00f65cbcfe0d3e7135eea3ec6fe8ba22 Mon Sep 17 00:00:00 2001 From: GitBib <15717621+GitBib@users.noreply.github.com> Date: Tue, 3 Sep 2024 23:26:12 +0300 Subject: [PATCH] Add detailed docstrings to all classes and methods. Enhanced the codebase with detailed docstrings for the `Time`, `Subtitle`, and `Dialogue` classes and their methods. This includes comprehensive descriptions of parameters, return types, and examples to improve code readability and maintainability. --- pyasstosrt/dialogue.py | 55 ++++++++++++++----- pyasstosrt/pyasstosrt.py | 111 ++++++++++++++++++++++++++++----------- pyasstosrt/time.py | 45 +++++++++++++--- 3 files changed, 159 insertions(+), 52 deletions(-) diff --git a/pyasstosrt/dialogue.py b/pyasstosrt/dialogue.py index bc4bd44..7a1bfc6 100644 --- a/pyasstosrt/dialogue.py +++ b/pyasstosrt/dialogue.py @@ -2,19 +2,43 @@ class Dialogue: - start: Time - end: Time - text: str - index: int + """ + Represents a dialogue entry in a subtitle file. + + This class encapsulates a single dialogue entry, including its index, + start and end times, and text content. + + :param index: The position of the dialogue in the subtitle file + :type index: int + :param start: The start time of the dialogue + :type start: str + :param end: The end time of the dialogue + :type end: str + :param text: The text content of the dialogue + :type text: str + + :ivar start: The start time of the dialogue + :type start: :class:`~pyasstosrt.time.Time` + :ivar end: The end time of the dialogue + :type end: :class:`~pyasstosrt.time.Time` + :ivar text: The text content of the dialogue + :type text: str + :ivar index: The position of the dialogue in the subtitle file + :type index: int + """ def __init__(self, index: int, start: str, end: str, text: str): """ - Dialogue structure. + Initialize a Dialogue instance. - :param index: Must contain a position of dialogue - :param start: Start time of the dialogue - :param end: End of dialog time - :param text: Contains the text of the dialogue + :param index: The position of the dialogue in the subtitle file + :type index: int + :param start: The start time of the dialogue + :type start: str + :param end: The end time of the dialogue + :type end: str + :param text: The text content of the dialogue + :type text: str """ self.index = index self.start = Time(start) @@ -23,16 +47,21 @@ def __init__(self, index: int, start: str, end: str, text: str): def get_timestamp(self) -> str: """ - Format a time line for srt. + Format the timestamp for SRT format. + + Generates a formatted string representation of the dialogue's start and end times + in the SRT timestamp format. - :return: Let's finish the line of the beginning and end of the dialogue... + :return: A formatted string representing the start and end times of the dialogue + :rtype: str """ return f"{self.start} --> {self.end}" def __str__(self) -> str: """ - Formatting the dialogue in string. + Format the dialogue as a string in SRT format. - :return: We get a dialogue in string + :return: A formatted string representation of the dialogue, including index, timestamp, and text + :rtype: str """ return f"{self.index}\n{self.get_timestamp()}\n{self.text}\n\n" diff --git a/pyasstosrt/pyasstosrt.py b/pyasstosrt/pyasstosrt.py index 33bb915..a7ff20c 100644 --- a/pyasstosrt/pyasstosrt.py +++ b/pyasstosrt/pyasstosrt.py @@ -9,10 +9,39 @@ class Subtitle: """ - Converting ass to srt. - - :type filepath: Path to a file that contains text in Advanced SubStation Alpha format - :type removing_effects: Whether to remove effects from the text + Converting ASS (Advanced SubStation Alpha) subtitles to SRT format. + + This class provides functionality to read an ASS subtitle file, convert its contents + to SRT format, and export the result either as a file or as a list of dialogues. + + :param filepath: Path to a file that contains text in Advanced SubStation Alpha format + :type filepath: Union[str, os.PathLike] + :param removing_effects: Whether to remove effects from the text + :type removing_effects: bool + :param remove_duplicates: Whether to remove and merge consecutive duplicate dialogues + :type remove_duplicates: bool + + :raises FileNotFoundError: If the specified file does not exist + + :ivar filepath: The path to the input ASS file + :type filepath: AnyStr + :ivar file: The stem (filename without extension) of the input file + :type file: AnyStr + :ivar raw_text: The raw content of the input file + :type raw_text: AnyStr + :ivar dialogues: List of :class:`~pyasstosrt.dialogue.Dialogue` objects representing the subtitles + :type dialogues: List[Dialogue] + :ivar removing_effects: Flag indicating whether to remove effects from the text + :type removing_effects: bool + :ivar is_remove_duplicates: Flag indicating whether to remove and merge consecutive duplicate dialogues + :type is_remove_duplicates: bool + + :Example: + + >>> from pyasstosrt import Subtitle + >>> sub = Subtitle("path/to/subtitle.ass", removing_effects=True, remove_duplicates=True) + >>> sub.convert() + >>> sub.export("output/directory", encoding="utf-8") """ dialog_mask = re.compile(r"Dialogue: \d+?,(\d:\d{2}:\d{2}.\d{2}),(\d:\d{2}:\d{2}.\d{2}),.*?,\d+,\d+,\d+,.*?,(.*)") @@ -33,22 +62,25 @@ def __init__( self.filepath: AnyStr = filepath self.file: AnyStr = Path(filepath).stem self.raw_text: AnyStr = self.get_text() - self.dialogues: List = [] - self.removing_effects = removing_effects - self.is_remove_duplicates = remove_duplicates + self.dialogues: List[Dialogue] = [] + self.removing_effects: bool = removing_effects + self.is_remove_duplicates: bool = remove_duplicates def get_text(self) -> str: """ - Reads the file and returns the complete contents - :return: File contents + Reads the file and returns the complete contents. + + :return: File contents as a string + :rtype: str """ return Path(self.filepath).read_text(encoding="utf8") def convert(self): """ - Convert the format ass subtitles to srt. + Convert the ASS subtitles to SRT format. - :return: + This method processes the raw text, applies any necessary filters (like removing effects), + and prepares the dialogues for formatting. """ cleaning_old_format = re.compile(r"{.*?}") dialogs = re.findall(self.dialog_mask, re.sub(cleaning_old_format, "", self.raw_text)) @@ -61,23 +93,26 @@ def convert(self): @staticmethod def text_clearing(raw_text: str) -> str: """ - We're clearing the text from unnecessary tags. + Clear the text from unnecessary tags and format line breaks. - :param raw_text: Dialog text with whitespace characters - :return: Dialog text without whitespaces and with the right move to a new line + :param raw_text: Dialog text with whitespace characters and ASS format tags + :type raw_text: str + :return: Cleaned dialog text without whitespaces and with proper line breaks + :rtype: str """ - text = raw_text.replace(r"\h", "\xa0").strip() line_text = text.split(r"\N") return "\n".join(item.strip() for item in line_text).strip() @staticmethod - def merged_dialogues(dialogues: List) -> List[Tuple[str, str, str]]: + def merged_dialogues(dialogues: List[Tuple[str, str, str]]) -> List[Tuple[str, str, str]]: """ Group consecutive dialogues with the same text into a single dialogue with a merged time range. - :return: A generator that iterates over the input dialogues and groups consecutive dialogues - with the same text into a single dialogue with a merged time range. + :param dialogues: List of dialogue tuples (start_time, end_time, text) + :type dialogues: List[Tuple[str, str, str]] + :return: Generator yielding merged dialogues + :rtype: List[Tuple[str, str, str]] """ curr_dialogue = None for start, end, text in dialogues: @@ -91,20 +126,26 @@ def merged_dialogues(dialogues: List) -> List[Tuple[str, str, str]]: if curr_dialogue is not None: yield curr_dialogue - def remove_duplicates(self, dialogues: List): + def remove_duplicates(self, dialogues: List[Tuple[str, str, str]]) -> List[Tuple[str, str, str]]: """ Remove consecutive duplicate dialogues in the given list and merge their time ranges. + :param dialogues: A list of dialogues, where each dialogue is a tuple (start, end, text) + :type dialogues: List[Tuple[str, str, str]] :return: A list of dialogues with consecutive duplicates removed and time ranges merged + :rtype: List[Tuple[str, str, str]] """ return list(self.merged_dialogues(dialogues)) - def subtitle_formatting(self, dialogues: List): + def subtitle_formatting(self, dialogues: List[Tuple[str, str, str]]): """ - Formatting ass into srt. + Format ASS dialogues into SRT format. - :param dialogues: Prepared dialogues - :return: Prepared dialogue sheet + This method processes the dialogues, removes duplicates if necessary, and creates + :class:`~pyasstosrt.dialogue.Dialogue` objects for each subtitle entry. + + :param dialogues: Prepared dialogues as tuples (start_time, end_time, text) + :type dialogues: List[Tuple[str, str, str]] """ cleaned_dialogues = self.remove_duplicates(dialogues) if self.is_remove_duplicates else dialogues @@ -115,17 +156,23 @@ def subtitle_formatting(self, dialogues: List): self.dialogues.append(dialogue) def export( - self, output_dir: AnyStr = None, encoding: AnyStr = "utf8", output_dialogues: bool = False - ) -> Optional[List]: + self, output_dir: Optional[AnyStr] = None, encoding: AnyStr = "utf8", output_dialogues: bool = False + ) -> Optional[List[Dialogue]]: """ - If ret_dialogues parameter is False exports the subtitles to a file. - - :param output_dir: Export path SubRip file - :param encoding: In which encoding you should save the file - :param output_dialogues: Whereas it should return a list of dialogues not creating a SubRip file - :return: List of dialogues + Export the subtitles either to a file or as a list of dialogues. + + If `output_dialogues` is False, this method exports the subtitles to an SRT file. + Otherwise, it returns a list of :class:`~pyasstosrt.dialogue.Dialogue` objects. + + :param output_dir: Export path for the SRT file (optional) + :type output_dir: Optional[AnyStr] + :param encoding: Encoding to use when saving the file (default is UTF-8) + :type encoding: AnyStr + :param output_dialogues: Whether to return a list of dialogues instead of creating an SRT file + :type output_dialogues: bool + :return: List of :class:`~pyasstosrt.dialogue.Dialogue` objects if `output_dialogues` is True, otherwise None + :rtype: Optional[List[Dialogue]] """ - self.convert() if output_dialogues: diff --git a/pyasstosrt/time.py b/pyasstosrt/time.py index 8780504..0aca588 100644 --- a/pyasstosrt/time.py +++ b/pyasstosrt/time.py @@ -1,4 +1,14 @@ class Time: + """ + Represents a time structure for subtitle timestamps. + + Attributes: + hour (int): The hour component of the time. + minute (int): The minute component of the time. + second (int): The second component of the time. + millisecond (int): The millisecond component of the time. + """ + hour: int minute: int second: int @@ -6,9 +16,15 @@ class Time: def __init__(self, text: str): """ - Time data structure. + Initialize a Time object from a string representation. + + Args: + text (str): A string representing time in the format '0:00:00.00'. - :param text: format time '0:00:00.00' + Example: + >>> time = Time("1:23:45.67") + >>> print(time) + 01:23:45,670 """ s = text.split(":") self.hour, self.minute = [int(sr) for sr in s[:-1]] @@ -18,10 +34,19 @@ def __init__(self, text: str): def __sub__(self, other: "Time") -> float: """ - We get the duration of the subtitles. + Calculate the duration between two :class:`Time` objects. - :param other: Another time structure - :return: The difference between the beginning and end of subtitles + Args: + other (:class:`Time`): Another Time object to subtract from this one. + + Returns: + float: The difference in seconds between the two :class:`Time` objects. + + Example: + >>> t1 = Time("0:00:10.00") + >>> t2 = Time("0:00:05.00") + >>> print(t1 - t2) + 5.0 """ return ( (self.hour - other.hour) * 3600 @@ -32,8 +57,14 @@ def __sub__(self, other: "Time") -> float: def __str__(self) -> str: """ - Format the time for str subtitles. + Format the :class:`Time` object as a string for SRT subtitles. + + Returns: + str: A string representation of the time in the format '00:00:00,000'. - :return: We get the format string '0:00:00,000' + Example: + >>> time = Time("1:23:45.67") + >>> str(time) + '01:23:45,670' """ return f"{self.hour:02d}:{self.minute:02d}:{self.second:02d},{self.millisecond:03d}"