Skip to content

Commit

Permalink
Add detailed docstrings to all classes and methods.
Browse files Browse the repository at this point in the history
Enhanced the codebase with detailed docstrings for the `Time`, `Subtitle`, and `Dialogue` classes and their methods. This includes comprehensive descriptions of parameters, return types, and examples to improve code readability and maintainability.
  • Loading branch information
GitBib committed Sep 3, 2024
1 parent 19ab83a commit d01f151
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 52 deletions.
55 changes: 42 additions & 13 deletions pyasstosrt/dialogue.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,43 @@


class Dialogue:
start: Time
end: Time
text: str
index: int
"""
Represents a dialogue entry in a subtitle file.
This class encapsulates a single dialogue entry, including its index,
start and end times, and text content.
:param index: The position of the dialogue in the subtitle file
:type index: int
:param start: The start time of the dialogue
:type start: str
:param end: The end time of the dialogue
:type end: str
:param text: The text content of the dialogue
:type text: str
:ivar start: The start time of the dialogue
:type start: :class:`~pyasstosrt.time.Time`
:ivar end: The end time of the dialogue
:type end: :class:`~pyasstosrt.time.Time`
:ivar text: The text content of the dialogue
:type text: str
:ivar index: The position of the dialogue in the subtitle file
:type index: int
"""

def __init__(self, index: int, start: str, end: str, text: str):
"""
Dialogue structure.
Initialize a Dialogue instance.
:param index: Must contain a position of dialogue
:param start: Start time of the dialogue
:param end: End of dialog time
:param text: Contains the text of the dialogue
:param index: The position of the dialogue in the subtitle file
:type index: int
:param start: The start time of the dialogue
:type start: str
:param end: The end time of the dialogue
:type end: str
:param text: The text content of the dialogue
:type text: str
"""
self.index = index
self.start = Time(start)
Expand All @@ -23,16 +47,21 @@ def __init__(self, index: int, start: str, end: str, text: str):

def get_timestamp(self) -> str:
"""
Format a time line for srt.
Format the timestamp for SRT format.
Generates a formatted string representation of the dialogue's start and end times
in the SRT timestamp format.
:return: Let's finish the line of the beginning and end of the dialogue...
:return: A formatted string representing the start and end times of the dialogue
:rtype: str
"""
return f"{self.start} --> {self.end}"

def __str__(self) -> str:
"""
Formatting the dialogue in string.
Format the dialogue as a string in SRT format.
:return: We get a dialogue in string
:return: A formatted string representation of the dialogue, including index, timestamp, and text
:rtype: str
"""
return f"{self.index}\n{self.get_timestamp()}\n{self.text}\n\n"
111 changes: 79 additions & 32 deletions pyasstosrt/pyasstosrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,39 @@

class Subtitle:
"""
Converting ass to srt.
:type filepath: Path to a file that contains text in Advanced SubStation Alpha format
:type removing_effects: Whether to remove effects from the text
Converting ASS (Advanced SubStation Alpha) subtitles to SRT format.
This class provides functionality to read an ASS subtitle file, convert its contents
to SRT format, and export the result either as a file or as a list of dialogues.
:param filepath: Path to a file that contains text in Advanced SubStation Alpha format
:type filepath: Union[str, os.PathLike]
:param removing_effects: Whether to remove effects from the text
:type removing_effects: bool
:param remove_duplicates: Whether to remove and merge consecutive duplicate dialogues
:type remove_duplicates: bool
:raises FileNotFoundError: If the specified file does not exist
:ivar filepath: The path to the input ASS file
:type filepath: AnyStr
:ivar file: The stem (filename without extension) of the input file
:type file: AnyStr
:ivar raw_text: The raw content of the input file
:type raw_text: AnyStr
:ivar dialogues: List of :class:`~pyasstosrt.dialogue.Dialogue` objects representing the subtitles
:type dialogues: List[Dialogue]
:ivar removing_effects: Flag indicating whether to remove effects from the text
:type removing_effects: bool
:ivar is_remove_duplicates: Flag indicating whether to remove and merge consecutive duplicate dialogues
:type is_remove_duplicates: bool
:Example:
>>> from pyasstosrt import Subtitle
>>> sub = Subtitle("path/to/subtitle.ass", removing_effects=True, remove_duplicates=True)
>>> sub.convert()
>>> sub.export("output/directory", encoding="utf-8")
"""

dialog_mask = re.compile(r"Dialogue: \d+?,(\d:\d{2}:\d{2}.\d{2}),(\d:\d{2}:\d{2}.\d{2}),.*?,\d+,\d+,\d+,.*?,(.*)")
Expand All @@ -33,22 +62,25 @@ def __init__(
self.filepath: AnyStr = filepath
self.file: AnyStr = Path(filepath).stem
self.raw_text: AnyStr = self.get_text()
self.dialogues: List = []
self.removing_effects = removing_effects
self.is_remove_duplicates = remove_duplicates
self.dialogues: List[Dialogue] = []
self.removing_effects: bool = removing_effects
self.is_remove_duplicates: bool = remove_duplicates

def get_text(self) -> str:
"""
Reads the file and returns the complete contents
:return: File contents
Reads the file and returns the complete contents.
:return: File contents as a string
:rtype: str
"""
return Path(self.filepath).read_text(encoding="utf8")

def convert(self):
"""
Convert the format ass subtitles to srt.
Convert the ASS subtitles to SRT format.
:return:
This method processes the raw text, applies any necessary filters (like removing effects),
and prepares the dialogues for formatting.
"""
cleaning_old_format = re.compile(r"{.*?}")
dialogs = re.findall(self.dialog_mask, re.sub(cleaning_old_format, "", self.raw_text))
Expand All @@ -61,23 +93,26 @@ def convert(self):
@staticmethod
def text_clearing(raw_text: str) -> str:
"""
We're clearing the text from unnecessary tags.
Clear the text from unnecessary tags and format line breaks.
:param raw_text: Dialog text with whitespace characters
:return: Dialog text without whitespaces and with the right move to a new line
:param raw_text: Dialog text with whitespace characters and ASS format tags
:type raw_text: str
:return: Cleaned dialog text without whitespaces and with proper line breaks
:rtype: str
"""

text = raw_text.replace(r"\h", "\xa0").strip()
line_text = text.split(r"\N")
return "\n".join(item.strip() for item in line_text).strip()

@staticmethod
def merged_dialogues(dialogues: List) -> List[Tuple[str, str, str]]:
def merged_dialogues(dialogues: List[Tuple[str, str, str]]) -> List[Tuple[str, str, str]]:
"""
Group consecutive dialogues with the same text into a single dialogue with a merged time range.
:return: A generator that iterates over the input dialogues and groups consecutive dialogues
with the same text into a single dialogue with a merged time range.
:param dialogues: List of dialogue tuples (start_time, end_time, text)
:type dialogues: List[Tuple[str, str, str]]
:return: Generator yielding merged dialogues
:rtype: List[Tuple[str, str, str]]
"""
curr_dialogue = None
for start, end, text in dialogues:
Expand All @@ -91,20 +126,26 @@ def merged_dialogues(dialogues: List) -> List[Tuple[str, str, str]]:
if curr_dialogue is not None:
yield curr_dialogue

def remove_duplicates(self, dialogues: List):
def remove_duplicates(self, dialogues: List[Tuple[str, str, str]]) -> List[Tuple[str, str, str]]:
"""
Remove consecutive duplicate dialogues in the given list and merge their time ranges.
:param dialogues: A list of dialogues, where each dialogue is a tuple (start, end, text)
:type dialogues: List[Tuple[str, str, str]]
:return: A list of dialogues with consecutive duplicates removed and time ranges merged
:rtype: List[Tuple[str, str, str]]
"""
return list(self.merged_dialogues(dialogues))

def subtitle_formatting(self, dialogues: List):
def subtitle_formatting(self, dialogues: List[Tuple[str, str, str]]):
"""
Formatting ass into srt.
Format ASS dialogues into SRT format.
:param dialogues: Prepared dialogues
:return: Prepared dialogue sheet
This method processes the dialogues, removes duplicates if necessary, and creates
:class:`~pyasstosrt.dialogue.Dialogue` objects for each subtitle entry.
:param dialogues: Prepared dialogues as tuples (start_time, end_time, text)
:type dialogues: List[Tuple[str, str, str]]
"""
cleaned_dialogues = self.remove_duplicates(dialogues) if self.is_remove_duplicates else dialogues

Expand All @@ -115,17 +156,23 @@ def subtitle_formatting(self, dialogues: List):
self.dialogues.append(dialogue)

def export(
self, output_dir: AnyStr = None, encoding: AnyStr = "utf8", output_dialogues: bool = False
) -> Optional[List]:
self, output_dir: Optional[AnyStr] = None, encoding: AnyStr = "utf8", output_dialogues: bool = False
) -> Optional[List[Dialogue]]:
"""
If ret_dialogues parameter is False exports the subtitles to a file.
:param output_dir: Export path SubRip file
:param encoding: In which encoding you should save the file
:param output_dialogues: Whereas it should return a list of dialogues not creating a SubRip file
:return: List of dialogues
Export the subtitles either to a file or as a list of dialogues.
If `output_dialogues` is False, this method exports the subtitles to an SRT file.
Otherwise, it returns a list of :class:`~pyasstosrt.dialogue.Dialogue` objects.
:param output_dir: Export path for the SRT file (optional)
:type output_dir: Optional[AnyStr]
:param encoding: Encoding to use when saving the file (default is UTF-8)
:type encoding: AnyStr
:param output_dialogues: Whether to return a list of dialogues instead of creating an SRT file
:type output_dialogues: bool
:return: List of :class:`~pyasstosrt.dialogue.Dialogue` objects if `output_dialogues` is True, otherwise None
:rtype: Optional[List[Dialogue]]
"""

self.convert()

if output_dialogues:
Expand Down
45 changes: 38 additions & 7 deletions pyasstosrt/time.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,30 @@
class Time:
"""
Represents a time structure for subtitle timestamps.
Attributes:
hour (int): The hour component of the time.
minute (int): The minute component of the time.
second (int): The second component of the time.
millisecond (int): The millisecond component of the time.
"""

hour: int
minute: int
second: int
millisecond: int

def __init__(self, text: str):
"""
Time data structure.
Initialize a Time object from a string representation.
Args:
text (str): A string representing time in the format '0:00:00.00'.
:param text: format time '0:00:00.00'
Example:
>>> time = Time("1:23:45.67")
>>> print(time)
01:23:45,670
"""
s = text.split(":")
self.hour, self.minute = [int(sr) for sr in s[:-1]]
Expand All @@ -18,10 +34,19 @@ def __init__(self, text: str):

def __sub__(self, other: "Time") -> float:
"""
We get the duration of the subtitles.
Calculate the duration between two :class:`Time` objects.
:param other: Another time structure
:return: The difference between the beginning and end of subtitles
Args:
other (:class:`Time`): Another Time object to subtract from this one.
Returns:
float: The difference in seconds between the two :class:`Time` objects.
Example:
>>> t1 = Time("0:00:10.00")
>>> t2 = Time("0:00:05.00")
>>> print(t1 - t2)
5.0
"""
return (
(self.hour - other.hour) * 3600
Expand All @@ -32,8 +57,14 @@ def __sub__(self, other: "Time") -> float:

def __str__(self) -> str:
"""
Format the time for str subtitles.
Format the :class:`Time` object as a string for SRT subtitles.
Returns:
str: A string representation of the time in the format '00:00:00,000'.
:return: We get the format string '0:00:00,000'
Example:
>>> time = Time("1:23:45.67")
>>> str(time)
'01:23:45,670'
"""
return f"{self.hour:02d}:{self.minute:02d}:{self.second:02d},{self.millisecond:03d}"

0 comments on commit d01f151

Please sign in to comment.