Skip to content

Commit

Permalink
Update readme_translator.py (#768)
Browse files Browse the repository at this point in the history
1. Add Logging: Instead of using print statements for errors, we can use
the logging module for better error tracking.

2. Refactor Code: Break down the translate_segment function into smaller
functions for clarity.

3. Add Type Hints: Improve type hints for better code understanding.

4. Handle Empty Translations: Add a check to handle cases where the
translation might be empty.

5. Use Context Manager for File Operations: Ensure files are properly
closed after operations.
<!-- ELLIPSIS_HIDDEN -->

----

> [!IMPORTANT]
> Enhance `readme_translator.py` with logging, refactoring, type hints,
improved error handling, and better file management.
> 
>   - **Logging**:
> - Replace print statements with logging in `translate_sub_segment()`
and `main()` for error tracking and process updates.
>   - **Refactoring**:
> - Break down `translate_segment()` into smaller functions:
`is_html_tag()`, `is_special_character()`, and
`translate_sub_segment()`.
>   - **Type Hints**:
>     - Add type hints to functions for better code understanding.
>   - **Error Handling**:
> - Add check in `translate_sub_segment()` to handle empty translations.
>   - **File Management**:
> - Use context manager in `save_translated_readme()` to ensure files
are closed properly.
> 
> <sup>This description was created by </sup>[<img alt="Ellipsis"
src="https://img.shields.io/badge/Ellipsis-blue?color=175173">](https://www.ellipsis.dev?ref=julep-ai%2Fjulep&utm_source=github&utm_medium=referral)<sup>
for 9d1e0cf. It will automatically
update as commits are pushed.</sup>

<!-- ELLIPSIS_HIDDEN -->
  • Loading branch information
DhruvKadam-git authored Oct 29, 2024
1 parent 5c380d4 commit 2096347
Showing 1 changed file with 28 additions and 12 deletions.
40 changes: 28 additions & 12 deletions scripts/readme_translator.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import re
import logging
from typing import List
from pathlib import Path
from functools import partial
from deep_translator import GoogleTranslator
import parmapper

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

HTML_TAGS_PATTERN = r"(<[^>]+>)"
CODEBLOCK_PATTERN = r"(```[\s\S]*?```|\n)"

Expand All @@ -14,6 +18,23 @@ def create_translator(target: str) -> GoogleTranslator:
"""
return GoogleTranslator(source="en", target=target)

def is_html_tag(segment: str) -> bool:
"""Check if the segment is an HTML tag."""
return re.fullmatch(HTML_TAGS_PATTERN, segment) is not None

def is_special_character(segment: str) -> bool:
"""Check if the segment consists of special characters only."""
return re.fullmatch(r'^[!"#$%&\'()*+,\-./:;<=>?@[\]^_`{|}~]+$', segment) is not None

def translate_sub_segment(translator: GoogleTranslator, sub_segment: str) -> str:
"""Translate a single sub-segment."""
try:
translated = translator.translate(sub_segment)
return translated if translated else sub_segment
except Exception as e:
logging.error(f"Error translating segment '{sub_segment}': {e}")
return sub_segment

def translate_segment(translator: GoogleTranslator, segment: str) -> str:
"""
Translate a given raw HTML content using the provided translator, preserving HTML tags and newlines.
Expand All @@ -25,19 +46,12 @@ def translate_segment(translator: GoogleTranslator, segment: str) -> str:
translated_segments = []

for sub_segment in segments:
if re.fullmatch(HTML_TAGS_PATTERN, sub_segment):
if is_html_tag(sub_segment):
translated_segments.append(sub_segment)
elif is_special_character(sub_segment):
translated_segments.append(sub_segment)
else:
try:
if re.fullmatch(r'^[!"#$%&\'()*+,\-./:;<=>?@[\]^_`{|}~]+$', sub_segment):
translated_segments.append(sub_segment)
continue

translated = translator.translate(sub_segment)
translated_segments.append(translated if translated else sub_segment)
except Exception as e:
print(f"Error translating segment '{sub_segment}': {e}")
translated_segments.append(sub_segment)
translated_segments.append(translate_sub_segment(translator, sub_segment))

return "".join(translated_segments)

Expand Down Expand Up @@ -68,8 +82,10 @@ def main() -> None:
destination_langs = ["zh-CN", "ja", "fr"]

for lang in destination_langs:
logging.info(f"Translating to {lang}...")
translated_readme = translate_readme(source_file, lang)
save_translated_readme(translated_readme, lang)
logging.info(f"Saved translated README for {lang}.")

if __name__ == "__main__":
main()
main()

0 comments on commit 2096347

Please sign in to comment.