diff --git a/scripts/readme_translator.py b/scripts/readme_translator.py index 011763b52..180c69d9a 100644 --- a/scripts/readme_translator.py +++ b/scripts/readme_translator.py @@ -1,10 +1,14 @@ import re +import logging from typing import List from pathlib import Path from functools import partial from deep_translator import GoogleTranslator import parmapper +# Configure logging +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') + HTML_TAGS_PATTERN = r"(<[^>]+>)" CODEBLOCK_PATTERN = r"(```[\s\S]*?```|\n)" @@ -14,6 +18,23 @@ def create_translator(target: str) -> GoogleTranslator: """ return GoogleTranslator(source="en", target=target) +def is_html_tag(segment: str) -> bool: + """Check if the segment is an HTML tag.""" + return re.fullmatch(HTML_TAGS_PATTERN, segment) is not None + +def is_special_character(segment: str) -> bool: + """Check if the segment consists of special characters only.""" + return re.fullmatch(r'^[!"#$%&\'()*+,\-./:;<=>?@[\]^_`{|}~]+$', segment) is not None + +def translate_sub_segment(translator: GoogleTranslator, sub_segment: str) -> str: + """Translate a single sub-segment.""" + try: + translated = translator.translate(sub_segment) + return translated if translated else sub_segment + except Exception as e: + logging.error(f"Error translating segment '{sub_segment}': {e}") + return sub_segment + def translate_segment(translator: GoogleTranslator, segment: str) -> str: """ Translate a given raw HTML content using the provided translator, preserving HTML tags and newlines. @@ -25,19 +46,12 @@ def translate_segment(translator: GoogleTranslator, segment: str) -> str: translated_segments = [] for sub_segment in segments: - if re.fullmatch(HTML_TAGS_PATTERN, sub_segment): + if is_html_tag(sub_segment): + translated_segments.append(sub_segment) + elif is_special_character(sub_segment): translated_segments.append(sub_segment) else: - try: - if re.fullmatch(r'^[!"#$%&\'()*+,\-./:;<=>?@[\]^_`{|}~]+$', sub_segment): - translated_segments.append(sub_segment) - continue - - translated = translator.translate(sub_segment) - translated_segments.append(translated if translated else sub_segment) - except Exception as e: - print(f"Error translating segment '{sub_segment}': {e}") - translated_segments.append(sub_segment) + translated_segments.append(translate_sub_segment(translator, sub_segment)) return "".join(translated_segments) @@ -68,8 +82,10 @@ def main() -> None: destination_langs = ["zh-CN", "ja", "fr"] for lang in destination_langs: + logging.info(f"Translating to {lang}...") translated_readme = translate_readme(source_file, lang) save_translated_readme(translated_readme, lang) + logging.info(f"Saved translated README for {lang}.") if __name__ == "__main__": - main() \ No newline at end of file + main()