Skip to content

Commit

Permalink
pre-commit changes
Browse files Browse the repository at this point in the history
  • Loading branch information
MauroDruwel committed Dec 23, 2024
1 parent 46d0829 commit 6ecc4f4
Showing 1 changed file with 7 additions and 3 deletions.
10 changes: 7 additions & 3 deletions src/markitdown/_markitdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -699,7 +699,7 @@ class DocxConverter(HtmlConverter):
def sanitize_filename(self, name: str) -> str:
"""Sanitizes a string to make it a valid file name."""
# Normalize whitespace
name = re.sub(r'\s+', " ", name.strip())
name = re.sub(r"\s+", " ", name.strip())
# Replace invalid characters with underscores
return re.sub(r'[\\/*?:"<>|]', "_", name)

Expand All @@ -720,7 +720,9 @@ def convert_image(self, image, output_dir: str) -> dict:
# Return an empty src if saving fails
return {"src": ""}

def convert(self, local_path: str, **kwargs) -> Union[None, DocumentConverterResult]:
def convert(
self, local_path: str, **kwargs
) -> Union[None, DocumentConverterResult]:
# Bail if not a DOCX
extension = kwargs.get("file_extension", "")
if extension.lower() != ".docx":
Expand All @@ -734,7 +736,9 @@ def convert(self, local_path: str, **kwargs) -> Union[None, DocumentConverterRes
mammoth_result = convert_to_html(
docx_file,
style_map=style_map,
convert_image=images.inline(lambda img: self.convert_image(img, image_output_dir)),
convert_image=images.inline(
lambda img: self.convert_image(img, image_output_dir)
),
)

html_content = mammoth_result.value
Expand Down

0 comments on commit 6ecc4f4

Please sign in to comment.