Skip to content

Commit

Permalink
show a warning if there are complex tables that couldn't be converted
Browse files Browse the repository at this point in the history
  • Loading branch information
marph91 committed Oct 7, 2024
1 parent c2effdf commit 9e2e2ea
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 9 deletions.
9 changes: 1 addition & 8 deletions src/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
from pathlib import Path
import subprocess

import pypandoc

import common
import intermediate_format as imf
import markdown_lib.common
Expand Down Expand Up @@ -141,12 +139,7 @@ def convert_file(self, file_: Path, parent: imf.Notebook):
# Remove unnecessarily added lines if needed.
note_body = "\n".join(note_body_splitted[:-2])
case _:
note_body = pypandoc.convert_file(
file_,
markdown_lib.common.PANDOC_OUTPUT_FORMAT,
# somehow the temp folder is needed to create the resources properly
extra_args=[f"--extract-media={common.get_temp_folder()}"],
)
note_body = markdown_lib.common.file_to_markdown(file_)

resources, note_links = self.handle_markdown_links(note_body, file_.parent)
note_imf = imf.Note(
Expand Down
20 changes: 19 additions & 1 deletion src/markdown_lib/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@

from dataclasses import dataclass, field
import logging
from pathlib import Path
import re

import markdown
from markdown.treeprocessors import Treeprocessor
from markdown.extensions import Extension
import pypandoc

import common


LOGGER = logging.getLogger("jimmy")

Expand Down Expand Up @@ -208,7 +211,22 @@ def get_inline_tags(text: str, start_characters: list[str]) -> list[str]:


def markup_to_markdown(text: str, format_: str = "html") -> str:
return pypandoc.convert_text(text, PANDOC_OUTPUT_FORMAT, format=format_)
text_md = pypandoc.convert_text(text, PANDOC_OUTPUT_FORMAT, format=format_)
if "[TABLE]" in text_md:
LOGGER.warning("Table is too complex and can't be converted to markdown.")
return text_md


def file_to_markdown(file_: Path) -> str:
file_md = pypandoc.convert_file(
file_,
PANDOC_OUTPUT_FORMAT,
# somehow the temp folder is needed to create the resources properly
extra_args=[f"--extract-media={common.get_temp_folder()}"],
)
if "[TABLE]" in file_md:
LOGGER.warning("Table is too complex and can't be converted to markdown.")
return file_md


# Problem: "//" is part of many URI (between scheme and host).
Expand Down

0 comments on commit 9e2e2ea

Please sign in to comment.