Skip to content

Commit

Permalink
improve nimbus note conversion (by adding html preprocessors)
Browse files Browse the repository at this point in the history
  • Loading branch information
marph91 committed Oct 18, 2024
1 parent ae77229 commit d28bdc9
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 3 deletions.
32 changes: 30 additions & 2 deletions src/formats/nimbus_note.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,33 @@
import base64
from pathlib import Path

from bs4 import BeautifulSoup

import common
import converter
import intermediate_format as imf
import markdown_lib.common


def clean_tables(soup):
for table in soup.find_all("table"):
tags_to_remove = ["div", "span"]
for tag in tags_to_remove:
for element in table.find_all(tag):
element.unwrap()


def assign_lists(_soup):
# TODO:
# - all lists are unnumbered lists (ul)
# - type is in the class attr (list-item-number, -bullet, -checkbox)
# - indentation is in the class attr (indent-0)

# for task_list in soup.find_all("ul", class_="checklist"):
# ...
pass


class Converter(converter.BaseConverter):
accept_folder = True

Expand Down Expand Up @@ -55,8 +76,15 @@ def convert(self, file_or_folder: Path):
common.extract_zip(file_, temp_folder=temp_folder_note)

# HTML note seems to have the name "note.html" always
note_body_html = (temp_folder_note / "note.html").read_text("utf-8")
note_body_markdown = markdown_lib.common.markup_to_markdown(note_body_html)
note_body_html = (temp_folder_note / "note.html").read_text(
encoding="utf-8"
)

soup = BeautifulSoup(note_body_html, "html.parser")
clean_tables(soup)
assign_lists(soup)

note_body_markdown = markdown_lib.common.markup_to_markdown(str(soup))
resources = self.handle_markdown_links(note_body_markdown, temp_folder_note)
note_imf = imf.Note(
title,
Expand Down

0 comments on commit d28bdc9

Please sign in to comment.