Merge pull request #25 from marph91/standard-notes-support-super-format

Support the standard notes "Super" format
marph91 · Nov 24, 2024 · ad67d8f · ad67d8f
2 parents c3b11fb + a5188b6
commit ad67d8f
Show file tree

Hide file tree

Showing 10 changed files with 277 additions and 36 deletions.
diff --git a/docs/formats/standard_notes.md b/docs/formats/standard_notes.md
@@ -12,6 +12,10 @@ This page describes how to convert notes from Standard Notes to Markdown.
 3. Convert to Markdown. Example: `jimmy-cli-linux "Standard Notes Backup - Sun Apr 28 2024 12_56_55 GMT+0200.zip" --format standard_notes`
 4. [Import to your app](../import_instructions.md)
 
+## Import Structure
+
+Notes in "Super" format are converted to Markdown. Other notes are taken as-is.
+
 ## Known Limitations
 
 Note links, attachments and folders are not implemented yet, since they require a subscription.
diff --git a/readme.md b/readme.md
@@ -2,11 +2,11 @@
 
 Free your notes by converting them to Markdown.
 
-Download: [**Linux**](https://github.com/marph91/jimmy/releases/latest/download/jimmy-cli-linux) | [**Windows**](https://github.com/marph91/jimmy/releases/latest/download/jimmy-cli-windows.exe) | [**MacOS**](https://github.com/marph91/jimmy/releases/latest/download/jimmy-cli-darwin-arm64)  [![GitHub Downloads (all assets, all releases)](https://img.shields.io/github/downloads/marph91/jimmy/total)](https://github.com/marph91/jimmy/releases/latest)
+:floppy_disk: Download: [**Linux**](https://github.com/marph91/jimmy/releases/latest/download/jimmy-cli-linux) | [**Windows**](https://github.com/marph91/jimmy/releases/latest/download/jimmy-cli-windows.exe) | [**MacOS**](https://github.com/marph91/jimmy/releases/latest/download/jimmy-cli-darwin-arm64)  [![GitHub Downloads (all assets, all releases)](https://img.shields.io/github/downloads/marph91/jimmy/total)](https://github.com/marph91/jimmy/releases/latest)
 
-For detailed information, take a look at the [Documentation](https://marph91.github.io/jimmy/).
+:blue_book: For detailed information, take a look at the [Documentation](https://marph91.github.io/jimmy/).
 
-If this app is useful for you, feel free to star it on [github](https://github.com/marph91/jimmy).
+:star: If this app is useful for you, feel free to star it on [github](https://github.com/marph91/jimmy).
 
 [![build](https://github.com/marph91/jimmy/actions/workflows/build.yml/badge.svg)](https://github.com/marph91/jimmy/actions/workflows/build.yml)
 [![lint](https://github.com/marph91/jimmy/actions/workflows/lint.yml/badge.svg)](https://github.com/marph91/jimmy/actions/workflows/lint.yml)

diff --git a/src/formats/cherrytree.py b/src/formats/cherrytree.py
@@ -228,7 +228,7 @@ def convert_to_markdown(self, node, root_notebook):
                     note_body += resource_md
                     note_imf.resources.append(resource_imf)
                 case "table":
-                    note_body += convert_table(child)
+                    note_body += "\n" + convert_table(child) + "\n"
                 case _:
                     self.logger.warning(f"ignoring tag {child.tag}")
 

diff --git a/src/formats/standard_notes.py b/src/formats/standard_notes.py
@@ -2,28 +2,235 @@
 
 from collections import defaultdict
 import datetime as dt
+import enum
+import logging
 import json
 from pathlib import Path
 
 import converter
 import intermediate_format as imf
+import markdown_lib
+
+
+LOGGER = logging.getLogger("jimmy")
+
+
+class Format(enum.Flag):
+    """
+    Represents a super text format:
+    https://github.com/standardnotes/app/blob/5c23a11b5a4555e809ecc7ca7775e49bc0ccda0f/packages/web/src/javascripts/Components/SuperEditor/Lexical/Utils/MarkdownExport.ts#L10
+    https://github.com/facebook/lexical/blob/fe4f5b8cb408476f39d6689ae8886be1d1322df1/packages/lexical/src/LexicalConstants.ts#L38-L46
+    """
+
+    BOLD = 1
+    ITALIC = 1 << 1
+    STRIKETHROUGH = 1 << 2
+    UNDERLINE = 1 << 3
+    CODE = 1 << 4
+    SUBSCRIPT = 1 << 5
+    SUPERSCRIPT = 1 << 6
+    HIGHLIGHT = 1 << 7
+
+
+def format_text(text: str, format_: Format) -> str:
+    for item in Format:
+        if item in format_:
+            match item:
+                case Format.BOLD:
+                    text = f"**{text}**"
+                case Format.ITALIC:
+                    text = f"*{text}*"
+                case Format.STRIKETHROUGH:
+                    text = f"~~{text}~~"
+                case Format.UNDERLINE:
+                    text = f"++{text}++"
+                case Format.CODE:
+                    text = f"`{text}`"
+                case Format.SUBSCRIPT:
+                    text = f"~{text}~"
+                case Format.SUPERSCRIPT:
+                    text = f"^{text}^"
+                case Format.HIGHLIGHT:
+                    text = f"=={text}=="
+                case _:
+                    LOGGER.debug(f"Unknown format: {item}")
+    return text
+
+
+class SuperToMarkdown:
+    def __init__(self):
+        self.last_list = None
+        self.md = []
+
+    def add_newlines(self, count: int):
+        """Add as many newlines as needed. Consider preceding newlines."""
+        # TODO: check for simpler implementation
+        if not self.md:
+            return
+        # how many newlines are at the end already?
+        index = 0
+        while index < len(self.md) and index < count and self.md[-index - 1] == "\n":
+            index += 1
+        # insert missing newlines
+        if index > 0:
+            self.md[-index:] = ["\n"] * count
+        else:
+            self.md.extend(["\n"] * count)
+
+    def add_text(self, text: str | list[str], quote_level: int):
+        if not text:
+            return
+        if isinstance(text, str):
+            text = [text]
+        if quote_level == 0:
+            self.md.extend(text)
+            return
+        if self.md and self.md[-1] == "\n":
+            self.md.append("> " * quote_level)
+        for item in text:
+            self.md.append(item)
+            if item == "\n":
+                self.md.append("> " * quote_level)
+
+    def parse_table(self, block: dict):
+        md_before = self.md
+        table = markdown_lib.common.MarkdownTable()
+        for row in block["children"]:
+            # assert row["type"] == "tablerow"
+            row_cells = []
+            is_header_row = False
+            for cell in row["children"]:
+                # assert cell["type"] == "tablecell"
+                if cell["headerState"] == 1 and not table.header_rows:
+                    is_header_row = True
+                self.md = []
+                self.parse_block(cell)
+                # newlines aren't allowed in Markdown tables
+                row_cells.extend([md for md in self.md if md != "\n"])
+            if is_header_row:
+                table.header_rows.append(row_cells)
+            else:
+                table.data_rows.append(row_cells)
+        self.md = md_before
+        self.add_text(table.create_md(), 0)
+
+    def parse_block(self, block: dict, quote_level: int = 0):
+        # TODO: "indent" is ignored
+        # https://stackoverflow.com/a/6046472/7410886
+        # assert block["version"] == 1
+        newlines = 0
+        append = []
+        skip_children = False
+        match block["type"]:
+            case "autolink" | "link":
+                link = markdown_lib.common.MarkdownLink(
+                    block["children"][0]["text"],
+                    block.get("url", ""),
+                    block.get("title", ""),
+                )
+                self.add_text(link.reformat(), quote_level)
+                skip_children = True
+            case "code":
+                self.add_text([f"```{block.get("language", "")}", "\n"], quote_level)
+                newlines = 1
+                append = ["```", "\n", "\n"]
+            case "collapsible-container":
+                # There is no collapse in Markdown.
+                # Convert it to bold (collapsible-title) + text (collapsible-content).
+                pass
+            case "collapsible-title":
+                self.add_text(f"**{block["children"][0]["text"]}**", quote_level)
+                skip_children = True
+                newlines = 2
+            case "collapsible-content":
+                pass
+                # self.add_text(block["text"], quote_level)
+                # newlines = 2
+            case "heading":
+                self.add_text(f"{"#" * int(block["tag"][-1])} ", quote_level)
+                newlines = 2
+            case "horizontalrule":
+                self.add_newlines(2)
+                self.add_text("---", quote_level)
+                newlines = 2
+            case "linebreak":
+                self.add_newlines(1)
+            case "list":
+                # TODO: nested list
+                self.last_list = block["listType"]
+                newlines = 2
+            case "listitem":
+                # TODO: indent
+                match self.last_list:
+                    case "bullet":
+                        bullet = "- "
+                    case "number":
+                        bullet = "1. "
+                    case "check":
+                        bullet = "- [x] " if block.get("checked", False) else "- [ ] "
+                self.add_text(bullet, quote_level)
+                newlines = 1
+            case "paragraph":
+                self.add_newlines(1)
+                newlines = 2
+            case "root":
+                pass
+            case "snfile":
+                # TODO: Is the uuid relevant for note links?
+                # print("snfile", block["version"], block["fileUuid"])
+                pass
+            case "table":
+                self.add_newlines(2)
+                self.parse_table(block)
+                newlines = 2
+                skip_children = True
+            case "tablerow" | "tablecell":
+                pass  # handled in parse_table()
+            case "code-highlight" | "text":
+                self.add_text(
+                    format_text(block["text"], Format(block["format"])), quote_level
+                )
+            case "quote":
+                quote_level += 1
+                newlines = 2
+            case _:
+                LOGGER.debug(f"Unknown block type: {block["type"]}")
+        if not skip_children:
+            for child in block.get("children", []):
+                self.parse_block(child, quote_level)
+        self.add_newlines(newlines)
+        self.add_text(append, quote_level)
+
+    def convert(self, super_json: str) -> str:
+        super_dict = json.loads(super_json)
+        self.md = []
+        self.parse_block(super_dict["root"])
+        return "".join(self.md)
 
 
 class Converter(converter.BaseConverter):
     accepted_extensions = [".zip"]
 
     def convert(self, file_or_folder: Path):
-        input_json = json.loads(
-            (self.root_path / "Standard Notes Backup and Import File.txt").read_text(
-                encoding="utf-8"
-            )
-        )
+        target_file = None
+        for file_ in [
+            "Standard Notes Backup and Import File.txt",
+            "Standard Notes Backup and Import File txt.txt",
+        ]:
+            if (self.root_path / file_).is_file():
+                target_file = self.root_path / file_
+                break
+        if target_file is None:
+            self.logger.error("Couldn't find text file in zip.")
+            return
+
+        input_json = json.loads(target_file.read_text(encoding="utf-8"))
 
         # first pass: get all tags
         # In the export, notes are assigned to tags. We need tags assigned to notes.
         note_id_tag_map = defaultdict(list)
         for item in input_json["items"]:
-            if item["content_type"] != "Tag" or item["deleted"]:
+            if item["content_type"] != "Tag" or item.get("deleted", False):
                 continue
             tag = imf.Tag(
                 item["content"]["title"],
@@ -38,26 +245,34 @@ def convert(self, file_or_folder: Path):
 
         # second pass: get all notes and assign tags to notes
         for item in input_json["items"]:
-            if item["content_type"] != "Note" or item["deleted"]:
+            if item["content_type"] != "Note" or item.get("deleted", False):
                 continue
             title = item["content"]["title"]
             self.logger.debug(f'Converting note "{title}"')
-
-            tags = note_id_tag_map.get(item["uuid"], [])
-            if item["content"].get("starred", False):
-                tags.append(imf.Tag("standard_notes-starred"))
-
             note_imf = imf.Note(
                 title,
-                # TODO: "noteType" is ignored for now.
-                item["content"]["text"],
                 created=dt.datetime.fromisoformat(item["created_at"]),
                 updated=dt.datetime.fromisoformat(item["updated_at"]),
                 source_application=self.format,
-                # Tags don't have a separate id. Just use the name as id.
-                tags=tags,
                 original_id=item["uuid"],
             )
+
+            note_imf.tags.extend(note_id_tag_map.get(item["uuid"], []))
+            if item["content"].get("starred", False):
+                note_imf.tags.append(imf.Tag("standard_notes-starred"))
+
+            match item["content"].get("noteType", "plain-text"):
+                case "plain-text":
+                    note_imf.body = item["content"]["text"]
+                case "super":
+                    super_converter = SuperToMarkdown()
+                    note_imf.body = super_converter.convert(item["content"]["text"])
+                case _:
+                    note_imf.body = item["content"]["text"]
+                    self.logger.debug(
+                        f"Unsupported note type \"{item["content"]["noteType"]}\""
+                    )
+
             if item["content"].get("trashed", False):
                 parent = trash_notebook
             elif item["content"]["appData"]["org.standardnotes.sn"].get(
@@ -67,3 +282,6 @@ def convert(self, file_or_folder: Path):
             else:
                 parent = self.root_notebook
             parent.child_notes.append(note_imf)
+
+        # Don't export empty notebooks
+        self.remove_empty_notebooks()
diff --git a/src/formats/tomboy_ng.py b/src/formats/tomboy_ng.py
@@ -28,7 +28,7 @@ def parse_content(self, node):
             ):
                 md_content.append(f"**{child.text}**")
             elif child.tag.endswith("highlight"):
-                md_content.append(child.text)  # TODO
+                md_content.append(f"=={child.text}==")
             elif child.tag.endswith("italic"):
                 md_content.append(f"*{child.text}*")
             elif child.tag.endswith("list"):

diff --git a/src/markdown_lib/common.py b/src/markdown_lib/common.py
@@ -52,21 +52,22 @@ def create_md_row(cells: list[str]) -> str:
             rows_md.append(create_md_row(row))
 
         caption = self.caption + "\n\n" if self.caption else ""
-        return caption + "\n".join(rows_md) + "\n"
+        return caption + "\n".join(rows_md)
 
 
 @dataclass
 class MarkdownLink:
     """
-    Represents a markdown:
+    Represents a markdown link:
     - link: https://www.markdownguide.org/basic-syntax/#links
     - image: https://www.markdownguide.org/basic-syntax/#images-1
     """
 
-    text: str
-    url: str
-    # TODO: ignored for now
-    # title: str = ""
+    # TODO: doctest
+
+    text: str = ""
+    url: str = ""
+    title: str = ""
     is_image: bool = False
 
     @property
@@ -80,7 +81,17 @@ def is_mail_link(self) -> bool:
 
     def __str__(self) -> str:
         prefix = "!" if self.is_image else ""
-        return f"{prefix}[{self.text}]({self.url})"
+        title = "" if not self.title else f' "{self.title}"'
+        return f"{prefix}[{self.text}]({self.url}{title})"
+
+    def reformat(self) -> str:
+        if not self.url:
+            return f"<{self.text}>"
+        if self.is_web_link and self.text == self.url:
+            return f"<{self.url}>"
+        prefix = "!" if self.is_image else ""
+        title = "" if not self.title else f' "{self.title}"'
+        return f"{prefix}[{self.text}]({self.url}{title})"
 
 
 class LinkExtractor(Treeprocessor):