Skip to content

Commit

Permalink
Merge pull request #25 from marph91/standard-notes-support-super-format
Browse files Browse the repository at this point in the history
Support the standard notes "Super" format
  • Loading branch information
marph91 authored Nov 24, 2024
2 parents c3b11fb + a5188b6 commit ad67d8f
Show file tree
Hide file tree
Showing 10 changed files with 277 additions and 36 deletions.
4 changes: 4 additions & 0 deletions docs/formats/standard_notes.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ This page describes how to convert notes from Standard Notes to Markdown.
3. Convert to Markdown. Example: `jimmy-cli-linux "Standard Notes Backup - Sun Apr 28 2024 12_56_55 GMT+0200.zip" --format standard_notes`
4. [Import to your app](../import_instructions.md)

## Import Structure

Notes in "Super" format are converted to Markdown. Other notes are taken as-is.

## Known Limitations

Note links, attachments and folders are not implemented yet, since they require a subscription.
6 changes: 3 additions & 3 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@

Free your notes by converting them to Markdown.

Download: [**Linux**](https://github.com/marph91/jimmy/releases/latest/download/jimmy-cli-linux) | [**Windows**](https://github.com/marph91/jimmy/releases/latest/download/jimmy-cli-windows.exe) | [**MacOS**](https://github.com/marph91/jimmy/releases/latest/download/jimmy-cli-darwin-arm64) [![GitHub Downloads (all assets, all releases)](https://img.shields.io/github/downloads/marph91/jimmy/total)](https://github.com/marph91/jimmy/releases/latest)
:floppy_disk: Download: [**Linux**](https://github.com/marph91/jimmy/releases/latest/download/jimmy-cli-linux) | [**Windows**](https://github.com/marph91/jimmy/releases/latest/download/jimmy-cli-windows.exe) | [**MacOS**](https://github.com/marph91/jimmy/releases/latest/download/jimmy-cli-darwin-arm64) [![GitHub Downloads (all assets, all releases)](https://img.shields.io/github/downloads/marph91/jimmy/total)](https://github.com/marph91/jimmy/releases/latest)

For detailed information, take a look at the [Documentation](https://marph91.github.io/jimmy/).
:blue_book: For detailed information, take a look at the [Documentation](https://marph91.github.io/jimmy/).

If this app is useful for you, feel free to star it on [github](https://github.com/marph91/jimmy).
:star: If this app is useful for you, feel free to star it on [github](https://github.com/marph91/jimmy).

[![build](https://github.com/marph91/jimmy/actions/workflows/build.yml/badge.svg)](https://github.com/marph91/jimmy/actions/workflows/build.yml)
[![lint](https://github.com/marph91/jimmy/actions/workflows/lint.yml/badge.svg)](https://github.com/marph91/jimmy/actions/workflows/lint.yml)
Expand Down
2 changes: 1 addition & 1 deletion src/formats/cherrytree.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def convert_to_markdown(self, node, root_notebook):
note_body += resource_md
note_imf.resources.append(resource_imf)
case "table":
note_body += convert_table(child)
note_body += "\n" + convert_table(child) + "\n"
case _:
self.logger.warning(f"ignoring tag {child.tag}")

Expand Down
250 changes: 234 additions & 16 deletions src/formats/standard_notes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,235 @@

from collections import defaultdict
import datetime as dt
import enum
import logging
import json
from pathlib import Path

import converter
import intermediate_format as imf
import markdown_lib


LOGGER = logging.getLogger("jimmy")


class Format(enum.Flag):
"""
Represents a super text format:
https://github.com/standardnotes/app/blob/5c23a11b5a4555e809ecc7ca7775e49bc0ccda0f/packages/web/src/javascripts/Components/SuperEditor/Lexical/Utils/MarkdownExport.ts#L10
https://github.com/facebook/lexical/blob/fe4f5b8cb408476f39d6689ae8886be1d1322df1/packages/lexical/src/LexicalConstants.ts#L38-L46
"""

BOLD = 1
ITALIC = 1 << 1
STRIKETHROUGH = 1 << 2
UNDERLINE = 1 << 3
CODE = 1 << 4
SUBSCRIPT = 1 << 5
SUPERSCRIPT = 1 << 6
HIGHLIGHT = 1 << 7


def format_text(text: str, format_: Format) -> str:
for item in Format:
if item in format_:
match item:
case Format.BOLD:
text = f"**{text}**"
case Format.ITALIC:
text = f"*{text}*"
case Format.STRIKETHROUGH:
text = f"~~{text}~~"
case Format.UNDERLINE:
text = f"++{text}++"
case Format.CODE:
text = f"`{text}`"
case Format.SUBSCRIPT:
text = f"~{text}~"
case Format.SUPERSCRIPT:
text = f"^{text}^"
case Format.HIGHLIGHT:
text = f"=={text}=="
case _:
LOGGER.debug(f"Unknown format: {item}")
return text


class SuperToMarkdown:
def __init__(self):
self.last_list = None
self.md = []

def add_newlines(self, count: int):
"""Add as many newlines as needed. Consider preceding newlines."""
# TODO: check for simpler implementation
if not self.md:
return
# how many newlines are at the end already?
index = 0
while index < len(self.md) and index < count and self.md[-index - 1] == "\n":
index += 1
# insert missing newlines
if index > 0:
self.md[-index:] = ["\n"] * count
else:
self.md.extend(["\n"] * count)

def add_text(self, text: str | list[str], quote_level: int):
if not text:
return
if isinstance(text, str):
text = [text]
if quote_level == 0:
self.md.extend(text)
return
if self.md and self.md[-1] == "\n":
self.md.append("> " * quote_level)
for item in text:
self.md.append(item)
if item == "\n":
self.md.append("> " * quote_level)

def parse_table(self, block: dict):
md_before = self.md
table = markdown_lib.common.MarkdownTable()
for row in block["children"]:
# assert row["type"] == "tablerow"
row_cells = []
is_header_row = False
for cell in row["children"]:
# assert cell["type"] == "tablecell"
if cell["headerState"] == 1 and not table.header_rows:
is_header_row = True
self.md = []
self.parse_block(cell)
# newlines aren't allowed in Markdown tables
row_cells.extend([md for md in self.md if md != "\n"])
if is_header_row:
table.header_rows.append(row_cells)
else:
table.data_rows.append(row_cells)
self.md = md_before
self.add_text(table.create_md(), 0)

def parse_block(self, block: dict, quote_level: int = 0):
# TODO: "indent" is ignored
# https://stackoverflow.com/a/6046472/7410886
# assert block["version"] == 1
newlines = 0
append = []
skip_children = False
match block["type"]:
case "autolink" | "link":
link = markdown_lib.common.MarkdownLink(
block["children"][0]["text"],
block.get("url", ""),
block.get("title", ""),
)
self.add_text(link.reformat(), quote_level)
skip_children = True
case "code":
self.add_text([f"```{block.get("language", "")}", "\n"], quote_level)
newlines = 1
append = ["```", "\n", "\n"]
case "collapsible-container":
# There is no collapse in Markdown.
# Convert it to bold (collapsible-title) + text (collapsible-content).
pass
case "collapsible-title":
self.add_text(f"**{block["children"][0]["text"]}**", quote_level)
skip_children = True
newlines = 2
case "collapsible-content":
pass
# self.add_text(block["text"], quote_level)
# newlines = 2
case "heading":
self.add_text(f"{"#" * int(block["tag"][-1])} ", quote_level)
newlines = 2
case "horizontalrule":
self.add_newlines(2)
self.add_text("---", quote_level)
newlines = 2
case "linebreak":
self.add_newlines(1)
case "list":
# TODO: nested list
self.last_list = block["listType"]
newlines = 2
case "listitem":
# TODO: indent
match self.last_list:
case "bullet":
bullet = "- "
case "number":
bullet = "1. "
case "check":
bullet = "- [x] " if block.get("checked", False) else "- [ ] "
self.add_text(bullet, quote_level)
newlines = 1
case "paragraph":
self.add_newlines(1)
newlines = 2
case "root":
pass
case "snfile":
# TODO: Is the uuid relevant for note links?
# print("snfile", block["version"], block["fileUuid"])
pass
case "table":
self.add_newlines(2)
self.parse_table(block)
newlines = 2
skip_children = True
case "tablerow" | "tablecell":
pass # handled in parse_table()
case "code-highlight" | "text":
self.add_text(
format_text(block["text"], Format(block["format"])), quote_level
)
case "quote":
quote_level += 1
newlines = 2
case _:
LOGGER.debug(f"Unknown block type: {block["type"]}")
if not skip_children:
for child in block.get("children", []):
self.parse_block(child, quote_level)
self.add_newlines(newlines)
self.add_text(append, quote_level)

def convert(self, super_json: str) -> str:
super_dict = json.loads(super_json)
self.md = []
self.parse_block(super_dict["root"])
return "".join(self.md)


class Converter(converter.BaseConverter):
accepted_extensions = [".zip"]

def convert(self, file_or_folder: Path):
input_json = json.loads(
(self.root_path / "Standard Notes Backup and Import File.txt").read_text(
encoding="utf-8"
)
)
target_file = None
for file_ in [
"Standard Notes Backup and Import File.txt",
"Standard Notes Backup and Import File txt.txt",
]:
if (self.root_path / file_).is_file():
target_file = self.root_path / file_
break
if target_file is None:
self.logger.error("Couldn't find text file in zip.")
return

input_json = json.loads(target_file.read_text(encoding="utf-8"))

# first pass: get all tags
# In the export, notes are assigned to tags. We need tags assigned to notes.
note_id_tag_map = defaultdict(list)
for item in input_json["items"]:
if item["content_type"] != "Tag" or item["deleted"]:
if item["content_type"] != "Tag" or item.get("deleted", False):
continue
tag = imf.Tag(
item["content"]["title"],
Expand All @@ -38,26 +245,34 @@ def convert(self, file_or_folder: Path):

# second pass: get all notes and assign tags to notes
for item in input_json["items"]:
if item["content_type"] != "Note" or item["deleted"]:
if item["content_type"] != "Note" or item.get("deleted", False):
continue
title = item["content"]["title"]
self.logger.debug(f'Converting note "{title}"')

tags = note_id_tag_map.get(item["uuid"], [])
if item["content"].get("starred", False):
tags.append(imf.Tag("standard_notes-starred"))

note_imf = imf.Note(
title,
# TODO: "noteType" is ignored for now.
item["content"]["text"],
created=dt.datetime.fromisoformat(item["created_at"]),
updated=dt.datetime.fromisoformat(item["updated_at"]),
source_application=self.format,
# Tags don't have a separate id. Just use the name as id.
tags=tags,
original_id=item["uuid"],
)

note_imf.tags.extend(note_id_tag_map.get(item["uuid"], []))
if item["content"].get("starred", False):
note_imf.tags.append(imf.Tag("standard_notes-starred"))

match item["content"].get("noteType", "plain-text"):
case "plain-text":
note_imf.body = item["content"]["text"]
case "super":
super_converter = SuperToMarkdown()
note_imf.body = super_converter.convert(item["content"]["text"])
case _:
note_imf.body = item["content"]["text"]
self.logger.debug(
f"Unsupported note type \"{item["content"]["noteType"]}\""
)

if item["content"].get("trashed", False):
parent = trash_notebook
elif item["content"]["appData"]["org.standardnotes.sn"].get(
Expand All @@ -67,3 +282,6 @@ def convert(self, file_or_folder: Path):
else:
parent = self.root_notebook
parent.child_notes.append(note_imf)

# Don't export empty notebooks
self.remove_empty_notebooks()
2 changes: 1 addition & 1 deletion src/formats/tomboy_ng.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def parse_content(self, node):
):
md_content.append(f"**{child.text}**")
elif child.tag.endswith("highlight"):
md_content.append(child.text) # TODO
md_content.append(f"=={child.text}==")
elif child.tag.endswith("italic"):
md_content.append(f"*{child.text}*")
elif child.tag.endswith("list"):
Expand Down
25 changes: 18 additions & 7 deletions src/markdown_lib/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,21 +52,22 @@ def create_md_row(cells: list[str]) -> str:
rows_md.append(create_md_row(row))

caption = self.caption + "\n\n" if self.caption else ""
return caption + "\n".join(rows_md) + "\n"
return caption + "\n".join(rows_md)


@dataclass
class MarkdownLink:
"""
Represents a markdown:
Represents a markdown link:
- link: https://www.markdownguide.org/basic-syntax/#links
- image: https://www.markdownguide.org/basic-syntax/#images-1
"""

text: str
url: str
# TODO: ignored for now
# title: str = ""
# TODO: doctest

text: str = ""
url: str = ""
title: str = ""
is_image: bool = False

@property
Expand All @@ -80,7 +81,17 @@ def is_mail_link(self) -> bool:

def __str__(self) -> str:
prefix = "!" if self.is_image else ""
return f"{prefix}[{self.text}]({self.url})"
title = "" if not self.title else f' "{self.title}"'
return f"{prefix}[{self.text}]({self.url}{title})"

def reformat(self) -> str:
if not self.url:
return f"<{self.text}>"
if self.is_web_link and self.text == self.url:
return f"<{self.url}>"
prefix = "!" if self.is_image else ""
title = "" if not self.title else f' "{self.title}"'
return f"{prefix}[{self.text}]({self.url}{title})"


class LinkExtractor(Treeprocessor):
Expand Down
Loading

0 comments on commit ad67d8f

Please sign in to comment.