Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into standard-notes-supp…
Browse files Browse the repository at this point in the history
…ort-super-format
  • Loading branch information
marph91 committed Nov 24, 2024
2 parents 160e3bc + c3b11fb commit 284e52d
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 64 deletions.
9 changes: 9 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ on:
paths-ignore:
- "**.md"
pull_request:
paths-ignore:
- "**.md"
workflow_dispatch:

jobs:
Expand All @@ -22,6 +24,13 @@ jobs:
# "macos-latest" is ARM based
# "macos-13" is Intel based
os: ["windows-latest", "ubuntu-latest", "macos-latest", "macos-13"]
# build "macos-13" executable only at release
# https://github.com/orgs/community/discussions/26253?sort=top#discussioncomment-3250989
is_release:
- ${{ startsWith(github.ref, 'refs/tags/v') }}
exclude:
- is_release: false
os: "macos-13"
steps:
- uses: actions/checkout@v4
with:
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ on:
paths-ignore:
- "**.md"
pull_request:
paths-ignore:
- "**.md"
workflow_dispatch:

jobs:
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ on:
paths-ignore:
- "**.md"
pull_request:
paths-ignore:
- "**.md"
workflow_dispatch:

jobs:
Expand Down
134 changes: 71 additions & 63 deletions src/formats/cherrytree.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""Convert cherrytree notes to the intermediate format."""

import base64
import logging
from pathlib import Path
import re
import xml.etree.ElementTree as ET # noqa: N817

import common
Expand All @@ -10,6 +12,9 @@
import markdown_lib


LOGGER = logging.getLogger("jimmy")


def convert_table(node):
table_md = markdown_lib.common.MarkdownTable()
for row_index, row in enumerate(node):
Expand Down Expand Up @@ -37,102 +42,114 @@ def fix_inline_formatting(md_content: str) -> str:
for checked_checkbox in ("☑", "☒"):
md_content = md_content.replace(checked_checkbox, "- [x]")

# unnumbered list
# numbered list
for number in range(10):
for bullet in (")", "-", ">"):
md_content = md_content.replace(f"{number}{bullet}", f"{number}.")

# unnumbered list
for bullet in ("•", "◇", "▪", "→", "⇒"):
for bullet in ("•", "◇", "▪", "▸", "→", "⇒"):
md_content = md_content.replace(bullet, "-")
return md_content


def convert_rich_text(rich_text, logger):
WHITESPACE_RE = re.compile(r"^(\s*)(.+?)(\s*)$")


def separate_whitespace(string: str) -> tuple[str, str, str]:
# TODO: doctest
match_ = WHITESPACE_RE.match(string)
if match_ is None:
return ("", string, "")
# TODO: check types
return match_.groups(default="") # type: ignore[return-value]


def convert_rich_text(rich_text):
if rich_text.text is None:
return "", []
if not rich_text.text.strip():
return rich_text.text, [] # keep whitespaces but don't format them
# TODO: is this fine with mixed text and child tags?
note_links = []
md_content = ""

# formatting needs to be applied directly to the string without spaces
leading, md_content, trailing = separate_whitespace(rich_text.text)
for attrib, attrib_value in rich_text.attrib.items():
match attrib:
case "background" | "foreground" | "justification":
if rich_text.text is not None:
md_content += rich_text.text
logger.debug(
LOGGER.debug(
f"ignoring {attrib}={attrib_value} "
"as it's not supported in markdown"
)
case "family":
match attrib_value:
case "monospace":
md_content = f"`{rich_text.text}`"
md_content = f"`{md_content}`"
case _:
logger.warning(f"ignoring {attrib}={attrib_value}")
LOGGER.warning(f"ignoring {attrib}={attrib_value}")
case "link":
url = attrib_value
if url.startswith("webs "):
# web links
url = url.lstrip("webs ")
if rich_text.text == url:
md_content = f"<{url}>"
md_content = f"<{md_content}>"
else:
md_content = f"[{rich_text.text}]({url})"
md_content = f"[{md_content}]({url})"
elif url.startswith("node "):
# internal node links
url = url.lstrip("node ")
md_content = f"[{rich_text.text}]({url})"
note_links.append(imf.NoteLink(md_content, url, rich_text.text))
text = md_content
md_content = f"[{text}]({url})"
# Split the note ID from the optional title. It can look like:
# "36 h2-3" or just "36".
# TODO: Anchors are not supported.
original_id = url.split(" ", maxsplit=1)[0]
note_links.append(imf.NoteLink(md_content, original_id, text))
else:
# ?
md_content = f"[{rich_text.text}]({url})"
md_content = f"[{md_content}]({url})"
case "scale":
match attrib_value:
case "sup":
md_content = f"^{rich_text.text}^"
md_content = f"^{md_content}^"
case "sub":
md_content = f"~{rich_text.text}~"
case "h1":
md_content = f"# {rich_text.text}"
case "h2":
md_content = f"## {rich_text.text}"
case "h3":
md_content = f"### {rich_text.text}"
case "h4":
md_content = f"#### {rich_text.text}"
case "h5":
md_content = f"##### {rich_text.text}"
case "h6":
md_content = f"###### {rich_text.text}"
md_content = f"~{md_content}~"
case "h1" | "h2" | "h3" | "h4" | "h5" | "h6":
leading = f"{"#" * int(attrib_value[-1])} " + leading
case _:
logger.warning(f"ignoring {attrib}={attrib_value}")
LOGGER.warning(f"ignoring {attrib}={attrib_value}")
case "strikethrough":
match attrib_value:
case "true":
md_content = f"~~{rich_text.text}~~"
md_content = f"~~{md_content}~~"
case _:
logger.warning(f"ignoring {attrib}={attrib_value}")
LOGGER.warning(f"ignoring {attrib}={attrib_value}")
case "style":
match attrib_value:
case "italic":
md_content = f"*{rich_text.text}*"
md_content = f"*{md_content}*"
case _:
logger.warning(f"ignoring {attrib}={attrib_value}")
LOGGER.warning(f"ignoring {attrib}={attrib_value}")
case "underline":
match attrib_value:
case "single":
md_content = f"++{rich_text.text}++"
md_content = f"++{md_content}++"
case _:
logger.warning(f"ignoring {attrib}={attrib_value}")
LOGGER.warning(f"ignoring {attrib}={attrib_value}")
case "weight":
match attrib_value:
case "heavy":
md_content = f"**{rich_text.text}**"
md_content = f"**{md_content}**"
case _:
logger.warning(f"ignoring {attrib}={attrib_value}")
LOGGER.warning(f"ignoring {attrib}={attrib_value}")
case _:
logger.warning(f"ignoring {attrib}={attrib_value}")
LOGGER.warning(f"ignoring {attrib}={attrib_value}")
md_content = leading + md_content + trailing
if not md_content:
# TODO: make this more robust
md_content += "" if rich_text.text is None else rich_text.text
md_content = rich_text.text
if not rich_text.attrib:
# TODO: make this more robust
# Make sure to don't break links.
Expand Down Expand Up @@ -171,17 +188,17 @@ def convert_to_markdown(self, node, root_notebook):
# TODO
# pylint: disable=too-many-locals
title = node.attrib.get("name", "")
self.logger.debug(f'Converting note "{title}", parent "{root_notebook.title}"')
note_imf = imf.Note(title, source_application=self.format)

new_root_notebook = None # only needed if there are sub notes
resources = []
note_links = []
note_body = ""
for child in node:
match child.tag:
case "rich_text":
content_md, note_links_imf = convert_rich_text(child, self.logger)
content_md, note_links_imf = convert_rich_text(child)
note_body += content_md
note_links.extend(note_links_imf)
note_imf.note_links.extend(note_links_imf)
case "node":
# there are sub notes -> create notebook with same name as note
if new_root_notebook is None:
Expand All @@ -192,9 +209,6 @@ def convert_to_markdown(self, node, root_notebook):
f"parent: {root_notebook.title}"
)
self.convert_to_markdown(child, new_root_notebook)
if new_root_notebook.is_empty():
# Delete the notebook if it's empty.
del root_notebook.child_notebooks[-1]
case "codebox":
language = child.attrib.get("syntax_highlighting", "")
note_body += f"\n```{language}\n{child.text}\n```\n"
Expand All @@ -212,31 +226,22 @@ def convert_to_markdown(self, node, root_notebook):
# but we do it later with the common function.
resource_md, resource_imf = convert_png(child, self.root_path)
note_body += resource_md
resources.append(resource_imf)
note_imf.resources.append(resource_imf)
case "table":
note_body += "\n" + convert_table(child) + "\n"
case _:
self.logger.warning(f"ignoring tag {child.tag}")

self.logger.debug(f'Converting note "{title}", parent "{root_notebook.title}"')
note_imf.body = note_body

tags = []
# cherrytree bookmark -> tag
unique_id = node.attrib["unique_id"]
if unique_id in self.bookmarked_nodes:
tags.append("cherrytree-bookmarked")
note_imf.original_id = node.attrib["unique_id"]
if note_imf.original_id in self.bookmarked_nodes:
note_imf.tags.append(imf.Tag("cherrytree-bookmarked"))
if tags_str := node.attrib.get("tags", ""):
tags.extend(tags_str.strip().split(" "))

note_imf = imf.Note(
title,
note_body,
source_application=self.format,
tags=[imf.Tag(tag) for tag in tags],
resources=resources,
note_links=note_links,
original_id=unique_id,
)
note_imf.tags.extend(
imf.Tag(t) for t in tags_str.strip().split(" ") if t.strip()
)

if (created_time := node.attrib.get("ts_creation")) is not None:
note_imf.created = common.timestamp_to_datetime(int(created_time))
Expand All @@ -261,3 +266,6 @@ def convert(self, file_or_folder: Path):
self.convert_to_markdown(child, self.root_notebook)
case _:
self.logger.warning(f"ignoring tag {child.tag}")

# Don't export empty notebooks
self.remove_empty_notebooks()
2 changes: 1 addition & 1 deletion test/data
Submodule data updated 57 files
+1 −1 reference_data/cherrytree/test_2/Files.md
+3 −3 reference_data/cherrytree/test_2/Files/Command Line.md
+3 −3 reference_data/cherrytree/test_2/Files/Creating and Opening.md
+3 −3 reference_data/cherrytree/test_2/Files/Exporting.md
+2 −2 reference_data/cherrytree/test_2/Files/Importing.md
+13 −13 reference_data/cherrytree/test_2/Files/Saving.md
+3 −3 reference_data/cherrytree/test_2/Installation.md
+3 −3 reference_data/cherrytree/test_2/Installation/Fedora.md
+4 −4 reference_data/cherrytree/test_2/Installation/Ubuntu.md
+1 −1 reference_data/cherrytree/test_2/Installation/Windows.md
+103 −103 reference_data/cherrytree/test_2/Introduction.md
+6 −6 reference_data/cherrytree/test_2/Introduction/Get Involved.md
+3 −3 reference_data/cherrytree/test_2/Nodes.md
+4 −4 reference_data/cherrytree/test_2/Nodes/Creating Nodes.md
+3 −3 reference_data/cherrytree/test_2/Nodes/Managing Nodes.md
+2 −2 reference_data/cherrytree/test_2/Objects.md
+7 −7 reference_data/cherrytree/test_2/Objects/Anchors.md
+16 −16 reference_data/cherrytree/test_2/Objects/Codebox.md
+5 −5 reference_data/cherrytree/test_2/Objects/Files.md
+2 −2 reference_data/cherrytree/test_2/Objects/Horizontal Rule.md
+10 −10 reference_data/cherrytree/test_2/Objects/Images.md
+8 −8 reference_data/cherrytree/test_2/Objects/Links.md
+2 −2 reference_data/cherrytree/test_2/Objects/Special Characters.md
+3 −3 reference_data/cherrytree/test_2/Objects/Table of Contents.md
+32 −32 reference_data/cherrytree/test_2/Objects/Tables.md
+3 −3 reference_data/cherrytree/test_2/Objects/Timestamps.md
+8 −8 reference_data/cherrytree/test_2/Quick Start.md
+2 −2 reference_data/cherrytree/test_2/Quick Start/Export a Document.md
+3 −3 reference_data/cherrytree/test_2/Quick Start/Insert Objects.md
+11 −11 reference_data/cherrytree/test_2/Quick Start/Interface.md
+4 −4 reference_data/cherrytree/test_2/Quick Start/Node Management.md
+2 −2 reference_data/cherrytree/test_2/Quick Start/Save Your Notes.md
+3 −3 reference_data/cherrytree/test_2/Running from Source.md
+1 −1 reference_data/cherrytree/test_2/Running from Source/Additional Resources.md
+1 −1 reference_data/cherrytree/test_2/Running from Source/Ubuntu.md
+11 −11 reference_data/cherrytree/test_2/Running from Source/Windows.md
+1 −1 reference_data/cherrytree/test_2/Running from Source/macOS (Not Tested).md
+1 −1 reference_data/cherrytree/test_2/Settings.md
+4 −4 reference_data/cherrytree/test_2/Settings/Keyboard Shortcuts.md
+4 −4 reference_data/cherrytree/test_2/Settings/Links.md
+20 −20 reference_data/cherrytree/test_2/Settings/Miscellaneous.md
+16 −16 reference_data/cherrytree/test_2/Settings/Plain Text and Code.md
+11 −11 reference_data/cherrytree/test_2/Settings/Rich Text.md
+13 −13 reference_data/cherrytree/test_2/Settings/Text and Code.md
+3 −3 reference_data/cherrytree/test_2/Settings/Text.md
+7 −7 reference_data/cherrytree/test_2/Settings/Toolbar.md
+10 −10 reference_data/cherrytree/test_2/Settings/Tree 1.md
+5 −5 reference_data/cherrytree/test_2/Settings/Tree 2.md
+2 −2 reference_data/cherrytree/test_2/Text.md
+6 −6 reference_data/cherrytree/test_2/Text/Automatic Syntax Highlighting.md
+6 −6 reference_data/cherrytree/test_2/Text/Editing.md
+9 −9 reference_data/cherrytree/test_2/Text/Formatting.md
+23 −23 reference_data/cherrytree/test_2/Text/Search.md
+1 −1 reference_data/cherrytree/test_3/OSCP-OS-XXXX-Lab-Report/3.0 Methodologies/3.2 Penetration.md
+7 −7 reference_data/cherrytree/test_3/OSCP-OS-XXXX-Lab-Report/3.0 Methodologies/3.2 Penetration/xxxxx template.md
+1 −1 reference_data/cherrytree/test_4/192.168.x.249.md
+1 −1 reference_data/cherrytree/test_4/192.168.x.249/Credential Harvesting.md

0 comments on commit 284e52d

Please sign in to comment.