From 5b89a4e385361d29e2e70968174807dc5f328ffb Mon Sep 17 00:00:00 2001 From: Jonatan Heyman Date: Sun, 7 Jul 2024 15:50:57 +0200 Subject: [PATCH 1/6] Optimize block parsing when syntax tree isn't available. If the syntax tree isn't available, use String.indexOf to parse the blocks. --- src/editor/block/block.js | 92 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 89 insertions(+), 3 deletions(-) diff --git a/src/editor/block/block.js b/src/editor/block/block.js index bb86ecbd..623c16ed 100644 --- a/src/editor/block/block.js +++ b/src/editor/block/block.js @@ -1,7 +1,7 @@ import { ViewPlugin, EditorView, Decoration, WidgetType, lineNumbers } from "@codemirror/view" import { layer, RectangleMarker } from "@codemirror/view" import { EditorState, RangeSetBuilder, StateField, Facet , StateEffect, RangeSet} from "@codemirror/state"; -import { syntaxTree, ensureSyntaxTree } from "@codemirror/language" +import { syntaxTree, ensureSyntaxTree, syntaxTreeAvailable } from "@codemirror/language" import { Note, Document, NoteDelimiter } from "../lang-heynote/parser.terms.js" import { IterMode } from "@lezer/common"; import { heynoteEvent, LANGUAGE_CHANGE } from "../annotation.js"; @@ -10,12 +10,28 @@ import { mathBlock } from "./math.js" import { emptyBlockSelected } from "./select-all.js"; +function startTimer() { + const timeStart = performance.now(); + return function () { + return Math.round(performance.now() - timeStart); + }; +} + + // tracks the size of the first delimiter let firstBlockDelimiterSize -function getBlocks(state, timeout=50) { +/** + * Return a list of blocks in the document from the syntax tree. + * syntaxTreeAvailable() should have been called before this function to ensure the syntax tree is available. + * @param {*} state + * @param {*} timeout + * @returns + */ +function getBlocksFromSyntaxTree(state, timeout=50) { + //const timer = startTimer() const blocks = []; - const tree = ensureSyntaxTree(state, state.doc.length, timeout) + const tree = syntaxTree(state, state.doc.length, timeout) if (tree) { tree.iterate({ enter: (type) => { @@ -52,9 +68,79 @@ function getBlocks(state, timeout=50) { }); firstBlockDelimiterSize = blocks[0]?.delimiter.to } + //console.log("getBlocksSyntaxTree took", timer(), "ms") return blocks } +/** + * Get the blocks for the document state. + * If the syntax tree is available, we'll extract the blocks from that. Otherwise + * the blocks are parsed from the string contents of the document, which is much faster + * than waiting for the tree parsing to finnish. + */ +function getBlocks(state) { + if (syntaxTreeAvailable(state, state.doc.length)) { + return getBlocksFromSyntaxTree(state) + } + //const timer = startTimer() + const blocks = [] + const doc = state.doc + if (doc.length === 0) { + return []; + } + const content = doc.sliceString(0, doc.length) + const delim = "\n∞∞∞" + let pos = 0 + while (pos < doc.length) { + const blockStart = content.indexOf(delim, pos); + if (blockStart != pos) { + console.error("Error parsing blocks, expected delimiter at", pos) + break; + } + const langStart = blockStart + delim.length; + const delimiterEnd = content.indexOf("\n", langStart) + if (delimiterEnd < 0) { + console.error("Error parsing blocks. Delimiter didn't end with newline") + break + } + const langFull = content.substring(langStart, delimiterEnd); + let auto = false; + let lang = langFull; + if (langFull.endsWith("-a")) { + auto = true; + lang = langFull.substring(0, langFull.length - 2); + } + const contentFrom = delimiterEnd + 1; + let blockEnd = content.indexOf(delim, contentFrom); + if (blockEnd < 0) { + blockEnd = doc.length; + } + + const block = { + language: { + name: lang, + auto: auto, + }, + content: { + from: contentFrom, + to: blockEnd, + }, + delimiter: { + from: blockStart, + to: delimiterEnd + 1, + }, + range: { + from: blockStart, + to: blockEnd, + }, + }; + blocks.push(block); + pos = blockEnd; + } + //console.log("getBlocks (string parsing) took", timer(), "ms") + return blocks; +} + export const blockState = StateField.define({ create(state) { return getBlocks(state, 1000); From 145f0ba6862c77aaff121fd9586b07980053169c Mon Sep 17 00:00:00 2001 From: Jonatan Heyman Date: Sun, 7 Jul 2024 16:04:54 +0200 Subject: [PATCH 2/6] Remove timeout parameter for getBlocksFromSyntaxTree() since we now expect the syntax tree to already be available. --- src/editor/block/block.js | 133 ++++++++++++++++++++------------------ 1 file changed, 69 insertions(+), 64 deletions(-) diff --git a/src/editor/block/block.js b/src/editor/block/block.js index 623c16ed..c93d98f8 100644 --- a/src/editor/block/block.js +++ b/src/editor/block/block.js @@ -24,14 +24,11 @@ let firstBlockDelimiterSize /** * Return a list of blocks in the document from the syntax tree. * syntaxTreeAvailable() should have been called before this function to ensure the syntax tree is available. - * @param {*} state - * @param {*} timeout - * @returns */ -function getBlocksFromSyntaxTree(state, timeout=50) { +function getBlocksFromSyntaxTree(state) { //const timer = startTimer() const blocks = []; - const tree = syntaxTree(state, state.doc.length, timeout) + const tree = syntaxTree(state, state.doc.length) if (tree) { tree.iterate({ enter: (type) => { @@ -73,7 +70,70 @@ function getBlocksFromSyntaxTree(state, timeout=50) { } /** - * Get the blocks for the document state. + * Parse blocks from document's string contents using String.indexOf() + */ +function getBlocksFromString(state) { + //const timer = startTimer() + const blocks = [] + const doc = state.doc + if (doc.length === 0) { + return []; + } + const content = doc.sliceString(0, doc.length) + const delim = "\n∞∞∞" + let pos = 0 + while (pos < doc.length) { + const blockStart = content.indexOf(delim, pos); + if (blockStart != pos) { + console.error("Error parsing blocks, expected delimiter at", pos) + break; + } + const langStart = blockStart + delim.length; + const delimiterEnd = content.indexOf("\n", langStart) + if (delimiterEnd < 0) { + console.error("Error parsing blocks. Delimiter didn't end with newline") + break + } + const langFull = content.substring(langStart, delimiterEnd); + let auto = false; + let lang = langFull; + if (langFull.endsWith("-a")) { + auto = true; + lang = langFull.substring(0, langFull.length - 2); + } + const contentFrom = delimiterEnd + 1; + let blockEnd = content.indexOf(delim, contentFrom); + if (blockEnd < 0) { + blockEnd = doc.length; + } + + const block = { + language: { + name: lang, + auto: auto, + }, + content: { + from: contentFrom, + to: blockEnd, + }, + delimiter: { + from: blockStart, + to: delimiterEnd + 1, + }, + range: { + from: blockStart, + to: blockEnd, + }, + }; + blocks.push(block); + pos = blockEnd; + } + //console.log("getBlocksFromString() took", timer(), "ms") + return blocks; +} + +/** + * Get the blocks from the document state. * If the syntax tree is available, we'll extract the blocks from that. Otherwise * the blocks are parsed from the string contents of the document, which is much faster * than waiting for the tree parsing to finnish. @@ -81,69 +141,14 @@ function getBlocksFromSyntaxTree(state, timeout=50) { function getBlocks(state) { if (syntaxTreeAvailable(state, state.doc.length)) { return getBlocksFromSyntaxTree(state) + } else { + return getBlocksFromString(state) } - //const timer = startTimer() - const blocks = [] - const doc = state.doc - if (doc.length === 0) { - return []; - } - const content = doc.sliceString(0, doc.length) - const delim = "\n∞∞∞" - let pos = 0 - while (pos < doc.length) { - const blockStart = content.indexOf(delim, pos); - if (blockStart != pos) { - console.error("Error parsing blocks, expected delimiter at", pos) - break; - } - const langStart = blockStart + delim.length; - const delimiterEnd = content.indexOf("\n", langStart) - if (delimiterEnd < 0) { - console.error("Error parsing blocks. Delimiter didn't end with newline") - break - } - const langFull = content.substring(langStart, delimiterEnd); - let auto = false; - let lang = langFull; - if (langFull.endsWith("-a")) { - auto = true; - lang = langFull.substring(0, langFull.length - 2); - } - const contentFrom = delimiterEnd + 1; - let blockEnd = content.indexOf(delim, contentFrom); - if (blockEnd < 0) { - blockEnd = doc.length; - } - - const block = { - language: { - name: lang, - auto: auto, - }, - content: { - from: contentFrom, - to: blockEnd, - }, - delimiter: { - from: blockStart, - to: delimiterEnd + 1, - }, - range: { - from: blockStart, - to: blockEnd, - }, - }; - blocks.push(block); - pos = blockEnd; - } - //console.log("getBlocks (string parsing) took", timer(), "ms") - return blocks; } export const blockState = StateField.define({ create(state) { - return getBlocks(state, 1000); + return getBlocks(state); }, update(blocks, transaction) { // if blocks are empty it likely means we didn't get a parsed syntax tree, and then we want to update From c0ac62312b914d47b41e868a134e388fb172d420 Mon Sep 17 00:00:00 2001 From: Jonatan Heyman Date: Sun, 7 Jul 2024 19:29:17 +0200 Subject: [PATCH 3/6] Typo --- src/editor/block/block.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/editor/block/block.js b/src/editor/block/block.js index c93d98f8..2afd1cb9 100644 --- a/src/editor/block/block.js +++ b/src/editor/block/block.js @@ -136,7 +136,7 @@ function getBlocksFromString(state) { * Get the blocks from the document state. * If the syntax tree is available, we'll extract the blocks from that. Otherwise * the blocks are parsed from the string contents of the document, which is much faster - * than waiting for the tree parsing to finnish. + * than waiting for the tree parsing to finish. */ function getBlocks(state) { if (syntaxTreeAvailable(state, state.doc.length)) { From aa8b2fc729e66b7842ae04c18f7b72b86f063b4b Mon Sep 17 00:00:00 2001 From: Jonatan Heyman Date: Sun, 7 Jul 2024 21:58:43 +0200 Subject: [PATCH 4/6] Update prettier to latest version --- package-lock.json | 8 ++++---- package.json | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/package-lock.json b/package-lock.json index d3a68a05..c42ab1ea 100644 --- a/package-lock.json +++ b/package-lock.json @@ -47,7 +47,7 @@ "electron-store": "^8.1.0", "electron-updater": "^6.1.7", "fs-jetpack": "^5.1.0", - "prettier": "^3.1.1", + "prettier": "^3.3.2", "rollup-plugin-license": "^3.0.1", "sass": "^1.57.1", "typescript": "^4.9.4", @@ -4996,9 +4996,9 @@ } }, "node_modules/prettier": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.1.1.tgz", - "integrity": "sha512-22UbSzg8luF4UuZtzgiUOfcGM8s4tjBv6dJRT7j275NXsy2jb4aJa4NNveul5x4eqlF1wuhuR2RElK71RvmVaw==", + "version": "3.3.2", + "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.3.2.tgz", + "integrity": "sha512-rAVeHYMcv8ATV5d508CFdn+8/pHPpXeIid1DdrPwXnaAdH7cqjVbpJaT5eq4yRAFU/lsbwYwSF/n5iNrdJHPQA==", "dev": true, "bin": { "prettier": "bin/prettier.cjs" diff --git a/package.json b/package.json index 65577ead..e4eef4e7 100644 --- a/package.json +++ b/package.json @@ -66,7 +66,7 @@ "electron-store": "^8.1.0", "electron-updater": "^6.1.7", "fs-jetpack": "^5.1.0", - "prettier": "^3.1.1", + "prettier": "^3.3.2", "rollup-plugin-license": "^3.0.1", "sass": "^1.57.1", "typescript": "^4.9.4", From 3926bcfa0d997b45a60465f1a71d694f734652c6 Mon Sep 17 00:00:00 2001 From: Jonatan Heyman Date: Sun, 7 Jul 2024 21:59:47 +0200 Subject: [PATCH 5/6] Update imports so that that they also work in playwright tests --- src/editor/languages.js | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/editor/languages.js b/src/editor/languages.js index a7d85ea9..804e62f1 100644 --- a/src/editor/languages.js +++ b/src/editor/languages.js @@ -27,13 +27,13 @@ import { groovy } from "@codemirror/legacy-modes/mode/groovy" import { diff } from "@codemirror/legacy-modes/mode/diff"; import { powerShell } from "@codemirror/legacy-modes/mode/powershell"; -import typescriptPlugin from "prettier/plugins/typescript.mjs" -import babelPrettierPlugin from "prettier/plugins/babel.mjs" -import htmlPrettierPlugin from "prettier/esm/parser-html.mjs" -import cssPrettierPlugin from "prettier/esm/parser-postcss.mjs" -import markdownPrettierPlugin from "prettier/esm/parser-markdown.mjs" -import yamlPrettierPlugin from "prettier/plugins/yaml.mjs" -import * as prettierPluginEstree from "prettier/plugins/estree.mjs"; +import typescriptPlugin from "prettier/plugins/typescript" +import babelPrettierPlugin from "prettier/plugins/babel" +import htmlPrettierPlugin from "prettier/plugins/html" +import cssPrettierPlugin from "prettier/plugins/postcss" +import markdownPrettierPlugin from "prettier/plugins/markdown" +import yamlPrettierPlugin from "prettier/plugins/yaml" +import * as prettierPluginEstree from "prettier/plugins/estree"; class Language { From 38fa21ef5bfd77e66d858b2e2da392d03bad8631 Mon Sep 17 00:00:00 2001 From: Jonatan Heyman Date: Sun, 7 Jul 2024 22:01:19 +0200 Subject: [PATCH 6/6] Add test that ensures that getBlocksFromSyntaxTree() and getBlocksFromString() functions produces the same results --- src/editor/block/block.js | 6 +++--- tests/block-parsing.spec.js | 28 ++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 3 deletions(-) create mode 100644 tests/block-parsing.spec.js diff --git a/src/editor/block/block.js b/src/editor/block/block.js index 2afd1cb9..57e2bb11 100644 --- a/src/editor/block/block.js +++ b/src/editor/block/block.js @@ -25,7 +25,7 @@ let firstBlockDelimiterSize * Return a list of blocks in the document from the syntax tree. * syntaxTreeAvailable() should have been called before this function to ensure the syntax tree is available. */ -function getBlocksFromSyntaxTree(state) { +export function getBlocksFromSyntaxTree(state) { //const timer = startTimer() const blocks = []; const tree = syntaxTree(state, state.doc.length) @@ -72,7 +72,7 @@ function getBlocksFromSyntaxTree(state) { /** * Parse blocks from document's string contents using String.indexOf() */ -function getBlocksFromString(state) { +export function getBlocksFromString(state) { //const timer = startTimer() const blocks = [] const doc = state.doc @@ -138,7 +138,7 @@ function getBlocksFromString(state) { * the blocks are parsed from the string contents of the document, which is much faster * than waiting for the tree parsing to finish. */ -function getBlocks(state) { +export function getBlocks(state) { if (syntaxTreeAvailable(state, state.doc.length)) { return getBlocksFromSyntaxTree(state) } else { diff --git a/tests/block-parsing.spec.js b/tests/block-parsing.spec.js new file mode 100644 index 00000000..ee554789 --- /dev/null +++ b/tests/block-parsing.spec.js @@ -0,0 +1,28 @@ +import { expect, test } from "@playwright/test" +import { EditorState } from "@codemirror/state" + +import { heynoteLang } from "../src/editor/lang-heynote/heynote.js" +import { getBlocksFromSyntaxTree, getBlocksFromString } from "../src/editor/block/block.js" + +test("parse blocks from both syntax tree and string contents", async ({page}) => { + const contents = ` +∞∞∞text +Text Block A +∞∞∞text-a +Text Block B +∞∞∞json-a +{ +"key": "value" +} +∞∞∞python +print("Hello, World!") +` + const state = EditorState.create({ + doc: contents, + extensions: heynoteLang(), + }) + const treeBlocks = getBlocksFromSyntaxTree(state) + const stringBlocks = getBlocksFromString(state) + + expect(treeBlocks).toEqual(stringBlocks) +})