Add support for objects in certain text markup

orgapp · Jul 18, 2021 · d39ae90 · d39ae90
1 parent a8b8d36
commit d39ae90
Show file tree

Hide file tree

Showing 14 changed files with 302 additions and 80 deletions.
diff --git a/packages/orga/src/parse/__tests__/paragraph.spec.ts b/packages/orga/src/parse/__tests__/paragraph.spec.ts
@@ -1,4 +1,5 @@
 import {
+  inlineFootnote,
   link,
   paragraph,
   pos,
@@ -7,10 +8,15 @@ import {
   testParseSection,
   text,
   textBold,
+  textBoldC,
   textCode,
   textItalic,
+  textItalicC,
   textStrikethrough,
+  textStrikethroughC,
   textUnderline,
+  textUnderlineC,
+  textVerbatim,
 } from './util';
 
 describe('Parse Paragraph', () => {
@@ -119,6 +125,57 @@ the round pegs in the +round+ square holes...
     testParseSection(testName, text, [paragraph(...rest)]);
   };
 
+  describe("code and verbatim only contain text", () => {
+    for (const [mup, mf] of [["~", textCode], ["=", textVerbatim]] as const) {
+      describe(`markup: ${mup}`, () => {
+        testParseParagraph("does not allow links", `${mup}[[link]]${mup}`, [mf("[[link]]")]);
+        testParseParagraph("does not allow bold markup", `${mup}*bold*${mup}`, [mf("*bold*")]);
+      });
+    }
+  });
+
+  describe("bold italic strike-through and underline support object contents", () => {
+    for (const [mup, mf] of [["*", textBoldC], ["/", textItalicC], ["+", textStrikethroughC], ["_", textUnderlineC]] as const) {
+      describe(`markup: ${mup}`, () => {
+        testParseParagraph("allows links", `${mup}[[https://duckduckgo.com]]${mup}`, [mf([link("https://duckduckgo.com")])]);
+        testParseParagraph("allows bold markup", `${mup}*bold*${mup}`, [mf([textBold("bold")])]);
+        testParseParagraph("allows footnote references", `${mup}[fn:name:Test]${mup}`, [mf([inlineFootnote("name", [text("Test")])])]);
+      });
+    }
+    testParseParagraph("nested markup example",
+      "*Test1 _test2_ /test3/ te*st =test4=*",
+      [textBoldC([text("Test1 "), textUnderline("test2"), text(" "), textItalic("test3"), text(" te*st "), textVerbatim("test4")])]);
+  });
+
+  const markupCharsWithFns = [
+    ["*", textBold],
+    ["=", textVerbatim],
+    ["/", textItalic],
+    ["+", textStrikethrough],
+    ["_", textUnderline],
+    ["~", textCode]
+  ] as const;
+  const markupChars = markupCharsWithFns.map(x => x[0]);
+
+  describe("markup with non-whitespace", () => {
+    for (const [mup, mf] of markupCharsWithFns) {
+      describe(`markup: ${mup}`, () => {
+        // NOTE: Org parser 2.4.4 treats __Test_ as a subscript rather than underline (2021-07-18)
+        testParseParagraph(`preceded by self (Org parser 2.4.4)`, `${mup}${mup}Test${mup}`, [mf(`${mup}Test`)]);
+        testParseParagraph(`followed by self (Org parser 2.4.4)`, `${mup}Test${mup}${mup}`, [mf(`Test${mup}`)]);
+        const excluded = markupChars.filter(x => x !== mup);
+        for (const excl of excluded) {
+          testParseParagraph(`followed by ${excl} (Org parser 2.4.4)`, `${mup}Test${mup}${excl}`, [text(`${mup}Test${mup}${excl}`)]);
+        }
+      });
+    }
+  });
+  testParseParagraph("underline mixed markup example (Org parser 2.4.4)", "_Test1 _test2_ /test3/ =test4=_", [textUnderline("Test1 _test2"), text(" "), textItalic("test3"), text(" =test4=_")]);
+
+  testParseParagraph("bold empty (Org parser 2.4.4)", "**", [text("**")]);
+  testParseParagraph("bold bold (Org parser 2.4.4)", "****", [textBold("**")]);
+  testParseParagraph("bold in bold (Org parser 2.4.4)", "**Test**", [textBoldC([textBold("Test")])]);
+
   testParseParagraph("pure markup", "_Test1_", [textUnderline("Test1")]);
   testParseParagraph("markup followed by newline", "_Test1_\n", [textUnderline("Test1")]);
   testParseParagraph("markup preceded by newline", "\n_Test1_", [textUnderline("Test1")]);
@@ -129,9 +186,9 @@ the round pegs in the +round+ square holes...
       testParseParagraph("marker cannot be first in line (end)", "_Test1\n_", [text("_Test1"), text(" "), text("_")]);
     });
 
-    testParseParagraph("marker with next line ending", "_Test1\nTest2_", [textUnderline("Test1\nTest2")]);
+    testParseParagraph("marker with next line ending", "_Test1\nTest2_", [textUnderlineC([text("Test1"), text(" "), text("Test2")])]);
 
-    testParseParagraph("marker with next line ending and spaces", "_Test1\n  Test2_", [textUnderline("Test1\n  Test2")]);
+    testParseParagraph("marker with next line ending and spaces", "_Test1\n  Test2_", [textUnderlineC([text("Test1"), text(" "), text("  Test2")])]);
 
     testParseParagraph("cannot span more than 3 lines (spec v2021.07.03)", "_Test1\nTest2\nTest3\nTest4_", [text("_Test1"), text(" "), text("Test2"), text(" "), text("Test3"), text(" "), text("Test4_")]);
 

diff --git a/packages/orga/src/parse/__tests__/util.ts b/packages/orga/src/parse/__tests__/util.ts
@@ -26,6 +26,8 @@ import {
   Table,
   TableCell,
   TableRow,
+  TextMarkupComplex,
+  TextMarkupSimple,
   Timestamp,
   Token,
   VerseBlock,
@@ -73,21 +75,36 @@ export const greaterBlock = (name: GreaterBlock['name'], children: GreaterBlock[
 export const specialBlock = (name: SpecialBlock['name'], children: SpecialBlock['children'], extra: ExtraP<SpecialBlock, 'name'> = {}): SpecialBlock =>
   ast.specialBlock(name, children, mkExtra(extra));
 
-export const styledText = <TextTy extends StyledText['type']>(type: TextTy) => (text: string, extra: Extra<StyledText, 'value'> = {}): StyledText & { type: TextTy } =>
-  ast.styledText(type)(text, mkExtra(extra));
+export const simpleStyledText = <TextTy extends TextMarkupSimple['type']>(type: TextTy) => (text: string, extra: Extra<TextMarkupSimple, 'value'> = {}): TextMarkupSimple & { type: TextTy } => ast.simpleStyledText(type)(text, mkExtra(extra));
+
+export const simpleStyledTextComplex = <TextTy extends TextMarkupComplex['type']>(type: TextTy) => (value: string, extra: ExtraP<TextMarkupComplex> = {}): TextMarkupComplex & { type: TextTy } => ast.simpleStyledTextComplex(type)(value, mkExtra(extra));
+
+export const complexTextMarkup = <TextTy extends TextMarkupComplex['type']>(type: TextTy) => (children: TextMarkupComplex['children'], extra: ExtraP<TextMarkupComplex> = {}): TextMarkupComplex & { type: TextTy } => ast.complexTextMarkup(type)(children, mkExtra(extra));
+
+export const styledText = <TextTy extends StyledText['type']>(type: TextTy) => type === 'text.plain' || type === 'text.code' || type === 'text.verbatim' ? simpleStyledText(type) : simpleStyledTextComplex(type);
 
 export const text = styledText('text.plain');
 
 export const textBold = styledText('text.bold');
 
+export const textBoldC = complexTextMarkup('text.bold');
+
 export const textCode = styledText('text.code');
 
 export const textItalic = styledText('text.italic');
 
+export const textItalicC = complexTextMarkup('text.italic');
+
 export const textStrikethrough = styledText('text.strikeThrough');
 
+export const textStrikethroughC = complexTextMarkup('text.strikeThrough');
+
 export const textUnderline = styledText('text.underline');
 
+export const textUnderlineC = complexTextMarkup('text.underline');
+
+export const textVerbatim = styledText('text.verbatim');
+
 import { FootnoteRef, FootnoteInline, FootnoteAnon } from '../utils';
 
 export const footnoteReference = (label: string, extra: ExtraP<FootnoteRef, 'label'> = {}): FootnoteRef =>

diff --git a/packages/orga/src/parse/textMarkup.ts b/packages/orga/src/parse/textMarkup.ts
@@ -1,14 +1,50 @@
 import { Lexer } from '../tokenize'
-import { StyledText } from '../types'
-import { isStyledText } from '../utils';
+import { PhrasingContent, StyledText } from '../types'
+import utils, { complexTextMarkup, manyEndBy, oneOf, simpleStyledText, text } from './utils';
+import phrasingContent from './phrasingContent';
+
+const MARKERS = {
+  '*': 'text.bold',
+  '/': 'text.italic',
+  '+': 'text.strikeThrough',
+  '_': 'text.underline',
+} as const;
 
 export default (lexer: Lexer): StyledText | undefined => {
   const { peek, eat } = lexer
+  const { returning, tryTo } = utils(lexer);
 
   const token = peek()
 
-  if (token && isStyledText(token)) {
-    eat()
-    return token;
+  if (token) {
+    if (token.type === 'text.plain' || token.type === 'text.code' || token.type === 'text.verbatim') {
+      // simple cases - these cannot contain objects
+      eat();
+      return simpleStyledText(token.type)(token.value, { position: token.position })
+    } else if (token.type === 'token.complexStyleChar') {
+      // "CONTENTS can contain any object encountered in a paragraph
+      // when markup is “bold”, “italic”, “strike-through” or
+      // “underline”." - spec v2021.07.03
+      eat();
+      const matchChar = token.char;
+      const newline = () => {
+        const token = peek();
+        if (token && token.type === 'newline') {
+          eat();
+          return (text(' ', { position: token.position }));
+        }
+      };
+      const phrasingContentOrNewline = oneOf([newline, phrasingContent]);
+      const toks = returning(tryTo(manyEndBy(phrasingContentOrNewline, () => {
+        const t = peek();
+        if (t && t.type === 'token.complexStyleChar' && t.char === matchChar) {
+          eat();
+          return t;
+        }
+      })))();
+      if (!toks) return;
+      const toksButLast = toks.slice(0, toks.length - 1) as PhrasingContent[];
+      return complexTextMarkup(MARKERS[matchChar])(toksButLast, { position: { start: token.position.start, end: toks[toks.length - 1].position.end } });
+    }
   }
 }
diff --git a/packages/orga/src/parse/utils.ts b/packages/orga/src/parse/utils.ts
@@ -23,6 +23,8 @@ import {
   Table,
   TableCell,
   TableRow,
+  TextMarkupComplex,
+  TextMarkupSimple,
   Timestamp,
   Token,
   VerseBlock,
@@ -186,12 +188,18 @@ export const manyOf = <T>(parse: TokenParser<T>): TokenParser<T[]> => {
   };
 }
 
-/** Parse zero or more occurences of `p` ended by `end`. */
+/**
+ * Parse zero or more occurences of `p` ended by `end`.
+ *
+ * This tries `end` before each occurrence of `p`, and thus you should
+ * use `manyEndBy` instead if you need nesting.
+ */
 export const manyTill = <T, End>(p: TokenParser<T>, end: TokenParser<End>): TokenParser<[...T[], End]> => {
   return (lexer: Lexer) => {
     const { returning, tryTo } = lexActions(lexer);
     const res: T[] = [];
     while (true) {
+      if (!lexer.peek()) return;
       const last = returning(tryTo(end))();
       if (last) {
         return [...res, last];
@@ -206,6 +214,29 @@ export const manyTill = <T, End>(p: TokenParser<T>, end: TokenParser<End>): Toke
   };
 }
 
+/**
+ * Parse zero or more occurences of `p` ended by `end`.
+ *
+ * This tries `end` _after_ each occurrence of `p`, so can be used for nesting.
+ */
+export const manyEndBy = <T, End>(p: TokenParser<T>, end: TokenParser<End>): TokenParser<[...T[], End]> => {
+  return (lexer: Lexer) => {
+    const { returning, tryTo } = lexActions(lexer);
+    const res: T[] = [];
+    while (true) {
+      if (!lexer.peek()) return;
+      const next = returning(tryTo(p))();
+      if (next) {
+        res.push(next);
+      }
+      const last = returning(tryTo(end))();
+      if (last) {
+        return [...res, last];
+      }
+    }
+  };
+}
+
 /** All of the given `ps` in sequence. */
 export const seq = <T, N extends number>(ps: TokenParser<T>[] & { length: N }): TokenParser<T[] & { length: N }> => {
   return (lexer: Lexer) => {
@@ -314,30 +345,52 @@ export const specialBlock = (name: SpecialBlock['name'], children: SpecialBlock[
   ...extra
 });
 
-/** Build an AST {@link StyledText} object. */
-export const styledText = <TextTy extends StyledText['type']>(type: TextTy) => (text: string, extra: Extra<StyledText, 'value'>): StyledText & { type: TextTy } => ({
+export const simpleStyledText = <TextTy extends TextMarkupSimple['type']>(type: TextTy) => (text: string, extra: Extra<TextMarkupSimple, 'value'>): TextMarkupSimple & { type: TextTy } => ({
   type: type,
   value: text,
   ...extra
 });
 
 /** Build an AST plain text object. */
-export const text = styledText('text.plain');
-
-/** Build an AST text bold object. */
-export const textBold = styledText('text.bold');
+export const text = simpleStyledText('text.plain');
 
 /** Build an AST text code object. */
-export const textCode = styledText('text.code');
+export const textCode = simpleStyledText('text.code');
+
+export const simpleStyledTextComplex = <TextTy extends TextMarkupComplex['type']>(type: TextTy) => (value: string, extra: ExtraP<TextMarkupComplex>): TextMarkupComplex & { type: TextTy } => {
+  const start = extra.position?.start;
+  const end = extra.position?.end;
+  const innerPos = start && end ? { start: { line: start.line, column: start.column + 1 }, end: { line: end.line, column: end.column - 1 } } : undefined;
+  return {
+    type: type,
+    // some trickery here... we know that start markup char can't
+    // immediately followed by a newline, and the end char can't be
+    // preceded immediately by a newline, so we can manipulate the
+    // position
+    children: [text(value, innerPos ? { position: innerPos } : {} as Extra<TextMarkupSimple, 'value'>)],
+    ...extra,
+  };
+};
+
+export const complexTextMarkup = <TextTy extends TextMarkupComplex['type']>(type: TextTy) => (children: TextMarkupComplex['children'], extra: ExtraP<TextMarkupComplex>): TextMarkupComplex & { type: TextTy } => ({
+  type: type,
+  children,
+  ...extra
+});
+
+export const styledText = <TextTy extends StyledText['type']>(type: TextTy) => type === 'text.plain' || type === 'text.code' || type === 'text.verbatim' ? simpleStyledText(type) : simpleStyledTextComplex(type);
+
+/** Build an AST text bold object. */
+export const textBold = simpleStyledTextComplex('text.bold');
 
 /** Build an AST text italic object. */
-export const textItalic = styledText('text.italic');
+export const textItalic = simpleStyledTextComplex('text.italic');
 
 /** Build an AST text strikethrough object. */
-export const textStrikethrough = styledText('text.strikeThrough');
+export const textStrikethrough = simpleStyledTextComplex('text.strikeThrough');
 
 /** Build an AST text underline object. */
-export const textUnderline = styledText('text.underline');
+export const textUnderline = simpleStyledTextComplex('text.underline');
 
 /** Footnote reference has empty `children`. */
 export type FootnoteRef = FootnoteReference & { children: [] };

diff --git a/packages/orga/src/reader.ts b/packages/orga/src/reader.ts
@@ -1,4 +1,4 @@
-import { Char, read as _read } from 'text-kit'
+import { Char, TextKit, read as _read } from 'text-kit'
 import { Point, Position } from 'unist'
 import { isGreaterOrEqual } from './position';
 
@@ -80,6 +80,7 @@ export const read = (text: string) => {
     eol,
     jump,
     match: (pattern: RegExp, position: Position = { start: now(), end: eol() }) => match(pattern, position),
+    shift,
   }
   return reader
 }
@@ -100,4 +101,5 @@ export interface Reader {
     captures: string[],
     position: Position;
   } | undefined;
+  shift: TextKit['shift'];
 }
diff --git a/packages/orga/src/tokenize/__tests__/block.spec.ts b/packages/orga/src/tokenize/__tests__/block.spec.ts
@@ -69,15 +69,15 @@ function () {}
     testVerseBlock("inner block with markup", `#+BEGIN_EXAMPLE *text*
 more text
 #+END_EXAMPLE`, [
-      tokText("#+BEGIN_EXAMPLE "), tokTextBold("text"),
+      tokText("#+BEGIN_EXAMPLE "), ...tokTextBold("text"),
       tokNewline(),
       tokText("more text"),
       tokNewline(),
       tokText("#+END_EXAMPLE"),
     ]);
 
     testVerseBlock("heading with markup", "* Heading *with markup*", [
-      tokText("* Heading "), tokTextBold("with markup"),
+      tokText("* Heading "), ...tokTextBold("with markup"),
     ]);
 
     testVerseBlock("lists not tokenized", `- this is not lexed

diff --git a/packages/orga/src/tokenize/__tests__/headline.spec.ts b/packages/orga/src/tokenize/__tests__/headline.spec.ts
@@ -22,7 +22,7 @@ describe("tokenize headline", () => {
 
   testLexerMulti("knows headlines", [
     testHeadline("** a headline", 2, [tokText("a headline")]),
-    testHeadline("** _headline_", 2, [tokTextUnderline("headline")]),
+    testHeadline("** _headline_", 2, tokTextUnderline("headline")),
     testHeadline("**   a headline", 2, [tokText("a headline")]),
     testHeadline("***** a headline", 5, [tokText("a headline")]),
     testHeadline("* a 😀line", 1, [tokText("a 😀line")]),
@@ -35,7 +35,7 @@ describe("tokenize headline", () => {
   testLexerMulti("knows these are not headlines", [
     ["*not a headline", [tokText("*not a headline")]],
     [" * not a headline", [tokText("* not a headline")]],
-    ["*_* not a headline", [tokTextBold("_"), tokText(" not a headline")]],
+    ["*_* not a headline", [...tokTextBold("_"), tokText(" not a headline")]],
     ["not a headline", [tokText("not a headline")]],
   ]);