Skip to content

Commit

Permalink
Add support for objects in certain text markup
Browse files Browse the repository at this point in the history
  • Loading branch information
GuiltyDolphin committed Jul 18, 2021
1 parent a8b8d36 commit d39ae90
Show file tree
Hide file tree
Showing 14 changed files with 302 additions and 80 deletions.
61 changes: 59 additions & 2 deletions packages/orga/src/parse/__tests__/paragraph.spec.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import {
inlineFootnote,
link,
paragraph,
pos,
Expand All @@ -7,10 +8,15 @@ import {
testParseSection,
text,
textBold,
textBoldC,
textCode,
textItalic,
textItalicC,
textStrikethrough,
textStrikethroughC,
textUnderline,
textUnderlineC,
textVerbatim,
} from './util';

describe('Parse Paragraph', () => {
Expand Down Expand Up @@ -119,6 +125,57 @@ the round pegs in the +round+ square holes...
testParseSection(testName, text, [paragraph(...rest)]);
};

describe("code and verbatim only contain text", () => {
for (const [mup, mf] of [["~", textCode], ["=", textVerbatim]] as const) {
describe(`markup: ${mup}`, () => {
testParseParagraph("does not allow links", `${mup}[[link]]${mup}`, [mf("[[link]]")]);
testParseParagraph("does not allow bold markup", `${mup}*bold*${mup}`, [mf("*bold*")]);
});
}
});

describe("bold italic strike-through and underline support object contents", () => {
for (const [mup, mf] of [["*", textBoldC], ["/", textItalicC], ["+", textStrikethroughC], ["_", textUnderlineC]] as const) {
describe(`markup: ${mup}`, () => {
testParseParagraph("allows links", `${mup}[[https://duckduckgo.com]]${mup}`, [mf([link("https://duckduckgo.com")])]);
testParseParagraph("allows bold markup", `${mup}*bold*${mup}`, [mf([textBold("bold")])]);
testParseParagraph("allows footnote references", `${mup}[fn:name:Test]${mup}`, [mf([inlineFootnote("name", [text("Test")])])]);
});
}
testParseParagraph("nested markup example",
"*Test1 _test2_ /test3/ te*st =test4=*",
[textBoldC([text("Test1 "), textUnderline("test2"), text(" "), textItalic("test3"), text(" te*st "), textVerbatim("test4")])]);
});

const markupCharsWithFns = [
["*", textBold],
["=", textVerbatim],
["/", textItalic],
["+", textStrikethrough],
["_", textUnderline],
["~", textCode]
] as const;
const markupChars = markupCharsWithFns.map(x => x[0]);

describe("markup with non-whitespace", () => {
for (const [mup, mf] of markupCharsWithFns) {
describe(`markup: ${mup}`, () => {
// NOTE: Org parser 2.4.4 treats __Test_ as a subscript rather than underline (2021-07-18)
testParseParagraph(`preceded by self (Org parser 2.4.4)`, `${mup}${mup}Test${mup}`, [mf(`${mup}Test`)]);
testParseParagraph(`followed by self (Org parser 2.4.4)`, `${mup}Test${mup}${mup}`, [mf(`Test${mup}`)]);
const excluded = markupChars.filter(x => x !== mup);
for (const excl of excluded) {
testParseParagraph(`followed by ${excl} (Org parser 2.4.4)`, `${mup}Test${mup}${excl}`, [text(`${mup}Test${mup}${excl}`)]);
}
});
}
});
testParseParagraph("underline mixed markup example (Org parser 2.4.4)", "_Test1 _test2_ /test3/ =test4=_", [textUnderline("Test1 _test2"), text(" "), textItalic("test3"), text(" =test4=_")]);

testParseParagraph("bold empty (Org parser 2.4.4)", "**", [text("**")]);
testParseParagraph("bold bold (Org parser 2.4.4)", "****", [textBold("**")]);
testParseParagraph("bold in bold (Org parser 2.4.4)", "**Test**", [textBoldC([textBold("Test")])]);

testParseParagraph("pure markup", "_Test1_", [textUnderline("Test1")]);
testParseParagraph("markup followed by newline", "_Test1_\n", [textUnderline("Test1")]);
testParseParagraph("markup preceded by newline", "\n_Test1_", [textUnderline("Test1")]);
Expand All @@ -129,9 +186,9 @@ the round pegs in the +round+ square holes...
testParseParagraph("marker cannot be first in line (end)", "_Test1\n_", [text("_Test1"), text(" "), text("_")]);
});

testParseParagraph("marker with next line ending", "_Test1\nTest2_", [textUnderline("Test1\nTest2")]);
testParseParagraph("marker with next line ending", "_Test1\nTest2_", [textUnderlineC([text("Test1"), text(" "), text("Test2")])]);

testParseParagraph("marker with next line ending and spaces", "_Test1\n Test2_", [textUnderline("Test1\n Test2")]);
testParseParagraph("marker with next line ending and spaces", "_Test1\n Test2_", [textUnderlineC([text("Test1"), text(" "), text(" Test2")])]);

testParseParagraph("cannot span more than 3 lines (spec v2021.07.03)", "_Test1\nTest2\nTest3\nTest4_", [text("_Test1"), text(" "), text("Test2"), text(" "), text("Test3"), text(" "), text("Test4_")]);

Expand Down
21 changes: 19 additions & 2 deletions packages/orga/src/parse/__tests__/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ import {
Table,
TableCell,
TableRow,
TextMarkupComplex,
TextMarkupSimple,
Timestamp,
Token,
VerseBlock,
Expand Down Expand Up @@ -73,21 +75,36 @@ export const greaterBlock = (name: GreaterBlock['name'], children: GreaterBlock[
export const specialBlock = (name: SpecialBlock['name'], children: SpecialBlock['children'], extra: ExtraP<SpecialBlock, 'name'> = {}): SpecialBlock =>
ast.specialBlock(name, children, mkExtra(extra));

export const styledText = <TextTy extends StyledText['type']>(type: TextTy) => (text: string, extra: Extra<StyledText, 'value'> = {}): StyledText & { type: TextTy } =>
ast.styledText(type)(text, mkExtra(extra));
export const simpleStyledText = <TextTy extends TextMarkupSimple['type']>(type: TextTy) => (text: string, extra: Extra<TextMarkupSimple, 'value'> = {}): TextMarkupSimple & { type: TextTy } => ast.simpleStyledText(type)(text, mkExtra(extra));

export const simpleStyledTextComplex = <TextTy extends TextMarkupComplex['type']>(type: TextTy) => (value: string, extra: ExtraP<TextMarkupComplex> = {}): TextMarkupComplex & { type: TextTy } => ast.simpleStyledTextComplex(type)(value, mkExtra(extra));

export const complexTextMarkup = <TextTy extends TextMarkupComplex['type']>(type: TextTy) => (children: TextMarkupComplex['children'], extra: ExtraP<TextMarkupComplex> = {}): TextMarkupComplex & { type: TextTy } => ast.complexTextMarkup(type)(children, mkExtra(extra));

export const styledText = <TextTy extends StyledText['type']>(type: TextTy) => type === 'text.plain' || type === 'text.code' || type === 'text.verbatim' ? simpleStyledText(type) : simpleStyledTextComplex(type);

export const text = styledText('text.plain');

export const textBold = styledText('text.bold');

export const textBoldC = complexTextMarkup('text.bold');

export const textCode = styledText('text.code');

export const textItalic = styledText('text.italic');

export const textItalicC = complexTextMarkup('text.italic');

export const textStrikethrough = styledText('text.strikeThrough');

export const textStrikethroughC = complexTextMarkup('text.strikeThrough');

export const textUnderline = styledText('text.underline');

export const textUnderlineC = complexTextMarkup('text.underline');

export const textVerbatim = styledText('text.verbatim');

import { FootnoteRef, FootnoteInline, FootnoteAnon } from '../utils';

export const footnoteReference = (label: string, extra: ExtraP<FootnoteRef, 'label'> = {}): FootnoteRef =>
Expand Down
46 changes: 41 additions & 5 deletions packages/orga/src/parse/textMarkup.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,50 @@
import { Lexer } from '../tokenize'
import { StyledText } from '../types'
import { isStyledText } from '../utils';
import { PhrasingContent, StyledText } from '../types'
import utils, { complexTextMarkup, manyEndBy, oneOf, simpleStyledText, text } from './utils';
import phrasingContent from './phrasingContent';

const MARKERS = {
'*': 'text.bold',
'/': 'text.italic',
'+': 'text.strikeThrough',
'_': 'text.underline',
} as const;

export default (lexer: Lexer): StyledText | undefined => {
const { peek, eat } = lexer
const { returning, tryTo } = utils(lexer);

const token = peek()

if (token && isStyledText(token)) {
eat()
return token;
if (token) {
if (token.type === 'text.plain' || token.type === 'text.code' || token.type === 'text.verbatim') {
// simple cases - these cannot contain objects
eat();
return simpleStyledText(token.type)(token.value, { position: token.position })
} else if (token.type === 'token.complexStyleChar') {
// "CONTENTS can contain any object encountered in a paragraph
// when markup is “bold”, “italic”, “strike-through” or
// “underline”." - spec v2021.07.03
eat();
const matchChar = token.char;
const newline = () => {
const token = peek();
if (token && token.type === 'newline') {
eat();
return (text(' ', { position: token.position }));
}
};
const phrasingContentOrNewline = oneOf([newline, phrasingContent]);
const toks = returning(tryTo(manyEndBy(phrasingContentOrNewline, () => {
const t = peek();
if (t && t.type === 'token.complexStyleChar' && t.char === matchChar) {
eat();
return t;
}
})))();
if (!toks) return;
const toksButLast = toks.slice(0, toks.length - 1) as PhrasingContent[];
return complexTextMarkup(MARKERS[matchChar])(toksButLast, { position: { start: token.position.start, end: toks[toks.length - 1].position.end } });
}
}
}
75 changes: 64 additions & 11 deletions packages/orga/src/parse/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ import {
Table,
TableCell,
TableRow,
TextMarkupComplex,
TextMarkupSimple,
Timestamp,
Token,
VerseBlock,
Expand Down Expand Up @@ -186,12 +188,18 @@ export const manyOf = <T>(parse: TokenParser<T>): TokenParser<T[]> => {
};
}

/** Parse zero or more occurences of `p` ended by `end`. */
/**
* Parse zero or more occurences of `p` ended by `end`.
*
* This tries `end` before each occurrence of `p`, and thus you should
* use `manyEndBy` instead if you need nesting.
*/
export const manyTill = <T, End>(p: TokenParser<T>, end: TokenParser<End>): TokenParser<[...T[], End]> => {
return (lexer: Lexer) => {
const { returning, tryTo } = lexActions(lexer);
const res: T[] = [];
while (true) {
if (!lexer.peek()) return;
const last = returning(tryTo(end))();
if (last) {
return [...res, last];
Expand All @@ -206,6 +214,29 @@ export const manyTill = <T, End>(p: TokenParser<T>, end: TokenParser<End>): Toke
};
}

/**
* Parse zero or more occurences of `p` ended by `end`.
*
* This tries `end` _after_ each occurrence of `p`, so can be used for nesting.
*/
export const manyEndBy = <T, End>(p: TokenParser<T>, end: TokenParser<End>): TokenParser<[...T[], End]> => {
return (lexer: Lexer) => {
const { returning, tryTo } = lexActions(lexer);
const res: T[] = [];
while (true) {
if (!lexer.peek()) return;
const next = returning(tryTo(p))();
if (next) {
res.push(next);
}
const last = returning(tryTo(end))();
if (last) {
return [...res, last];
}
}
};
}

/** All of the given `ps` in sequence. */
export const seq = <T, N extends number>(ps: TokenParser<T>[] & { length: N }): TokenParser<T[] & { length: N }> => {
return (lexer: Lexer) => {
Expand Down Expand Up @@ -314,30 +345,52 @@ export const specialBlock = (name: SpecialBlock['name'], children: SpecialBlock[
...extra
});

/** Build an AST {@link StyledText} object. */
export const styledText = <TextTy extends StyledText['type']>(type: TextTy) => (text: string, extra: Extra<StyledText, 'value'>): StyledText & { type: TextTy } => ({
export const simpleStyledText = <TextTy extends TextMarkupSimple['type']>(type: TextTy) => (text: string, extra: Extra<TextMarkupSimple, 'value'>): TextMarkupSimple & { type: TextTy } => ({
type: type,
value: text,
...extra
});

/** Build an AST plain text object. */
export const text = styledText('text.plain');

/** Build an AST text bold object. */
export const textBold = styledText('text.bold');
export const text = simpleStyledText('text.plain');

/** Build an AST text code object. */
export const textCode = styledText('text.code');
export const textCode = simpleStyledText('text.code');

export const simpleStyledTextComplex = <TextTy extends TextMarkupComplex['type']>(type: TextTy) => (value: string, extra: ExtraP<TextMarkupComplex>): TextMarkupComplex & { type: TextTy } => {
const start = extra.position?.start;
const end = extra.position?.end;
const innerPos = start && end ? { start: { line: start.line, column: start.column + 1 }, end: { line: end.line, column: end.column - 1 } } : undefined;
return {
type: type,
// some trickery here... we know that start markup char can't
// immediately followed by a newline, and the end char can't be
// preceded immediately by a newline, so we can manipulate the
// position
children: [text(value, innerPos ? { position: innerPos } : {} as Extra<TextMarkupSimple, 'value'>)],
...extra,
};
};

export const complexTextMarkup = <TextTy extends TextMarkupComplex['type']>(type: TextTy) => (children: TextMarkupComplex['children'], extra: ExtraP<TextMarkupComplex>): TextMarkupComplex & { type: TextTy } => ({
type: type,
children,
...extra
});

export const styledText = <TextTy extends StyledText['type']>(type: TextTy) => type === 'text.plain' || type === 'text.code' || type === 'text.verbatim' ? simpleStyledText(type) : simpleStyledTextComplex(type);

/** Build an AST text bold object. */
export const textBold = simpleStyledTextComplex('text.bold');

/** Build an AST text italic object. */
export const textItalic = styledText('text.italic');
export const textItalic = simpleStyledTextComplex('text.italic');

/** Build an AST text strikethrough object. */
export const textStrikethrough = styledText('text.strikeThrough');
export const textStrikethrough = simpleStyledTextComplex('text.strikeThrough');

/** Build an AST text underline object. */
export const textUnderline = styledText('text.underline');
export const textUnderline = simpleStyledTextComplex('text.underline');

/** Footnote reference has empty `children`. */
export type FootnoteRef = FootnoteReference & { children: [] };
Expand Down
4 changes: 3 additions & 1 deletion packages/orga/src/reader.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { Char, read as _read } from 'text-kit'
import { Char, TextKit, read as _read } from 'text-kit'
import { Point, Position } from 'unist'
import { isGreaterOrEqual } from './position';

Expand Down Expand Up @@ -80,6 +80,7 @@ export const read = (text: string) => {
eol,
jump,
match: (pattern: RegExp, position: Position = { start: now(), end: eol() }) => match(pattern, position),
shift,
}
return reader
}
Expand All @@ -100,4 +101,5 @@ export interface Reader {
captures: string[],
position: Position;
} | undefined;
shift: TextKit['shift'];
}
4 changes: 2 additions & 2 deletions packages/orga/src/tokenize/__tests__/block.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,15 @@ function () {}
testVerseBlock("inner block with markup", `#+BEGIN_EXAMPLE *text*
more text
#+END_EXAMPLE`, [
tokText("#+BEGIN_EXAMPLE "), tokTextBold("text"),
tokText("#+BEGIN_EXAMPLE "), ...tokTextBold("text"),
tokNewline(),
tokText("more text"),
tokNewline(),
tokText("#+END_EXAMPLE"),
]);

testVerseBlock("heading with markup", "* Heading *with markup*", [
tokText("* Heading "), tokTextBold("with markup"),
tokText("* Heading "), ...tokTextBold("with markup"),
]);

testVerseBlock("lists not tokenized", `- this is not lexed
Expand Down
4 changes: 2 additions & 2 deletions packages/orga/src/tokenize/__tests__/headline.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ describe("tokenize headline", () => {

testLexerMulti("knows headlines", [
testHeadline("** a headline", 2, [tokText("a headline")]),
testHeadline("** _headline_", 2, [tokTextUnderline("headline")]),
testHeadline("** _headline_", 2, tokTextUnderline("headline")),
testHeadline("** a headline", 2, [tokText("a headline")]),
testHeadline("***** a headline", 5, [tokText("a headline")]),
testHeadline("* a 😀line", 1, [tokText("a 😀line")]),
Expand All @@ -35,7 +35,7 @@ describe("tokenize headline", () => {
testLexerMulti("knows these are not headlines", [
["*not a headline", [tokText("*not a headline")]],
[" * not a headline", [tokText("* not a headline")]],
["*_* not a headline", [tokTextBold("_"), tokText(" not a headline")]],
["*_* not a headline", [...tokTextBold("_"), tokText(" not a headline")]],
["not a headline", [tokText("not a headline")]],
]);

Expand Down
Loading

0 comments on commit d39ae90

Please sign in to comment.