diff --git a/.changeset/gold-plants-tickle.md b/.changeset/gold-plants-tickle.md new file mode 100644 index 000000000..418bd38b4 --- /dev/null +++ b/.changeset/gold-plants-tickle.md @@ -0,0 +1,9 @@ +--- +'tex-to-myst': patch +--- + +Add `algorithm` and `algorithmic` handlers for latex parsing + +- Figures: Environment centering +- newtheorem in frontmatter is parsed +- safely ignore `itemsep`, `setcounter`, `cmidrule` for now diff --git a/.changeset/modern-pigs-confess.md b/.changeset/modern-pigs-confess.md new file mode 100644 index 000000000..01c73d41c --- /dev/null +++ b/.changeset/modern-pigs-confess.md @@ -0,0 +1,5 @@ +--- +'myst-spec-ext': patch +--- + +Add `Line` to myst-spec-ext diff --git a/.changeset/wise-spiders-beg.md b/.changeset/wise-spiders-beg.md new file mode 100644 index 000000000..559fff230 --- /dev/null +++ b/.changeset/wise-spiders-beg.md @@ -0,0 +1,5 @@ +--- +'myst-to-jats': patch +--- + +Add statements to JATS, count and number `statements` diff --git a/packages/myst-spec-ext/src/types.ts b/packages/myst-spec-ext/src/types.ts index 43081656b..b8a75d1bd 100644 --- a/packages/myst-spec-ext/src/types.ts +++ b/packages/myst-spec-ext/src/types.ts @@ -14,6 +14,7 @@ import type { Code as SpecCode, ListItem as SpecListItem, Container as SpecContainer, + InlineMath as SpecInlineMath, } from 'myst-spec'; type Visibility = 'show' | 'hide' | 'remove'; @@ -36,6 +37,22 @@ export type CaptionNumber = Parent & { enumerator: string; }; +/** + * AlgorithmLine is, e.g., a line in an algorithm and can be numbered as well as indented. + * Otherwise this works the same as a paragraph, ideally with tighter styling. + * The Line is used in Algorithms (e.g. when parsing from LaTeX) + */ +export type AlgorithmLine = Parent & { + type: 'algorithmLine'; + indent?: number; + enumerator?: string; +}; + +export type InlineMath = SpecInlineMath & { + label?: string; + identifier?: string; +}; + export type FootnoteDefinition = FND & { /** @deprecated this should be enumerator */ number?: number; @@ -184,6 +201,7 @@ export type Include = { caption?: (FlowContent | ListContent | PhrasingContent)[]; }; -export type Container = SpecContainer & { +export type Container = Omit & { + kind?: 'figure' | 'table' | 'quote' | 'code'; source?: Dependency; }; diff --git a/packages/myst-to-jats/src/index.ts b/packages/myst-to-jats/src/index.ts index 71f5923ed..9390f693f 100644 --- a/packages/myst-to-jats/src/index.ts +++ b/packages/myst-to-jats/src/index.ts @@ -1,5 +1,50 @@ -import type { Root, CrossReference, TableCell as SpecTableCell, Math, InlineMath } from 'myst-spec'; -import type { Cite, Code, FootnoteDefinition, FootnoteReference } from 'myst-spec-ext'; +import type { + Root, + CrossReference, + TableCell as SpecTableCell, + Math, + Text, + Paragraph, + Blockquote, + List, + ThematicBreak, + Role, + Directive, + Comment, + Strong, + Emphasis, + Underline, + InlineCode, + Subscript, + Superscript, + Abbreviation, + Link, + AdmonitionTitle, + Table, + Caption, + Break, +} from 'myst-spec'; +import type { + Block, + Cite, + Code, + DefinitionTerm, + DefinitionDescription, + DefinitionList, + FootnoteDefinition, + FootnoteReference, + Heading, + AlgorithmLine, + ListItem, + InlineMath, + Image, + Delete, + Smallcaps, + Admonition, + Container, + CaptionNumber, + CiteGroup, +} from 'myst-spec-ext'; import type { Plugin } from 'unified'; import { VFile } from 'vfile'; import { xml2js } from 'xml-js'; @@ -36,6 +81,7 @@ import type { IdInventory } from './transforms/references.js'; import type { Section } from './transforms/sections.js'; import { sectionAttrsFromBlock } from './transforms/sections.js'; import { inlineExpression } from './inlineExpression.js'; +import type { DefinitionItem } from './transforms/definitions.js'; type TableCell = SpecTableCell & { colspan?: number; rowspan?: number; width?: number }; @@ -44,7 +90,7 @@ function escapeForXML(text: string) { } function referenceKindToRefType(kind?: string): RefType { - switch (kind) { + switch (kind?.split(':')[0]) { case 'heading': return RefType.sec; case 'figure': @@ -53,6 +99,8 @@ function referenceKindToRefType(kind?: string): RefType { return RefType.dispFormula; case 'table': return RefType.table; + case 'proof': + return RefType.statement; default: return RefType.custom; } @@ -133,10 +181,12 @@ function addMmlAndRemoveAnnotation(el?: Element) { function mathToMml(node: Math | InlineMath) { const math = copyNode(node); - // TODO: add macros + // TODO: add macros, log errors renderEquation(new VFile(), math, { mathML: true }); - const katexJs = xml2js((math as any).html, { compact: false }) as Element; - const spanElement = katexJs.elements?.[0]; + const katexJs = (math as any).html + ? (xml2js((math as any).html, { compact: false }) as Element) + : undefined; + const spanElement = katexJs?.elements?.[0]; const mathElement = spanElement?.elements?.[0]; if (!mathElement) return; const inline = node.type === 'inlineMath'; @@ -165,7 +215,71 @@ function cleanLatex(value?: string): string | undefined { .trim(); } -const handlers: Record = { +// TODO: this should be based on some information of the proof, or myst config +function capitalize(kind?: string) { + if (!kind) return ''; + return kind.slice(0, 1).toUpperCase() + kind.slice(1); +} + +type Handlers = { + text: Handler; + paragraph: Handler; + section: Handler
; + heading: Handler; + block: Handler; + blockquote: Handler
; + definitionList: Handler; + definitionItem: Handler; + definitionTerm: Handler; + definitionDescription: Handler; + code: Handler; + list: Handler; + listItem: Handler; + thematicBreak: Handler; + inlineMath: Handler; + math: Handler; + mystRole: Handler; + mystDirective: Handler; + comment: Handler; + strong: Handler; + emphasis: Handler; + underline: Handler; + inlineCode: Handler; + subscript: Handler; + superscript: Handler; + delete: Handler; + smallcaps: Handler; + break: Handler; + abbreviation: Handler; + link: Handler; + admonition: Handler; + admonitionTitle: Handler; + attrib: Handler; + table: Handler; + tableHead: Handler; + tableBody: Handler; + tableFooter: Handler; + tableRow: Handler; + tableCell: Handler; + image: Handler; + container: Handler; + caption: Handler
; + captionNumber: Handler; + crossReference: Handler; + citeGroup: Handler; + cite: Handler; + footnoteReference: Handler; + footnoteDefinition: Handler; + si: Handler; + proof: Handler; + algorithmLine: Handler; + output: Handler; + embed: Handler; + supplementaryMaterial: Handler; + inlineExpression: Handler; +}; + +const handlers: Handlers = { text(node, state) { state.text(node.value); }, @@ -396,7 +510,7 @@ const handlers: Record = { state.renderInline(node, 'caption'); }, captionNumber(node, state) { - delete node.identifier; + delete (node as any).identifier; state.renderInline(node, 'label'); }, crossReference(node, state) { @@ -441,11 +555,42 @@ const handlers: Record = { }, si(node, state) { // 5 mm - state.openNode('named-content', { 'content-type': 'quantity' }); - if (node.number != null) state.text(`${node.number} `); + const hasNumber = node.number != null; + if (hasNumber) { + state.openNode('named-content', { 'content-type': 'quantity' }); + state.text(`${node.number} `); + } state.openNode('abbrev', { 'content-type': 'unit', alt: node.alt }); state.text(node.unit); state.closeNode(); + if (hasNumber) state.closeNode(); + }, + proof(node, state) { + state.openNode('statement', { 'specific-use': node.kind, id: node.identifier }); + const [title, ...rest] = node.children ?? []; + const useTitle = title && title.type === 'admonitionTitle'; + if (node.enumerated) { + state.openNode('label'); + state.text(`${capitalize(node.kind)} ${node.enumerator}`); + state.closeNode(); + } + if (useTitle) { + state.openNode('title'); + state.renderChildren(title); + state.closeNode(); + } + state.renderChildren(useTitle ? rest : node.children); + state.closeNode(); + }, + algorithmLine(node, state) { + state.openNode('p', { 'specific-use': 'line' }); + if (node.enumerator) { + state.openNode('x'); + state.text(`${node.enumerator}: `); + state.closeNode(); + } + state.text(Array(node.indent).fill(' ').join('')); + state.renderChildren(node); state.closeNode(); }, output(node, state) { @@ -652,11 +797,13 @@ class JatsSerializer implements IJatsSerializer { return node; } - renderChildren(node: GenericNode) { - node.children?.forEach((child) => { + renderChildren(node: GenericNode | GenericNode[]) { + const parent = Array.isArray(node) ? { children: node } : node; + const children = Array.isArray(node) ? node : node.children; + children?.forEach((child) => { const handler = this.handlers[child.type]; if (handler) { - handler(child, this, node); + handler(child, this, parent); } else { fileError(this.file, `Unhandled JATS conversion for node of "${child.type}"`, { node: child, diff --git a/packages/myst-to-jats/src/transforms/definitions.ts b/packages/myst-to-jats/src/transforms/definitions.ts index 142802308..9626cbf5b 100644 --- a/packages/myst-to-jats/src/transforms/definitions.ts +++ b/packages/myst-to-jats/src/transforms/definitions.ts @@ -4,7 +4,7 @@ import type { DefinitionList } from 'myst-spec-ext'; import { selectAll } from 'unist-util-select'; import type { GenericParent } from 'myst-common'; -type DefinitionItem = Parent & { type: 'definitionItem' }; +export type DefinitionItem = Parent & { type: 'definitionItem' }; export function definitionTransform(mdast: GenericParent) { const defList = selectAll('definitionList', mdast) as DefinitionList[]; diff --git a/packages/myst-to-jats/src/transforms/references.ts b/packages/myst-to-jats/src/transforms/references.ts index ec1a5828c..8c0f48e86 100644 --- a/packages/myst-to-jats/src/transforms/references.ts +++ b/packages/myst-to-jats/src/transforms/references.ts @@ -14,6 +14,7 @@ export type IdInventory = { quote?: CountAndLookup; cite?: CountAndLookup; footnote?: CountAndLookup; + proof?: CountAndLookup; }; const CONTAINER_KINDS: (keyof IdInventory)[] = ['figure', 'table', 'code', 'quote']; @@ -63,6 +64,10 @@ export function referenceTargetTransform( footnotes.forEach((fn) => { updateInventory(fn, 'footnote', 'fn', inventory); }); + const proofs = selectAll('proof', mdast) as GenericNode[]; + proofs.forEach((fn) => { + updateInventory(fn, 'proof', 'stm', inventory); + }); const containers = selectAll('container', mdast) as GenericNode[]; containers.forEach((container) => { if (!container.kind || !CONTAINER_KINDS.includes(container.kind as any)) { @@ -97,6 +102,7 @@ export function referenceResolutionTransform(mdast: GenericParent, inventory: Id ...inventory.table?.lookup, ...inventory.code?.lookup, ...inventory.quote?.lookup, + ...inventory.proof?.lookup, }; xrefs.forEach((xref) => { if (xref.identifier && lookup[xref.identifier]) { diff --git a/packages/myst-to-jats/src/types.ts b/packages/myst-to-jats/src/types.ts index 009fc77d1..28da44a2c 100644 --- a/packages/myst-to-jats/src/types.ts +++ b/packages/myst-to-jats/src/types.ts @@ -16,7 +16,7 @@ export type Element = { elements?: Element[]; }; -export type Handler = (node: GenericNode, state: IJatsSerializer, parent: any) => void; +export type Handler = (node: T, state: IJatsSerializer, parent: any) => void; export type MathPlugins = Required['math']; diff --git a/packages/myst-to-jats/tests/basic.spec.ts b/packages/myst-to-jats/tests/basic.spec.ts index ddabe2fe6..10b857685 100644 --- a/packages/myst-to-jats/tests/basic.spec.ts +++ b/packages/myst-to-jats/tests/basic.spec.ts @@ -73,7 +73,7 @@ beforeEach(() => { }); describe('Basic JATS body', () => { - const cases = [...loadCases('basic.yml'), ...loadCases('siunit.yml')]; + const cases = [...loadCases('basic.yml'), ...loadCases('siunit.yml'), ...loadCases('proof.yml')]; test.each(cases.map((c): [string, TestCase] => [c.title, c]))('%s', async (_, { tree, jats }) => { const pipe = unified().use( mystToJats, diff --git a/packages/myst-to-jats/tests/proof.yml b/packages/myst-to-jats/tests/proof.yml new file mode 100644 index 000000000..f230346fa --- /dev/null +++ b/packages/myst-to-jats/tests/proof.yml @@ -0,0 +1,27 @@ +cases: + - title: Proof + tree: + type: root + children: + - type: proof + kind: algorithm + label: alg:cap + identifier: alg:cap + enumerated: true + enumerator: '1' + children: + - type: admonitionTitle + children: + - type: text + value: An algorithm with caption + - type: algorithmLine + indent: 1 + enumerator: '1' + children: + - type: strong + children: + - type: text + value: 'Require: ' + - type: text + value: 'alg' + jats: 'An algorithm with caption

1: Require: alg

' diff --git a/packages/tex-to-myst/src/algorithms.ts b/packages/tex-to-myst/src/algorithms.ts new file mode 100644 index 000000000..861fd8b46 --- /dev/null +++ b/packages/tex-to-myst/src/algorithms.ts @@ -0,0 +1,172 @@ +import { u } from 'unist-builder'; +import type { GenericNode } from 'myst-common'; +import { normalizeLabel } from 'myst-common'; +import { type Paragraph } from 'myst-spec'; +import { type AlgorithmLine } from 'myst-spec-ext'; +import type { Handler, ITexParser } from './types.js'; +import { getArguments } from './utils.js'; +import { select, selectAll } from 'unist-util-select'; + +function addNestingStatement( + node: GenericNode, + state: ITexParser, + { before, after }: { before: string; after?: string }, +) { + state.closeParagraph(); + state.openParagraph({ indent: state.data.algorithm_indent }); + state.pushNode(u('strong', [u('text', before)])); + const args = getArguments(node, 'group'); + const children = args[args.length - 1]; + if (children) { + state.text(' '); + state.renderChildren(children); + } + if (after) { + state.text(' '); + state.pushNode(u('strong', [u('text', after)])); + } + state.closeParagraph(); + state.data.algorithm_indent ??= 0; + state.data.algorithm_indent += 1; +} + +function finishNestingStatement(node: GenericNode, state: ITexParser, { text }: { text: string }) { + state.closeParagraph(); + state.data.algorithm_indent ??= 0; + state.data.algorithm_indent -= 1; + state.openParagraph({ indent: state.data.algorithm_indent }); + state.pushNode(u('strong', [u('text', text)])); + const args = getArguments(node, 'group'); + const children = args[args.length - 1]; + if (children) { + state.text(' '); + state.renderChildren(children); + } + state.closeParagraph(); +} + +function numberParagraphsAsLines(node: GenericNode) { + const paragraphs = selectAll('paragraph', node) as Paragraph[]; + paragraphs.forEach((p, i) => { + const l = p as unknown as AlgorithmLine; + l.type = 'algorithmLine'; + l.enumerator = String(i + 1); + }); +} + +export function createTheoremHandler(name: string): Handler { + return function (node, state) { + state.closeParagraph(); + state.openNode('proof', { kind: name, enumerated: true }); + state.renderChildren(node); + state.closeParagraph(); + state.closeNode(); + }; +} + +export const ALGORITHM_HANDLERS: Record = { + env_algorithm(node, state) { + state.closeParagraph(); + state.openNode('proof', { kind: 'algorithm' }); + state.renderChildren(node); + state.closeParagraph(); + const proof = state.top(); + const caption = select('caption', proof) as GenericNode; + if (caption) { + caption.type = 'admonitionTitle'; + caption.children = caption.children?.[0].children; + } + if (caption?.label) { + // Move the caption label up to the proof + proof.label = caption.label; + delete caption.label; + delete caption.identifier; + } + if (proof.label) { + const { label, identifier } = normalizeLabel(proof.label) ?? {}; + proof.label = label; + proof.identifier = identifier; + proof.enumerated = true; + } + numberParagraphsAsLines(proof); + state.closeNode(); + }, + env_algorithmic(node, state) { + state.closeParagraph(); + const forceProof = state.top().type !== 'proof'; + if (forceProof) state.openNode('proof', { kind: 'algorithm' }); + state.data.algorithm_indent = 0; + state.renderChildren(node); + state.closeParagraph(); + if (forceProof) { + numberParagraphsAsLines(state.top()); + state.closeNode(); + } + }, + macro_Loop(node, state) { + addNestingStatement(node, state, { before: 'loop' }); + }, + macro_For(node, state) { + addNestingStatement(node, state, { before: 'for', after: 'do' }); + }, + macro_ForAll(node, state) { + addNestingStatement(node, state, { before: 'for all', after: 'do' }); + }, + macro_State(node, state) { + state.closeParagraph(); + state.openParagraph({ indent: state.data.algorithm_indent }); + }, + macro_Ensure(node, state) { + state.closeParagraph(); + state.openParagraph({ indent: state.data.algorithm_indent }); + state.pushNode(u('strong', [u('text', 'Ensure: ')])); + }, + macro_Require(node, state) { + state.closeParagraph(); + state.openParagraph({ indent: state.data.algorithm_indent }); + state.pushNode(u('strong', [u('text', 'Require: ')])); + }, + macro_Repeat(node, state) { + addNestingStatement(node, state, { before: 'repeat ' }); + }, + macro_Until(node, state) { + finishNestingStatement(node, state, { text: 'until' }); + }, + macro_While(node, state) { + addNestingStatement(node, state, { before: 'while', after: 'do' }); + }, + macro_EndWhile(node, state) { + finishNestingStatement(node, state, { text: 'end while' }); + }, + macro_EndFor(node, state) { + finishNestingStatement(node, state, { text: 'end for' }); + }, + macro_EndLoop(node, state) { + finishNestingStatement(node, state, { text: 'end loop' }); + }, + macro_If(node, state) { + addNestingStatement(node, state, { before: 'if', after: 'then' }); + }, + macro_ElsIf(node, state) { + state.data.algorithm_indent ??= 0; + state.data.algorithm_indent -= 1; + addNestingStatement(node, state, { before: 'else if', after: 'then' }); + }, + macro_Else(node, state) { + finishNestingStatement(node, state, { text: 'else' }); + state.data.algorithm_indent ??= 0; + state.data.algorithm_indent += 1; + }, + macro_EndIf(node, state) { + finishNestingStatement(node, state, { text: 'end if' }); + }, + macro_Comment(node, state) { + const args = getArguments(node, 'group'); + const children = args[args.length - 1]; + if (!children) return; + state.openNode('span', { style: { float: 'right' } }); + state.renderChildren(children); + state.data.ignoreNextWhitespace = true; + state.closeNode(); + }, +}; diff --git a/packages/tex-to-myst/src/figures.ts b/packages/tex-to-myst/src/figures.ts index 80e7a982a..ef9e82104 100644 --- a/packages/tex-to-myst/src/figures.ts +++ b/packages/tex-to-myst/src/figures.ts @@ -14,6 +14,16 @@ function renderCaption(node: GenericNode, state: ITexParser) { state.closeNode(); } +function centering(node: GenericNode, state: ITexParser) { + state.closeParagraph(); + const container = state.top(); + if (container.type === 'container') { + container.align = 'center'; + } else { + state.warn('Unknown use of centering, currently this only works for containers', node); + } +} + const FIGURE_HANDLERS: Record = { env_figure(node, state) { state.closeParagraph(); @@ -25,15 +35,8 @@ const FIGURE_HANDLERS: Record = { env_subfigure(node, state) { state.renderChildren(node); }, - macro_centering(node, state) { - state.closeParagraph(); - const container = state.top(); - if (container.type === 'container') { - container.align = 'center'; - } else { - state.warn('Unknown use of centering, currently this only works for containers', node); - } - }, + env_centering: centering, + macro_centering: centering, macro_includegraphics(node, state) { state.closeParagraph(); const url = texToText(getArguments(node, 'group')); diff --git a/packages/tex-to-myst/src/frontmatter.ts b/packages/tex-to-myst/src/frontmatter.ts index c443cb083..dbd1133d1 100644 --- a/packages/tex-to-myst/src/frontmatter.ts +++ b/packages/tex-to-myst/src/frontmatter.ts @@ -5,6 +5,7 @@ import { selectAll } from 'unist-util-select'; import { remove } from 'unist-util-remove'; import type { Handler, ITexParser } from './types.js'; import { getArguments, getPositionExtents, originalValue, texToText } from './utils.js'; +import { createTheoremHandler } from './algorithms.js'; function getContentFromRenderedSpan(node: GenericNode | undefined): string | GenericNode { if (!node) return ''; @@ -281,6 +282,17 @@ const FRONTMATTER_HANDLERS: Record = { .map((k) => k.trim()) .filter((k) => !!k); }, + macro_newtheorem(node, state) { + // https://tex.stackexchange.com/questions/155710/understanding-the-arguments-in-newtheorem-e-g-newtheoremtheoremtheoremsec/155714#155714 + const [, nameNode, x, labelNode, y] = node.args ?? []; + const name = texToText(nameNode); + const label = texToText(labelNode); + const countWith = texToText(x) || undefined; + const countAfter = texToText(y) || undefined; + state.data.theorems[name] = { label, countWith, countAfter }; + // We create a handler now for future nodes + state.data.dynamicHandlers[`env_${name}`] = createTheoremHandler(name); + }, }; FRONTMATTER_HANDLERS.macro_Author = FRONTMATTER_HANDLERS.macro_author; diff --git a/packages/tex-to-myst/src/lists.ts b/packages/tex-to-myst/src/lists.ts index a42029eab..21d21ab81 100644 --- a/packages/tex-to-myst/src/lists.ts +++ b/packages/tex-to-myst/src/lists.ts @@ -67,4 +67,5 @@ export const LIST_HANDLERS: Record = { const content = node.args[node.args.length - 1]; state.renderBlock(content, 'listItem'); }, + macro_itemsep() {}, }; diff --git a/packages/tex-to-myst/src/misc.ts b/packages/tex-to-myst/src/misc.ts index 6749ea090..3ff80de9d 100644 --- a/packages/tex-to-myst/src/misc.ts +++ b/packages/tex-to-myst/src/misc.ts @@ -79,4 +79,6 @@ export const MISC_HANDLERS: Record = { // line numbers macro_linenumbers: pass, macro_nolinenumbers: pass, + // counters + macro_setcounter: pass, }; diff --git a/packages/tex-to-myst/src/parser.ts b/packages/tex-to-myst/src/parser.ts index a01735f84..6cd29c49c 100644 --- a/packages/tex-to-myst/src/parser.ts +++ b/packages/tex-to-myst/src/parser.ts @@ -25,6 +25,7 @@ import { TABLE_HANDLERS } from './tables.js'; import { FOOTNOTE_HANDLERS } from './footnotes.js'; import { SIUNITX_HANDLERS } from './siunitx.js'; import { CHEM_HANDLERS } from './chem.js'; +import { ALGORITHM_HANDLERS } from './algorithms.js'; const DEFAULT_HANDLERS: Record = { ...BASIC_TEXT_HANDLERS, @@ -45,6 +46,7 @@ const DEFAULT_HANDLERS: Record = { ...FOOTNOTE_HANDLERS, ...SIUNITX_HANDLERS, ...CHEM_HANDLERS, + ...ALGORITHM_HANDLERS, }; // This currently is needed as we don't support affiliations in the frontmatter. @@ -84,6 +86,8 @@ export class TexParser implements ITexParser { colors: {}, macros: {}, frontmatter: {}, + theorems: {}, + dynamicHandlers: {}, }; this.stack = [{ type: 'root', children: [] }]; this.handlers = opts?.handlers ?? DEFAULT_HANDLERS; @@ -94,11 +98,11 @@ export class TexParser implements ITexParser { do { stack = this.closeNode(); } while (this.stack.length); - (selectAll('[label]', stack) as GenericNode[]).forEach((xref) => { - const reference = normalizeLabel(xref.label); + (selectAll('[label]', stack) as GenericNode[]).forEach((node) => { + const reference = normalizeLabel(node.label); if (!reference) return; - xref.identifier = reference.identifier; - xref.label = reference.label; + node.identifier = reference.identifier; + node.label = reference.label; }); cleanFrontmatter(this.data.frontmatter); this.ast = stack; @@ -161,7 +165,7 @@ export class TexParser implements ITexParser { ? `env_${child.env}` : child.type; this.currentPosition = child.position ?? this.currentPosition; - const handler = this.handlers[kind]; + const handler = this.handlers[kind] ?? this.data.dynamicHandlers[kind]; if (handler) { handler(child, this, node); } else { @@ -216,10 +220,10 @@ export class TexParser implements ITexParser { this.stack.push(node); } - openParagraph() { + openParagraph(attributes?: Record) { const inPhrasing = phrasingTypes.has(this.top()?.type); if (inPhrasing) return; - this.openNode('paragraph'); + this.openNode('paragraph', attributes); } closeParagraph() { diff --git a/packages/tex-to-myst/src/tables.ts b/packages/tex-to-myst/src/tables.ts index fd13a6772..46d747499 100644 --- a/packages/tex-to-myst/src/tables.ts +++ b/packages/tex-to-myst/src/tables.ts @@ -11,7 +11,7 @@ function createTable(node: GenericNode, state: ITexParser) { cells: [] as GenericNode[][], cell: [] as GenericNode[], }; - const IGNORE = new Set(['hline', 'rule', 'midrule', 'toprule', 'bottomrule']); + const IGNORE = new Set(['hline', 'rule', 'midrule', 'cmidrule', 'toprule', 'bottomrule']); node.content.forEach((n: GenericNode) => { if (n.type === 'macro' && IGNORE.has(n.content)) return; if (n.type === 'macro' && n.content === '\\') { diff --git a/packages/tex-to-myst/src/tex.ts b/packages/tex-to-myst/src/tex.ts index 914042abe..4d7849139 100644 --- a/packages/tex-to-myst/src/tex.ts +++ b/packages/tex-to-myst/src/tex.ts @@ -79,9 +79,20 @@ const macros: Record = { subfigure: 2, tabularx: 2, supertabular: 1, + cmidrule: 2, adjustbox: 1, resizebox: 2, captionof: 2, + // Algorithms and proofs + algorithm: 1, + algorithmic: 1, + For: 1, + ForAll: 1, + While: 1, + If: 1, + ElsIf: 1, + Comment: 1, + Until: 1, // SI Units: https://texdoc.org/serve/siunitx/0 SI: 2, si: 1, diff --git a/packages/tex-to-myst/src/types.ts b/packages/tex-to-myst/src/types.ts index eb1d4d74f..f6e1de4ed 100644 --- a/packages/tex-to-myst/src/types.ts +++ b/packages/tex-to-myst/src/types.ts @@ -13,11 +13,14 @@ export type StateData = { colors: Record; bibliography: string[]; macros: Record; + dynamicHandlers: Record; + theorems: Record; listType?: string; openGroups: string[]; ignoreNextWhitespace?: boolean; maketitle?: boolean; appendix?: boolean; + algorithm_indent?: number; frontmatter: PageFrontmatter; /** This is called on `\and` in latex, e.g. in the author block */ andCallback?: () => void; @@ -43,7 +46,7 @@ export interface ITexParser = StateData> { addLeaf>(name: string, attributes?: Omit): void; openNode: (name: string, attributes?: Record) => void; closeNode: () => GenericNode; - openParagraph: () => void; + openParagraph: (attributes?: Record) => void; closeParagraph: () => void; openBlock: (attributes?: Record) => void; closeBlock: () => void; diff --git a/packages/tex-to-myst/tests/algorithm.yml b/packages/tex-to-myst/tests/algorithm.yml new file mode 100644 index 000000000..b52160cf8 --- /dev/null +++ b/packages/tex-to-myst/tests/algorithm.yml @@ -0,0 +1,476 @@ +title: Algorithms +cases: + - title: algorithmic + tex: | + \begin{algorithm}[ht] + \label{algo1} + \caption{$PCP$ algorithm} + \begin{algorithmic}[1] + \For{($v=1$ to $Lmt$)} + \State Fix $x=n_{su}$ + \While{($x\geq 1$)} + \State Fix $k=Rad(1,x)$ + \State Available supercomputer is $Su_z$ + \State Call $SLg(k,Su_z)$ + \State Fix $x--$ + \EndWhile + \State Determine $Grt_{v}$ + \EndFor + \State Determine $Grt_{ }=\min \limits_{1\leq v\leq Lmt} Grt_{v}$ + \end{algorithmic} + \end{algorithm} + tree: + type: root + children: + - type: proof + kind: algorithm + label: algo1 + identifier: algo1 + enumerated: true + children: + - type: admonitionTitle + children: + - type: inlineMath + value: PCP + - type: text + value: ' algorithm' + - type: algorithmLine + indent: 0 + enumerator: '1' + children: + - type: strong + children: + - type: text + value: for + - type: text + value: ' (' + - type: inlineMath + value: v=1 + - type: text + value: ' to ' + - type: inlineMath + value: Lmt + - type: text + value: ') ' + - type: strong + children: + - type: text + value: do + - type: algorithmLine + indent: 1 + enumerator: '2' + children: + - type: text + value: 'Fix ' + - type: inlineMath + value: x=n_{su} + - type: text + value: '' + - type: algorithmLine + indent: 1 + enumerator: '3' + children: + - type: strong + children: + - type: text + value: while + - type: text + value: ' (' + - type: inlineMath + value: x\geq 1 + - type: text + value: ') ' + - type: strong + children: + - type: text + value: do + - type: algorithmLine + indent: 2 + enumerator: '4' + children: + - type: text + value: 'Fix ' + - type: inlineMath + value: k=Rad(1,x) + - type: text + value: '' + - type: algorithmLine + indent: 2 + enumerator: '5' + children: + - type: text + value: 'Available supercomputer is ' + - type: inlineMath + value: Su_z + - type: text + value: '' + - type: algorithmLine + indent: 2 + enumerator: '6' + children: + - type: text + value: 'Call ' + - type: inlineMath + value: SLg(k,Su_z) + - type: text + value: '' + - type: algorithmLine + indent: 2 + enumerator: '7' + children: + - type: text + value: 'Fix ' + - type: inlineMath + value: x-- + - type: text + value: '' + - type: algorithmLine + indent: 1 + enumerator: '8' + children: + - type: strong + children: + - type: text + value: end while + - type: algorithmLine + indent: 1 + enumerator: '9' + children: + - type: text + value: 'Determine ' + - type: inlineMath + value: Grt_{v} + - type: text + value: '' + - type: algorithmLine + indent: 0 + enumerator: '10' + children: + - type: strong + children: + - type: text + value: end for + - type: algorithmLine + indent: 0 + enumerator: '11' + children: + - type: text + value: 'Determine ' + - type: inlineMath + value: Grt_{ }=\min \limits_{1\leq v\leq Lmt} Grt_{v} + - title: algorithmic - if - nested + tex: | + \begin{algorithmic} + \State $i \gets 10$ + \If{$i\geq 5$} + \State $i \gets i-1$ + \Else + \If{$i\leq 3$} + \State $i \gets i+2$ + \EndIf + \EndIf + \end{algorithmic} + tree: + type: root + children: + - type: proof + kind: algorithm + children: + - type: algorithmLine + indent: 0 + enumerator: '1' + children: + - type: text + value: '' + - type: inlineMath + value: i \gets 10 + - type: text + value: '' + - type: algorithmLine + indent: 0 + enumerator: '2' + children: + - type: strong + children: + - type: text + value: if + - type: text + value: ' ' + - type: inlineMath + value: i\geq 5 + - type: text + value: ' ' + - type: strong + children: + - type: text + value: then + - type: algorithmLine + indent: 1 + enumerator: '3' + children: + - type: text + value: '' + - type: inlineMath + value: i \gets i-1 + - type: text + value: '' + - type: algorithmLine + indent: 0 + enumerator: '4' + children: + - type: strong + children: + - type: text + value: else + - type: algorithmLine + indent: 1 + enumerator: '5' + children: + - type: strong + children: + - type: text + value: if + - type: text + value: ' ' + - type: inlineMath + value: i\leq 3 + - type: text + value: ' ' + - type: strong + children: + - type: text + value: then + - type: algorithmLine + indent: 2 + enumerator: '6' + children: + - type: text + value: '' + - type: inlineMath + value: i \gets i+2 + - type: text + value: '' + - type: algorithmLine + indent: 1 + enumerator: '7' + children: + - type: strong + children: + - type: text + value: end if + - type: algorithmLine + indent: 0 + enumerator: '8' + children: + - type: strong + children: + - type: text + value: end if + - title: algorithmic - if-else + tex: | + \begin{algorithm} + \caption{An algorithm with caption}\label{alg:cap} + \begin{algorithmic} + \Require $n \geq 0$ + \Ensure $y = x^n$ + \State $y \gets 1$ + \State $X \gets x$ + \State $N \gets n$ + \While{$N \neq 0$} + \If{$N$ is even} + \State $X \gets X \times X$ + \State $N \gets \frac{N}{2}$ \Comment{This is a comment} + \ElsIf{$N$ is odd} + \State $y \gets y \times X$ + \State $N \gets N - 1$ + \EndIf + \EndWhile + \end{algorithmic} + \end{algorithm} + tree: + type: root + children: + - type: proof + kind: algorithm + label: alg:cap + identifier: alg:cap + enumerated: true + children: + - type: admonitionTitle + children: + - type: text + value: An algorithm with caption + - type: algorithmLine + indent: 0 + enumerator: '1' + children: + - type: strong + children: + - type: text + value: 'Require: ' + - type: text + value: ' ' + - type: inlineMath + value: n \geq 0 + - type: text + value: '' + - type: algorithmLine + indent: 0 + enumerator: '2' + children: + - type: strong + children: + - type: text + value: 'Ensure: ' + - type: text + value: ' ' + - type: inlineMath + value: y = x^n + - type: text + value: '' + - type: algorithmLine + indent: 0 + enumerator: '3' + children: + - type: text + value: '' + - type: inlineMath + value: y \gets 1 + - type: text + value: '' + - type: algorithmLine + indent: 0 + enumerator: '4' + children: + - type: text + value: '' + - type: inlineMath + value: X \gets x + - type: text + value: '' + - type: algorithmLine + indent: 0 + enumerator: '5' + children: + - type: text + value: '' + - type: inlineMath + value: N \gets n + - type: text + value: '' + - type: algorithmLine + indent: 0 + enumerator: '6' + children: + - type: strong + children: + - type: text + value: while + - type: text + value: ' ' + - type: inlineMath + value: N \neq 0 + - type: text + value: ' ' + - type: strong + children: + - type: text + value: do + - type: algorithmLine + indent: 1 + enumerator: '7' + children: + - type: strong + children: + - type: text + value: if + - type: text + value: ' ' + - type: inlineMath + value: 'N' + - type: text + value: ' is even ' + - type: strong + children: + - type: text + value: then + - type: algorithmLine + indent: 2 + enumerator: '8' + children: + - type: text + value: '' + - type: inlineMath + value: X \gets X \times X + - type: text + value: '' + - type: algorithmLine + indent: 2 + enumerator: '9' + children: + - type: text + value: '' + - type: inlineMath + value: N \gets \frac{N}{2} + - type: text + value: ' ' + - type: span + style: + float: right + children: + - type: text + value: 'This is a comment' + - type: algorithmLine + indent: 1 + enumerator: '10' + children: + - type: strong + children: + - type: text + value: else if + - type: text + value: ' ' + - type: inlineMath + value: 'N' + - type: text + value: ' is odd ' + - type: strong + children: + - type: text + value: then + - type: algorithmLine + indent: 2 + enumerator: '11' + children: + - type: text + value: '' + - type: inlineMath + value: y \gets y \times X + - type: text + value: '' + - type: algorithmLine + indent: 2 + enumerator: '12' + children: + - type: text + value: '' + - type: inlineMath + value: N \gets N - 1 + - type: text + value: '' + - type: algorithmLine + indent: 1 + enumerator: '13' + children: + - type: strong + children: + - type: text + value: end if + - type: algorithmLine + indent: 0 + enumerator: '14' + children: + - type: strong + children: + - type: text + value: end while diff --git a/packages/tex-to-myst/tests/cases.spec.ts b/packages/tex-to-myst/tests/cases.spec.ts index f882b0ae7..a8583b11c 100644 --- a/packages/tex-to-myst/tests/cases.spec.ts +++ b/packages/tex-to-myst/tests/cases.spec.ts @@ -45,6 +45,7 @@ const files = [ 'footnotes.yml', 'siunitx.yml', 'verbatim.yml', + 'algorithm.yml', ]; const only = ''; // Can set this to a test title @@ -58,9 +59,9 @@ const casesList = files }); casesList.forEach(({ title, cases }) => { + const casesToUse = cases.filter((c) => !only || c.title === only); + if (casesToUse.length === 0) return; describe(title, () => { - const casesToUse = cases.filter((c) => !only || c.title === only); - if (casesToUse.length === 0) return; test.each(casesToUse.map((c): [string, TestCase] => [c.title, c]))( '%s', (_, { tex, tree, text, warnings, data }) => { @@ -78,7 +79,7 @@ casesList.forEach(({ title, cases }) => { if (tree) { if (only) console.log(yaml.dump(state.ast)); expect(state.ast).toEqual(tree); - } else if (text != null) expect(toText(state.ast)).toEqual(text); + } else if (text != null) expect(toText(state.ast).trim()).toEqual(text); else throw new Error('Must have at least "tree" or "text" defined.'); if (data?.colors) { expect(state.data.colors).toEqual(data.colors); @@ -89,6 +90,9 @@ casesList.forEach(({ title, cases }) => { if (data?.macros) { expect(state.data.macros).toEqual(data.macros); } + if (data?.theorems) { + expect(state.data.theorems).toEqual(data.theorems); + } if (data?.frontmatter) { stripPositions(state.data.frontmatter.title); stripPositions(state.data.frontmatter.short_title); diff --git a/packages/tex-to-myst/tests/commands.yml b/packages/tex-to-myst/tests/commands.yml index 9571cab49..94d3a11fc 100644 --- a/packages/tex-to-myst/tests/commands.yml +++ b/packages/tex-to-myst/tests/commands.yml @@ -13,3 +13,20 @@ cases: '\ii': '{i\mkern1mu}' '\f': '\mathcal{F}_{#1}\left[#2\right]' '\FFT': '\mathcal{F}_{\mathbf{r}\to\mathbf{k}}' + - title: newtheorem + tex: |- + \newtheorem{proposition}{Proposition} + \newtheorem{example}{Example} + \newtheorem{proof}{Proof} + \newtheorem{theorem}{Theorem}[section] + \newtheorem{corollary}{Corollary}[theorem] + \newtheorem{lemma}[theorem]{Lemma} + text: '' + data: + theorems: + proposition: { label: Proposition } + example: { label: Example } + proof: { label: Proof } + theorem: { label: Theorem, countAfter: section } + corollary: { label: Corollary, countAfter: theorem } + lemma: { label: Lemma, countWith: theorem }