diff --git a/.changeset/wild-trains-pay.md b/.changeset/wild-trains-pay.md new file mode 100644 index 000000000..8f75961b4 --- /dev/null +++ b/.changeset/wild-trains-pay.md @@ -0,0 +1,7 @@ +--- +'myst-to-jats': patch +--- + +Improve MathML tagging, removing unnecessay mml:semantics and mml:mrow if there is only a single element. + +Clean up latex in the CDATA output. diff --git a/packages/myst-to-jats/src/index.ts b/packages/myst-to-jats/src/index.ts index b93e9e66c..b8f614e5d 100644 --- a/packages/myst-to-jats/src/index.ts +++ b/packages/myst-to-jats/src/index.ts @@ -121,6 +121,15 @@ function alternativesFromMinifiedOutput(output: MinifiedOutput, state: IJatsSeri state.closeNode(); } +function addMmlAndRemoveAnnotation(el?: Element) { + if (el?.name) el.name = `mml:${el.name}`; + if (!el?.elements) return; + el.elements = el.elements.filter((child: Element) => child.name !== 'annotation'); + el.elements.forEach((child: Element) => { + addMmlAndRemoveAnnotation(child); + }); +} + function mathToMml(math?: string, inline?: boolean) { const katexXml = katex.renderToString(math, { output: 'mathml', throwOnError: false }); const katexJs = xml2js(katexXml, { compact: false }) as Element; @@ -129,18 +138,29 @@ function mathToMml(math?: string, inline?: boolean) { if (!mathElement) return; if (inline) mathElement.attributes = { ...mathElement.attributes, display: 'inline' }; delete mathElement.attributes?.xmlns; - function addMmlAndRemoveAnnotation(el?: Element) { - if (el?.name) el.name = `mml:${el.name}`; - if (!el?.elements) return; - el.elements = el.elements.filter((child: Element) => child.name !== 'annotation'); - el.elements.forEach((child: Element) => { - addMmlAndRemoveAnnotation(child); - }); - } addMmlAndRemoveAnnotation(mathElement); + // Remove the wrapping `` if it is the only element + if (mathElement?.elements?.length === 1 && mathElement.elements[0].name === 'mml:semantics') { + mathElement.elements = mathElement.elements[0].elements; + } + if (mathElement?.elements?.length === 1 && mathElement.elements[0].name === 'mml:mrow') { + mathElement.elements = mathElement.elements[0].elements; + } return mathElement; } +/** + * Remove comments and consolidate to one line + */ +function cleanLatex(value?: string): string | undefined { + if (!value) return; + return value + .split('\n') + .map((s) => s.replace(/%(.*)/, '').trim()) + .join(' ') + .trim(); +} + const handlers: Record = { text(node, state) { state.text(node.value); @@ -203,11 +223,15 @@ const handlers: Record = { ); }, inlineMath(node, state) { - state.openNode('inline-formula'); + const inlineFormulaAttrs: Attributes = {}; + if (node.identifier) { + inlineFormulaAttrs.id = node.identifier; + } + state.openNode('inline-formula', inlineFormulaAttrs); state.openNode('alternatives'); state.pushNode(mathToMml(node.value, true)); state.openNode('tex-math'); - state.addLeaf('cdata', { cdata: node.value }); + state.addLeaf('cdata', { cdata: cleanLatex(node.value) }); state.closeNode(); state.closeNode(); state.closeNode(); @@ -222,7 +246,7 @@ const handlers: Record = { state.openNode('alternatives'); state.pushNode(mathToMml(node.value)); state.openNode('tex-math'); - state.addLeaf('cdata', { cdata: node.value }); + state.addLeaf('cdata', { cdata: cleanLatex(node.value) }); state.closeNode(); state.closeNode(); state.closeNode(); diff --git a/packages/myst-to-jats/tests/basic.yml b/packages/myst-to-jats/tests/basic.yml index dc117fc12..f86b972a6 100644 --- a/packages/myst-to-jats/tests/basic.yml +++ b/packages/myst-to-jats/tests/basic.yml @@ -189,7 +189,7 @@ cases: value: Ax=b - type: text value: . - jats:

This math is a role, e=mc2, while this math is wrapped in dollar signs, Ax=b.

+ jats:

This math is a role, e=mc2, while this math is wrapped in dollar signs, Ax=b.

- title: Display Math (label) tree: type: root @@ -198,14 +198,14 @@ cases: identifier: my-equation enumerator: 1 value: w_{t+1} = (1 + r_{t+1}) s(w_t) + y_{t+1} - jats: wt+1=(1+rt+1)s(wt)+yt+1 + jats: wt+1=(1+rt+1)s(wt)+yt+1 - title: Display Math (no label) tree: type: root children: - type: math value: w_{t+1} = (1 + r_{t+1}) s(w_t) + y_{t+1} - jats: wt+1=(1+rt+1)s(wt)+yt+1 + jats: wt+1=(1+rt+1)s(wt)+yt+1 - title: Display Math (not numbered) tree: type: root @@ -215,7 +215,21 @@ cases: enumerated: false enumerator: 1 value: w_{t+1} = (1 + r_{t+1}) s(w_t) + y_{t+1} - jats: wt+1=(1+rt+1)s(wt)+yt+1 + jats: wt+1=(1+rt+1)s(wt)+yt+1 + - title: Display Math strips latex comments and is on a single line + tree: + type: root + children: + - type: math + identifier: my-equation + enumerated: false + enumerator: 1 + value: | + A + x + = % some saucy comment + b + jats: Ax=b - title: Lists tree: type: root