From 250cef19e36691cfcc8639f420b01707f91afd60 Mon Sep 17 00:00:00 2001 From: Rowan Cockett Date: Fri, 13 Oct 2023 09:03:41 -0600 Subject: [PATCH 1/2] =?UTF-8?q?=F0=9F=A7=AE=20Clean=20up=20MathML=20taggin?= =?UTF-8?q?g=20in=20JATS?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .changeset/wild-trains-pay.md | 7 +++++ packages/myst-to-jats/src/index.ts | 43 ++++++++++++++++++++------- packages/myst-to-jats/tests/basic.yml | 22 +++++++++++--- 3 files changed, 57 insertions(+), 15 deletions(-) create mode 100644 .changeset/wild-trains-pay.md diff --git a/.changeset/wild-trains-pay.md b/.changeset/wild-trains-pay.md new file mode 100644 index 000000000..8f75961b4 --- /dev/null +++ b/.changeset/wild-trains-pay.md @@ -0,0 +1,7 @@ +--- +'myst-to-jats': patch +--- + +Improve MathML tagging, removing unnecessay mml:semantics and mml:mrow if there is only a single element. + +Clean up latex in the CDATA output. diff --git a/packages/myst-to-jats/src/index.ts b/packages/myst-to-jats/src/index.ts index b93e9e66c..c279f2d0e 100644 --- a/packages/myst-to-jats/src/index.ts +++ b/packages/myst-to-jats/src/index.ts @@ -121,6 +121,15 @@ function alternativesFromMinifiedOutput(output: MinifiedOutput, state: IJatsSeri state.closeNode(); } +function addMmlAndRemoveAnnotation(el?: Element) { + if (el?.name) el.name = `mml:${el.name}`; + if (!el?.elements) return; + el.elements = el.elements.filter((child: Element) => child.name !== 'annotation'); + el.elements.forEach((child: Element) => { + addMmlAndRemoveAnnotation(child); + }); +} + function mathToMml(math?: string, inline?: boolean) { const katexXml = katex.renderToString(math, { output: 'mathml', throwOnError: false }); const katexJs = xml2js(katexXml, { compact: false }) as Element; @@ -129,18 +138,26 @@ function mathToMml(math?: string, inline?: boolean) { if (!mathElement) return; if (inline) mathElement.attributes = { ...mathElement.attributes, display: 'inline' }; delete mathElement.attributes?.xmlns; - function addMmlAndRemoveAnnotation(el?: Element) { - if (el?.name) el.name = `mml:${el.name}`; - if (!el?.elements) return; - el.elements = el.elements.filter((child: Element) => child.name !== 'annotation'); - el.elements.forEach((child: Element) => { - addMmlAndRemoveAnnotation(child); - }); - } addMmlAndRemoveAnnotation(mathElement); + // Remove the wrapping `` if it is the only element + if (mathElement?.elements?.length === 1 && mathElement.elements[0].name === 'mml:semantics') { + mathElement.elements = mathElement.elements[0].elements; + } + if (mathElement?.elements?.length === 1 && mathElement.elements[0].name === 'mml:mrow') { + mathElement.elements = mathElement.elements[0].elements; + } return mathElement; } +function cleanLatex(value?: string): string | undefined { + if (!value) return; + return value + .split('\n') + .map((s) => s.replace(/%(.*)/, '').trim()) + .join(' ') + .trim(); +} + const handlers: Record = { text(node, state) { state.text(node.value); @@ -203,11 +220,15 @@ const handlers: Record = { ); }, inlineMath(node, state) { - state.openNode('inline-formula'); + const inlineFormulaAttrs: Attributes = {}; + if (node.identifier) { + inlineFormulaAttrs.id = node.identifier; + } + state.openNode('inline-formula', inlineFormulaAttrs); state.openNode('alternatives'); state.pushNode(mathToMml(node.value, true)); state.openNode('tex-math'); - state.addLeaf('cdata', { cdata: node.value }); + state.addLeaf('cdata', { cdata: cleanLatex(node.value) }); state.closeNode(); state.closeNode(); state.closeNode(); @@ -222,7 +243,7 @@ const handlers: Record = { state.openNode('alternatives'); state.pushNode(mathToMml(node.value)); state.openNode('tex-math'); - state.addLeaf('cdata', { cdata: node.value }); + state.addLeaf('cdata', { cdata: cleanLatex(node.value) }); state.closeNode(); state.closeNode(); state.closeNode(); diff --git a/packages/myst-to-jats/tests/basic.yml b/packages/myst-to-jats/tests/basic.yml index dc117fc12..f86b972a6 100644 --- a/packages/myst-to-jats/tests/basic.yml +++ b/packages/myst-to-jats/tests/basic.yml @@ -189,7 +189,7 @@ cases: value: Ax=b - type: text value: . - jats:

This math is a role, e=mc2, while this math is wrapped in dollar signs, Ax=b.

+ jats:

This math is a role, e=mc2, while this math is wrapped in dollar signs, Ax=b.

- title: Display Math (label) tree: type: root @@ -198,14 +198,14 @@ cases: identifier: my-equation enumerator: 1 value: w_{t+1} = (1 + r_{t+1}) s(w_t) + y_{t+1} - jats: wt+1=(1+rt+1)s(wt)+yt+1 + jats: wt+1=(1+rt+1)s(wt)+yt+1 - title: Display Math (no label) tree: type: root children: - type: math value: w_{t+1} = (1 + r_{t+1}) s(w_t) + y_{t+1} - jats: wt+1=(1+rt+1)s(wt)+yt+1 + jats: wt+1=(1+rt+1)s(wt)+yt+1 - title: Display Math (not numbered) tree: type: root @@ -215,7 +215,21 @@ cases: enumerated: false enumerator: 1 value: w_{t+1} = (1 + r_{t+1}) s(w_t) + y_{t+1} - jats: wt+1=(1+rt+1)s(wt)+yt+1 + jats: wt+1=(1+rt+1)s(wt)+yt+1 + - title: Display Math strips latex comments and is on a single line + tree: + type: root + children: + - type: math + identifier: my-equation + enumerated: false + enumerator: 1 + value: | + A + x + = % some saucy comment + b + jats: Ax=b - title: Lists tree: type: root From 22da0891d3b5a67ffc3f9f4dfcae80b46481e614 Mon Sep 17 00:00:00 2001 From: Rowan Cockett Date: Fri, 13 Oct 2023 10:20:37 -0600 Subject: [PATCH 2/2] Add comment --- packages/myst-to-jats/src/index.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/packages/myst-to-jats/src/index.ts b/packages/myst-to-jats/src/index.ts index c279f2d0e..b8f614e5d 100644 --- a/packages/myst-to-jats/src/index.ts +++ b/packages/myst-to-jats/src/index.ts @@ -149,6 +149,9 @@ function mathToMml(math?: string, inline?: boolean) { return mathElement; } +/** + * Remove comments and consolidate to one line + */ function cleanLatex(value?: string): string | undefined { if (!value) return; return value