From add502e1a9faba0651210bc1881bb1ccd233cc95 Mon Sep 17 00:00:00 2001 From: "Steven R. Loomis" Date: Fri, 29 Nov 2024 12:16:11 -0600 Subject: [PATCH] feat(common,developer): tests and warning on denormalized content - common: shift the 'strs' processing slightly, because otherwise we normalize to NFD before even tracking the strings - if any string is neither NFC nor NFD, give a warning - add tests for the same Fixes: #7394 --- common/web/types/src/kmx/kmx-plus/kmx-plus.ts | 15 ++++++----- .../kmc-ldml/src/compiler/empty-compiler.ts | 7 +++++ .../src/compiler/ldml-compiler-messages.ts | 6 ++++- .../fixtures/sections/strs/warn-denorm-1.xml | 18 +++++++++++++ .../fixtures/sections/strs/warn-denorm-2.xml | 22 ++++++++++++++++ .../fixtures/sections/strs/warn-denorm-3.xml | 26 +++++++++++++++++++ .../fixtures/sections/strs/warn-denorm-4.xml | 26 +++++++++++++++++++ developer/src/kmc-ldml/test/strs.tests.ts | 20 ++++++++++++++ 8 files changed, 132 insertions(+), 8 deletions(-) create mode 100644 developer/src/kmc-ldml/test/fixtures/sections/strs/warn-denorm-1.xml create mode 100644 developer/src/kmc-ldml/test/fixtures/sections/strs/warn-denorm-2.xml create mode 100644 developer/src/kmc-ldml/test/fixtures/sections/strs/warn-denorm-3.xml create mode 100644 developer/src/kmc-ldml/test/fixtures/sections/strs/warn-denorm-4.xml diff --git a/common/web/types/src/kmx/kmx-plus/kmx-plus.ts b/common/web/types/src/kmx/kmx-plus/kmx-plus.ts index 0cb49e527c7..f78e2d6d2eb 100644 --- a/common/web/types/src/kmx/kmx-plus/kmx-plus.ts +++ b/common/web/types/src/kmx/kmx-plus/kmx-plus.ts @@ -174,12 +174,7 @@ export class Strs extends Section { */ allocString(s?: string, opts?: StrsOptions, sections?: DependencySections): StrsItem { // Run the string processing pipeline - s = Strs.processString(s, opts, sections); - - // add to the set, for testing - if (s) { - this.allProcessedStrings.add(s); - } + s = this.processString(s, opts, sections); // if it's a single char, don't push it into the strs table if (opts?.singleOk && isOneChar(s)) { @@ -197,7 +192,7 @@ export class Strs extends Section { } /** process everything according to opts */ - static processString(s: string, opts: StrsOptions, sections: DependencySections) { + processString(s: string, opts: StrsOptions, sections: DependencySections) { s = s ?? ''; // type check everything else if (typeof s !== 'string') { @@ -215,6 +210,12 @@ export class Strs extends Section { if (opts?.unescape) { s = unescapeString(s); } + + if (s) { + // add all processed strings here, so that we catch denormalized strings in the input + this.allProcessedStrings.add(s); + } + // nfd if (opts?.nfd) { if (!sections?.meta?.normalizationDisabled) { diff --git a/developer/src/kmc-ldml/src/compiler/empty-compiler.ts b/developer/src/kmc-ldml/src/compiler/empty-compiler.ts index aaba0950260..12c39d9d620 100644 --- a/developer/src/kmc-ldml/src/compiler/empty-compiler.ts +++ b/developer/src/kmc-ldml/src/compiler/empty-compiler.ts @@ -34,9 +34,12 @@ export class StrsCompiler extends EmptyCompiler { const strs = section; if (strs) { + let hadDenormalized = false; const badStringAnalyzer = new util.BadStringAnalyzer(); const CONTAINS_MARKER_REGEX = new RegExp(LdmlKeyboardTypes.MarkerParser.ANY_MARKER_MATCH); for (let s of strs.allProcessedStrings.values()) { + // stop at the first denormalized string + hadDenormalized = hadDenormalized || util.isDenormalized(s); // replace all \\uXXXX with the actual code point. // this lets us analyze whether there are PUA, unassigned, etc. // the results might not be valid regex of course. @@ -72,6 +75,10 @@ export class StrsCompiler extends EmptyCompiler { return false; } } + if (hadDenormalized) { + // warn once + this.callbacks.reportMessage(LdmlCompilerMessages.Warn_StringDenorm()); + } } return true; } diff --git a/developer/src/kmc-ldml/src/compiler/ldml-compiler-messages.ts b/developer/src/kmc-ldml/src/compiler/ldml-compiler-messages.ts index 1445b40036a..f947b7ca189 100644 --- a/developer/src/kmc-ldml/src/compiler/ldml-compiler-messages.ts +++ b/developer/src/kmc-ldml/src/compiler/ldml-compiler-messages.ts @@ -199,7 +199,11 @@ export class LdmlCompilerMessages { `Invalid marker identifier "\m{${def(o.id)}}". Identifiers must be between 1 and 32 characters, and can use A-Z, a-z, 0-9, and _.`, ); - // Available: 0x02B-0x2F + static WARN_StringDenorm = SevWarn | 0x002B; + static Warn_StringDenorm = () => + m(this.WARN_StringDenorm, `File contains text that is neither NFC nor NFD.`); + + // Available: 0x02C-0x2F static ERROR_InvalidQuadEscape = SevError | 0x0030; static Error_InvalidQuadEscape = (o: { cp: number }) => diff --git a/developer/src/kmc-ldml/test/fixtures/sections/strs/warn-denorm-1.xml b/developer/src/kmc-ldml/test/fixtures/sections/strs/warn-denorm-1.xml new file mode 100644 index 00000000000..49cd61d358a --- /dev/null +++ b/developer/src/kmc-ldml/test/fixtures/sections/strs/warn-denorm-1.xml @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + + diff --git a/developer/src/kmc-ldml/test/fixtures/sections/strs/warn-denorm-2.xml b/developer/src/kmc-ldml/test/fixtures/sections/strs/warn-denorm-2.xml new file mode 100644 index 00000000000..ec001db7684 --- /dev/null +++ b/developer/src/kmc-ldml/test/fixtures/sections/strs/warn-denorm-2.xml @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + + + + + + + diff --git a/developer/src/kmc-ldml/test/fixtures/sections/strs/warn-denorm-3.xml b/developer/src/kmc-ldml/test/fixtures/sections/strs/warn-denorm-3.xml new file mode 100644 index 00000000000..a40c119099c --- /dev/null +++ b/developer/src/kmc-ldml/test/fixtures/sections/strs/warn-denorm-3.xml @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/developer/src/kmc-ldml/test/fixtures/sections/strs/warn-denorm-4.xml b/developer/src/kmc-ldml/test/fixtures/sections/strs/warn-denorm-4.xml new file mode 100644 index 00000000000..cf443c61fe5 --- /dev/null +++ b/developer/src/kmc-ldml/test/fixtures/sections/strs/warn-denorm-4.xml @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/developer/src/kmc-ldml/test/strs.tests.ts b/developer/src/kmc-ldml/test/strs.tests.ts index a5f7e89e35c..14250d993ae 100644 --- a/developer/src/kmc-ldml/test/strs.tests.ts +++ b/developer/src/kmc-ldml/test/strs.tests.ts @@ -71,4 +71,24 @@ describe('strs', function () { ]); assert.isNull(kmx); // should fail post-validate }); + describe('should warn on denorm strings', async function () { + for (const num of [1, 2, 3, 4]) { + const path = `sections/strs/warn-denorm-${num}.xml`; + it(path, async function () { + const inputFilename = makePathToFixture(path); + // Compile the keyboard + const kmx = await compileKeyboard(inputFilename, { ...compilerTestOptions, saveDebug: true, shouldAddCompilerVersion: false }, + [ + // validation messages + LdmlCompilerMessages.Warn_StringDenorm(), + ], + false, // validation should pass + [ + // same messages + LdmlCompilerMessages.Warn_StringDenorm(), + ]); + assert.isNotNull(kmx); // not failing + }); + } + }); });