From 6f2a6b2ebfd4c1664a3372b362b1828e61029bda Mon Sep 17 00:00:00 2001 From: "Steven R. Loomis" Date: Mon, 2 Dec 2024 11:35:06 -0600 Subject: [PATCH] feat(common,developer): tests and warning on denormalized content per review comments - report a warning on each separate instance of a denormalized string (once per string). Fixes: #7394 --- common/web/types/src/util/util.ts | 10 +++++----- common/web/types/test/util/test-unescape.ts | 6 +++--- developer/src/kmc-ldml/src/compiler/empty-compiler.ts | 9 +++------ .../kmc-ldml/src/compiler/ldml-compiler-messages.ts | 4 ++-- developer/src/kmc-ldml/test/strs.tests.ts | 5 +++-- 5 files changed, 16 insertions(+), 18 deletions(-) diff --git a/common/web/types/src/util/util.ts b/common/web/types/src/util/util.ts index 7395c27ba0f..70e1077cc5a 100644 --- a/common/web/types/src/util/util.ts +++ b/common/web/types/src/util/util.ts @@ -305,13 +305,13 @@ export function isPUA(ch: number) { (ch >= Uni_PUA_16_START && ch <= Uni_PUA_16_END)); } -/** @returns true if s is NEITHER NFC nor NFD */ -export function isDenormalized(s: string) : boolean { - if(!s) return false; // empty or null +/** @returns false if s is NEITHER NFC nor NFD. (Returns true for falsy) */ +export function isNormalized(s: string) : boolean { + if(!s) return true; // empty or null const nfc = s.normalize("NFC"); const nfd = s.normalize("NFD"); - if (s !== nfc && s !== nfd) return true; - return false; + if (s !== nfc && s !== nfd) return false; + return true; } class BadStringMap extends Map> { diff --git a/common/web/types/test/util/test-unescape.ts b/common/web/types/test/util/test-unescape.ts index 853fb6a09b0..3eaafd7150f 100644 --- a/common/web/types/test/util/test-unescape.ts +++ b/common/web/types/test/util/test-unescape.ts @@ -1,6 +1,6 @@ import 'mocha'; import {assert} from 'chai'; -import {unescapeString, UnescapeError, isOneChar, toOneChar, unescapeOneQuadString, BadStringAnalyzer, isValidUnicode, describeCodepoint, isPUA, BadStringType, unescapeStringToRegex, unescapeQuadString, NFDAnalyzer, isDenormalized} from '../../src/util/util.js'; +import {unescapeString, UnescapeError, isOneChar, toOneChar, unescapeOneQuadString, BadStringAnalyzer, isValidUnicode, describeCodepoint, isPUA, BadStringType, unescapeStringToRegex, unescapeQuadString, NFDAnalyzer, isNormalized} from '../../src/util/util.js'; describe('test UTF32 functions()', function() { it('should properly categorize strings', () => { @@ -197,11 +197,11 @@ describe('test bad char functions', () => { 'fas\u0323\u0307cinating', // NFD 'd\u0323\u0307', // NFD '\u1e0d\u0307', // NFC - ].map(s => assert.isFalse(isDenormalized(s), `for string ${s}`)); + ].map(s => assert.isTrue(isNormalized(s), `for string ${s}`)); [ 'd\u0307\u0323', // NFD but reversed marks 'fas\u0307\u0323cinating', // not-NFD - ].map(s => assert.isTrue(isDenormalized(s), `for string ${s}`)); + ].map(s => assert.isFalse(isNormalized(s), `for string ${s}`)); }); }); }); diff --git a/developer/src/kmc-ldml/src/compiler/empty-compiler.ts b/developer/src/kmc-ldml/src/compiler/empty-compiler.ts index 12c39d9d620..4b9aec76133 100644 --- a/developer/src/kmc-ldml/src/compiler/empty-compiler.ts +++ b/developer/src/kmc-ldml/src/compiler/empty-compiler.ts @@ -34,12 +34,13 @@ export class StrsCompiler extends EmptyCompiler { const strs = section; if (strs) { - let hadDenormalized = false; const badStringAnalyzer = new util.BadStringAnalyzer(); const CONTAINS_MARKER_REGEX = new RegExp(LdmlKeyboardTypes.MarkerParser.ANY_MARKER_MATCH); for (let s of strs.allProcessedStrings.values()) { // stop at the first denormalized string - hadDenormalized = hadDenormalized || util.isDenormalized(s); + if (!util.isNormalized(s)) { + this.callbacks.reportMessage(LdmlCompilerMessages.Warn_StringDenorm({s})); + } // replace all \\uXXXX with the actual code point. // this lets us analyze whether there are PUA, unassigned, etc. // the results might not be valid regex of course. @@ -75,10 +76,6 @@ export class StrsCompiler extends EmptyCompiler { return false; } } - if (hadDenormalized) { - // warn once - this.callbacks.reportMessage(LdmlCompilerMessages.Warn_StringDenorm()); - } } return true; } diff --git a/developer/src/kmc-ldml/src/compiler/ldml-compiler-messages.ts b/developer/src/kmc-ldml/src/compiler/ldml-compiler-messages.ts index f947b7ca189..5df0da4b73a 100644 --- a/developer/src/kmc-ldml/src/compiler/ldml-compiler-messages.ts +++ b/developer/src/kmc-ldml/src/compiler/ldml-compiler-messages.ts @@ -200,8 +200,8 @@ export class LdmlCompilerMessages { ); static WARN_StringDenorm = SevWarn | 0x002B; - static Warn_StringDenorm = () => - m(this.WARN_StringDenorm, `File contains text that is neither NFC nor NFD.`); + static Warn_StringDenorm = (o: { s: string }) => + m(this.WARN_StringDenorm, `File contains string "${def(o.s)}" that is neither NFC nor NFD.`); // Available: 0x02C-0x2F diff --git a/developer/src/kmc-ldml/test/strs.tests.ts b/developer/src/kmc-ldml/test/strs.tests.ts index 14250d993ae..24e8ff4c1ed 100644 --- a/developer/src/kmc-ldml/test/strs.tests.ts +++ b/developer/src/kmc-ldml/test/strs.tests.ts @@ -76,16 +76,17 @@ describe('strs', function () { const path = `sections/strs/warn-denorm-${num}.xml`; it(path, async function () { const inputFilename = makePathToFixture(path); + const s = 's\u0307\u0323'; // ṩ // Compile the keyboard const kmx = await compileKeyboard(inputFilename, { ...compilerTestOptions, saveDebug: true, shouldAddCompilerVersion: false }, [ // validation messages - LdmlCompilerMessages.Warn_StringDenorm(), + LdmlCompilerMessages.Warn_StringDenorm({s}), ], false, // validation should pass [ // same messages - LdmlCompilerMessages.Warn_StringDenorm(), + LdmlCompilerMessages.Warn_StringDenorm({s}), ]); assert.isNotNull(kmx); // not failing });