Skip to content

Commit

Permalink
feat(common,developer): tests and warning on denormalized content per…
Browse files Browse the repository at this point in the history
… review comments

- report a warning on each separate instance of a denormalized string (once per string).

Fixes: #7394
  • Loading branch information
srl295 committed Dec 2, 2024
1 parent f4b7e0a commit 6f2a6b2
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 18 deletions.
10 changes: 5 additions & 5 deletions common/web/types/src/util/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -305,13 +305,13 @@ export function isPUA(ch: number) {
(ch >= Uni_PUA_16_START && ch <= Uni_PUA_16_END));
}

/** @returns true if s is NEITHER NFC nor NFD */
export function isDenormalized(s: string) : boolean {
if(!s) return false; // empty or null
/** @returns false if s is NEITHER NFC nor NFD. (Returns true for falsy) */
export function isNormalized(s: string) : boolean {
if(!s) return true; // empty or null
const nfc = s.normalize("NFC");
const nfd = s.normalize("NFD");
if (s !== nfc && s !== nfd) return true;
return false;
if (s !== nfc && s !== nfd) return false;
return true;
}

class BadStringMap extends Map<BadStringType, Set<number>> {
Expand Down
6 changes: 3 additions & 3 deletions common/web/types/test/util/test-unescape.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import 'mocha';
import {assert} from 'chai';
import {unescapeString, UnescapeError, isOneChar, toOneChar, unescapeOneQuadString, BadStringAnalyzer, isValidUnicode, describeCodepoint, isPUA, BadStringType, unescapeStringToRegex, unescapeQuadString, NFDAnalyzer, isDenormalized} from '../../src/util/util.js';
import {unescapeString, UnescapeError, isOneChar, toOneChar, unescapeOneQuadString, BadStringAnalyzer, isValidUnicode, describeCodepoint, isPUA, BadStringType, unescapeStringToRegex, unescapeQuadString, NFDAnalyzer, isNormalized} from '../../src/util/util.js';

describe('test UTF32 functions()', function() {
it('should properly categorize strings', () => {
Expand Down Expand Up @@ -197,11 +197,11 @@ describe('test bad char functions', () => {
'fas\u0323\u0307cinating', // NFD
'd\u0323\u0307', // NFD
'\u1e0d\u0307', // NFC
].map(s => assert.isFalse(isDenormalized(s), `for string ${s}`));
].map(s => assert.isTrue(isNormalized(s), `for string ${s}`));
[
'd\u0307\u0323', // NFD but reversed marks
'fas\u0307\u0323cinating', // not-NFD
].map(s => assert.isTrue(isDenormalized(s), `for string ${s}`));
].map(s => assert.isFalse(isNormalized(s), `for string ${s}`));
});
});
});
Expand Down
9 changes: 3 additions & 6 deletions developer/src/kmc-ldml/src/compiler/empty-compiler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,13 @@ export class StrsCompiler extends EmptyCompiler {
const strs = <KMXPlus.Strs>section;

if (strs) {
let hadDenormalized = false;
const badStringAnalyzer = new util.BadStringAnalyzer();
const CONTAINS_MARKER_REGEX = new RegExp(LdmlKeyboardTypes.MarkerParser.ANY_MARKER_MATCH);
for (let s of strs.allProcessedStrings.values()) {
// stop at the first denormalized string
hadDenormalized = hadDenormalized || util.isDenormalized(s);
if (!util.isNormalized(s)) {
this.callbacks.reportMessage(LdmlCompilerMessages.Warn_StringDenorm({s}));
}
// replace all \\uXXXX with the actual code point.
// this lets us analyze whether there are PUA, unassigned, etc.
// the results might not be valid regex of course.
Expand Down Expand Up @@ -75,10 +76,6 @@ export class StrsCompiler extends EmptyCompiler {
return false;
}
}
if (hadDenormalized) {
// warn once
this.callbacks.reportMessage(LdmlCompilerMessages.Warn_StringDenorm());
}
}
return true;
}
Expand Down
4 changes: 2 additions & 2 deletions developer/src/kmc-ldml/src/compiler/ldml-compiler-messages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,8 @@ export class LdmlCompilerMessages {
);

static WARN_StringDenorm = SevWarn | 0x002B;
static Warn_StringDenorm = () =>
m(this.WARN_StringDenorm, `File contains text that is neither NFC nor NFD.`);
static Warn_StringDenorm = (o: { s: string }) =>
m(this.WARN_StringDenorm, `File contains string "${def(o.s)}" that is neither NFC nor NFD.`);

// Available: 0x02C-0x2F

Expand Down
5 changes: 3 additions & 2 deletions developer/src/kmc-ldml/test/strs.tests.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,16 +76,17 @@ describe('strs', function () {
const path = `sections/strs/warn-denorm-${num}.xml`;
it(path, async function () {
const inputFilename = makePathToFixture(path);
const s = 's\u0307\u0323'; // ṩ
// Compile the keyboard
const kmx = await compileKeyboard(inputFilename, { ...compilerTestOptions, saveDebug: true, shouldAddCompilerVersion: false },
[
// validation messages
LdmlCompilerMessages.Warn_StringDenorm(),
LdmlCompilerMessages.Warn_StringDenorm({s}),
],
false, // validation should pass
[
// same messages
LdmlCompilerMessages.Warn_StringDenorm(),
LdmlCompilerMessages.Warn_StringDenorm({s}),
]);
assert.isNotNull(kmx); // not failing
});
Expand Down

0 comments on commit 6f2a6b2

Please sign in to comment.