Skip to content

Commit

Permalink
feat(common,developer): tests and warning on denormalized content
Browse files Browse the repository at this point in the history
- common: shift the 'strs' processing slightly, because otherwise we normalize to NFD before even tracking the strings
- if any string is neither NFC nor NFD, give a warning
- add tests for the same

Fixes: #7394
  • Loading branch information
srl295 committed Nov 29, 2024
1 parent 8e19508 commit add502e
Show file tree
Hide file tree
Showing 8 changed files with 132 additions and 8 deletions.
15 changes: 8 additions & 7 deletions common/web/types/src/kmx/kmx-plus/kmx-plus.ts
Original file line number Diff line number Diff line change
Expand Up @@ -174,12 +174,7 @@ export class Strs extends Section {
*/
allocString(s?: string, opts?: StrsOptions, sections?: DependencySections): StrsItem {
// Run the string processing pipeline
s = Strs.processString(s, opts, sections);

// add to the set, for testing
if (s) {
this.allProcessedStrings.add(s);
}
s = this.processString(s, opts, sections);

// if it's a single char, don't push it into the strs table
if (opts?.singleOk && isOneChar(s)) {
Expand All @@ -197,7 +192,7 @@ export class Strs extends Section {
}

/** process everything according to opts */
static processString(s: string, opts: StrsOptions, sections: DependencySections) {
processString(s: string, opts: StrsOptions, sections: DependencySections) {
s = s ?? '';
// type check everything else
if (typeof s !== 'string') {
Expand All @@ -215,6 +210,12 @@ export class Strs extends Section {
if (opts?.unescape) {
s = unescapeString(s);
}

if (s) {
// add all processed strings here, so that we catch denormalized strings in the input
this.allProcessedStrings.add(s);
}

// nfd
if (opts?.nfd) {
if (!sections?.meta?.normalizationDisabled) {
Expand Down
7 changes: 7 additions & 0 deletions developer/src/kmc-ldml/src/compiler/empty-compiler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,12 @@ export class StrsCompiler extends EmptyCompiler {
const strs = <KMXPlus.Strs>section;

if (strs) {
let hadDenormalized = false;
const badStringAnalyzer = new util.BadStringAnalyzer();
const CONTAINS_MARKER_REGEX = new RegExp(LdmlKeyboardTypes.MarkerParser.ANY_MARKER_MATCH);
for (let s of strs.allProcessedStrings.values()) {
// stop at the first denormalized string
hadDenormalized = hadDenormalized || util.isDenormalized(s);
// replace all \\uXXXX with the actual code point.
// this lets us analyze whether there are PUA, unassigned, etc.
// the results might not be valid regex of course.
Expand Down Expand Up @@ -72,6 +75,10 @@ export class StrsCompiler extends EmptyCompiler {
return false;
}
}
if (hadDenormalized) {
// warn once
this.callbacks.reportMessage(LdmlCompilerMessages.Warn_StringDenorm());
}
}
return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,11 @@ export class LdmlCompilerMessages {
`Invalid marker identifier "\m{${def(o.id)}}". Identifiers must be between 1 and 32 characters, and can use A-Z, a-z, 0-9, and _.`,
);

// Available: 0x02B-0x2F
static WARN_StringDenorm = SevWarn | 0x002B;
static Warn_StringDenorm = () =>
m(this.WARN_StringDenorm, `File contains text that is neither NFC nor NFD.`);

// Available: 0x02C-0x2F

static ERROR_InvalidQuadEscape = SevError | 0x0030;
static Error_InvalidQuadEscape = (o: { cp: number }) =>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<?xml version="1.0" encoding="UTF-8"?>

<keyboard3 xmlns="https://schemas.unicode.org/cldr/45/keyboard3" locale="mt" conformsTo="45">
<version number="1.0.0" />

<info author="srl295" indicator="🙀" layout="qwerty" name="TestKbd" />

<keys>
<key id="ess" output="s\u{0307}\u{0323}" /> <!-- not NFD-->
</keys>

<layers formId="us" minDeviceWidth="123">
<layer id="zz">
<row keys="ess" />
</layer>
</layers>

</keyboard3>
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<?xml version="1.0" encoding="UTF-8"?>

<keyboard3 xmlns="https://schemas.unicode.org/cldr/45/keyboard3" locale="mt" conformsTo="45">
<version number="1.0.0" />

<info author="srl295" indicator="🙀" layout="qwerty" name="TestKbd" />

<keys>
<key id="ess" output="${ess}" />
</keys>

<variables>
<string id="ess" value="s\u{0307}\u{0323}" /> <!-- Not NFD -->
</variables>

<layers formId="us" minDeviceWidth="123">
<layer id="zz">
<row keys="ess" />
</layer>
</layers>

</keyboard3>
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<?xml version="1.0" encoding="UTF-8"?>

<keyboard3 xmlns="https://schemas.unicode.org/cldr/45/keyboard3" locale="mt" conformsTo="45">
<version number="1.0.0" />

<info author="srl295" indicator="🙀" layout="qwerty" name="TestKbd" />

<displays>
<display keyId="ess" display="s\u{0307}\u{0323}" /> <!-- not NFD-->
</displays>

<keys>
<key id="ess" output="${ess}" />
</keys>

<variables>
<string id="ess" value="S" />
</variables>

<layers formId="us" minDeviceWidth="123">
<layer id="zz">
<row keys="ess" />
</layer>
</layers>

</keyboard3>
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<?xml version="1.0" encoding="UTF-8"?>

<keyboard3 xmlns="https://schemas.unicode.org/cldr/45/keyboard3" locale="mt" conformsTo="45">
<version number="1.0.0" />

<info author="srl295" indicator="🙀" layout="qwerty" name="TestKbd" />


<keys>
<key id="ess" output="S" />
</keys>


<layers formId="us" minDeviceWidth="123">
<layer id="zz">
<row keys="ess" />
</layer>
</layers>

<transforms type="simple">
<transformGroup>
<transform from="S" to="s\u{0307}\u{0323}" /> <!-- Not NFD-->
</transformGroup>
</transforms>

</keyboard3>
20 changes: 20 additions & 0 deletions developer/src/kmc-ldml/test/strs.tests.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,4 +71,24 @@ describe('strs', function () {
]);
assert.isNull(kmx); // should fail post-validate
});
describe('should warn on denorm strings', async function () {
for (const num of [1, 2, 3, 4]) {
const path = `sections/strs/warn-denorm-${num}.xml`;
it(path, async function () {
const inputFilename = makePathToFixture(path);
// Compile the keyboard
const kmx = await compileKeyboard(inputFilename, { ...compilerTestOptions, saveDebug: true, shouldAddCompilerVersion: false },
[
// validation messages
LdmlCompilerMessages.Warn_StringDenorm(),
],
false, // validation should pass
[
// same messages
LdmlCompilerMessages.Warn_StringDenorm(),
]);
assert.isNotNull(kmx); // not failing
});
}
});
});

0 comments on commit add502e

Please sign in to comment.