Skip to content

Commit

Permalink
Merge pull request #12482 from keymanapp/feat/common/12208-unified-xm…
Browse files Browse the repository at this point in the history
…l-parse

feat(common): unified XML parser/writer
  • Loading branch information
srl295 authored Oct 3, 2024
2 parents 5881855 + 158c4a7 commit e483318
Show file tree
Hide file tree
Showing 27 changed files with 5,131 additions and 108 deletions.
2 changes: 1 addition & 1 deletion developer/src/common/web/utils/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,4 @@ export { defaultCompilerOptions, CompilerBaseOptions, CompilerCallbacks, Compile

export { CommonTypesMessages } from './common-messages.js';

export * as xml2js from './deps/xml2js/xml2js.js';
export { KeymanXMLType, KeymanXMLWriter, KeymanXMLReader } from './xml-utils.js';
19 changes: 4 additions & 15 deletions developer/src/common/web/utils/src/types/kpj/kpj-file-reader.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { xml2js } from '../../index.js';
import { KeymanXMLReader } from '../../index.js';
import { KPJFile, KPJFileProject } from './kpj-file.js';
import { util } from '@keymanapp/common-types';
import { KeymanDeveloperProject, KeymanDeveloperProjectFile10, KeymanDeveloperProjectType } from './keyman-developer-project.js';
Expand All @@ -13,20 +13,9 @@ export class KPJFileReader {
public read(file: Uint8Array): KPJFile {
let data: KPJFile;

const parser = new xml2js.Parser({
explicitArray: false,
mergeAttrs: false,
includeWhiteChars: false,
normalize: false,
emptyTag: ''
});
data = new KeymanXMLReader('kpj')
.parse(file.toString());

parser.parseString(file, (e: unknown, r: unknown) => {
if(e) {
throw e;
}
data = r as KPJFile;
});
data = this.boxArrays(data);
if(data.KeymanDeveloperProject?.Files?.File?.length) {
for(const file of data.KeymanDeveloperProject?.Files?.File) {
Expand Down Expand Up @@ -126,4 +115,4 @@ export class KPJFileReader {
util.boxXmlArray(source.KeymanDeveloperProject.Files, 'File');
return source;
}
}
}
38 changes: 11 additions & 27 deletions developer/src/common/web/utils/src/types/kvks/kvks-file-reader.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { SchemaValidators as SV, KvkFile, util, Constants } from '@keymanapp/common-types';
import { xml2js } from '../../index.js'
import { KeymanXMLReader } from '../../index.js'
import KVKSourceFile from './kvks-file.js';
const SchemaValidators = SV.default;
import boxXmlArray = util.boxXmlArray;
Expand All @@ -20,31 +20,15 @@ export default class KVKSFileReader {
public read(file: Uint8Array): KVKSourceFile {
let source: KVKSourceFile;

const parser = new xml2js.Parser({
explicitArray: false,
mergeAttrs: false,
includeWhiteChars: true,
normalize: false,
emptyTag: {} as any
// Why "as any"? xml2js is broken:
// https://github.com/Leonidas-from-XIV/node-xml2js/issues/648 means
// that an old version of `emptyTag` is used which doesn't support
// functions, but DefinitelyTyped is requiring use of function or a
// string. See also notes at
// https://github.com/DefinitelyTyped/DefinitelyTyped/pull/59259#issuecomment-1254405470
// An alternative fix would be to pull xml2js directly from github
// rather than using the version tagged on npmjs.com.
});

parser.parseString(file, (e: unknown, r: unknown) => {
if(e) {
if(file.byteLength > 4 && file.subarray(0,3).every((v,i) => v == KVK_HEADER_IDENTIFIER_BYTES[i])) {
throw new Error('File appears to be a binary .kvk file', {cause: e});
}
throw e;
};
source = r as KVKSourceFile;
});
try {
source = new KeymanXMLReader('kvks')
.parse(file.toString()) as KVKSourceFile;
} catch(e) {
if(file.byteLength > 4 && file.subarray(0,3).every((v,i) => v == KVK_HEADER_IDENTIFIER_BYTES[i])) {
throw new Error('File appears to be a binary .kvk file', {cause: e});
}
throw e;
}
if(source) {
source = this.boxArrays(source);
this.cleanupFlags(source);
Expand Down Expand Up @@ -197,4 +181,4 @@ export default class KVKSFileReader {
}
return 0;
}
}
}
20 changes: 3 additions & 17 deletions developer/src/common/web/utils/src/types/kvks/kvks-file-writer.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { VisualKeyboard as VK, Constants } from '@keymanapp/common-types';
import KVKSourceFile, { KVKSEncoding, KVKSFlags, KVKSKey, KVKSLayer } from './kvks-file.js';
import { xml2js } from '../../index.js';
import { KeymanXMLWriter } from '../../index.js';

import USVirtualKeyCodes = Constants.USVirtualKeyCodes;
import VisualKeyboard = VK.VisualKeyboard;
Expand All @@ -11,18 +11,6 @@ import VisualKeyboardShiftState = VK.VisualKeyboardShiftState;

export default class KVKSFileWriter {
public write(vk: VisualKeyboard): string {

const builder = new xml2js.Builder({
allowSurrogateChars: true,
attrkey: '$',
charkey: '_',
xmldec: {
version: '1.0',
encoding: 'UTF-8',
standalone: true
}
})

const flags: KVKSFlags = {};
if(vk.header.flags & VisualKeyboardHeaderFlags.kvkhDisplayUnderlying) {
flags.displayunderlying = '';
Expand All @@ -37,8 +25,6 @@ export default class KVKSFileWriter {
flags.useunderlying = '';
}



const kvks: KVKSourceFile = {
visualkeyboard: {
header: {
Expand Down Expand Up @@ -105,7 +91,7 @@ export default class KVKSFileWriter {
l.key.push(k);
}

const result = builder.buildObject(kvks);
const result = new KeymanXMLWriter('kvks').write(kvks);
return result; //Uint8Array.from(result);
}

Expand All @@ -124,4 +110,4 @@ export default class KVKSFileWriter {
}
return '';
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,12 @@
* Reads a LDML XML keyboard file into JS object tree and resolves imports
*/
import { SchemaValidators, util } from '@keymanapp/common-types';
import { xml2js } from '../../index.js';
import { CommonTypesMessages } from '../../common-messages.js';
import { CompilerCallbacks } from '../../compiler-interfaces.js';
import { LDMLKeyboardXMLSourceFile, LKImport, ImportStatus } from './ldml-keyboard-xml.js';
import { constants } from '@keymanapp/ldml-keyboard-constants';
import { LDMLKeyboardTestDataXMLSourceFile, LKTTest, LKTTests } from './ldml-keyboard-testdata-xml.js';

import { KeymanXMLReader } from '@keymanapp/developer-utils';
import boxXmlArray = util.boxXmlArray;

interface NameAndProps {
Expand Down Expand Up @@ -262,26 +261,9 @@ export class LDMLKeyboardXMLSourceFileReader {
}

loadUnboxed(file: Uint8Array): LDMLKeyboardXMLSourceFile {
const source = (() => {
let a: LDMLKeyboardXMLSourceFile;
const parser = new xml2js.Parser({
explicitArray: false,
mergeAttrs: true,
includeWhiteChars: false,
emptyTag: {} as any
// Why "as any"? xml2js is broken:
// https://github.com/Leonidas-from-XIV/node-xml2js/issues/648 means
// that an old version of `emptyTag` is used which doesn't support
// functions, but DefinitelyTyped is requiring use of function or a
// string. See also notes at
// https://github.com/DefinitelyTyped/DefinitelyTyped/pull/59259#issuecomment-1254405470
// An alternative fix would be to pull xml2js directly from github
// rather than using the version tagged on npmjs.com.
});
const data = new TextDecoder().decode(file);
parser.parseString(data, (e: unknown, r: unknown) => { if(e) throw e; a = r as LDMLKeyboardXMLSourceFile }); // TODO-LDML: isn't 'e' the error?
return a;
})();
const data = new TextDecoder().decode(file);
const source = new KeymanXMLReader('keyboard3')
.parse(data) as LDMLKeyboardXMLSourceFile;
return source;
}

Expand Down Expand Up @@ -311,27 +293,8 @@ export class LDMLKeyboardXMLSourceFileReader {
}

loadTestDataUnboxed(file: Uint8Array): any {
const source = (() => {
let a: any;
const parser = new xml2js.Parser({
// explicitArray: false,
preserveChildrenOrder:true, // needed for test data
explicitChildren: true, // needed for test data
// mergeAttrs: true,
// includeWhiteChars: false,
// emptyTag: {} as any
// Why "as any"? xml2js is broken:
// https://github.com/Leonidas-from-XIV/node-xml2js/issues/648 means
// that an old version of `emptyTag` is used which doesn't support
// functions, but DefinitelyTyped is requiring use of function or a
// string. See also notes at
// https://github.com/DefinitelyTyped/DefinitelyTyped/pull/59259#issuecomment-1254405470
// An alternative fix would be to pull xml2js directly from github
// rather than using the version tagged on npmjs.com.
});
parser.parseString(file, (e: unknown, r: unknown) => { a = r as any }); // TODO-LDML: isn't 'e' the error?
return a; // Why 'any'? Because we need to box up the $'s into proper properties.
})();
const source = new KeymanXMLReader('keyboardTest3')
.parse(file.toString()) as any;
return source;
}

Expand Down
128 changes: 128 additions & 0 deletions developer/src/common/web/utils/src/xml-utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
/*
* Keyman is copyright (C) SIL Global. MIT License.
*
* Created by srl on 2024-09-27
*
* Abstraction for XML reading and writing
*/

import * as xml2js from "./deps/xml2js/xml2js.js";

export type KeymanXMLType =
'keyboard3' // LDML <keyboard3>
| 'keyboardTest3' // LDML <keyboardTest3>
| 'kps' // <Package>
| 'kvks' // <visualkeyboard>
| 'kpj' // <KeymanDeveloperProject>
;

/** Bag of options, maximally one for each KeymanXMLType */
type KemanXMLOptionsBag = {
[key in KeymanXMLType]?: any
};

/** map of options for the XML parser */
const PARSER_OPTIONS: KemanXMLOptionsBag = {
'keyboard3': {
explicitArray: false,
mergeAttrs: true,
includeWhiteChars: false,
emptyTag: {} as any
// Why "as any"? xml2js is broken:
// https://github.com/Leonidas-from-XIV/node-xml2js/issues/648 means
// that an old version of `emptyTag` is used which doesn't support
// functions, but DefinitelyTyped is requiring use of function or a
// string. See also notes at
// https://github.com/DefinitelyTyped/DefinitelyTyped/pull/59259#issuecomment-1254405470
// An alternative fix would be to pull xml2js directly from github
// rather than using the version tagged on npmjs.com.
},
'keyboardTest3': {
preserveChildrenOrder: true, // needed for test data
explicitChildren: true, // needed for test data
},
'kps': {
explicitArray: false
},
'kpj': {
explicitArray: false,
mergeAttrs: false,
includeWhiteChars: false,
normalize: false,
emptyTag: ''
},
'kvks': {
explicitArray: false,
mergeAttrs: false,
includeWhiteChars: true,
normalize: false,
emptyTag: {} as any
// Why "as any"? xml2js is broken:
// https://github.com/Leonidas-from-XIV/node-xml2js/issues/648 means
// that an old version of `emptyTag` is used which doesn't support
// functions, but DefinitelyTyped is requiring use of function or a
// string. See also notes at
// https://github.com/DefinitelyTyped/DefinitelyTyped/pull/59259#issuecomment-1254405470
// An alternative fix would be to pull xml2js directly from github
// rather than using the version tagged on npmjs.com.
},
};

const GENERATOR_OPTIONS: KemanXMLOptionsBag = {
kvks: {
allowSurrogateChars: true,
attrkey: '$',
charkey: '_',
xmldec: {
version: '1.0',
encoding: 'UTF-8',
standalone: true
},
},
};

/** wrapper for XML parsing support */
export class KeymanXMLReader {
public constructor(public type: KeymanXMLType) {
}

public parse(data: string): any {
const parser = this.parser();
let a: any;
parser.parseString(data, (e: unknown, r: unknown) => { if (e) throw e; a = r; });
return a;
}

public parser() {
let options = PARSER_OPTIONS[this.type];
if (!options) {
/* c8 ignore next 1 */
throw Error(`Internal error: unhandled XML type ${this.type}`);
}
options = Object.assign({}, options); // TODO: xml2js likes to mutate the options here. Shallow clone the object.
if (options.emptyTag) {
options.emptyTag = {}; // TODO: xml2js likes to mutate the options here. Reset it.
}
return new xml2js.Parser(options);
}
}

/** wrapper for XML generation support */
export class KeymanXMLWriter {
write(data: any): string {
const builder = this.builder();
return builder.buildObject(data);
}
constructor(public type: KeymanXMLType) {
}

public builder() {
const options = GENERATOR_OPTIONS[this.type];
if (!options) {
/* c8 ignore next 1 */
throw Error(`Internal error: unhandled XML type ${this.type}`);
}
return new xml2js.Builder(Object.assign({}, options)); // Shallow clone in case the options are mutated.
}
}

16 changes: 16 additions & 0 deletions developer/src/common/web/utils/test/fixtures/xml/disp_maximal.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>

<keyboard3 xmlns="https://schemas.unicode.org/cldr/45/keyboard3" locale="mt" conformsTo="45">
<info name="disp-maximal"/>

<displays>
<display keyId="g" display="(g)"/>
<display output="f" display="(f)"/> <!-- Note: in opposite lexical order, as the compiler will sort -->
<display output="${eee}" display="(${eee})"/>
<displayOptions baseCharacter="x" />
</displays>

<variables>
<string id="eee" value="e" />
</variables>
</keyboard3>
Loading

0 comments on commit e483318

Please sign in to comment.