From 4ed88d8fa6295104bd00a01bf7c9c3be1740c85c Mon Sep 17 00:00:00 2001 From: Marc Durdin Date: Fri, 24 May 2024 15:41:10 +0700 Subject: [PATCH 01/88] refactor(developer): move developer-utils files into src/ and sub-folder In preparation for moving source file types into developer-utils, move the existing developer-utils files into src/ and src/utils subfolders. --- developer/src/common/web/utils/index.ts | 5 ----- developer/src/common/web/utils/package.json | 2 +- developer/src/common/web/utils/src/index.ts | 6 ++++++ .../src/common/web/utils/src/{ => utils}/KeymanSentry.ts | 0 .../src/common/web/utils/src/{ => utils}/keyman-urls.ts | 0 developer/src/common/web/utils/src/{ => utils}/markdown.ts | 0 developer/src/common/web/utils/src/{ => utils}/options.ts | 0 .../web/utils/src/{ => utils}/validate-mit-license.ts | 0 developer/src/common/web/utils/test/test-license.ts | 2 +- developer/src/common/web/utils/tsconfig.json | 1 - 10 files changed, 8 insertions(+), 8 deletions(-) delete mode 100644 developer/src/common/web/utils/index.ts create mode 100644 developer/src/common/web/utils/src/index.ts rename developer/src/common/web/utils/src/{ => utils}/KeymanSentry.ts (100%) rename developer/src/common/web/utils/src/{ => utils}/keyman-urls.ts (100%) rename developer/src/common/web/utils/src/{ => utils}/markdown.ts (100%) rename developer/src/common/web/utils/src/{ => utils}/options.ts (100%) rename developer/src/common/web/utils/src/{ => utils}/validate-mit-license.ts (100%) diff --git a/developer/src/common/web/utils/index.ts b/developer/src/common/web/utils/index.ts deleted file mode 100644 index 84651750639..00000000000 --- a/developer/src/common/web/utils/index.ts +++ /dev/null @@ -1,5 +0,0 @@ -export { validateMITLicense } from './src/validate-mit-license.js'; -export { KeymanSentry, SentryNodeOptions } from './src/KeymanSentry.js'; -export { getOption, loadOptions, clearOptions } from './src/options.js'; -export { escapeMarkdownChar } from './src/markdown.js'; -export { KeymanUrls } from './src/keyman-urls.js'; \ No newline at end of file diff --git a/developer/src/common/web/utils/package.json b/developer/src/common/web/utils/package.json index b93cca8efa3..ad3b796a215 100644 --- a/developer/src/common/web/utils/package.json +++ b/developer/src/common/web/utils/package.json @@ -3,7 +3,7 @@ "description": "Keyman Developer utilities", "type": "module", "exports": { - ".": "./build/index.js" + ".": "./build/src/index.js" }, "files": [ "/build/" diff --git a/developer/src/common/web/utils/src/index.ts b/developer/src/common/web/utils/src/index.ts new file mode 100644 index 00000000000..5da5e4c48c7 --- /dev/null +++ b/developer/src/common/web/utils/src/index.ts @@ -0,0 +1,6 @@ +export { validateMITLicense } from './utils/validate-mit-license.js'; +export { KeymanSentry, SentryNodeOptions } from './utils/KeymanSentry.js'; +export { getOption, loadOptions, clearOptions } from './utils/options.js'; +export { escapeMarkdownChar } from './utils/markdown.js'; +export { KeymanUrls } from './utils/keyman-urls.js'; + diff --git a/developer/src/common/web/utils/src/KeymanSentry.ts b/developer/src/common/web/utils/src/utils/KeymanSentry.ts similarity index 100% rename from developer/src/common/web/utils/src/KeymanSentry.ts rename to developer/src/common/web/utils/src/utils/KeymanSentry.ts diff --git a/developer/src/common/web/utils/src/keyman-urls.ts b/developer/src/common/web/utils/src/utils/keyman-urls.ts similarity index 100% rename from developer/src/common/web/utils/src/keyman-urls.ts rename to developer/src/common/web/utils/src/utils/keyman-urls.ts diff --git a/developer/src/common/web/utils/src/markdown.ts b/developer/src/common/web/utils/src/utils/markdown.ts similarity index 100% rename from developer/src/common/web/utils/src/markdown.ts rename to developer/src/common/web/utils/src/utils/markdown.ts diff --git a/developer/src/common/web/utils/src/options.ts b/developer/src/common/web/utils/src/utils/options.ts similarity index 100% rename from developer/src/common/web/utils/src/options.ts rename to developer/src/common/web/utils/src/utils/options.ts diff --git a/developer/src/common/web/utils/src/validate-mit-license.ts b/developer/src/common/web/utils/src/utils/validate-mit-license.ts similarity index 100% rename from developer/src/common/web/utils/src/validate-mit-license.ts rename to developer/src/common/web/utils/src/utils/validate-mit-license.ts diff --git a/developer/src/common/web/utils/test/test-license.ts b/developer/src/common/web/utils/test/test-license.ts index 653f89ba69c..501f4f4711e 100644 --- a/developer/src/common/web/utils/test/test-license.ts +++ b/developer/src/common/web/utils/test/test-license.ts @@ -2,7 +2,7 @@ import * as fs from 'fs'; import { assert } from 'chai'; import 'mocha'; import { makePathToFixture } from './helpers/index.js'; -import { validateMITLicense } from '../src/validate-mit-license.js'; +import { validateMITLicense } from '../src/utils/validate-mit-license.js'; function verifyLicenseFile(filename: string) { return validateMITLicense(fs.readFileSync(makePathToFixture('license', filename), 'utf-8')); diff --git a/developer/src/common/web/utils/tsconfig.json b/developer/src/common/web/utils/tsconfig.json index c5980c82f9c..fa41799c2e6 100644 --- a/developer/src/common/web/utils/tsconfig.json +++ b/developer/src/common/web/utils/tsconfig.json @@ -7,7 +7,6 @@ "baseUrl": ".", }, "include": [ - "index.ts", "src/**/*.ts", ], } \ No newline at end of file From 9cf09a5140cc828e9bc79764a47f584e6d59787b Mon Sep 17 00:00:00 2001 From: Marc Durdin Date: Fri, 24 May 2024 15:42:42 +0700 Subject: [PATCH 02/88] refactor(common): move kpj-related files into developer-utils Relates to #9665. --- common/web/types/src/main.ts | 4 ---- developer/src/common/web/utils/src/index.ts | 3 +++ .../src/types}/kpj/keyman-developer-project.ts | 3 +-- .../web/utils/src/types}/kpj/kpj-file-reader.ts | 13 ++++++------- .../src/common/web/utils/src/types}/kpj/kpj-file.ts | 0 .../web/utils}/test/kpj/test-kpj-file-reader.ts | 6 +++--- .../kmc/src/commands/buildClasses/BuildProject.ts | 3 ++- developer/src/kmc/src/util/projectLoader.ts | 3 ++- 8 files changed, 17 insertions(+), 18 deletions(-) rename {common/web/types/src => developer/src/common/web/utils/src/types}/kpj/keyman-developer-project.ts (98%) rename {common/web/types/src => developer/src/common/web/utils/src/types}/kpj/kpj-file-reader.ts (92%) rename {common/web/types/src => developer/src/common/web/utils/src/types}/kpj/kpj-file.ts (100%) rename {common/web/types => developer/src/common/web/utils}/test/kpj/test-kpj-file-reader.ts (97%) diff --git a/common/web/types/src/main.ts b/common/web/types/src/main.ts index 3d18812876c..f2f91cba181 100644 --- a/common/web/types/src/main.ts +++ b/common/web/types/src/main.ts @@ -43,10 +43,6 @@ export * as TouchLayout from './keyman-touch-layout/keyman-touch-layout-file.js' export { TouchLayoutFileReader } from './keyman-touch-layout/keyman-touch-layout-file-reader.js'; export { TouchLayoutFileWriter, TouchLayoutFileWriterOptions } from './keyman-touch-layout/keyman-touch-layout-file-writer.js'; -export * as KPJ from './kpj/kpj-file.js'; -export { KPJFileReader } from './kpj/kpj-file-reader.js'; -export { KeymanDeveloperProject, KeymanDeveloperProjectFile, KeymanDeveloperProjectType, } from './kpj/keyman-developer-project.js'; - export * as KpsFile from './package/kps-file.js'; export * as KmpJsonFile from './package/kmp-json-file.js'; diff --git a/developer/src/common/web/utils/src/index.ts b/developer/src/common/web/utils/src/index.ts index 5da5e4c48c7..bfbb6d97c52 100644 --- a/developer/src/common/web/utils/src/index.ts +++ b/developer/src/common/web/utils/src/index.ts @@ -4,3 +4,6 @@ export { getOption, loadOptions, clearOptions } from './utils/options.js'; export { escapeMarkdownChar } from './utils/markdown.js'; export { KeymanUrls } from './utils/keyman-urls.js'; +export * as KPJ from './types/kpj/kpj-file.js'; +export { KPJFileReader } from './types/kpj/kpj-file-reader.js'; +export { KeymanDeveloperProject, KeymanDeveloperProjectFile, KeymanDeveloperProjectType, } from './types/kpj/keyman-developer-project.js'; diff --git a/common/web/types/src/kpj/keyman-developer-project.ts b/developer/src/common/web/utils/src/types/kpj/keyman-developer-project.ts similarity index 98% rename from common/web/types/src/kpj/keyman-developer-project.ts rename to developer/src/common/web/utils/src/types/kpj/keyman-developer-project.ts index 76f3ab709a2..7ddcb43c183 100644 --- a/common/web/types/src/kpj/keyman-developer-project.ts +++ b/developer/src/common/web/utils/src/types/kpj/keyman-developer-project.ts @@ -2,8 +2,7 @@ // Version 1.0 and 2.0 of Keyman Developer Project .kpj file // -import { KeymanFileTypes } from '../main.js'; -import { CompilerCallbacks } from '../util/compiler-interfaces.js'; +import { CompilerCallbacks, KeymanFileTypes } from '@keymanapp/common-types'; export class KeymanDeveloperProject { options: KeymanDeveloperProjectOptions; diff --git a/common/web/types/src/kpj/kpj-file-reader.ts b/developer/src/common/web/utils/src/types/kpj/kpj-file-reader.ts similarity index 92% rename from common/web/types/src/kpj/kpj-file-reader.ts rename to developer/src/common/web/utils/src/types/kpj/kpj-file-reader.ts index eb2a168a48c..a0ef422b01a 100644 --- a/common/web/types/src/kpj/kpj-file-reader.ts +++ b/developer/src/common/web/utils/src/types/kpj/kpj-file-reader.ts @@ -1,9 +1,8 @@ import * as xml2js from '../deps/xml2js/xml2js.js'; import { KPJFile, KPJFileProject } from './kpj-file.js'; -import { boxXmlArray } from '../util/util.js'; +import { util } from '@keymanapp/common-types'; import { KeymanDeveloperProject, KeymanDeveloperProjectFile10, KeymanDeveloperProjectType } from './keyman-developer-project.js'; -import { CompilerCallbacks } from '../util/compiler-interfaces.js'; -import SchemaValidators from '../schema-validators.js'; +import { CompilerCallbacks, SchemaValidators } from '@keymanapp/common-types'; export class KPJFileReader { constructor(private callbacks: CompilerCallbacks) { @@ -40,11 +39,11 @@ export class KPJFileReader { } public validate(source: KPJFile): void { - if(!SchemaValidators.kpj(source)) { - if(!SchemaValidators.kpj90(source)) { + if(!SchemaValidators.default.kpj(source)) { + if(!SchemaValidators.default.kpj90(source)) { // If the legacy schema also does not validate, then we will only report // the errors against the modern schema - throw new Error(JSON.stringify((SchemaValidators.kpj).errors)); + throw new Error(JSON.stringify((SchemaValidators.default.kpj).errors)); } } } @@ -123,7 +122,7 @@ export class KPJFileReader { if(!source.KeymanDeveloperProject.Files || typeof source.KeymanDeveloperProject.Files == 'string') { source.KeymanDeveloperProject.Files = {File:[]}; } - boxXmlArray(source.KeymanDeveloperProject.Files, 'File'); + util.boxXmlArray(source.KeymanDeveloperProject.Files, 'File'); return source; } } \ No newline at end of file diff --git a/common/web/types/src/kpj/kpj-file.ts b/developer/src/common/web/utils/src/types/kpj/kpj-file.ts similarity index 100% rename from common/web/types/src/kpj/kpj-file.ts rename to developer/src/common/web/utils/src/types/kpj/kpj-file.ts diff --git a/common/web/types/test/kpj/test-kpj-file-reader.ts b/developer/src/common/web/utils/test/kpj/test-kpj-file-reader.ts similarity index 97% rename from common/web/types/test/kpj/test-kpj-file-reader.ts rename to developer/src/common/web/utils/test/kpj/test-kpj-file-reader.ts index cbebdd79207..4b154af0606 100644 --- a/common/web/types/test/kpj/test-kpj-file-reader.ts +++ b/developer/src/common/web/utils/test/kpj/test-kpj-file-reader.ts @@ -2,9 +2,9 @@ import * as fs from 'fs'; import 'mocha'; import {assert} from 'chai'; import { makePathToFixture } from '../helpers/index.js'; -import { KPJFileReader } from "../../src/kpj/kpj-file-reader.js"; -import { KeymanDeveloperProjectFile10, KeymanDeveloperProjectType } from '../../src/kpj/keyman-developer-project.js'; -import { TestCompilerCallbacks } from '../helpers/TestCompilerCallbacks.js'; +import { KPJFileReader } from "../../src/types/kpj/kpj-file-reader.js"; +import { KeymanDeveloperProjectFile10, KeymanDeveloperProjectType } from '../../src/types/kpj/keyman-developer-project.js'; +import { TestCompilerCallbacks } from '@keymanapp/developer-test-helpers'; const callbacks = new TestCompilerCallbacks(); diff --git a/developer/src/kmc/src/commands/buildClasses/BuildProject.ts b/developer/src/kmc/src/commands/buildClasses/BuildProject.ts index 4a5f9148bc6..ccdb85b6a2e 100644 --- a/developer/src/kmc/src/commands/buildClasses/BuildProject.ts +++ b/developer/src/kmc/src/commands/buildClasses/BuildProject.ts @@ -1,6 +1,7 @@ import * as path from 'path'; import * as fs from 'fs'; -import { CompilerCallbacks, CompilerFileCallbacks, KeymanDeveloperProject, KeymanDeveloperProjectFile, KeymanDeveloperProjectType, KeymanFileTypes } from '@keymanapp/common-types'; +import { CompilerCallbacks, CompilerFileCallbacks, KeymanFileTypes } from '@keymanapp/common-types'; +import { KeymanDeveloperProject, KeymanDeveloperProjectFile, KeymanDeveloperProjectType } from '@keymanapp/developer-utils'; import { BuildActivity } from './BuildActivity.js'; import { buildActivities, buildKeyboardInfoActivity, buildModelInfoActivity } from './buildActivities.js'; import { InfrastructureMessages } from '../../messages/infrastructureMessages.js'; diff --git a/developer/src/kmc/src/util/projectLoader.ts b/developer/src/kmc/src/util/projectLoader.ts index b26399ee395..1900008f306 100644 --- a/developer/src/kmc/src/util/projectLoader.ts +++ b/developer/src/kmc/src/util/projectLoader.ts @@ -1,7 +1,8 @@ import * as path from 'path'; import * as fs from 'fs'; -import { CompilerCallbacks, KeymanDeveloperProject, KeymanFileTypes, KPJFileReader } from "@keymanapp/common-types"; +import { CompilerCallbacks, KeymanFileTypes } from "@keymanapp/common-types"; +import { KeymanDeveloperProject, KPJFileReader } from "@keymanapp/developer-utils"; import { InfrastructureMessages } from "../messages/infrastructureMessages.js"; export const isProject = (filename: string): boolean => From 74e2a8ccead3e9b27b5b684b777a03d63d57a856 Mon Sep 17 00:00:00 2001 From: Marc Durdin Date: Fri, 24 May 2024 15:46:47 +0700 Subject: [PATCH 03/88] refactor(common): move kpj test fixtures and cleanup post-move Moves the kpj test fixtures into developer-utils, moves from let to const in code where required by eslint, and reduces the coverage threshold in order to get tests to pass for developer-utils. --- developer/src/common/web/utils/build.sh | 7 +++++-- .../utils/src/types/kpj/keyman-developer-project.ts | 12 ++++++------ .../web/utils/src/types/kpj/kpj-file-reader.ts | 12 ++++++------ .../web/utils}/test/fixtures/kpj/khmer_angkor.kpj | 0 .../project-missing-file/project_missing_file.kpj | 0 .../project-missing-file/project_missing_files.kpj | 0 6 files changed, 17 insertions(+), 14 deletions(-) rename {common/web/types => developer/src/common/web/utils}/test/fixtures/kpj/khmer_angkor.kpj (100%) rename {common/web/types => developer/src/common/web/utils}/test/fixtures/kpj/project-missing-file/project_missing_file.kpj (100%) rename {common/web/types => developer/src/common/web/utils}/test/fixtures/kpj/project-missing-file/project_missing_files.kpj (100%) diff --git a/developer/src/common/web/utils/build.sh b/developer/src/common/web/utils/build.sh index e58d585c5d3..cd50df5a166 100755 --- a/developer/src/common/web/utils/build.sh +++ b/developer/src/common/web/utils/build.sh @@ -21,7 +21,7 @@ builder_describe "Build Keyman Developer web utility module" \ builder_describe_outputs \ configure /node_modules \ - build /developer/src/common/web/utils/build/index.js + build /developer/src/common/web/utils/build/src/index.js builder_parse "$@" @@ -34,7 +34,10 @@ builder_run_action build tsc --build if builder_start_action test; then eslint . tsc --build test - c8 --reporter=lcov --reporter=text --exclude-after-remap mocha + readonly C8_THRESHOLD=60 + c8 --reporter=lcov --reporter=text --exclude-after-remap --lines $C8_THRESHOLD --statements $C8_THRESHOLD --branches $C8_THRESHOLD --functions $C8_THRESHOLD mocha + builder_echo warning "Coverage thresholds are currently $C8_THRESHOLD%, which is lower than ideal." + builder_echo warning "Please increase threshold in build.sh as test coverage improves." builder_finish_action success test fi diff --git a/developer/src/common/web/utils/src/types/kpj/keyman-developer-project.ts b/developer/src/common/web/utils/src/types/kpj/keyman-developer-project.ts index 7ddcb43c183..a215bfbaae2 100644 --- a/developer/src/common/web/utils/src/types/kpj/keyman-developer-project.ts +++ b/developer/src/common/web/utils/src/types/kpj/keyman-developer-project.ts @@ -28,13 +28,13 @@ export class KeymanDeveloperProject { if(this.options.version != '2.0') { throw new Error('populateFiles can only be called on a v2.0 project'); } - let sourcePath = this.resolveProjectPath(this.options.sourcePath); + const sourcePath = this.resolveProjectPath(this.options.sourcePath); if(!this.callbacks.fs.existsSync(sourcePath)) { return false; } - let files = this.callbacks.fs.readdirSync(sourcePath); - for(let filename of files) { - let fullPath = this.callbacks.path.join(sourcePath, filename); + const files = this.callbacks.fs.readdirSync(sourcePath); + for(const filename of files) { + const fullPath = this.callbacks.path.join(sourcePath, filename); if(KeymanFileTypes.filenameIs(filename, KeymanFileTypes.Source.LdmlKeyboard)) { try { const data = this.callbacks.loadFile(fullPath); @@ -50,7 +50,7 @@ export class KeymanDeveloperProject { } } if(KeymanFileTypes.sourceTypeFromFilename(filename) !== null) { - let file = new KeymanDeveloperProjectFile20(fullPath, this.callbacks); + const file = new KeymanDeveloperProjectFile20(fullPath, this.callbacks); this.files.push(file); } } @@ -108,7 +108,7 @@ export class KeymanDeveloperProject { p = this.resolveProjectPath(p); - let f = file.filename.replace(new RegExp(`\\${sourceExt}$`, 'i'), targetExt); + const f = file.filename.replace(new RegExp(`\\${sourceExt}$`, 'i'), targetExt); return this.callbacks.path.normalize(this.callbacks.path.join(p, f)); } diff --git a/developer/src/common/web/utils/src/types/kpj/kpj-file-reader.ts b/developer/src/common/web/utils/src/types/kpj/kpj-file-reader.ts index a0ef422b01a..9c33aaedd7d 100644 --- a/developer/src/common/web/utils/src/types/kpj/kpj-file-reader.ts +++ b/developer/src/common/web/utils/src/types/kpj/kpj-file-reader.ts @@ -28,7 +28,7 @@ export class KPJFileReader { }); data = this.boxArrays(data); if(data.KeymanDeveloperProject?.Files?.File?.length) { - for(let file of data.KeymanDeveloperProject?.Files?.File) { + for(const file of data.KeymanDeveloperProject?.Files?.File) { // xml2js imports
as '' so we will just delete the empty string if(typeof file.Details == 'string') { delete file.Details; @@ -59,8 +59,8 @@ export class KPJFileReader { // NOTE: at this point, the xml should have been validated // and matched the schema result so we can assume the source // is a valid shape - let project = source.KeymanDeveloperProject; - let result: KeymanDeveloperProject = new KeymanDeveloperProject(projectFilename, project.Options?.Version || "1.0", this.callbacks); + const project = source.KeymanDeveloperProject; + const result: KeymanDeveloperProject = new KeymanDeveloperProject(projectFilename, project.Options?.Version || "1.0", this.callbacks); if(result.options.version == '2.0') { result.options.buildPath = (project.Options?.BuildPath || result.options.buildPath).replace(/\\/g, '/'); result.options.sourcePath = (project.Options?.SourcePath || result.options.sourcePath).replace(/\\/g, '/'); @@ -87,9 +87,9 @@ export class KPJFileReader { } private transformFilesVersion10(project: KPJFileProject, result: KeymanDeveloperProject) { - let ids: { [id: string]: KeymanDeveloperProjectFile10; } = {}; - for (let sourceFile of project.Files?.File) { - let file: KeymanDeveloperProjectFile10 = new KeymanDeveloperProjectFile10( + const ids: { [id: string]: KeymanDeveloperProjectFile10; } = {}; + for (const sourceFile of project.Files?.File) { + const file: KeymanDeveloperProjectFile10 = new KeymanDeveloperProjectFile10( sourceFile.ID || '', (sourceFile.Filepath || '').replace(/\\/g, '/'), sourceFile.FileVersion || '', diff --git a/common/web/types/test/fixtures/kpj/khmer_angkor.kpj b/developer/src/common/web/utils/test/fixtures/kpj/khmer_angkor.kpj similarity index 100% rename from common/web/types/test/fixtures/kpj/khmer_angkor.kpj rename to developer/src/common/web/utils/test/fixtures/kpj/khmer_angkor.kpj diff --git a/common/web/types/test/fixtures/kpj/project-missing-file/project_missing_file.kpj b/developer/src/common/web/utils/test/fixtures/kpj/project-missing-file/project_missing_file.kpj similarity index 100% rename from common/web/types/test/fixtures/kpj/project-missing-file/project_missing_file.kpj rename to developer/src/common/web/utils/test/fixtures/kpj/project-missing-file/project_missing_file.kpj diff --git a/common/web/types/test/fixtures/kpj/project-missing-file/project_missing_files.kpj b/developer/src/common/web/utils/test/fixtures/kpj/project-missing-file/project_missing_files.kpj similarity index 100% rename from common/web/types/test/fixtures/kpj/project-missing-file/project_missing_files.kpj rename to developer/src/common/web/utils/test/fixtures/kpj/project-missing-file/project_missing_files.kpj From b2c4979cf4eed79c402ecf19cd4b1fbe5491c3d9 Mon Sep 17 00:00:00 2001 From: Marc Durdin Date: Fri, 24 May 2024 15:53:23 +0700 Subject: [PATCH 04/88] chore(common): reduce c8 threshold for common-types As we move some better-tested areas out of common-types, it looks like our overall coverage goes down. --- common/web/types/build.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/common/web/types/build.sh b/common/web/types/build.sh index df11540b424..b57a516893e 100755 --- a/common/web/types/build.sh +++ b/common/web/types/build.sh @@ -100,7 +100,10 @@ function do_build() { function do_test() { eslint . tsc --build test - c8 --skip-full --reporter=lcov --reporter=text mocha "${builder_extra_params[@]}" + readonly C8_THRESHOLD=75 + c8 -skip-full --reporter=lcov --reporter=text --lines $C8_THRESHOLD --statements $C8_THRESHOLD --branches $C8_THRESHOLD --functions $C8_THRESHOLD mocha "${builder_extra_params[@]}" + builder_echo warning "Coverage thresholds are currently $C8_THRESHOLD%, which is lower than ideal." + builder_echo warning "Please increase threshold in build.sh as test coverage improves." } #------------------------------------------------------------------------------------------------------------------- From 185c020bf12364f100c685c778d197ba3b3a54b8 Mon Sep 17 00:00:00 2001 From: Marc Durdin Date: Wed, 12 Jun 2024 16:43:28 +0700 Subject: [PATCH 05/88] chore(developer): update xml2js reference for kpj-file-reader --- developer/src/common/web/utils/src/types/kpj/kpj-file-reader.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/developer/src/common/web/utils/src/types/kpj/kpj-file-reader.ts b/developer/src/common/web/utils/src/types/kpj/kpj-file-reader.ts index 9c33aaedd7d..f65440f74ba 100644 --- a/developer/src/common/web/utils/src/types/kpj/kpj-file-reader.ts +++ b/developer/src/common/web/utils/src/types/kpj/kpj-file-reader.ts @@ -1,4 +1,4 @@ -import * as xml2js from '../deps/xml2js/xml2js.js'; +import * as xml2js from '@keymanapp/common-types'; import { KPJFile, KPJFileProject } from './kpj-file.js'; import { util } from '@keymanapp/common-types'; import { KeymanDeveloperProject, KeymanDeveloperProjectFile10, KeymanDeveloperProjectType } from './keyman-developer-project.js'; From 071fe5ccb5bbff401b00e811638183613bfed0ed Mon Sep 17 00:00:00 2001 From: Marc Durdin Date: Thu, 13 Jun 2024 08:49:05 +1000 Subject: [PATCH 06/88] chore(developer): fixup reference to xml2js --- developer/src/common/web/utils/src/types/kpj/kpj-file-reader.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/developer/src/common/web/utils/src/types/kpj/kpj-file-reader.ts b/developer/src/common/web/utils/src/types/kpj/kpj-file-reader.ts index f65440f74ba..bff0b299a0f 100644 --- a/developer/src/common/web/utils/src/types/kpj/kpj-file-reader.ts +++ b/developer/src/common/web/utils/src/types/kpj/kpj-file-reader.ts @@ -1,4 +1,4 @@ -import * as xml2js from '@keymanapp/common-types'; +import { xml2js } from '@keymanapp/common-types'; import { KPJFile, KPJFileProject } from './kpj-file.js'; import { util } from '@keymanapp/common-types'; import { KeymanDeveloperProject, KeymanDeveloperProjectFile10, KeymanDeveloperProjectType } from './keyman-developer-project.js'; From de9ab217e910a3b7ef86601a7c3df69278a86123 Mon Sep 17 00:00:00 2001 From: Marc Durdin Date: Wed, 12 Jun 2024 16:44:57 +0700 Subject: [PATCH 07/88] refactor(common): move kps-file.ts to @keymanapp/developer-utils Part of #9665. --- common/web/types/src/main.ts | 1 - developer/src/common/web/utils/src/index.ts | 2 ++ .../src/common/web/utils/src/types/kps}/kps-file.ts | 0 developer/src/kmc-package/build.sh | 1 + developer/src/kmc-package/package.json | 1 + developer/src/kmc-package/src/compiler/kmp-compiler.ts | 3 ++- .../src/kmc-package/src/compiler/package-version-validator.ts | 3 ++- .../src/compiler/windows-package-installer-compiler.ts | 3 ++- developer/src/kmc-package/tsconfig.json | 1 + 9 files changed, 11 insertions(+), 4 deletions(-) rename {common/web/types/src/package => developer/src/common/web/utils/src/types/kps}/kps-file.ts (100%) diff --git a/common/web/types/src/main.ts b/common/web/types/src/main.ts index f2f91cba181..c6c1bdc1d31 100644 --- a/common/web/types/src/main.ts +++ b/common/web/types/src/main.ts @@ -43,7 +43,6 @@ export * as TouchLayout from './keyman-touch-layout/keyman-touch-layout-file.js' export { TouchLayoutFileReader } from './keyman-touch-layout/keyman-touch-layout-file-reader.js'; export { TouchLayoutFileWriter, TouchLayoutFileWriterOptions } from './keyman-touch-layout/keyman-touch-layout-file-writer.js'; -export * as KpsFile from './package/kps-file.js'; export * as KmpJsonFile from './package/kmp-json-file.js'; export * as util from './util/util.js'; diff --git a/developer/src/common/web/utils/src/index.ts b/developer/src/common/web/utils/src/index.ts index bfbb6d97c52..22248ea55b2 100644 --- a/developer/src/common/web/utils/src/index.ts +++ b/developer/src/common/web/utils/src/index.ts @@ -7,3 +7,5 @@ export { KeymanUrls } from './utils/keyman-urls.js'; export * as KPJ from './types/kpj/kpj-file.js'; export { KPJFileReader } from './types/kpj/kpj-file-reader.js'; export { KeymanDeveloperProject, KeymanDeveloperProjectFile, KeymanDeveloperProjectType, } from './types/kpj/keyman-developer-project.js'; + +export * as KpsFile from './types/kps/kps-file.js'; diff --git a/common/web/types/src/package/kps-file.ts b/developer/src/common/web/utils/src/types/kps/kps-file.ts similarity index 100% rename from common/web/types/src/package/kps-file.ts rename to developer/src/common/web/utils/src/types/kps/kps-file.ts diff --git a/developer/src/kmc-package/build.sh b/developer/src/kmc-package/build.sh index 27c725a9633..0b2d7bda9b9 100755 --- a/developer/src/kmc-package/build.sh +++ b/developer/src/kmc-package/build.sh @@ -14,6 +14,7 @@ THIS_SCRIPT="$(readlink -f "${BASH_SOURCE[0]}")" builder_describe "Build Keyman kmc Package Compiler module" \ "@/common/web/keyman-version" \ "@/developer/src/common/web/test-helpers" \ + "@/developer/src/common/web/utils" \ "configure" \ "build" \ "api analyze API and prepare API documentation" \ diff --git a/developer/src/kmc-package/package.json b/developer/src/kmc-package/package.json index 22bc7b024df..521ae02e44b 100644 --- a/developer/src/kmc-package/package.json +++ b/developer/src/kmc-package/package.json @@ -30,6 +30,7 @@ }, "dependencies": { "@keymanapp/common-types": "*", + "@keymanapp/developer-utils": "*", "jszip": "^3.7.0", "marked": "^7.0.0" }, diff --git a/developer/src/kmc-package/src/compiler/kmp-compiler.ts b/developer/src/kmc-package/src/compiler/kmp-compiler.ts index 0d4740de9e0..b0a7aedf080 100644 --- a/developer/src/kmc-package/src/compiler/kmp-compiler.ts +++ b/developer/src/kmc-package/src/compiler/kmp-compiler.ts @@ -2,7 +2,7 @@ import { xml2js } from '@keymanapp/common-types'; import JSZip from 'jszip'; import KEYMAN_VERSION from "@keymanapp/keyman-version"; -import { KmpJsonFile, KpsFile, SchemaValidators, CompilerCallbacks, KeymanFileTypes, KvkFile, KeymanCompiler, CompilerOptions, KeymanCompilerResult, KeymanCompilerArtifacts, KeymanCompilerArtifact } from '@keymanapp/common-types'; +import { KmpJsonFile, SchemaValidators, CompilerCallbacks, KeymanFileTypes, KvkFile, KeymanCompiler, CompilerOptions, KeymanCompilerResult, KeymanCompilerArtifacts, KeymanCompilerArtifact } from '@keymanapp/common-types'; import { CompilerMessages } from './package-compiler-messages.js'; import { PackageMetadataCollector } from './package-metadata-collector.js'; import { KmpInfWriter } from './kmp-inf-writer.js'; @@ -12,6 +12,7 @@ import { PackageKeyboardTargetValidator } from './package-keyboard-target-valida import { PackageMetadataUpdater } from './package-metadata-updater.js'; import { markdownToHTML } from './markdown.js'; import { PackageValidation } from './package-validation.js'; +import { KpsFile } from '@keymanapp/developer-utils'; const KMP_JSON_FILENAME = 'kmp.json'; const KMP_INF_FILENAME = 'kmp.inf'; diff --git a/developer/src/kmc-package/src/compiler/package-version-validator.ts b/developer/src/kmc-package/src/compiler/package-version-validator.ts index 430755434a1..f62bb5cc8d5 100644 --- a/developer/src/kmc-package/src/compiler/package-version-validator.ts +++ b/developer/src/kmc-package/src/compiler/package-version-validator.ts @@ -1,6 +1,7 @@ -import { KmpJsonFile, CompilerCallbacks, KpsFile } from '@keymanapp/common-types'; +import { KmpJsonFile, CompilerCallbacks } from '@keymanapp/common-types'; import { CompilerMessages } from './package-compiler-messages.js'; import { KeyboardMetadataCollection } from './package-metadata-collector.js'; +import { KpsFile } from '@keymanapp/developer-utils'; export const DEFAULT_KEYBOARD_VERSION = '1.0'; export const MIN_LM_FILEVERSION_KMP_JSON = '12.0'; diff --git a/developer/src/kmc-package/src/compiler/windows-package-installer-compiler.ts b/developer/src/kmc-package/src/compiler/windows-package-installer-compiler.ts index 49e0ca25f9a..55077bd9a55 100644 --- a/developer/src/kmc-package/src/compiler/windows-package-installer-compiler.ts +++ b/developer/src/kmc-package/src/compiler/windows-package-installer-compiler.ts @@ -11,10 +11,11 @@ */ import JSZip from 'jszip'; -import { CompilerCallbacks, KeymanCompiler, KeymanCompilerArtifact, KeymanCompilerArtifacts, KeymanCompilerResult, KeymanFileTypes, KmpJsonFile, KpsFile } from "@keymanapp/common-types"; +import { CompilerCallbacks, KeymanCompiler, KeymanCompilerArtifact, KeymanCompilerArtifacts, KeymanCompilerResult, KeymanFileTypes, KmpJsonFile } from "@keymanapp/common-types"; import KEYMAN_VERSION from "@keymanapp/keyman-version"; import { KmpCompiler, KmpCompilerOptions } from "./kmp-compiler.js"; import { CompilerMessages } from "./package-compiler-messages.js"; +import { KpsFile } from '@keymanapp/developer-utils'; const SETUP_INF_FILENAME = 'setup.inf'; const PRODUCT_NAME = 'Keyman'; diff --git a/developer/src/kmc-package/tsconfig.json b/developer/src/kmc-package/tsconfig.json index 0ab78a91ffb..4fae2b81ec9 100644 --- a/developer/src/kmc-package/tsconfig.json +++ b/developer/src/kmc-package/tsconfig.json @@ -10,6 +10,7 @@ "src/**/*.ts", ], "references": [ + { "path": "../common/web/utils" }, { "path": "../../../common/web/keyman-version" }, { "path": "../../../common/web/types" }, ] From 1c74d9d713d0ba55c2cb614558f6e14426625cc9 Mon Sep 17 00:00:00 2001 From: Marc Durdin Date: Thu, 13 Jun 2024 08:56:40 +1000 Subject: [PATCH 08/88] refactor(common): move kvks-file to @keymanapp/developer-utils Relates to #9665 --- common/web/types/src/main.ts | 3 -- developer/src/common/web/utils/src/index.ts | 5 ++ .../utils/src/types/kvks}/kvks-file-reader.ts | 20 +++++--- .../utils/src/types/kvks}/kvks-file-writer.ts | 11 +++-- .../web/utils/src/types/kvks}/kvks-file.ts | 0 .../utils/test/kvks}/test-kvk-round-trip.ts | 7 ++- .../web/utils/test/kvks/test-kvk-utils.ts | 48 +++++++++++++++++++ .../web/utils/test/kvks}/test-kvks-file.ts | 4 +- .../src/osk-character-use/index.ts | 4 +- .../kmc-analyze/src/osk-rewrite-pua/index.ts | 3 +- .../src/kmc-kmn/src/compiler/compiler.ts | 3 +- package-lock.json | 1 + 12 files changed, 87 insertions(+), 22 deletions(-) rename {common/web/types/src/kvk => developer/src/common/web/utils/src/types/kvks}/kvks-file-reader.ts (87%) rename {common/web/types/src/kvk => developer/src/common/web/utils/src/types/kvks}/kvks-file-writer.ts (87%) rename {common/web/types/src/kvk => developer/src/common/web/utils/src/types/kvks}/kvks-file.ts (100%) rename {common/web/types/test/kvk => developer/src/common/web/utils/test/kvks}/test-kvk-round-trip.ts (94%) create mode 100644 developer/src/common/web/utils/test/kvks/test-kvk-utils.ts rename {common/web/types/test/kvk => developer/src/common/web/utils/test/kvks}/test-kvks-file.ts (93%) diff --git a/common/web/types/src/main.ts b/common/web/types/src/main.ts index c6c1bdc1d31..6886cae9fec 100644 --- a/common/web/types/src/main.ts +++ b/common/web/types/src/main.ts @@ -7,11 +7,8 @@ export * as KeymanTargets from './kmx/keyman-targets.js'; export * as VisualKeyboard from './kvk/visual-keyboard.js'; export { default as KMXPlusBuilder} from './kmx/kmx-plus-builder/kmx-plus-builder.js'; export { default as KvkFileReader } from './kvk/kvk-file-reader.js'; -export { default as KvksFileReader } from './kvk/kvks-file-reader.js'; export { default as KvkFileWriter } from './kvk/kvk-file-writer.js'; -export { default as KvksFileWriter } from './kvk/kvks-file-writer.js'; export * as KvkFile from './kvk/kvk-file.js'; -export * as KvksFile from './kvk/kvks-file.js'; export * as LDMLKeyboard from './ldml-keyboard/ldml-keyboard-xml.js'; export { LDMLKeyboardTestDataXMLSourceFile } from './ldml-keyboard/ldml-keyboard-testdata-xml.js'; diff --git a/developer/src/common/web/utils/src/index.ts b/developer/src/common/web/utils/src/index.ts index 22248ea55b2..958499dc963 100644 --- a/developer/src/common/web/utils/src/index.ts +++ b/developer/src/common/web/utils/src/index.ts @@ -9,3 +9,8 @@ export { KPJFileReader } from './types/kpj/kpj-file-reader.js'; export { KeymanDeveloperProject, KeymanDeveloperProjectFile, KeymanDeveloperProjectType, } from './types/kpj/keyman-developer-project.js'; export * as KpsFile from './types/kps/kps-file.js'; + +export { default as KvksFileReader } from './types/kvks/kvks-file-reader.js'; +export { default as KvksFileWriter } from './types/kvks/kvks-file-writer.js'; +export * as KvksFile from './types/kvks/kvks-file.js'; + diff --git a/common/web/types/src/kvk/kvks-file-reader.ts b/developer/src/common/web/utils/src/types/kvks/kvks-file-reader.ts similarity index 87% rename from common/web/types/src/kvk/kvks-file-reader.ts rename to developer/src/common/web/utils/src/types/kvks/kvks-file-reader.ts index ff3a094e6f0..07c590a4e3c 100644 --- a/common/web/types/src/kvk/kvks-file-reader.ts +++ b/developer/src/common/web/utils/src/types/kvks/kvks-file-reader.ts @@ -1,10 +1,18 @@ -import * as xml2js from '../deps/xml2js/xml2js.js'; +import { SchemaValidators as SV, KvkFile, xml2js, util, Constants } from '@keymanapp/common-types'; import KVKSourceFile from './kvks-file.js'; -import { boxXmlArray } from '../util/util.js'; -import { DEFAULT_KVK_FONT, VisualKeyboard, VisualKeyboardHeaderFlags, VisualKeyboardKey, VisualKeyboardKeyFlags, VisualKeyboardLegalShiftStates, VisualKeyboardShiftState } from './visual-keyboard.js'; -import { USVirtualKeyCodes } from '../consts/virtual-key-constants.js'; -import { BUILDER_KVK_HEADER_VERSION, KVK_HEADER_IDENTIFIER_BYTES } from './kvk-file.js'; -import SchemaValidators from '../schema-validators.js'; +const SchemaValidators = SV.default; +import boxXmlArray = util.boxXmlArray; +import USVirtualKeyCodes = Constants.USVirtualKeyCodes; +import { VisualKeyboard as VK } from '@keymanapp/common-types'; +import DEFAULT_KVK_FONT = VK.DEFAULT_KVK_FONT; +import VisualKeyboard = VK.VisualKeyboard; +import VisualKeyboardHeaderFlags = VK.VisualKeyboardHeaderFlags; +import VisualKeyboardKey = VK.VisualKeyboardKey; +import VisualKeyboardKeyFlags = VK.VisualKeyboardKeyFlags; +import VisualKeyboardLegalShiftStates = VK.VisualKeyboardLegalShiftStates; +import VisualKeyboardShiftState = VK.VisualKeyboardShiftState; +import BUILDER_KVK_HEADER_VERSION = KvkFile.BUILDER_KVK_HEADER_VERSION; +import KVK_HEADER_IDENTIFIER_BYTES = KvkFile.KVK_HEADER_IDENTIFIER_BYTES; export default class KVKSFileReader { diff --git a/common/web/types/src/kvk/kvks-file-writer.ts b/developer/src/common/web/utils/src/types/kvks/kvks-file-writer.ts similarity index 87% rename from common/web/types/src/kvk/kvks-file-writer.ts rename to developer/src/common/web/utils/src/types/kvks/kvks-file-writer.ts index 19327e779da..52e988a0f30 100644 --- a/common/web/types/src/kvk/kvks-file-writer.ts +++ b/developer/src/common/web/utils/src/types/kvks/kvks-file-writer.ts @@ -1,7 +1,12 @@ -import * as xml2js from '../deps/xml2js/xml2js.js'; +import { VisualKeyboard as VK, Constants, xml2js } from '@keymanapp/common-types'; import KVKSourceFile, { KVKSEncoding, KVKSFlags, KVKSKey, KVKSLayer } from './kvks-file.js'; -import { VisualKeyboard, VisualKeyboardHeaderFlags, VisualKeyboardKeyFlags, VisualKeyboardLegalShiftStates, VisualKeyboardShiftState } from './visual-keyboard.js'; -import { USVirtualKeyCodes } from '../consts/virtual-key-constants.js'; + +import USVirtualKeyCodes = Constants.USVirtualKeyCodes; +import VisualKeyboard = VK.VisualKeyboard; +import VisualKeyboardHeaderFlags = VK.VisualKeyboardHeaderFlags; +import VisualKeyboardKeyFlags = VK.VisualKeyboardKeyFlags; +import VisualKeyboardLegalShiftStates = VK.VisualKeyboardLegalShiftStates; +import VisualKeyboardShiftState = VK.VisualKeyboardShiftState; export default class KVKSFileWriter { public write(vk: VisualKeyboard): string { diff --git a/common/web/types/src/kvk/kvks-file.ts b/developer/src/common/web/utils/src/types/kvks/kvks-file.ts similarity index 100% rename from common/web/types/src/kvk/kvks-file.ts rename to developer/src/common/web/utils/src/types/kvks/kvks-file.ts diff --git a/common/web/types/test/kvk/test-kvk-round-trip.ts b/developer/src/common/web/utils/test/kvks/test-kvk-round-trip.ts similarity index 94% rename from common/web/types/test/kvk/test-kvk-round-trip.ts rename to developer/src/common/web/utils/test/kvks/test-kvk-round-trip.ts index 7211ac3a355..f4e28f612de 100644 --- a/common/web/types/test/kvk/test-kvk-round-trip.ts +++ b/developer/src/common/web/utils/test/kvks/test-kvk-round-trip.ts @@ -5,10 +5,9 @@ import Hexy from 'hexy'; import gitDiff from 'git-diff'; const { hexy } = Hexy; import { makePathToFixture } from '../helpers/index.js'; -import KvksFileReader from "../../src/kvk/kvks-file-reader.js"; -import KvkFileReader from "../../src/kvk/kvk-file-reader.js"; -import KvkFileWriter from "../../src/kvk/kvk-file-writer.js"; -import KvksFileWriter from "../../src/kvk/kvks-file-writer.js"; +import KvksFileReader from "../../src/types/kvks/kvks-file-reader.js"; +import { KvkFileReader, KvkFileWriter } from "@keymanapp/common-types"; +import KvksFileWriter from "../../src/types/kvks/kvks-file-writer.js"; /** * diff --git a/developer/src/common/web/utils/test/kvks/test-kvk-utils.ts b/developer/src/common/web/utils/test/kvks/test-kvk-utils.ts new file mode 100644 index 00000000000..fe4c626d729 --- /dev/null +++ b/developer/src/common/web/utils/test/kvks/test-kvk-utils.ts @@ -0,0 +1,48 @@ +// NOTE: this is a copy of common/web/types/test/kvk/test-kvk-utils.ts +import 'mocha'; +import {assert} from 'chai'; +import { VisualKeyboard as VK } from "@keymanapp/common-types"; +import VisualKeyboard = VK.VisualKeyboard; +import VisualKeyboardHeaderFlags = VK.VisualKeyboardHeaderFlags; +import VisualKeyboardKeyFlags = VK.VisualKeyboardKeyFlags; +import VisualKeyboardShiftState = VK.VisualKeyboardShiftState; +import { Constants } from '@keymanapp/common-types'; +import USVirtualKeyCodes = Constants.USVirtualKeyCodes; + +export function verify_khmer_angkor(vk: VisualKeyboard) { + assert.equal(vk.header.flags, VisualKeyboardHeaderFlags.kvkhAltGr); + assert.equal(vk.header.associatedKeyboard, 'khmer_angkor'); + assert.equal(vk.header.ansiFont.name, 'Arial'); + assert.equal(vk.header.ansiFont.size, -12); + assert.equal(vk.header.unicodeFont.name, 'Khmer Busra Kbd'); + assert.equal(vk.header.unicodeFont.size, 16); + assert.equal(vk.keys.length, 186); + assert.equal(vk.keys[0].flags, VisualKeyboardKeyFlags.kvkkUnicode); + assert.equal(vk.keys[0].vkey, USVirtualKeyCodes.K_B); + assert.equal(vk.keys[0].shift, VisualKeyboardShiftState.KVKS_RALT); + assert.equal(vk.keys[0].text, 'ឞ'); + assert.equal(vk.keys[185].flags, VisualKeyboardKeyFlags.kvkkUnicode); + assert.equal(vk.keys[185].vkey, USVirtualKeyCodes.K_COMMA); + assert.equal(vk.keys[185].shift, VisualKeyboardShiftState.KVKS_SHIFT); + assert.equal(vk.keys[185].text, ''); +} + +export function verify_balochi_inpage(vk: VisualKeyboard) { + assert.equal(vk.header.flags, + VisualKeyboardHeaderFlags.kvkhAltGr | VisualKeyboardHeaderFlags.kvkhDisplayUnderlying); + assert.equal(vk.header.associatedKeyboard, 'balochi_inpage'); + assert.equal(vk.header.unicodeFont.name, 'Lateef'); + assert.equal(vk.header.unicodeFont.size, 14); + assert.equal(vk.keys.length, 147); + assert.equal(vk.keys[0].flags, VisualKeyboardKeyFlags.kvkkUnicode); + assert.equal(vk.keys[0].vkey, USVirtualKeyCodes.K_BKQUOTE); + assert.equal(vk.keys[0].shift, VisualKeyboardShiftState.KVKS_RALT); + assert.equal(vk.keys[0].text, '‍'); + assert.equal(vk.keys[30].flags, + VisualKeyboardKeyFlags.kvkkUnicode | VisualKeyboardKeyFlags.kvkkBitmap); + assert.equal(vk.keys[30].vkey, USVirtualKeyCodes.K_COMMA); + assert.equal(vk.keys[30].shift, + VisualKeyboardShiftState.KVKS_LCTRL); + assert.equal(vk.keys[30].text, ''); + assert.equal(vk.keys[30].bitmap.byteLength, 35766); +} \ No newline at end of file diff --git a/common/web/types/test/kvk/test-kvks-file.ts b/developer/src/common/web/utils/test/kvks/test-kvks-file.ts similarity index 93% rename from common/web/types/test/kvk/test-kvks-file.ts rename to developer/src/common/web/utils/test/kvks/test-kvks-file.ts index 2cd1a965c67..5733cb1701e 100644 --- a/common/web/types/test/kvk/test-kvks-file.ts +++ b/developer/src/common/web/utils/test/kvks/test-kvks-file.ts @@ -1,8 +1,8 @@ import * as fs from 'fs'; import 'mocha'; import { makePathToFixture } from '../helpers/index.js'; -import KvksFileReader from "../../src/kvk/kvks-file-reader.js"; -import KvksFileWriter from "../../src/kvk/kvks-file-writer.js"; +import KvksFileReader from "../../src/types/kvks/kvks-file-reader.js"; +import KvksFileWriter from "../../src/types/kvks/kvks-file-writer.js"; import { verify_khmer_angkor, verify_balochi_inpage } from './test-kvk-utils.js'; import { assert } from 'chai'; diff --git a/developer/src/kmc-analyze/src/osk-character-use/index.ts b/developer/src/kmc-analyze/src/osk-character-use/index.ts index e0570b99497..9f2f73ca347 100644 --- a/developer/src/kmc-analyze/src/osk-character-use/index.ts +++ b/developer/src/kmc-analyze/src/osk-character-use/index.ts @@ -1,6 +1,6 @@ -import { CompilerCallbacks, KeymanFileTypes, KvksFile, KvksFileReader, TouchLayout, TouchLayoutFileReader } from "@keymanapp/common-types"; +import { CompilerCallbacks, KeymanFileTypes, TouchLayout, TouchLayoutFileReader } from "@keymanapp/common-types"; import { CompilerMessages, Osk } from '@keymanapp/kmc-kmn'; -import { escapeMarkdownChar } from '@keymanapp/developer-utils'; +import { escapeMarkdownChar, KvksFile, KvksFileReader } from '@keymanapp/developer-utils'; import { getOskFromKmnFile } from "../util/get-osk-from-kmn-file.js"; import { AnalyzerMessages } from "../messages.js"; diff --git a/developer/src/kmc-analyze/src/osk-rewrite-pua/index.ts b/developer/src/kmc-analyze/src/osk-rewrite-pua/index.ts index eff545bb8e7..a6ce1ba337e 100644 --- a/developer/src/kmc-analyze/src/osk-rewrite-pua/index.ts +++ b/developer/src/kmc-analyze/src/osk-rewrite-pua/index.ts @@ -1,4 +1,5 @@ -import { CompilerCallbacks, KeymanFileTypes, KvksFile, KvksFileReader, KvksFileWriter, TouchLayoutFileReader, TouchLayoutFileWriter } from "@keymanapp/common-types"; +import { CompilerCallbacks, KeymanFileTypes, TouchLayoutFileReader, TouchLayoutFileWriter } from "@keymanapp/common-types"; +import { KvksFile, KvksFileReader, KvksFileWriter } from '@keymanapp/developer-utils'; import { CompilerMessages, Osk } from '@keymanapp/kmc-kmn'; import { getOskFromKmnFile } from "../util/get-osk-from-kmn-file.js"; import { AnalyzerMessages } from "../messages.js"; diff --git a/developer/src/kmc-kmn/src/compiler/compiler.ts b/developer/src/kmc-kmn/src/compiler/compiler.ts index a7b9a73c280..e9b892324a5 100644 --- a/developer/src/kmc-kmn/src/compiler/compiler.ts +++ b/developer/src/kmc-kmn/src/compiler/compiler.ts @@ -7,7 +7,8 @@ TODO: implement additional interfaces: // TODO: rename wasm-host? import { UnicodeSetParser, UnicodeSet, VisualKeyboard, KvkFileReader, KeymanCompiler, KeymanCompilerArtifacts, KeymanCompilerArtifactOptional, KeymanCompilerResult, KeymanCompilerArtifact } from '@keymanapp/common-types'; -import { CompilerCallbacks, CompilerEvent, CompilerOptions, KeymanFileTypes, KvkFileWriter, KvksFileReader } from '@keymanapp/common-types'; +import { CompilerCallbacks, CompilerEvent, CompilerOptions, KeymanFileTypes, KvkFileWriter } from '@keymanapp/common-types'; +import { KvksFileReader } from '@keymanapp/developer-utils'; import * as Osk from './osk.js'; import loadWasmHost from '../import/kmcmplib/wasm-host.js'; import { CompilerMessages, mapErrorFromKmcmplib } from './kmn-compiler-messages.js'; diff --git a/package-lock.json b/package-lock.json index 92d9df87671..da6a4cd1809 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1700,6 +1700,7 @@ "license": "MIT", "dependencies": { "@keymanapp/common-types": "*", + "@keymanapp/developer-utils": "*", "jszip": "^3.7.0", "marked": "^7.0.0" }, From cdbcbe723a6de3e839003fe4a74166987690f960 Mon Sep 17 00:00:00 2001 From: Marc Durdin Date: Thu, 13 Jun 2024 09:00:36 +1000 Subject: [PATCH 09/88] chore(common): reduce c8 threshold for common-types by 1% after moving kvk-file --- common/web/types/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/web/types/build.sh b/common/web/types/build.sh index b57a516893e..5bff3214737 100755 --- a/common/web/types/build.sh +++ b/common/web/types/build.sh @@ -100,7 +100,7 @@ function do_build() { function do_test() { eslint . tsc --build test - readonly C8_THRESHOLD=75 + readonly C8_THRESHOLD=74 c8 -skip-full --reporter=lcov --reporter=text --lines $C8_THRESHOLD --statements $C8_THRESHOLD --branches $C8_THRESHOLD --functions $C8_THRESHOLD mocha "${builder_extra_params[@]}" builder_echo warning "Coverage thresholds are currently $C8_THRESHOLD%, which is lower than ideal." builder_echo warning "Please increase threshold in build.sh as test coverage improves." From f6105d87d810436effdfbf7e1613bb522664ef42 Mon Sep 17 00:00:00 2001 From: Marc Durdin Date: Thu, 13 Jun 2024 09:31:52 +1000 Subject: [PATCH 10/88] refactor(common): fixup tests for kvks-file --- developer/src/common/web/utils/build.sh | 2 +- .../utils/src/types/kvks/kvks-file-reader.ts | 26 +++++++++--------- .../utils/src/types/kvks/kvks-file-writer.ts | 20 +++++++------- .../test/fixtures/kvks/balochi_inpage.kvk | Bin 0 -> 118474 bytes .../test/fixtures/kvks}/balochi_inpage.kvks | 0 .../utils/test/fixtures/kvks/khmer_angkor.kvk | Bin 0 -> 2893 bytes .../test/fixtures/kvks}/khmer_angkor.kvks | 0 .../utils/test/kvks/test-kvk-round-trip.ts | 12 ++++---- .../web/utils/test/kvks/test-kvks-file.ts | 8 +++--- developer/src/kmcmplib/subprojects/gtest.wrap | 2 +- 10 files changed, 35 insertions(+), 35 deletions(-) create mode 100644 developer/src/common/web/utils/test/fixtures/kvks/balochi_inpage.kvk rename {common/web/types/test/fixtures/kvk => developer/src/common/web/utils/test/fixtures/kvks}/balochi_inpage.kvks (100%) create mode 100644 developer/src/common/web/utils/test/fixtures/kvks/khmer_angkor.kvk rename {common/web/types/test/fixtures/kvk => developer/src/common/web/utils/test/fixtures/kvks}/khmer_angkor.kvks (100%) diff --git a/developer/src/common/web/utils/build.sh b/developer/src/common/web/utils/build.sh index cd50df5a166..13f590070a3 100755 --- a/developer/src/common/web/utils/build.sh +++ b/developer/src/common/web/utils/build.sh @@ -34,7 +34,7 @@ builder_run_action build tsc --build if builder_start_action test; then eslint . tsc --build test - readonly C8_THRESHOLD=60 + readonly C8_THRESHOLD=70 c8 --reporter=lcov --reporter=text --exclude-after-remap --lines $C8_THRESHOLD --statements $C8_THRESHOLD --branches $C8_THRESHOLD --functions $C8_THRESHOLD mocha builder_echo warning "Coverage thresholds are currently $C8_THRESHOLD%, which is lower than ideal." builder_echo warning "Please increase threshold in build.sh as test coverage improves." diff --git a/developer/src/common/web/utils/src/types/kvks/kvks-file-reader.ts b/developer/src/common/web/utils/src/types/kvks/kvks-file-reader.ts index 07c590a4e3c..5f1868b1c08 100644 --- a/developer/src/common/web/utils/src/types/kvks/kvks-file-reader.ts +++ b/developer/src/common/web/utils/src/types/kvks/kvks-file-reader.ts @@ -77,9 +77,9 @@ export default class KVKSFileReader { } } - for(let key of Object.keys(source)) { + for(const key of Object.keys(source)) { if(Array.isArray(source[key])) { - for(let item of source[key]) { + for(const item of source[key]) { if(typeof(item) === 'object') { this.cleanupUnderscore(key, item); } @@ -100,7 +100,7 @@ export default class KVKSFileReader { // NOTE: at this point, the xml should have been validated // and matched the schema result so we can assume properties exist - let result: VisualKeyboard = { + const result: VisualKeyboard = { header: { version: BUILDER_KVK_HEADER_VERSION, flags: 0, @@ -125,22 +125,22 @@ export default class KVKSFileReader { result.header.flags |= VisualKeyboardHeaderFlags.kvkhUseUnderlying; } - for(let encoding of source.visualkeyboard.encoding) { - let isUnicode = (encoding.$?.name == 'unicode'), + for(const encoding of source.visualkeyboard.encoding) { + const isUnicode = (encoding.$?.name == 'unicode'), font = isUnicode ? result.header.unicodeFont : result.header.ansiFont; font.name = encoding.$?.fontname ?? DEFAULT_KVK_FONT.name; font.size = parseInt(encoding.$?.fontsize ?? DEFAULT_KVK_FONT.size.toString(), 10); - for(let layer of encoding.layer) { - let shift = this.kvksShiftToKvkShift(layer.$?.shift); - for(let sourceKey of layer.key) { - let vkey = (USVirtualKeyCodes as any)[sourceKey.$?.vkey]; + for(const layer of encoding.layer) { + const shift = this.kvksShiftToKvkShift(layer.$?.shift); + for(const sourceKey of layer.key) { + const vkey = (USVirtualKeyCodes as any)[sourceKey.$?.vkey]; if(!vkey) { if(typeof invalidVkeys !== 'undefined') { invalidVkeys.push(sourceKey.$?.vkey); } continue; } - let key: VisualKeyboardKey = { + const key: VisualKeyboardKey = { flags: (isUnicode ? VisualKeyboardKeyFlags.kvkkUnicode : 0) | (sourceKey.bitmap ? VisualKeyboardKeyFlags.kvkkBitmap : 0), @@ -175,9 +175,9 @@ export default class KVKSFileReader { */ private boxArrays(source: KVKSourceFile) { boxXmlArray(source.visualkeyboard, 'encoding'); - for(let encoding of source.visualkeyboard.encoding) { + for(const encoding of source.visualkeyboard.encoding) { boxXmlArray(encoding, 'layer'); - for(let layer of encoding.layer) { + for(const layer of encoding.layer) { boxXmlArray(layer, 'key'); } } @@ -189,7 +189,7 @@ export default class KVKSFileReader { shift = shift.toUpperCase(); // TODO-LDML(lowpri): make a map of this? - for(let state of VisualKeyboardLegalShiftStates) { + for(const state of VisualKeyboardLegalShiftStates) { if(state.name == shift) { return state.shift; } diff --git a/developer/src/common/web/utils/src/types/kvks/kvks-file-writer.ts b/developer/src/common/web/utils/src/types/kvks/kvks-file-writer.ts index 52e988a0f30..a7e686b7764 100644 --- a/developer/src/common/web/utils/src/types/kvks/kvks-file-writer.ts +++ b/developer/src/common/web/utils/src/types/kvks/kvks-file-writer.ts @@ -22,7 +22,7 @@ export default class KVKSFileWriter { } }) - let flags: KVKSFlags = {}; + const flags: KVKSFlags = {}; if(vk.header.flags & VisualKeyboardHeaderFlags.kvkhDisplayUnderlying) { flags.displayunderlying = ''; } @@ -38,7 +38,7 @@ export default class KVKSFileWriter { - let kvks: KVKSourceFile = { + const kvks: KVKSourceFile = { visualkeyboard: { header: { version: '10.0', @@ -51,9 +51,9 @@ export default class KVKSFileWriter { if(vk.header.underlyingLayout) kvks.visualkeyboard.header.layout = vk.header.underlyingLayout; - let encodings: {ansi: {o: KVKSEncoding, l: {[name:string]:KVKSLayer}}, unicode: {o: KVKSEncoding, l: {[name:string]:KVKSLayer}}} = {ansi:null,unicode:null}; + const encodings: {ansi: {o: KVKSEncoding, l: {[name:string]:KVKSLayer}}, unicode: {o: KVKSEncoding, l: {[name:string]:KVKSLayer}}} = {ansi:null,unicode:null}; - for(let key of vk.keys) { + for(const key of vk.keys) { const encoding = key.flags & VisualKeyboardKeyFlags.kvkkUnicode ? 'unicode' : 'ansi'; const shift = this.kvkShiftToKvksShift(key.shift); @@ -70,7 +70,7 @@ export default class KVKSFileWriter { }; kvks.visualkeyboard.encoding.push(encodings[encoding].o); } - let e = encodings[encoding]; + const e = encodings[encoding]; if(!e.l[shift]) { e.l[shift] = { key: [], @@ -78,11 +78,11 @@ export default class KVKSFileWriter { }; e.o.layer.push(e.l[shift]); } - let l = e.l[shift]; + const l = e.l[shift]; // TODO-LDML: map let vkeyName = ''; - for(let vkey of Object.keys(USVirtualKeyCodes)) { + for(const vkey of Object.keys(USVirtualKeyCodes)) { if((USVirtualKeyCodes as any)[vkey] == key.vkey) { vkeyName = vkey; break; @@ -93,7 +93,7 @@ export default class KVKSFileWriter { //TODO-LDML: warn continue; } - let k: KVKSKey = { + const k: KVKSKey = { $: {vkey: vkeyName}, _: key.text, } @@ -104,7 +104,7 @@ export default class KVKSFileWriter { l.key.push(k); } - let result = builder.buildObject(kvks); + const result = builder.buildObject(kvks); return result; //Uint8Array.from(result); } @@ -116,7 +116,7 @@ export default class KVKSFileWriter { public kvkShiftToKvksShift(shift: VisualKeyboardShiftState): string { // TODO-LDML(lowpri): make a map of this? - for(let state of VisualKeyboardLegalShiftStates) { + for(const state of VisualKeyboardLegalShiftStates) { if(state.shift == shift) { return state.name; } diff --git a/developer/src/common/web/utils/test/fixtures/kvks/balochi_inpage.kvk b/developer/src/common/web/utils/test/fixtures/kvks/balochi_inpage.kvk new file mode 100644 index 0000000000000000000000000000000000000000..56fee9dfbd320bdd87c977abab6b53d386ff67a8 GIT binary patch literal 118474 zcmeHQ3Aj$h|DSs$OQC4{M_eUD+4rTcCFG*Ckcg7KMYJkeN>Nm_Je8$Q9$88uWh+a| z^GFmGZBi)NLuC2sKcC~ynK^UjocFx%eeb(|=XvHm^F6bF=6hy)zw@1(PC2dEsf-mZ z!FsY=*&S>kyN&f@{aFug?_ziJcYRrJ#(0=!Y%qU^FpM4i@4x>FGxlF0){eiqmxt)h z`tbL~xyAeE`7<+{En}IiSQdA4lbOwGaB?5d3??(1)#T)@lB~tao|3H1$=f7Zhm*Y| zS(lT&C0UP?eI!|*lYJ%GfRp_s*^ra{CAox?m!e4AWM;F)oNNj=H<{V2H78rhz)LxK z8N9g3%w`{RvKrjnWM;EZI9VNTZZZp7^{sJ|0*U(8lqBj~OOmK>ZAqfObtH-U)|Djc zTThaxZ+%Ikz6~Ua`Zkm#s&HP`Nt%}m_5D9dq6(#Q$pVGh5|+V3E*Q&L^Y-|KLB^H2 zy@lJGxy@i%{Fkv2x&9WB`@{K!k&Q0~%a<=_ojZ39X@CFycUN28d^1`&n0fTkM_uu^ zZr#di)~p%GUVr`duK0p?KK$^*KxrVayYIf+n9gs%{T8euxuHbf>T~AInIY4tTeq$w z|7p{vg^Ys$-+lL;BOdb2EB?R!`fH%P5vW?VYOef6-GkNv>2&DO!I%!=4|Kduy5L`` zR4Hb%Km726i*KHL?m5<@M~^^u)~s2s_~1EHUZ!_Vn>KaD7yLha_UypvbnV*Jn9hzJ zI|9{1)K}!KK7r9Iue{>2HEM{724kW6pCd<(7|W;TL+rvhTk8F5w$xeADfF zruxQrJR|%G|7h{RzJ2=`#$aJjoH#+_LzKsdLx&Er0|yS+R&Be-9(#{cOjG|K-|vC&C;X}Z!OZ`pQLz{~awIEPuAFUY{Tuyn<+j^yv+)JiD*jmS9z1x^ zwyGbwqth0L4xvZ{9ptBFHBc{#d(QdZ{>`CX)b4pEOQKQ1fAALX0eV9z8N|hM$5Z&5+xisftWjY zu8ozz%nyI8GupOoYo~MOnP;+f>(;Rf6)M>8iWMtnkHP8A|G*3DodE*|*!ZXp^yLC~ z@$|$0`0?Wotjm`#&#=bo*|VpbEW_Fp=|vNNv@`fc*idUu)BHa_`X8)=diCnX`t<4J zu-&?KV`a*evF0k7nVAlJKwz$Ds)HZ?7+ck}va_?TG%%-7jTi&M#UE>L^p(O2o`~cb zEBw`Z2X*6Y=kDFRUA&Ka`r(f~Fx5Q!?6X!Ht5&UIMT-`-y|C8CSP?G%k>KnVCsFZl z)Tj}||4d%x_QU^*E3U9HhGYHu^;SCi_@0xKV|^EdzXL9ts9wD~TeD`3u?@WN#~MO4 zn0Jac#QsOss#R_8%9SfKtkco~|8vhh*XsZK_U+4bi}l|5=bz7t6e(h3qdLMFf4}(R z3pFac@x~i%{Lv2Tyc7W;#~)4n@r})&Ki{?(D@6O7p0VnGkdNQ~+du#O!xk=FXzf=z z0OW+ezlwTz@xMOCPM9#kQGev28tkKni@%C5&YG&Y>S=xc`R5LQH7wkTg1;!&M-PsytV)U_mx%(j*bZe){b{+kvB;7|Ji4s8OtE*p?-^^5c#qGtzEk|gMKpnN2}W{Teh&3D_62xZn=e_ zuh9+KM3h%NP5A5a7B61xiii04Kl!xNPP2on-;8(}E&LN@{1x?zhz;=cwz6D$0&SBNtMs{QoS zPkqa7s*m~(`@fL=2-jk5x_9s1KzUS$Y_4y~^ z@5U?IJVf~CKgND-M)(u{e*BT@e#HO8|EbR7e)S{%C;s>2k5u;~{wMxVbsqPtpCAAG z#UsD?em8_a;ZHn3JU~2EVYTc8u8q zFPw`KVM0CibC*bC$&w|iXsWN<@BVk(p%v70=+MDf7o4#PRR`gZ^G5;$_4&|44}~m- zF8`p9zorDBzgx{mxS*zuK;5z^ul~f;B9wtzM?GF%@z+)O_?5x$ zn4@1j)vtH&-mW^JPK3XTlG#o8yZIj~Os>BA>OdB%a`o>gQTYGIAAbx~2h?@!*s-p9 zKouJLho_!;%3-0-U6`Q#5BgFaC!!-8tFg=yhVYLJ!@Tew;U9a<)%GI%34fIz(#;)| z|CcUZ%HDeGEz4p{EZuO*ulizJ4_DB@LfvD21Xllpkv?Jz{P%kH}7q046fVBS;9qfU%I{(K$5aAy^Y`w5l-xL0^#aqQ5Zo;4N z*LfnHXySk3|8(May{^Rn#Q!=^q!Ug2pB(%T)##87gY}cMNhjWoMqP14Sm5dNo;KiWx|I04BOo)kEXCb`Gj_bW~ zRzS7-wWFrExaLQVBiwW5%yHpVym)cDERi~NS!*137J1@1V88%ZzL0BH!@xav?p#MW z9EH}yKnC1YXS@rFe^CC%abn%foH^4oe^D+R!_>_M7hJ%=>mrPJ<_G@S+1ZZlaox7a zPdssLqHb_ZTKMBRYt}41h$U~XhEd&#fte7K`-qMq@<-|H$|T$L`$m1_9oT06CF!fnDIS6APE|NVjOx8Hv2NIO*j zW9nB0!~Fd7&vsg=#Q)*o4>?xRK9M~A@JHWMx^!s)x9d56{CGQW!auV4y&wL#s#yKz zYSygDiWDhgw?pH`jgub#N|h?v^$`x7y)*H$iiHT9pZt%xv3eCW#wA>ZtD4=rcUxsm zHvY%;yEr$knnHyNF{~p@*yCNS@yEGvVKOr_t?O8!GDJ0N*RD-E{0|*E#L#!D*MFl< zP=)mO-+w0z{(buNv7tu4vwr=0mwG9&#>y}LhYG39n>Snb&p-e4#KtsFC|9l=!+J)w zSkI~7v|T~&z`P&dMWXP(+BcRcQG($t?@vGd#7dSdX%}0l@d+1yuR2Eh-h{th$bb%v zcYgWDvj4#zlWLkaZOTrZIKlSp*~7|~Eo*yq>eR_9bF$-Kv0_Dr{feVUk9xL2Eb*T{ zeY%Y?9QYp=yrEwkf8KfLB`p3K85s=YmdQFQrw~s+{j_Hv6f69}=lb`ouC>OE8>f_* zG594S|Kq#>RP>oF_7q#SYUQYhAiU<3vBDqgF38#oQ@(t8$2W;Nk7_Va$}jwJ_EcO8 z4>i+Xs-$&(LHUC0e`3tljh8C6SmBR3{Y4jDWEZP#+qMq;(GRI(Fv=S){#fG+Tuu6W z#5jmOZDBA!Jb3V+t1Ji`DgMyiq#NWT@>id+!vB|FeqmUP2!sC!)N;VB?i-@5UVH5| z5!`-4=ZPLq(Dm!rx5HshDBkPO{Kns;X9o4r%K%!mix;yA6DBbHw~V=| zZgO&R;)Z|0f(0GLf$l+__d)O7yVns8-+*r7jlW)=p!Az>zG310P$HUDG(?ZANp zp5?)Qm7hr#{IL#(zSZN$k9*cN9{i8~#WeP+{E?NF<=F(W#2JxPRg|h+ZlVJx98pIACKJ1yMS6=kq{oge9#sscVZ%-Jx zF(OO|6XF5l0pbDTfq3yiQ2FzCl`mQIRprn9_Wue0{Q33?|NOx<==VtY6aGPYB^ARF z{}cbGB3}os6Y)Rse^6dY#c;&`#Q&+t*FozPl>dXKN8#f8JrVwdKk)$Z0P#R-@c?AJ z#64Az=cM+HPW!G}wTexhI@Pl8z4x9cuB!Y$-G31s?7_o9=bwas^e|WZ4#Gcwa8)t( za})lAzaM|3x?d#xf5sVSxb8yl+O;d&zJ0rE{Kk3Ap!dH+c2~z6dQgHIFTLPje)(n4 zBv|j>y`7J8g?z5|VOe?j6SjF%4nyyE}E4?i&I zs`M*|(4k_g1HF^qQ>?JaEX-Q6OjhI6lO11y!YqqraQA|-j5TkMUl{18sLbsx+}_M> z1`h=@BG=y{a(`k~a>^(6+(MWW=EMWU13`HJ+C-pH$Iq@?x6U=AY1XWnQInRgUF(Gx zUU0>I<&{@hty;B0T9c+3)C(tlgTzOeW5iGp`uy-CT&K;h7Ai87urBfR=QvR=2tyV*?zkdA)N4~}){viyA2Z#rV z2Z#p}l?NatfORh{G;k$qITO8HxIrG83t_|A{~SDcuvj)*Z4j?|VxI>4J-C6gMvWTQ z{xCF8H)_;~p^V~Yqj;4sRKDu^3wx`f!VCXIz<)EpQ3$uGg)-1cirW}O*%RrRtoUOu z)9+S6y-jelK_cNU%95=3-+JqaGqoR$#vPLyXd;*Xm}^xMaZ6)VPY0tGjdqt7?> zOSp+GQE(UKNJji;%$VWgb^ZTJ{P(OIKM7fZN35U8h(B%$*3FI`J6ybt8%0!uZzR_E zhRQD)@$c5Hn+mHXy=|%|)+V|!g^TCAWW-;WruVvO2LI>jCZ1TE@{Sb$)vH(ACol0| zfGO?#diU|iAKQ83^rx5PbTZ(NTld5XR@~B-Uzo)s-DJQ&9=OCRPr^UG`=^9I;h$gM z`qHIKZCSD@Q>G;3xRDI}k9DGu;Ttn%Okf-nVgEA}ZzTi%ksA{_lfDcuTvh!8khAd$=l33RR09SK$g|zFX;a|*k)Lj$m%~&} z$n2hS$|;WEWy+LcCr+F&hBu`V>K*p}g~56~RCwVZ4g5`5h_Zz9#CIsl<@W?`<;$0M zC2i6;iFEK?n~X^}f?vLVe(8iiwCI`4efQmGY(MC`F&Svp^ush9zjVUiOB*S45}7cA zHdm8r-MY0gyeW-P@8XHSmv&dkx4rbzOM$IPbFZo1S6+Fg2~JkLcyXh?6Vv`F5-41_ zFgtwsa8%kLp7`VJ5cFs1=CspJW8Z!ET~z8P+5vLuy3D$oFZw1C7EjD`3KlG=264C- zE?npcuf`3>9Z&pG&b@p0vI-R{IO+xh@8B0sUHrmAMp88uD^^rVHf`E8M_ylj^_9cF zN|h=O8hU&D@(b!0Z~ReCXt*s}w5X$gAaZhYjQv8;x~bv%_wVmWzi;2Z40@x488~pD z1J8*QCkij?iSwSSL3^m_N9s-l{89GVvuCT-v)x074$X5tG4IUE%Cggh1N!}1wrpv? zYuvc8BOjcVQ4PizUH99TEn6IMB58w2@gF~ayv4KNThuL*y!ALkh73`wY`f>onUkmV zH*VZ$rzITAmMyc+rwY1Aks=PAhqY_h7G73gh_dQ^P&DMu#*G_iC(k&f9%hzCKb<}LvP)M7hb4V({f*Y@x^T4 zzI_pW2Wmfq#K7C&C5$_&dG*y-t@paNba7)BbnmGKH(4aw|7p?qMz9Zs{y;a8jK>EL z9(2tk#aIg+pM?q)va`V$jq*>LG|7Gk2lOuDJ#n=b_JgBAg9Z-&Sm8fv)F@WAY+1{OqqifR@&OCI z??#<59tYWj1izLmR}N8ZgLR!+27QkWV}iO)hF%XfygruW`!MNRRpa=%aYG35gT>yg zUs%;IR`So#^MLuSZi0-*CVkI3p3q648px`s1~M#ay-l~#cD=cSuaE11va)nYGA zuUE3r@x(t_Fv`zzMvDLMzyEIYt>1T=pJh*lw0G{@Y2%1>PAb%i>X7>KuhhoW#>4~h z;{o;lLBBm1Y9s$P!he0;yLY#&_+KsvW~%&iq-(z*`J{Rn+{GqLIQ!3pzdv6G;yE@;RL zhKdV+$iAE62j`kh^~Zh0 zK}^Y#C5`csW>90wU;p1sPsxG>wK$nA$=aN}NRo9pDNZ)>z;!u! zvGlFS$?lS@&q=W~;DH-(Qk*#C=ZA1dazK2^{~CByc<=N#J-`lECp_NjBu<5lJrMWNAqP)(VmYj$I`*KXbB@ zB!Q!lTjTXz#>w_FZZiayWOGhdmShV~J|W4LoUAU%D>=DFlC3z|L6WUGIY*LhIN4f~ zS8;N+B-?WGQc31;a)u<^ak8Z(+jH_tNp|35D@k6>$*U!K4JU_5vLh!eO7dDx)|BLR zoUAR$>p59Pk~eVjb4hmMm+#-Cu`-B1u}BEdC`T8HE)k! zn0nk+=C*pSMJSk2x&9WB`$Ii_diM0x?$qwY1H=RA$^$~uKN;&U#&0S+eZR@b1L;&| z`c~ufjV3eh>GXY2U8D1TfZq!gDB$_8ULxa+vg5Q@BA3ytY$!X7mp{Fa4u2DliGkxu zZck2rr?Mvo57D`{Z<#WewPC_gzTgbd*Fi1-klO&|2%1aW`Qf(y(@u=%133(_8!@%&A zA*FT6NeU!{r`pND5RbZ0k`SJ1Bm+Y_tiJSxI9M}DLL97|Bq8v1sf-Kps7}%s;!#y4 z3Gt{gl7x8FVi_0WQ7fb`#G@KX65>&pNfP2wcS{oDQS~GV@u+H&gm{z?3gT!$JnCZU z3-PE$l7x8FN=ZUIs-Ywy9@Qx8WTAOd5RaN5eIXuISCSBq8X`%EM|GAY#G@KW67o>m>IM_k$r3l2uHEZ(|L1?23xBq6nU9y>+!=@;YU>bN$p zj2q&z7#r8cRr${KsmOYBzK+XxuSs=RtJOXc)uw3qe@8h^iV1OJt{NNVeE;&eB3Cx$ zFDu*fwW#EhBBtz~f9Fq8$yG%>Cx1_VCKo2(lIJB~lJk=<%d?Vi$a9i2<;dg=d1i8! zJSF+MJUsdRI&QIclVg$#<=M#vE^beLW3?^$v(=-LzsjSNzsb{+Ke+d_i&QU6UMxG4hsZg}gXM_ifO6eMB~P}91FVit?kh(n2bSwDDsfD5 zf2)TjD{?{d5;-_|vphC=lRP(hkQ|=uk*6ky$d2TJa&~gWo^=w=d1SJ+Tz65)WEU~5t|*#oPh)H2$eM%ycfI5UH%OkOGd%pk~7*SIh4O79lAskm35M+te3RL3Q5{GNXE!ANeI_U=EhRV+*m2;{Y8>a z*)HjnjgsNENz&cjl6kdSGLcqE^8AlvQ2g6q)Izv(x%Ye{II=GrV056dQc&Q}7%On9_E(!1oNq|>MrphWwo>xmkILDi0^t|8 zszFxC`hJoKe(PdFc7jzRxSLhF=qEpyE;`Z0^vOJ{49#8KOUNF#O2{TjLUyktWFNSf zkUeabkUcL6*{70_z3W~=c9&H`cB>?0( Date: Thu, 13 Jun 2024 10:23:47 +1000 Subject: [PATCH 11/88] chore(common): reduce threshold for c8 to 70% --- common/web/types/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/web/types/build.sh b/common/web/types/build.sh index 5bff3214737..44696964d53 100755 --- a/common/web/types/build.sh +++ b/common/web/types/build.sh @@ -100,7 +100,7 @@ function do_build() { function do_test() { eslint . tsc --build test - readonly C8_THRESHOLD=74 + readonly C8_THRESHOLD=70 c8 -skip-full --reporter=lcov --reporter=text --lines $C8_THRESHOLD --statements $C8_THRESHOLD --branches $C8_THRESHOLD --functions $C8_THRESHOLD mocha "${builder_extra_params[@]}" builder_echo warning "Coverage thresholds are currently $C8_THRESHOLD%, which is lower than ideal." builder_echo warning "Please increase threshold in build.sh as test coverage improves." From 09ee82727033548992d90b73d1c9750058ed8a4b Mon Sep 17 00:00:00 2001 From: Marc Durdin Date: Thu, 13 Jun 2024 09:20:49 +1000 Subject: [PATCH 12/88] refactor(common): move .keyman-touch-layout reader/writer to @keymanapp/developer-utils Relates to #9665 --- common/web/types/src/main.ts | 2 -- developer/src/common/web/utils/src/index.ts | 2 ++ .../keyman-touch-layout-file-reader.ts | 13 +++++++------ .../keyman-touch-layout-file-writer.ts | 18 +++++++++++------- .../khmer_angkor.keyman-touch-layout | 0 .../legacy.keyman-touch-layout | 0 .../test-keyman-touch-layout-file-reader.ts | 2 +- .../test-keyman-touch-layout-round-trip.ts | 6 +++--- 8 files changed, 24 insertions(+), 19 deletions(-) rename {common/web/types/src => developer/src/common/web/utils/src/types}/keyman-touch-layout/keyman-touch-layout-file-reader.ts (82%) rename {common/web/types/src => developer/src/common/web/utils/src/types}/keyman-touch-layout/keyman-touch-layout-file-writer.ts (86%) rename {common/web/types => developer/src/common/web/utils}/test/fixtures/keyman-touch-layout/khmer_angkor.keyman-touch-layout (100%) rename {common/web/types => developer/src/common/web/utils}/test/fixtures/keyman-touch-layout/legacy.keyman-touch-layout (100%) rename {common/web/types => developer/src/common/web/utils}/test/keyman-touch-layout/test-keyman-touch-layout-file-reader.ts (96%) rename {common/web/types => developer/src/common/web/utils}/test/keyman-touch-layout/test-keyman-touch-layout-round-trip.ts (81%) diff --git a/common/web/types/src/main.ts b/common/web/types/src/main.ts index 6886cae9fec..ab7554a063c 100644 --- a/common/web/types/src/main.ts +++ b/common/web/types/src/main.ts @@ -37,8 +37,6 @@ export { defaultCompilerOptions, CompilerBaseOptions, CompilerCallbacks, Compile export { CommonTypesMessages } from './util/common-events.js'; export * as TouchLayout from './keyman-touch-layout/keyman-touch-layout-file.js'; -export { TouchLayoutFileReader } from './keyman-touch-layout/keyman-touch-layout-file-reader.js'; -export { TouchLayoutFileWriter, TouchLayoutFileWriterOptions } from './keyman-touch-layout/keyman-touch-layout-file-writer.js'; export * as KmpJsonFile from './package/kmp-json-file.js'; diff --git a/developer/src/common/web/utils/src/index.ts b/developer/src/common/web/utils/src/index.ts index 958499dc963..4fbaf6e2395 100644 --- a/developer/src/common/web/utils/src/index.ts +++ b/developer/src/common/web/utils/src/index.ts @@ -14,3 +14,5 @@ export { default as KvksFileReader } from './types/kvks/kvks-file-reader.js'; export { default as KvksFileWriter } from './types/kvks/kvks-file-writer.js'; export * as KvksFile from './types/kvks/kvks-file.js'; +export { TouchLayoutFileReader } from './types/keyman-touch-layout/keyman-touch-layout-file-reader.js'; +export { TouchLayoutFileWriter, TouchLayoutFileWriterOptions } from './types/keyman-touch-layout/keyman-touch-layout-file-writer.js'; diff --git a/common/web/types/src/keyman-touch-layout/keyman-touch-layout-file-reader.ts b/developer/src/common/web/utils/src/types/keyman-touch-layout/keyman-touch-layout-file-reader.ts similarity index 82% rename from common/web/types/src/keyman-touch-layout/keyman-touch-layout-file-reader.ts rename to developer/src/common/web/utils/src/types/keyman-touch-layout/keyman-touch-layout-file-reader.ts index e0f4ea7917e..81be30e3874 100644 --- a/common/web/types/src/keyman-touch-layout/keyman-touch-layout-file-reader.ts +++ b/developer/src/common/web/utils/src/types/keyman-touch-layout/keyman-touch-layout-file-reader.ts @@ -1,5 +1,6 @@ -import { TouchLayoutFile } from "./keyman-touch-layout-file.js"; -import SchemaValidators from '../schema-validators.js'; +import { TouchLayout } from "@keymanapp/common-types"; +import TouchLayoutFile = TouchLayout.TouchLayoutFile; +import { SchemaValidators } from '@keymanapp/common-types'; export class TouchLayoutFileReader { public read(source: Uint8Array): TouchLayoutFile { @@ -22,7 +23,7 @@ export class TouchLayoutFileReader { // `row.id` should be number, but may have been stringified; we use // presence of `key` property to recognise this as a `TouchLayoutRow`. if(this.key && key == 'id' && typeof value == 'string') { - let newValue = parseInt(value, 10); + const newValue = parseInt(value, 10); /* c8 ignore next 3 */ if(isNaN(newValue)) { throw new TypeError(`Invalid row.id: "${value}"`); @@ -39,7 +40,7 @@ export class TouchLayoutFileReader { return undefined; } - let newValue = parseInt(value, 10); + const newValue = parseInt(value, 10); /* c8 ignore next 3 */ if(isNaN(newValue)) { throw new TypeError(`Invalid [sub]key.${key}: "${value}"`); @@ -67,10 +68,10 @@ export class TouchLayoutFileReader { } public validate(source: TouchLayoutFile): void { - if(!SchemaValidators.touchLayoutClean(source)) + if(!SchemaValidators.default.touchLayoutClean(source)) /* c8 ignore next 3 */ { - throw new Error(JSON.stringify((SchemaValidators.touchLayoutClean).errors)); + throw new Error(JSON.stringify((SchemaValidators.default.touchLayoutClean).errors)); } } diff --git a/common/web/types/src/keyman-touch-layout/keyman-touch-layout-file-writer.ts b/developer/src/common/web/utils/src/types/keyman-touch-layout/keyman-touch-layout-file-writer.ts similarity index 86% rename from common/web/types/src/keyman-touch-layout/keyman-touch-layout-file-writer.ts rename to developer/src/common/web/utils/src/types/keyman-touch-layout/keyman-touch-layout-file-writer.ts index 5de702eb1c6..6261f80d4bd 100644 --- a/common/web/types/src/keyman-touch-layout/keyman-touch-layout-file-writer.ts +++ b/developer/src/common/web/utils/src/types/keyman-touch-layout/keyman-touch-layout-file-writer.ts @@ -1,4 +1,8 @@ -import { TouchLayoutFile, TouchLayoutPlatform, TouchLayoutKey, TouchLayoutSubKey } from "./keyman-touch-layout-file.js"; +import { TouchLayout } from "@keymanapp/common-types"; +import TouchLayoutFile = TouchLayout.TouchLayoutFile; +import TouchLayoutPlatform = TouchLayout.TouchLayoutPlatform;; +import TouchLayoutKey = TouchLayout.TouchLayoutKey; +import TouchLayoutSubKey = TouchLayout.TouchLayoutSubKey; export interface TouchLayoutFileWriterOptions { formatted?: boolean; @@ -87,24 +91,24 @@ export class TouchLayoutFileWriter { // displayUnderlying is always written out by kmcomp, so we do the same for kmc: platform.displayUnderlying = !!platform.displayUnderlying; - for(let layer of platform.layer) { - for(let row of layer.row) { + for(const layer of platform.layer) { + for(const row of layer.row) { // this matches the old spec for touch layout files (row.id as any) = row.id.toString(); - for(let key of row.key) { + for(const key of row.key) { fixupKey(key); if(key.sk) { - for(let sk of key.sk) { + for(const sk of key.sk) { fixupKey(sk); } } if(key.multitap) { - for(let sk of key.multitap) { + for(const sk of key.multitap) { fixupKey(sk); } } if(key.flick) { - for(let id of Object.keys(key.flick)) { + for(const id of Object.keys(key.flick)) { fixupKey((key.flick as any)[id] as TouchLayoutSubKey); } } diff --git a/common/web/types/test/fixtures/keyman-touch-layout/khmer_angkor.keyman-touch-layout b/developer/src/common/web/utils/test/fixtures/keyman-touch-layout/khmer_angkor.keyman-touch-layout similarity index 100% rename from common/web/types/test/fixtures/keyman-touch-layout/khmer_angkor.keyman-touch-layout rename to developer/src/common/web/utils/test/fixtures/keyman-touch-layout/khmer_angkor.keyman-touch-layout diff --git a/common/web/types/test/fixtures/keyman-touch-layout/legacy.keyman-touch-layout b/developer/src/common/web/utils/test/fixtures/keyman-touch-layout/legacy.keyman-touch-layout similarity index 100% rename from common/web/types/test/fixtures/keyman-touch-layout/legacy.keyman-touch-layout rename to developer/src/common/web/utils/test/fixtures/keyman-touch-layout/legacy.keyman-touch-layout diff --git a/common/web/types/test/keyman-touch-layout/test-keyman-touch-layout-file-reader.ts b/developer/src/common/web/utils/test/keyman-touch-layout/test-keyman-touch-layout-file-reader.ts similarity index 96% rename from common/web/types/test/keyman-touch-layout/test-keyman-touch-layout-file-reader.ts rename to developer/src/common/web/utils/test/keyman-touch-layout/test-keyman-touch-layout-file-reader.ts index 479f2c4ccbc..bfebec85e8f 100644 --- a/common/web/types/test/keyman-touch-layout/test-keyman-touch-layout-file-reader.ts +++ b/developer/src/common/web/utils/test/keyman-touch-layout/test-keyman-touch-layout-file-reader.ts @@ -2,7 +2,7 @@ import * as fs from 'fs'; import 'mocha'; import { assert } from 'chai'; import { makePathToFixture } from '../helpers/index.js'; -import { TouchLayoutFileReader } from "../../src/keyman-touch-layout/keyman-touch-layout-file-reader.js"; +import { TouchLayoutFileReader } from "../../src/types/keyman-touch-layout/keyman-touch-layout-file-reader.js"; describe('TouchLayoutFileReader', function () { it('should read a valid file', function() { diff --git a/common/web/types/test/keyman-touch-layout/test-keyman-touch-layout-round-trip.ts b/developer/src/common/web/utils/test/keyman-touch-layout/test-keyman-touch-layout-round-trip.ts similarity index 81% rename from common/web/types/test/keyman-touch-layout/test-keyman-touch-layout-round-trip.ts rename to developer/src/common/web/utils/test/keyman-touch-layout/test-keyman-touch-layout-round-trip.ts index f6a09d08d3b..25870e6e09b 100644 --- a/common/web/types/test/keyman-touch-layout/test-keyman-touch-layout-round-trip.ts +++ b/developer/src/common/web/utils/test/keyman-touch-layout/test-keyman-touch-layout-round-trip.ts @@ -2,8 +2,8 @@ import * as fs from 'fs'; import 'mocha'; import { assert } from 'chai'; import { makePathToFixture } from '../helpers/index.js'; -import { TouchLayoutFileReader } from "../../src/keyman-touch-layout/keyman-touch-layout-file-reader.js"; -import { TouchLayoutFileWriter } from "../../src/keyman-touch-layout/keyman-touch-layout-file-writer.js"; +import { TouchLayoutFileReader } from "../../src/types/keyman-touch-layout/keyman-touch-layout-file-reader.js"; +import { TouchLayoutFileWriter } from "../../src/types/keyman-touch-layout/keyman-touch-layout-file-writer.js"; describe('TouchLayoutFile', function () { it('should round-trip from TouchLayoutFileReader to TouchLayoutFileWriter', function() { @@ -26,7 +26,7 @@ describe('TouchLayoutFile', function () { assert.deepEqual(layout, newLayout); // And do the same without any options - let output2 = writer.write(layout); + const output2 = writer.write(layout); assert.deepEqual(output, output2); newLayout = reader.read(output2); From 4c5b6213d104efba91ecd27628c775ab21bc6fc7 Mon Sep 17 00:00:00 2001 From: Darcy Wong Date: Mon, 8 Jul 2024 16:26:50 -0500 Subject: [PATCH 13/88] fix(android/engine): Parse keyboards.json for FirstVoices app --- .../com/firstvoices/keyboards/FVShared.java | 127 +++++++++++------- .../firstvoices/keyboards/MainActivity.java | 10 +- oem/firstvoices/android/build.sh | 4 - 3 files changed, 86 insertions(+), 55 deletions(-) diff --git a/oem/firstvoices/android/app/src/main/java/com/firstvoices/keyboards/FVShared.java b/oem/firstvoices/android/app/src/main/java/com/firstvoices/keyboards/FVShared.java index 95158826ba5..4ae998500a5 100644 --- a/oem/firstvoices/android/app/src/main/java/com/firstvoices/keyboards/FVShared.java +++ b/oem/firstvoices/android/app/src/main/java/com/firstvoices/keyboards/FVShared.java @@ -7,22 +7,25 @@ import android.net.Uri; import android.util.Log; import android.widget.Toast; +import com.keyman.engine.JSONParser; import com.keyman.engine.KMManager; import com.keyman.engine.data.Keyboard; import com.keyman.engine.packages.PackageProcessor; import com.keyman.engine.util.KMLog; -import java.io.BufferedReader; +import org.json.JSONArray; +import org.json.JSONObject; + import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; -import java.io.InputStreamReader; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.io.OutputStream; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -30,6 +33,8 @@ final class FVShared { private static FVShared instance = null; private boolean isInitialized = false; + // File containing keyboard+region info for each keyboard + private static final String FVKeyboards_JSON = "keyboards.json"; private static final String FVLoadedKeyboardList = "loaded_keyboards.dat"; // Keys from earlier versions of app, used only in the upgrade process @@ -124,53 +129,83 @@ public static FVShared getInstance() { } private FVRegionList loadRegionList() { - FVRegionList list = new FVRegionList(); - try { - // At this point in initialization, fv_all.kmp hasn't been extracted, so - // we get all the keyboard info from keyboards.csv - InputStream inputStream = context.getAssets().open("keyboards.csv"); - BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream)); - - reader.readLine(); // skip header row - String line = reader.readLine(); - - while (line != null) { - while (line.contains(",,")) - line = line.replace(",,", ", ,"); - - String[] values = line.split(","); - if (values != null && values.length > 0) { - // Read in column info - String kbId = values[1]; - String kbName = values[2]; - String regionName = values[3]; - String legacyId = values[4]; - String version = values[5]; - String lgId = values[6].toLowerCase(); // Normalize language ID - String lgName = values[7]; - - FVRegion region = list.findRegion(regionName); - if(region == null) { - region = new FVRegion(regionName); - list.add(region); - } - - FVKeyboard keyboard = new FVKeyboard(kbId, kbName, legacyId, version, lgId, lgName); - - region.keyboards.add(keyboard); - } + FVRegionList list = new FVRegionList(); + File resourceRoot = new File(getResourceRoot()); + PackageProcessor kmpProcessor = new PackageProcessor(resourceRoot); + JSONParser parser = new JSONParser(); + File jsonFile = new File(getPackagesDir() + FVDefault_PackageID + File.separator + FVKeyboards_JSON); + if (!jsonFile.exists()) { + return list; + } + try { + // At this point in initialization, fv_all.kmp is now extracted, so + // populate keyboard info from keyboards.json + JSONArray keyboardsArray = parser.getJSONObjectFromFile(jsonFile, JSONArray.class); + + if (keyboardsArray == null) { + Log.d("loadRegionList", "unable to load keyboards.json"); + return list; + } - line = reader.readLine(); - } + for (int i=0; i r1.name.compareTo(r2.name)); - return list; - } + for (int i=0; i k1.name.compareTo(k2.name)); + } + return list; + } private FVLoadedKeyboardList loadLoadedKeyboardList() { FVLoadedKeyboardList data = new FVLoadedKeyboardList(); diff --git a/oem/firstvoices/android/app/src/main/java/com/firstvoices/keyboards/MainActivity.java b/oem/firstvoices/android/app/src/main/java/com/firstvoices/keyboards/MainActivity.java index f91657203bc..d38d5b9bc07 100644 --- a/oem/firstvoices/android/app/src/main/java/com/firstvoices/keyboards/MainActivity.java +++ b/oem/firstvoices/android/app/src/main/java/com/firstvoices/keyboards/MainActivity.java @@ -49,17 +49,17 @@ protected void onCreate(Bundle savedInstanceState) { setContentView(R.layout.activity_main); + if (BuildConfig.DEBUG) { + KMManager.setDebugMode(true); + } + KMManager.initialize(getApplicationContext(), KMManager.KeyboardType.KEYBOARD_TYPE_INAPP); + FVShared.getInstance().initialize(this); FVShared.getInstance().upgradeTo12(); FVShared.getInstance().upgradeTo14(); FVShared.getInstance().preloadPackages(); - if (BuildConfig.DEBUG) { - KMManager.setDebugMode(true); - } - KMManager.initialize(getApplicationContext(), KMManager.KeyboardType.KEYBOARD_TYPE_INAPP); - /** * We need to set the default (fallback) keyboard to sil_euro_latin inside the fv_all package * rather than the normal default of sil_euro_latin inside the sil_euro_latin package. diff --git a/oem/firstvoices/android/build.sh b/oem/firstvoices/android/build.sh index 3e10a79ae66..e27be069747 100755 --- a/oem/firstvoices/android/build.sh +++ b/oem/firstvoices/android/build.sh @@ -61,13 +61,9 @@ if builder_start_action clean; then fi if builder_start_action configure; then - KEYBOARDS_CSV="$KEYMAN_ROOT/oem/firstvoices/keyboards.csv" - KEYBOARDS_CSV_TARGET="$KEYMAN_ROOT/oem/firstvoices/android/app/src/main/assets/keyboards.csv" - KEYBOARD_PACKAGE_ID="fv_all" KEYBOARDS_TARGET="$KEYMAN_ROOT/oem/firstvoices/android/app/src/main/assets/${KEYBOARD_PACKAGE_ID}.kmp" - cp "$KEYBOARDS_CSV" "$KEYBOARDS_CSV_TARGET" downloadKeyboardPackage "$KEYBOARD_PACKAGE_ID" "$KEYBOARDS_TARGET" builder_finish_action success configure From 21d21fb8f4bdc7e7e504b409673b20f572151cdb Mon Sep 17 00:00:00 2001 From: "Joshua A. Horton" Date: Thu, 18 Jul 2024 11:32:09 +0700 Subject: [PATCH 14/88] change(common/models): change model tokenization to also tokenize whitespace --- common/models/templates/src/tokenization.ts | 183 +++++++++++----- .../templates/test/test-tokenization.js | 200 +++++++++++++++--- .../src/main/correction/context-tracker.ts | 7 +- .../lm-worker/src/main/model-compositor.ts | 2 +- .../web/lm-worker/src/main/model-helpers.ts | 8 +- 5 files changed, 315 insertions(+), 85 deletions(-) diff --git a/common/models/templates/src/tokenization.ts b/common/models/templates/src/tokenization.ts index 2aa5c392e6d..5db7d872c0c 100644 --- a/common/models/templates/src/tokenization.ts +++ b/common/models/templates/src/tokenization.ts @@ -6,13 +6,19 @@ export interface Tokenization { * An array of tokens to the left of the caret. If the caret is in the middle of a token, * only the part to the left of the caret is included. */ - left: USVString[], + left: { + text: USVString, + isWhitespace?: boolean + }[], /** * An array of tokens to the right of the caret. If the caret is in the middle of a token, * only the part to the right of the caret is included. */ - right: USVString[], + right: { + text: USVString, + isWhitespace?: boolean + }[], /** * A flag indicating whether or not the caret's position in the context caused a token @@ -22,21 +28,66 @@ export interface Tokenization { caretSplitsToken: boolean } -export function tokenize(wordBreaker: WordBreakingFunction, context?: Partial): Tokenization { +export function tokenize( + wordBreaker: WordBreakingFunction, + context: Partial, + options?: { + /** Characters to rejoin to preceding tokens if found immediately pre-caret. */ + rejoins?: string[] + } +): Tokenization { + const rejoins = options?.rejoins || ["'"]; context = context || { left: undefined, startOfBuffer: undefined, endOfBuffer: undefined }; - let leftSpans = wordBreaker(context.left || '') || []; - let rightSpans = wordBreaker(context.right || '') || []; + const leftSpans = wordBreaker(context.left || '') || []; + const rightSpans = wordBreaker(context.right || '') || []; + + const tokenization: Tokenization = { + left: [], + right: [], + caretSplitsToken: false + } - let leftTail: Span; - if(leftSpans.length > 0) { - leftTail = leftSpans[leftSpans.length - 1]; + // New step 1: process left-context. + let currentIndex = 0; + while(leftSpans.length > 0) { + const nextSpan = leftSpans[0]; + if(nextSpan.start != currentIndex) { + // Implicit whitespace span! + tokenization.left.push({ + text: context.left!.substring(currentIndex, nextSpan.start), + isWhitespace: true + }); + currentIndex = nextSpan.start; + } else { + leftSpans.shift(); + // Explicit non-whitespace span. + tokenization.left.push({ + text: nextSpan.text + }); + currentIndex = nextSpan.end; + } } + // Detect any pre-caret whitespace after the final pre-caret non-whitespace + // token + // + // Note: the default wordbreaker won't need this code, as it emits a `''` + // after final whitespace. + if(currentIndex != (context.left?.length ?? 0)) { + tokenization.left.push({ + text: context.left!.substring(currentIndex, context.left!.length), + isWhitespace: true + }); + currentIndex = context.left!.length; + } + + // New step 2: handle any rejoins needed. + // Handle any desired special handling for directly-pre-caret scenarios - where for this // _specific_ context, we should not make a token division where one normally would exist otherwise. // @@ -45,62 +96,84 @@ export function tokenize(wordBreaker: WordBreakingFunction, context?: Partial 1) { - const leftTailBase = leftSpans[leftSpans.length - 2]; + let leftTokenCount = tokenization.left.length; + if(leftTokenCount > 1) { + const leftTailBase = tokenization.left[leftTokenCount - 2]; + const leftTail = tokenization.left[leftTokenCount - 1]; // If the final two pre-caret spans are adjacent - without intervening whitespace... - if(leftTailBase.end == leftTail!.start) { + if(!leftTailBase.isWhitespace && !leftTail.isWhitespace) { // Ideal: if(leftTailBase is standard-char-class && leftTail is single-quote-class) // But we don't have character class access here; it's all wordbreaker-function internal. // Upon inspection of the wordbreaker data definitions... the single-quote-class is ONLY "'". // So... we'll just be lazy for now and append the `'`. - if(leftTail!.text == "'") { - let mergedSpan: Span = { - text: leftTailBase.text + leftTail!.text, - start: leftTailBase.start, - end: leftTail!.end, - length: leftTailBase.length + leftTail!.length - }; - - leftSpans.pop(); // leftTail - leftSpans.pop(); // leftTailBase - leftSpans.push(mergedSpan); - leftTail = mergedSpan; // don't forget to update the `leftTail` Span! + if(rejoins.indexOf(leftTail!.text) != -1) { + tokenization.left.pop(); // leftTail + tokenization.left.pop(); // leftTailBase + tokenization.left.push({ + text: leftTailBase.text + leftTail.text + }); + leftTokenCount--; } } } - // With any 'detokenization' cases already handled, we may now begin to build the return object. - let tokenization: Tokenization = { - left: leftSpans.map(span => span.text), - right: rightSpans.map(span => span.text), - - // A default initialization of the value. - caretSplitsToken: false - }; - - // Now the hard part - determining whether or not the caret caused a token split. - if(leftSpans.length > 0 && rightSpans.length > 0) { - let rightHead = rightSpans[0]; - - // If tokenization includes all characters on each side of the caret, - // we have a good candidate for a caret-splitting scenario. - let leftSuffixWordbreak = leftTail!.end != context.left!.length; - let rightPrefixWordbreak = rightHead.start != 0; + // New step 3: right-context tokenization + token split detection + + // context.right starts from index 0; it's an 'index reset'. + currentIndex = 0; + // Set a flag for special "first token" processing. + let firstRightToken = true; + + // Note: is MOSTLY "WET" with the left-span loop, though the + // `caretSplitsToken` check is additional. + while(rightSpans.length > 0) { + const nextSpan = rightSpans[0]; + if(nextSpan.start != currentIndex) { + // Implicit whitespace span! + tokenization.right.push({ + text: context.right!.substring(currentIndex, nextSpan.start), + isWhitespace: true + }); + currentIndex = nextSpan.start; + } else { + // If the first non-whitespace token to the right is non-whitespace, + // and the last token to the left is non-whitespace, the caret may + // be splitting a token. + const leftTail = tokenization.left[leftTokenCount-1]; + if(firstRightToken && !leftTail.isWhitespace) { + if(wordBreaker(leftTail!.text + nextSpan.text).length == 1) { + tokenization.caretSplitsToken = true; + } + } - if(leftSuffixWordbreak || rightPrefixWordbreak) { - // Bypass the final test, as we already know the caret didn't split a token. - // (The tokenization process already removed characters between the two.) - return tokenization; + // Explicit non-whitespace span. + rightSpans.shift(); + tokenization.right.push({ + text: nextSpan.text + }); + currentIndex = nextSpan.end; } - // Worth note - some languages don't use wordbreaking characters. So, a final check: - // - // Does the wordbreaker split a merge of the 'two center' tokens? - // If not, then the caret is responsible for the split. - if(wordBreaker(leftTail!.text + rightHead.text).length == 1) { - tokenization.caretSplitsToken = true; - } + // We've always processed the "first right token" after the first iteration. + // Do not run the caret-split check on any future iterations. + firstRightToken = false; + } + + // Detect any pre-caret whitespace after the final pre-caret non-whitespace + // token + // + // Note: the default wordbreaker won't need this code, as it emits a `''` + // after final whitespace. + // + // Also note: is pretty much WET with the similar check after the + // leftSpan loop. + if(currentIndex != (context.right?.length ?? 0)) { + tokenization.right.push({ + text: context.right!.substring(currentIndex, context.right!.length), + isWhitespace: true + }); + currentIndex = context.right!.length; } return tokenization; @@ -108,12 +181,18 @@ export function tokenize(wordBreaker: WordBreakingFunction, context?: Partial 0) { - return tokenization.left.pop() as string; + const lastToken = tokenization.left.pop(); + if(lastToken!.isWhitespace) { + return ''; + } else { + return lastToken!.text; + } } return ''; diff --git a/common/models/templates/test/test-tokenization.js b/common/models/templates/test/test-tokenization.js index 6c1ff746e58..4433a2cf18e 100644 --- a/common/models/templates/test/test-tokenization.js +++ b/common/models/templates/test/test-tokenization.js @@ -6,6 +6,28 @@ import { assert } from 'chai'; import * as models from "@keymanapp/models-templates"; import * as wordBreakers from "@keymanapp/models-wordbreakers"; +function asProcessedToken(text) { + // default wordbreaker emits these at the end of each context half if ending with whitespace. + // Indicates a new spot for non-whitespace text. + if(text == '') { + return { + text: text + }; + } else if(text.trim() == '') { + // Simple cases using standard Latin-script patterns - can be handled via trim() + return { + text: text, + isWhitespace: true + }; + } + + // could add simple check for other, non-default cases here. + + return { + text: text + }; +} + describe('Tokenization functions', function() { describe('tokenize', function() { it('tokenizes English using defaults, pre-whitespace caret', function() { @@ -19,8 +41,8 @@ describe('Tokenization functions', function() { let tokenization = models.tokenize(wordBreakers.default, context); let expectedResult = { - left: ['The', 'quick', 'brown', 'fox'], - right: ['jumped', 'over', 'the', 'lazy', 'dog'], + left: ['The', ' ', 'quick', ' ', 'brown', ' ', 'fox'].map(asProcessedToken), + right: [' ', 'jumped', ' ', 'over', ' ', 'the', ' ', 'lazy', ' ', 'dog'].map(asProcessedToken), caretSplitsToken: false }; @@ -30,7 +52,7 @@ describe('Tokenization functions', function() { it('tokenizes English using defaults, pre-whitespace caret, partial context', function() { let context = { left: "quick brown fox", // No "The" - right: " jumped over the lazy", // No "dog" + right: " jumped over the lazy ", // No "dog" startOfBuffer: false, endOfBuffer: false }; @@ -38,8 +60,8 @@ describe('Tokenization functions', function() { let tokenization = models.tokenize(wordBreakers.default, context); let expectedResult = { - left: ['quick', 'brown', 'fox'], - right: ['jumped', 'over', 'the', 'lazy'], + left: ['quick', ' ', 'brown', ' ', 'fox'].map(asProcessedToken), + right: [' ', 'jumped', ' ', 'over', ' ', 'the', ' ', 'lazy', ' ', ''].map(asProcessedToken), caretSplitsToken: false }; @@ -59,14 +81,35 @@ describe('Tokenization functions', function() { // Technically, we're editing the start of the first token on the right // when in this context. let expectedResult = { - left: ['The', 'quick', 'brown', 'fox', ''], - right: ['jumped', 'over', 'the', 'lazy', 'dog'], + left: ['The', ' ', 'quick', ' ', 'brown', ' ', 'fox', ' ', ''].map(asProcessedToken), + right: ['jumped', ' ', 'over', ' ', 'the', ' ', 'lazy', ' ', 'dog'].map(asProcessedToken), caretSplitsToken: true }; assert.deepEqual(tokenization, expectedResult); }); + it('tokenizes English using ascii-breaker, post-whitespace caret', function() { + let context = { + left: "The quick brown fox ", + right: "jumped over the lazy dog ", + startOfBuffer: true, + endOfBuffer: true + }; + + let tokenization = models.tokenize(wordBreakers.ascii, context); + + // Technically, we're editing the start of the first token on the right + // when in this context. + let expectedResult = { + left: ['The', ' ', 'quick', ' ', 'brown', ' ', 'fox', ' '].map(asProcessedToken), + right: ['jumped', ' ', 'over', ' ', 'the', ' ', 'lazy', ' ', 'dog', ' '].map(asProcessedToken), + caretSplitsToken: false + }; + + assert.deepEqual(tokenization, expectedResult); + }); + it('tokenizes English using defaults, post-whitespace caret, partial context', function() { let context = { left: "quick brown fox ", @@ -80,8 +123,8 @@ describe('Tokenization functions', function() { // Technically, we're editing the start of the first token on the right // when in this context. let expectedResult = { - left: ['quick', 'brown', 'fox', ''], - right: ['jumped', 'over', 'the', 'lazy'], + left: ['quick', ' ', 'brown', ' ', 'fox', ' ', ''].map(asProcessedToken), + right: ['jumped', ' ', 'over', ' ', 'the', ' ', 'lazy'].map(asProcessedToken), caretSplitsToken: true }; @@ -99,8 +142,8 @@ describe('Tokenization functions', function() { let tokenization = models.tokenize(wordBreakers.default, context); let expectedResult = { - left: ['The', 'quick', 'brown', 'fox', 'jum'], - right: ['ped', 'over', 'the', 'lazy', 'dog'], + left: ['The', ' ', 'quick', ' ', 'brown', ' ', 'fox', ' ', 'jum'].map(asProcessedToken), + right: ['ped', ' ', 'over', ' ', 'the', ' ', 'lazy', ' ', 'dog'].map(asProcessedToken), caretSplitsToken: true }; @@ -118,8 +161,8 @@ describe('Tokenization functions', function() { let tokenization = models.tokenize(wordBreakers.default, context); let expectedResult = { - left: ['The', 'quick', 'brown', 'fox', 'jum'], - right: ['ped', 'over', 'the', 'lazy', 'dog'], + left: ['The', ' ', 'quick', ' ', 'brown', ' ', 'fox', ' ', 'jum'].map(asProcessedToken), + right: ['ped', ' ', 'over', ' ', 'the', ' ', 'lazy', ' ', 'dog'].map(asProcessedToken), caretSplitsToken: true }; @@ -167,7 +210,7 @@ describe('Tokenization functions', function() { let tokenization = models.tokenize(wordBreakers.default, context); let expectedResult = { - left: [''], + left: [' ', ''].map(asProcessedToken), right: [], caretSplitsToken: false }; @@ -175,6 +218,85 @@ describe('Tokenization functions', function() { assert.deepEqual(tokenization, expectedResult); }); + it('properly tokenizes partial English contractions - default setting', function() { + let context = { + left: "I can'", + right: "", + startOfBuffer: true, + endOfBuffer: true + }; + + let tokenization = models.tokenize(wordBreakers.default, context); + + // Technically, we're editing the start of the first token on the right + // when in this context. + let expectedResult = { + left: ['I', ' ', 'can\''].map(asProcessedToken), + right: [].map(asProcessedToken), + caretSplitsToken: false + }; + + assert.deepEqual(tokenization, expectedResult); + }); + + it('overly tokenizes partial English contractions when (default) apostrophe rejoin is disabled', function() { + let context = { + left: "I can'", + right: "", + startOfBuffer: true, + endOfBuffer: true + }; + + let tokenization = models.tokenize(wordBreakers.default, context, { rejoins: [] }); + + // Technically, we're editing the start of the first token on the right + // when in this context. + let expectedResult = { + left: ['I', ' ', 'can' , '\''].map(asProcessedToken), + right: [].map(asProcessedToken), + caretSplitsToken: false + }; + + assert.deepEqual(tokenization, expectedResult); + }); + + it('properly tokenizes English contractions', function() { + // Note: a 'context0' with the caret before the `'` actually + // is not supported well yet; a leading `'` is broken from + // following text when in isolation. + + let context1 = { + left: "I can'", + right: "t", + startOfBuffer: true, + endOfBuffer: true + }; + + let context2 = { + left: "I can't", + right: "", + startOfBuffer: true, + endOfBuffer: true + } + + let tokenization1 = models.tokenize(wordBreakers.default, context1); + let tokenization2 = models.tokenize(wordBreakers.default, context2); + + let expectedResult1 = { + left: ['I', ' ', 'can\''].map(asProcessedToken), + right: ['t'].map(asProcessedToken), + caretSplitsToken: true + }; + let expectedResult2 = { + left: ['I', ' ', 'can\'t'].map(asProcessedToken), + right: [].map(asProcessedToken), + caretSplitsToken: false + }; + + assert.deepEqual(tokenization1, expectedResult1); + assert.deepEqual(tokenization2, expectedResult2); + }); + // For the next few tests: a mocked wordbreaker for Khmer, a language // without whitespace between words. let mockedKhmerBreaker = function(text) { @@ -258,8 +380,8 @@ describe('Tokenization functions', function() { let tokenization = models.tokenize(mockedKhmerBreaker, context); let expectedResult = { - left: ['ស្រុក'], - right: ['ខ្មែរ'], + left: ['ស្រុក'].map(asProcessedToken), + right: ['ខ្មែរ'].map(asProcessedToken), caretSplitsToken: false }; @@ -282,8 +404,8 @@ describe('Tokenization functions', function() { let tokenization = models.tokenize(mockedKhmerBreaker, context); let expectedResult = { - left: ['ស្រុ'], - right: ['ក', 'ខ្មែរ'], + left: ['ស្រុ'].map(asProcessedToken), + right: ['ក', 'ខ្មែរ'].map(asProcessedToken), caretSplitsToken: true }; @@ -326,21 +448,21 @@ describe('Tokenization functions', function() { let tokenization = models.tokenize(wordBreakers.default, context); assert.deepEqual(tokenization, { - left: ["don", "-"], - right: ["worry"], + left: ["don", "-"].map(asProcessedToken), + right: [" ", "worry"].map(asProcessedToken), caretSplitsToken: false }); tokenization = models.tokenize(midLetterNonbreaker, context); assert.deepEqual(tokenization, { - left: ["don-"], - right: ["worry"], + left: ["don-"].map(asProcessedToken), + right: [" ", "worry"].map(asProcessedToken), caretSplitsToken: false }); }); - it('handles mid-contraction tokenization', function() { + it('handles mid-contraction tokenization (via wordbreaker customization)', function() { let context = { left: "don:", right: "t worry", @@ -351,17 +473,20 @@ describe('Tokenization functions', function() { let tokenization = models.tokenize(wordBreakers.default, context); assert.deepEqual(tokenization, { - left: ["don", ":"], // This particular case feels like a possible issue. - right: ["t", "worry"], // It'd be a three-way split token, as "don:t" would - // be a single token were it not for the caret in the middle. + // This particular case feels like a possible issue. + // It'd be a three-way split token, as "don:t" would + // be a single token were it not for the caret in the middle. + left: ["don", ":"].map(asProcessedToken), + right: ["t", " ", "worry"].map(asProcessedToken), + caretSplitsToken: false }) tokenization = models.tokenize(midLetterNonbreaker, context); assert.deepEqual(tokenization, { - left: ["don:"], - right: ["t", "worry"], + left: ["don:"].map(asProcessedToken), + right: ["t", " ", "worry"].map(asProcessedToken), caretSplitsToken: true }); }); @@ -394,6 +519,20 @@ describe('Tokenization functions', function() { assert.equal(tokenization, ''); }); + + it('with post-whitespace caret, ascii breaker', function() { + let context = { + left: "The quick brown fox ", + right: "jumped over the lazy dog", + startOfBuffer: true, + endOfBuffer: true + }; + + let tokenization = models.getLastPreCaretToken(wordBreakers.ascii, context); + + assert.equal(tokenization, ''); + }); + it('within a token', function() { let context = { left: "The quick brown fox jum", @@ -406,6 +545,11 @@ describe('Tokenization functions', function() { assert.equal(tokenization, 'jum'); }); + + it('with no context', function() { + let tokenization = models.getLastPreCaretToken(wordBreakers.default, null); + assert.equal(tokenization, ''); + }); }); describe('wordbreak', function() { diff --git a/common/web/lm-worker/src/main/correction/context-tracker.ts b/common/web/lm-worker/src/main/correction/context-tracker.ts index 2f15165e5e3..9dde1d6f9cc 100644 --- a/common/web/lm-worker/src/main/correction/context-tracker.ts +++ b/common/web/lm-worker/src/main/correction/context-tracker.ts @@ -1,9 +1,9 @@ -import { applyTransform, tokenize } from '@keymanapp/models-templates'; -import { defaultWordbreaker } from '@keymanapp/models-wordbreakers'; +import { applyTransform } from '@keymanapp/models-templates'; import { ClassicalDistanceCalculation } from './classical-calculation.js'; import { SearchSpace } from './distance-modeler.js'; import TransformUtils from '../transformUtils.js'; +import { determineModelTokenizer } from '../model-helpers.js'; function textToCharTransforms(text: string, transformId?: number) { let perCharTransforms: Transform[] = []; @@ -536,7 +536,8 @@ export class ContextTracker extends CircularArray { throw "This lexical model does not provide adequate data for correction algorithms and context reuse"; } - let tokenizedContext = tokenize(model.wordbreaker || defaultWordbreaker, context); + let tokenize = determineModelTokenizer(model); + let tokenizedContext = tokenize(context); if(tokenizedContext.left.length > 0) { for(let i = this.count - 1; i >= 0; i--) { diff --git a/common/web/lm-worker/src/main/model-compositor.ts b/common/web/lm-worker/src/main/model-compositor.ts index 5ba5554ac2d..444de3c73bc 100644 --- a/common/web/lm-worker/src/main/model-compositor.ts +++ b/common/web/lm-worker/src/main/model-compositor.ts @@ -323,7 +323,7 @@ export class ModelCompositor { return breaker(context); } - private tokenize(context: Context): models.Tokenization { + private tokenize(context: Context) { const tokenizer = determineModelTokenizer(this.lexicalModel); return tokenizer(context); } diff --git a/common/web/lm-worker/src/main/model-helpers.ts b/common/web/lm-worker/src/main/model-helpers.ts index 17ad212671c..afca8ecdc1d 100644 --- a/common/web/lm-worker/src/main/model-helpers.ts +++ b/common/web/lm-worker/src/main/model-helpers.ts @@ -62,7 +62,13 @@ export function determineModelWordbreaker(model: LexicalModel): (context: Contex export function determineModelTokenizer(model: LexicalModel) { return (context: Context) => { if(model.wordbreaker) { - return models.tokenize(model.wordbreaker, context); + const fullTokenization = models.tokenize(model.wordbreaker, context); + + return { + left: fullTokenization.left .filter((entry) => !entry.isWhitespace).map((entry) => entry.text), + right: fullTokenization.right.filter((entry) => !entry.isWhitespace).map((entry) => entry.text), + caretSplitsToken: fullTokenization.caretSplitsToken + } } else { return null; } From 96185472f194cee3748bc963821bccb865ba2c7c Mon Sep 17 00:00:00 2001 From: "Joshua A. Horton" Date: Mon, 22 Jul 2024 10:30:00 +0700 Subject: [PATCH 15/88] feat(web): add support for transform tokenization --- .../main/correction/transform-tokenization.ts | 51 +++ .../mocha/cases/transform-tokenization.js | 377 ++++++++++++++++++ 2 files changed, 428 insertions(+) create mode 100644 common/web/lm-worker/src/main/correction/transform-tokenization.ts create mode 100644 common/web/lm-worker/src/test/mocha/cases/transform-tokenization.js diff --git a/common/web/lm-worker/src/main/correction/transform-tokenization.ts b/common/web/lm-worker/src/main/correction/transform-tokenization.ts new file mode 100644 index 00000000000..42455b5164b --- /dev/null +++ b/common/web/lm-worker/src/main/correction/transform-tokenization.ts @@ -0,0 +1,51 @@ +import { applyTransform, type Tokenization } from "@keymanapp/models-templates"; + +export function tokenizeTransform( + tokenize: (context: Context) => Tokenization, + context: Context, + transform: Transform +): Transform[] { + // Context does not slide within this function. + const postContext = applyTransform(transform, context); + const postTokenization = tokenize(postContext).left; + + let insert = transform.insert; + + const tokenizedTransforms: Transform[] = []; + for(let index = postTokenization.length - 1; index >= 0; index--) { + const currentToken = postTokenization[index]; + const textLen = currentToken.text.length; + + if(textLen < insert.length) { + tokenizedTransforms.unshift({ + insert: currentToken.text, + deleteLeft: 0 + }); + + insert = insert.substring(0, insert.length - textLen); + } else { + tokenizedTransforms.unshift({ + insert: insert, + deleteLeft: transform.deleteLeft + }); + break; + } + } + + return tokenizedTransforms; +} + +// If and when we look to do phrase-based suggestions and/or auto-correction on accidental +// spaces, this function should prove useful. +export function tokenizeTransformDistribution( + tokenize: (context: Context) => Tokenization, + context: Context, + transformDistribution: Distribution +): Distribution { + return transformDistribution.map((transform) => { + return { + sample: tokenizeTransform(tokenize, context, transform.sample), + p: transform.p + }; + }); +} \ No newline at end of file diff --git a/common/web/lm-worker/src/test/mocha/cases/transform-tokenization.js b/common/web/lm-worker/src/test/mocha/cases/transform-tokenization.js new file mode 100644 index 00000000000..41b2a912dea --- /dev/null +++ b/common/web/lm-worker/src/test/mocha/cases/transform-tokenization.js @@ -0,0 +1,377 @@ +import { assert } from 'chai'; + +import { default as defaultBreaker } from '@keymanapp/models-wordbreakers'; + +import { tokenize } from '@keymanapp/models-templates'; +import { tokenizeTransform } from '#./correction/transform-tokenization.js'; + +const defaultTokenize = (context) => tokenize(defaultBreaker, context); + +describe('tokenizeTransform', () => { + describe('with default wordbreaking', () => { + it('properly handles simple token-edit transform', () => { + const context = { + left: 'an apple a date', + right: '' + }; + + const editTransform = { + insert: 'y', + deleteLeft: 2 + }; + + const result = tokenizeTransform( + defaultTokenize, + context, + editTransform + ); + + assert.equal(result.length, 1); + assert.deepEqual(result[0], editTransform); + }); + + it('properly handles simple token-replacing transform', () => { + const context = { + left: 'an apple a date', + right: '' + }; + + const editTransform = { + insert: 'week', + deleteLeft: 4 + }; + + const result = tokenizeTransform( + defaultTokenize, + context, + editTransform + ); + + assert.equal(result.length, 1); + assert.deepEqual(result[0], editTransform); + }); + + it('handles simple token-replacing transform with cross-token deleteLeft', () => { + const context = { + left: 'an apple a date', + right: '' + }; + + // 'an apple any' + const editTransform = { + insert: 'ny', + deleteLeft: 6 + }; + + const result = tokenizeTransform( + defaultTokenize, + context, + editTransform + ); + + assert.equal(result.length, 1); + assert.deepEqual(result[0], editTransform); + }); + + it('properly handles a simple appended whitespace', () => { + const context = { + left: 'an apple a day', + right: '' + }; + + const editTransform = { + insert: ' ', + deleteLeft: 0 + }; + + const result = tokenizeTransform( + defaultTokenize, + context, + editTransform + ); + + assert.equal(result.length, 2); + assert.deepEqual(result, [ + // The whitespace belongs on the whitespace token that will be added. + editTransform, + // The default-breaker adds an empty token after whitespace, hence this + // empty transform. + { insert: '', deleteLeft: 0 } + ]); + }); + + it('properly handles a simple appended period', () => { + const context = { + left: 'an apple a day', + right: '' + }; + + const editTransform = { + insert: '.', + deleteLeft: 0 + }; + + const result = tokenizeTransform( + defaultTokenize, + context, + editTransform + ); + + // The default wordbreaker does not (currently) append a blank token + // after standard English punctuation. + assert.equal(result.length, 1); + assert.deepEqual(result, [ + editTransform + ]); + }); + + it('handles word-breakable transforms (case 1)', () => { + const context = { + left: 'an apple a dat', + right: '' + }; + + const editTransform = { + insert: 'y k', + deleteLeft: 1 + }; + + const result = tokenizeTransform( + defaultTokenize, + context, + editTransform + ); + + assert.equal(result.length, 3); + assert.deepEqual(result, [ + { insert: 'y', deleteLeft: 1 }, + { insert: ' ', deleteLeft: 0 }, + { insert: 'k', deleteLeft: 0 } + ]); + }); + + it('handles word-breakable transforms (case 2)', () => { + const context = { + left: 'an apple a dat', + right: '' + }; + + const editTransform = { + insert: 'y. ', + deleteLeft: 1 + }; + + const result = tokenizeTransform( + defaultTokenize, + context, + editTransform + ); + + assert.equal(result.length, 4); + assert.deepEqual(result, [ + { insert: 'y', deleteLeft: 1 }, + { insert: '.', deleteLeft: 0 }, + { insert: ' ', deleteLeft: 0 }, + { insert: '', deleteLeft: 0 } + ]); + }); + + it('handles complex breakable cases', () => { + const context = { + left: 'an apple a date', + right: '' + }; + + // 'an apple any' + const editTransform = { + insert: 'ny day', + deleteLeft: 6 + }; + + const result = tokenizeTransform( + defaultTokenize, + context, + editTransform + ); + + assert.equal(result.length, 3); + assert.deepEqual(result, [ + { insert: 'ny', deleteLeft: 6 }, + { insert: ' ', deleteLeft: 0 }, + { insert: 'day', deleteLeft: 0 } + ]); + }); + }); + + describe('with mocked dictionary-based wordbreaking', () => { + function mockedTokenization(entries) { + return { + left: entries.map((text) => { + return {text: text} + }) + }; + } + + it('properly handles simple token-edit transform', () => { + const context = { + left: 'anappleadate', + right: '' + }; + + const editTransform = { + insert: 'y', + deleteLeft: 2 + }; + + const result = tokenizeTransform( + () => mockedTokenization(['an', 'apple', 'a', 'day']), + context, + editTransform + ); + + assert.equal(result.length, 1); + assert.deepEqual(result[0], editTransform); + }); + + it('properly handles simple token-replacing transform', () => { + const context = { + left: 'anappleadate', + right: '' + }; + + const editTransform = { + insert: 'week', + deleteLeft: 4 + }; + + const result = tokenizeTransform( + () => mockedTokenization(['an', 'apple', 'a', 'week']), + context, + editTransform + ); + + assert.equal(result.length, 1); + assert.deepEqual(result[0], editTransform); + }); + + it('handles simple token-replacing transform with cross-token deleteLeft', () => { + const context = { + left: 'anappleadate', + right: '' + }; + + // 'an apple any' + const editTransform = { + insert: 'ny', + deleteLeft: 5 + }; + + const result = tokenizeTransform( + () => mockedTokenization(['an', 'apple', 'any']), + context, + editTransform + ); + + assert.equal(result.length, 1); + assert.deepEqual(result[0], editTransform); + }); + + it('handles word-breakable transforms (case 1)', () => { + const context = { + left: 'anappleadat', + right: '' + }; + + const editTransform = { + insert: 'yk', + deleteLeft: 1 + }; + + const result = tokenizeTransform( + () => mockedTokenization(['an', 'apple', 'any', 'k']), + context, + editTransform + ); + + assert.equal(result.length, 2); + assert.deepEqual(result, [ + { insert: 'y', deleteLeft: 1 }, + { insert: 'k', deleteLeft: 0 } + ]); + }); + + it('handles word-breakable transforms (case 2)', () => { + const context = { + left: 'anappleadat', + right: '' + }; + + const editTransform = { + insert: 'y.', + deleteLeft: 1 + }; + + const result = tokenizeTransform( + () => mockedTokenization(['an', 'apple', 'any', 'day', '.']), + context, + editTransform + ); + + assert.equal(result.length, 2); + assert.deepEqual(result, [ + { insert: 'y', deleteLeft: 1 }, + { insert: '.', deleteLeft: 0 } + ]); + }); + + it('handles word-breakable transforms (case 2 alternate output)', () => { + const context = { + left: 'anappleadat', + right: '' + }; + + const editTransform = { + insert: 'y.', + deleteLeft: 1 + }; + + const result = tokenizeTransform( + () => mockedTokenization(['an', 'apple', 'any', 'day', '.', '']), + context, + editTransform + ); + + assert.equal(result.length, 3); + assert.deepEqual(result, [ + { insert: 'y', deleteLeft: 1 }, + { insert: '.', deleteLeft: 0 }, + { insert: '', deleteLeft: 0 } + ]); + }); + + it('handles complex breakable cases', () => { + const context = { + left: 'anappleadate', + right: '' + }; + + // 'an apple any' + const editTransform = { + insert: 'nyday', + deleteLeft: 5 + }; + + const result = tokenizeTransform( + () => mockedTokenization(['an', 'apple', 'any', 'day']), + context, + editTransform + ); + + assert.equal(result.length, 2); + assert.deepEqual(result, [ + { insert: 'ny', deleteLeft: 5 }, + { insert: 'day', deleteLeft: 0 } + ]); + }); + }); +}); \ No newline at end of file From 8f6c2d602d5a8134e7bbc48dc5429d38fb185ed6 Mon Sep 17 00:00:00 2001 From: "Joshua A. Horton" Date: Fri, 19 Jul 2024 11:53:53 +0700 Subject: [PATCH 16/88] change(web): track whitespace-aware tokenization for context + correction-search caching --- .../src/main/correction/context-tracker.ts | 416 +++++++++--------- .../lm-worker/src/main/model-compositor.ts | 7 +- .../web/lm-worker/src/main/model-helpers.ts | 8 +- .../web/lm-worker/src/main/predict-helpers.ts | 19 +- .../cases/edit-distance/context-tracker.js | 131 ++++-- 5 files changed, 321 insertions(+), 260 deletions(-) diff --git a/common/web/lm-worker/src/main/correction/context-tracker.ts b/common/web/lm-worker/src/main/correction/context-tracker.ts index 9dde1d6f9cc..a8c42e53a18 100644 --- a/common/web/lm-worker/src/main/correction/context-tracker.ts +++ b/common/web/lm-worker/src/main/correction/context-tracker.ts @@ -31,6 +31,7 @@ export class TrackedContextSuggestion { export class TrackedContextToken { raw: string; replacementText: string; + isWhitespace?: boolean; transformDistributions: Distribution[] = []; replacements: TrackedContextSuggestion[]; @@ -54,6 +55,30 @@ export class TrackedContextToken { revert() { delete this.activeReplacementId; } + + /** + * Used for 14.0's backspace workaround, which flattens all previous Distribution + * entries because of limitations with direct use of backspace transforms. + * @param tokenText + * @param transformId + */ + updateWithBackspace(tokenText: USVString, transformId: number) { + // It's a backspace transform; time for special handling! + // + // For now, with 14.0, we simply compress all remaining Transforms for the token into + // multiple single-char transforms. Probabalistically modeling BKSP is quite complex, + // so we simplify by assuming everything remaining after a BKSP is 'true' and 'intended' text. + // + // Note that we cannot just use a single, monolithic transform at this point b/c + // of our current edit-distance optimization strategy; diagonalization is currently... + // not very compatible with that. + let backspacedTokenContext: Distribution[] = textToCharTransforms(tokenText, transformId).map(function(transform) { + return [{sample: transform, p: 1.0}]; + }); + + this.raw = tokenText; + this.transformDistributions = backspacedTokenContext; + } } export class TrackedContextState { @@ -126,7 +151,7 @@ export class TrackedContextState { } popHead() { - this.tokens.splice(0, 2); + this.tokens.splice(0, 1); this.indexOffset -= 1; } @@ -144,59 +169,57 @@ export class TrackedContextState { } } - pushWhitespaceToTail(transformDistribution: Distribution = null) { - let whitespaceToken = new TrackedContextToken(); - - // Track the Transform that resulted in the whitespace 'token'. - // Will be needed for phrase-level correction/prediction. - whitespaceToken.transformDistributions = transformDistribution ? [transformDistribution] : []; - - whitespaceToken.raw = null; - this.tokens.push(whitespaceToken); - } - - /** - * Used for 14.0's backspace workaround, which flattens all previous Distribution - * entries because of limitations with direct use of backspace transforms. - * @param tokenText - * @param transformId - */ - replaceTailForBackspace(tokenText: USVString, transformId: number) { - this.tokens.pop(); - - // It's a backspace transform; time for special handling! - // - // For now, with 14.0, we simply compress all remaining Transforms for the token into - // multiple single-char transforms. Probabalistically modeling BKSP is quite complex, - // so we simplify by assuming everything remaining after a BKSP is 'true' and 'intended' text. - // - // Note that we cannot just use a single, monolithic transform at this point b/c - // of our current edit-distance optimization strategy; diagonalization is currently... - // not very compatible with that. - let backspacedTokenContext: Distribution[] = textToCharTransforms(tokenText, transformId).map(function(transform) { - return [{sample: transform, p: 1.0}]; - }); - - let compactedToken = new TrackedContextToken(); - compactedToken.raw = tokenText; - compactedToken.transformDistributions = backspacedTokenContext; - this.pushTail(compactedToken); - } - - updateTail(transformDistribution: Distribution, tokenText?: USVString) { - let editedToken = this.tail; - + // pushWhitespaceToTail(transformDistribution: Distribution = null) { + // let whitespaceToken = new TrackedContextToken(); + + // // Track the Transform that resulted in the whitespace 'token'. + // // Will be needed for phrase-level correction/prediction. + // whitespaceToken.transformDistributions = transformDistribution ? [transformDistribution] : []; + + // whitespaceToken.raw = null; + // this.tokens.push(whitespaceToken); + // } + + // /** + // * Used for 14.0's backspace workaround, which flattens all previous Distribution + // * entries because of limitations with direct use of backspace transforms. + // * @param tokenText + // * @param transformId + // */ + // replaceTailForBackspace(tokenText: USVString, transformId: number) { + // this.tokens.pop(); + + // // It's a backspace transform; time for special handling! + // // + // // For now, with 14.0, we simply compress all remaining Transforms for the token into + // // multiple single-char transforms. Probabalistically modeling BKSP is quite complex, + // // so we simplify by assuming everything remaining after a BKSP is 'true' and 'intended' text. + // // + // // Note that we cannot just use a single, monolithic transform at this point b/c + // // of our current edit-distance optimization strategy; diagonalization is currently... + // // not very compatible with that. + // let backspacedTokenContext: Distribution[] = textToCharTransforms(tokenText, transformId).map(function(transform) { + // return [{sample: transform, p: 1.0}]; + // }); + + // let compactedToken = new TrackedContextToken(); + // compactedToken.raw = tokenText; + // compactedToken.transformDistributions = backspacedTokenContext; + // this.pushTail(compactedToken); + // } + + updateToken(token: TrackedContextToken, transformDistribution: Distribution, tokenText?: USVString) { // Preserve existing text if new text isn't specified. - tokenText = tokenText || (tokenText === '' ? '' : editedToken.raw); + tokenText = tokenText || (tokenText === '' ? '' : token.raw); if(transformDistribution && transformDistribution.length > 0) { - editedToken.transformDistributions.push(transformDistribution); + token.transformDistributions.push(transformDistribution); if(this.searchSpace) { this.searchSpace.forEach(space => space.addInput(transformDistribution)); } } // Replace old token's raw-text with new token's raw-text. - editedToken.raw = tokenText; + token.raw = tokenText; } toRawTokenization() { @@ -295,7 +318,8 @@ class CircularArray { */ item(index: number) { if(index >= this.count) { - throw "Invalid array index"; + // JS arrays return `undefined` for invalid array indices. + return undefined; } let mappedIndex = (this.currentTail + index) % this.maxCount; @@ -304,85 +328,83 @@ class CircularArray { } export class ContextTracker extends CircularArray { - static attemptMatchContext(tokenizedContext: USVString[], - matchState: TrackedContextState, - transformDistribution?: Distribution): TrackedContextState { + static attemptMatchContext( + tokenizedContext: { text: USVString, isWhitespace?: boolean } [], + matchState: TrackedContextState, + transformDistribution?: Distribution + ): TrackedContextState { // Map the previous tokenized state to an edit-distance friendly version. let matchContext: USVString[] = matchState.toRawTokenization(); // Inverted order, since 'match' existed before our new context. - let mapping = ClassicalDistanceCalculation.computeDistance(matchContext.map(value => ({key: value})), - tokenizedContext.map(value => ({key: value})), - 1); + let mapping = ClassicalDistanceCalculation.computeDistance( + matchContext.map(value => ({key: value})), + tokenizedContext.map(value => ({key: value.text})), + // Must be at least 2, as adding a single whitespace after a token tends + // to add two tokens: one for whitespace, one for the empty token to + // follow it. + 3 + ); let editPath = mapping.editPath(); - let poppedHead = false; - let pushedTail = false; - - // Matters greatly when starting from a nil context. - if(editPath.length > 1) { - // First entry: may not be an 'insert' or a 'transpose' op. - // 'insert' allowed if the next token is 'substitute', as this may occur with an edit path of length 2. - if((editPath[0] == 'insert' && !(editPath[1] == 'substitute' && editPath.length == 2)) || editPath[0].indexOf('transpose') >= 0) { - return null; - } else if(editPath[0] == 'delete') { - poppedHead = true; // a token from the previous state has been wholly removed. - } + // When the context has but two tokens, the path algorithm tends to invert + // 'insert' and 'substitute' from our preferred ordering for them. + // Logically, either order makes sense... but logic for other cases is + // far simpler if we have 'substitute' before 'insert'. + if(editPath.length == 2 && editPath[0] == 'insert' && editPath[1] == 'substitute') { + editPath[0] = 'substitute'; + editPath[1] = 'insert'; } - // Last entry: may not be a 'delete' or a 'transpose' op. - let tailIndex = editPath.length -1; - let ignorePenultimateMatch = false; - if(editPath[tailIndex] == 'delete' || editPath[0].indexOf('transpose') >= 0) { - return null; - } else if(editPath[tailIndex] == 'insert') { - pushedTail = true; - } else if(tailIndex > 0 && editPath[tailIndex-1] == 'insert' && editPath[tailIndex] == 'substitute') { - // Tends to happen when accepting suggestions. - pushedTail = true; - ignorePenultimateMatch = true; - } + const firstMatch = editPath.indexOf('match'); + const lastMatch = editPath.lastIndexOf('match'); - // Can happen for the first text input after backspace deletes a wordbreaking character, - // thus the new input continues a previous word while dropping the empty word after - // that prior wordbreaking character. - // - // We can't handle it reliably from this match state, but a previous entry (without the empty token) - // should still be in the cache and will be reliable for this example case. - if(tailIndex > 0 && editPath[tailIndex-1] == 'delete' && editPath[tailIndex] == 'substitute') { - return null; + // Assertion: for a long context, the bulk of the edit path should be a + // continuous block of 'match' entries. If there's anything else in + // the middle, we have a context mismatch. + if(firstMatch) { + for(let i = firstMatch+1; i < lastMatch; i++) { + if(editPath[i] != 'match') { + return null; + } + } } - // Now to check everything in-between: should be exclusively 'match'es. - for(let index = 1; index < editPath.length - (ignorePenultimateMatch ? 2 : 1); index++) { - if(editPath[index] != 'match') { - return null; - } + // If we have a perfect match with a pre-existing context, no mutations have + // happened; just re-use the old context state. + if(firstMatch == 0 && lastMatch == editPath.length - 1) { + return matchState; } - // If we've made it here... success! We have a context match! - let state: TrackedContextState; - - if(pushedTail) { - // On suggestion acceptance, we should update the previous final token. - // We do it first so that the acceptance is replicated in the new TrackedContextState - // as well. - if(ignorePenultimateMatch) { - // For this case, we were likely called by ModelCompositor.acceptSuggestion(), which - // would have marked the accepted suggestion. - matchState.tail.replacementText = tokenizedContext[tokenizedContext.length-2]; - } + // If mutations HAVE happened, we have work to do. + let state = matchState; - state = new TrackedContextState(matchState); - } else { - // We're continuing a previously-cached context; create a deep-copy of it. - // We can't just re-use the old instance, unfortunately; predictions break - // with multitaps otherwise - we should avoid tracking keystrokes that were - // rewound. - // - // If there are no incoming transforms, though... yeah, re-use is safe then. - state = !!transformDistribution ? new TrackedContextState(matchState) : matchState; + let priorEdit: typeof editPath[0]; + let poppedTokenCount = 0; + for(let i = 0; i < firstMatch; i++) { + switch(editPath[i]) { + case 'delete': + if(priorEdit && priorEdit != 'delete') { + return null; + } + if(state == matchState) { + state = new TrackedContextState(state); + } + state.popHead(); + poppedTokenCount++; + break; + case 'substitute': + // There's no major need to drop parts of a token being 'slid' out of the context window. + // We'll leave it intact. + break; + default: + // No 'insert' should exist on the leading edge of context when the + // context window slides. + // + // No 'transform' edits should exist within this section, either. + return null; + } } const hasDistribution = transformDistribution && Array.isArray(transformDistribution); @@ -391,100 +413,85 @@ export class ContextTracker extends CircularArray { primaryInput = null; } - const isWhitespace = primaryInput && TransformUtils.isWhitespace(primaryInput); + // TODO: "wordbreak" the `insert` section of the transform (if it exists). + // ... wait, might have to be done at a higher level... + // ... and will probably want its own unit test ... + const isBackspace = primaryInput && TransformUtils.isBackspace(primaryInput); - const finalToken = tokenizedContext[tokenizedContext.length-1]; - - /* Assumption: This is an adequate check for its two sub-branches. - * - * Basis: - * - Assumption: one keystroke may only cause a single token to rotate out of context. - * - That is, no "reasonable" keystroke would emit enough code points to 'bump' two words simultaneously. - * - ... This one may need to be loosened a bit... but it should be enough for initial correction testing as-is. - * - Assumption: one keystroke may only cause a single token to be appended to the context - * - That is, no "reasonable" keystroke would emit a Transform adding two separate word tokens - * - For languages using whitespace to word-break, said keystroke would have to include said whitespace to break the assumption. - */ - - function maintainLastToken() { - if(isWhitespace && editPath[tailIndex] == 'match') { - /* - We can land here if there are multiple whitespaces in a row. - There's already an implied whitespace to the left, so we conceptually - merge the new whitespace with that one. - */ - return; - } else if(isBackspace) { - // Consider backspace entry for this case? - state.replaceTailForBackspace(finalToken, primaryInput.id); - } else { - state.updateTail(primaryInput ? transformDistribution : null, finalToken); - } - } - // If there is/was more than one context token available... - if(editPath.length > 1) { - // We're removing a context token, but at least one remains. - if(poppedHead) { - state.popHead(); - } + // Reset priorEdit for the end-of-context updating loop. + priorEdit = undefined; - // We're adding an additional context token. - if(pushedTail) { - const tokenizedTail = tokenizedContext[tokenizedContext.length - 1]; - /* - * Common-case: most transforms that trigger this case are from pure-whitespace Transforms. MOST. - * - * Less-common, but noteworthy: some wordbreaks may occur without whitespace. Example: - * `"o` => ['"', 'o']. Make sure to double-check against `tokenizedContext`! - */ - let pushedToken = new TrackedContextToken(); - pushedToken.raw = tokenizedTail; - - if(isWhitespace || !primaryInput) { - state.pushWhitespaceToTail(transformDistribution ?? []); - // Continuing the earlier assumption, that 'pure-whitespace Transform' does not emit any initial characters - // for the new word (token), so the input keystrokes do not correspond to the new text token. - pushedToken.transformDistributions = []; - } else { - state.pushWhitespaceToTail(); - // Assumption: Since we only allow one-transform-at-a-time changes between states, we shouldn't be missing - // any metadata used to construct the new context state token. - pushedToken.transformDistributions = transformDistribution ? [transformDistribution] : []; - } + // Now to update the end of the context window. + for(let i = lastMatch+1; i < editPath.length; i++) { + const incomingToken = tokenizedContext[i - poppedTokenCount] + switch(editPath[i]) { + case 'substitute': + if(i == editPath.length - 1) { + state = new TrackedContextState(state); + } - state.pushTail(pushedToken); - } else { - // We're editing the final context token. - // TODO: Assumption: we didn't 'miss' any inputs somehow. - // As is, may be prone to fragility should the lm-layer's tracked context 'desync' from its host's. - maintainLastToken(); - } - // There is only one word in the context. - } else { - // TODO: Assumption: we didn't 'miss' any inputs somehow. - // As is, may be prone to fragility should the lm-layer's tracked context 'desync' from its host's. - - if(editPath[tailIndex] == 'insert') { - // Construct appropriate initial token. - let token = new TrackedContextToken(); - token.raw = tokenizedContext[0]; - token.transformDistributions = [transformDistribution]; - state.pushTail(token); - } else { - // Edit the lone context token. - maintainLastToken(); + if(isBackspace) { + state.tokens[i - poppedTokenCount].updateWithBackspace(incomingToken.text, primaryInput.id); + } else { + state.updateToken(state.tokens[i - poppedTokenCount], transformDistribution, incomingToken.text); + } + + if(state != matchState) { + if(isBackspace) { + matchState.tokens[i].updateWithBackspace(incomingToken.text, primaryInput.id); + } else { + matchState.updateToken(state.tokens[i], transformDistribution, incomingToken.text); + } + } + break; + case 'insert': + if(priorEdit && priorEdit != 'substitute' && priorEdit != 'match') { + return null; + } + + if(state == matchState) { + state = new TrackedContextState(state); + } + + let pushedToken = new TrackedContextToken(); + pushedToken.raw = incomingToken.text; + + // TODO: May need something more complicated if the keystroke's + // transform triggers a wordbreak _within_ its boundaries (rather than + // on an edge). (Probably some way to map the tokenization to the indices + // within `insert`.) + pushedToken.transformDistributions = transformDistribution ? [transformDistribution] : []; + pushedToken.isWhitespace = incomingToken.isWhitespace; + + state.pushTail(pushedToken); + break; + default: + // No 'delete' should exist on the trailing edge of context when the + // context window slides. While it can happen due to keystrokes with + // `deleteLeft`, we keep a cache of recent contexts - an older one will + // likely match sufficiently. + // - may see 'delete' followed by 'substitute' in such cases. + // + // No 'transform' edits should exist within this section, either. + return null; } } + return state; } - private static modelContextState(tokenizedContext: USVString[], - transformDistribution: Distribution, - lexicalModel: LexicalModel): TrackedContextState { + private static modelContextState( + tokenizedContext: {text: USVString, isWhitespace?: boolean}[], + lexicalModel: LexicalModel + ): TrackedContextState { let baseTokens = tokenizedContext.map(function(entry) { let token = new TrackedContextToken(); - token.raw = entry; + token.raw = entry.text; + if(entry.isWhitespace) { + token.isWhitespace = true; + } + if(token.raw) { token.transformDistributions = textToCharTransforms(token.raw).map(function(transform) { return [{sample: transform, p: 1.0}]; @@ -504,7 +511,7 @@ export class ContextTracker extends CircularArray { } while(baseTokens.length > 0) { - state.pushWhitespaceToTail(); + // state.pushWhitespaceToTail(); state.pushTail(baseTokens.splice(0, 1)[0]); } @@ -527,21 +534,32 @@ export class ContextTracker extends CircularArray { * @param context * @param transformDistribution */ - analyzeState(model: LexicalModel, - context: Context, - transformDistribution?: Distribution): TrackedContextState { + analyzeState( + model: LexicalModel, + context: Context, + transformDistribution?: Distribution + ): TrackedContextState { if(!model.traverseFromRoot) { // Assumption: LexicalModel provides a valid traverseFromRoot function. (Is technically optional) // Without it, no 'corrections' may be made; the model can only be used to predict, not correct. throw "This lexical model does not provide adequate data for correction algorithms and context reuse"; } + const inputTransform = transformDistribution?.[0]; + if(inputTransform) { + context = applyTransform(inputTransform.sample, context); + } + let tokenize = determineModelTokenizer(model); let tokenizedContext = tokenize(context); if(tokenizedContext.left.length > 0) { for(let i = this.count - 1; i >= 0; i--) { const priorMatchState = this.item(i); + + // Skip intermediate multitap-produced contexts. + // When multitapping, we skip all contexts from prior taps within the same interaction, + // but not any contexts from before the multitap started. const priorTaggedContext = priorMatchState.taggedContext; if(priorTaggedContext && transformDistribution && transformDistribution.length > 0) { // Using the potential `matchState` + the incoming transform, do the results line up for @@ -584,7 +602,7 @@ export class ContextTracker extends CircularArray { // // Assumption: as a caret needs to move to context before any actual transform distributions occur, // this state is only reached on caret moves; thus, transformDistribution is actually just a single null transform. - let state = ContextTracker.modelContextState(tokenizedContext.left, transformDistribution, model); + let state = ContextTracker.modelContextState(tokenizedContext.left, model); state.taggedContext = context; this.enqueue(state); return state; diff --git a/common/web/lm-worker/src/main/model-compositor.ts b/common/web/lm-worker/src/main/model-compositor.ts index 444de3c73bc..73f973b3828 100644 --- a/common/web/lm-worker/src/main/model-compositor.ts +++ b/common/web/lm-worker/src/main/model-compositor.ts @@ -216,11 +216,8 @@ export class ModelCompositor { let postContextTokenization = this.tokenize(postContext); if(postContextTokenization) { // Handles display string for reversions triggered by accepting a suggestion mid-token. - if(postContextTokenization.left.length > 0) { - revertedPrefix = postContextTokenization.left[postContextTokenization.left.length-1]; - } else { - revertedPrefix = ''; - } + const preCaretToken = postContextTokenization.left[postContextTokenization.left.length - 1]; + revertedPrefix = (preCaretToken && !preCaretToken.isWhitespace) ? preCaretToken.text : ''; revertedPrefix += postContextTokenization.caretSplitsToken ? postContextTokenization.right[0] : ''; } else { revertedPrefix = this.wordbreak(postContext); diff --git a/common/web/lm-worker/src/main/model-helpers.ts b/common/web/lm-worker/src/main/model-helpers.ts index afca8ecdc1d..17ad212671c 100644 --- a/common/web/lm-worker/src/main/model-helpers.ts +++ b/common/web/lm-worker/src/main/model-helpers.ts @@ -62,13 +62,7 @@ export function determineModelWordbreaker(model: LexicalModel): (context: Contex export function determineModelTokenizer(model: LexicalModel) { return (context: Context) => { if(model.wordbreaker) { - const fullTokenization = models.tokenize(model.wordbreaker, context); - - return { - left: fullTokenization.left .filter((entry) => !entry.isWhitespace).map((entry) => entry.text), - right: fullTokenization.right.filter((entry) => !entry.isWhitespace).map((entry) => entry.text), - caretSplitsToken: fullTokenization.caretSplitsToken - } + return models.tokenize(model.wordbreaker, context); } else { return null; } diff --git a/common/web/lm-worker/src/main/predict-helpers.ts b/common/web/lm-worker/src/main/predict-helpers.ts index 31b12e898b1..e505245edc5 100644 --- a/common/web/lm-worker/src/main/predict-helpers.ts +++ b/common/web/lm-worker/src/main/predict-helpers.ts @@ -162,14 +162,19 @@ export async function correctAndEnumerate( // facilitates a more thorough correction-search pattern. // Token replacement benefits greatly from knowledge of the prior context state. - let contextState = contextTracker.analyzeState(lexicalModel, context, null); + let contextState = contextTracker.analyzeState( + lexicalModel, + context, + null + ); // Corrections and predictions are based upon the post-context state, though. - postContextState = contextTracker.analyzeState( lexicalModel, - postContext, - !TransformUtils.isEmpty(inputTransform) - ? transformDistribution - : null - ); + postContextState = contextTracker.analyzeState( + lexicalModel, + context, + !TransformUtils.isEmpty(inputTransform) + ? transformDistribution + : null + ); // TODO: Should we filter backspaces & whitespaces out of the transform distribution? // Ideally, the answer (in the future) will be no, but leaving it in right now may pose an issue. diff --git a/common/web/lm-worker/src/test/mocha/cases/edit-distance/context-tracker.js b/common/web/lm-worker/src/test/mocha/cases/edit-distance/context-tracker.js index 5c9d4812b68..1ad31b3e8f0 100644 --- a/common/web/lm-worker/src/test/mocha/cases/edit-distance/context-tracker.js +++ b/common/web/lm-worker/src/test/mocha/cases/edit-distance/context-tracker.js @@ -4,6 +4,9 @@ import { ContextTracker } from '#./correction/context-tracker.js'; import ModelCompositor from '#./model-compositor.js'; import * as models from '#./models/index.js'; +import { default as defaultBreaker } from '@keymanapp/models-wordbreakers'; +import { deepCopy } from '@keymanapp/web-utils'; + import { jsonFixture } from '@keymanapp/common-test-resources/model-helpers.mjs'; describe('ContextTracker', function() { @@ -16,49 +19,76 @@ describe('ContextTracker', function() { describe('attemptMatchContext', function() { it("properly matches and aligns when lead token is removed", function() { - let existingContext = ["an", "apple", "a", "day", "keeps", "the", "doctor"]; + let existingContext = models.tokenize(defaultBreaker, { + left: "an apple a day keeps the doctor" + }); + let transform = { + insert: '', + deleteLeft: 0 + } + let newContext = deepCopy(existingContext); + newContext.left.splice(0, 1); + let rawTokens = [" ", "apple", " ", "a", " ", "day", " ", "keeps", " ", "the", " ", "doctor"]; + + let existingState = ContextTracker.modelContextState(existingContext.left); + let state = ContextTracker.attemptMatchContext(newContext.left, existingState, null, toWrapperDistribution(transform)); + assert.isNotNull(state); + assert.deepEqual(state.tokens.map(token => token.raw), rawTokens); + }); + + it("properly matches and aligns when lead token + following whitespace are removed", function() { + let existingContext = models.tokenize(defaultBreaker, { + left: "an apple a day keeps the doctor" + }); let transform = { insert: '', deleteLeft: 0 } - let newContext = existingContext.slice(0); - newContext.splice(0, 1); - let rawTokens = ["apple", null, "a", null, "day", null, "keeps", null, "the", null, "doctor"]; + let newContext = deepCopy(existingContext); + newContext.left.splice(0, 2); + let rawTokens = ["apple", " ", "a", " ", "day", " ", "keeps", " ", "the", " ", "doctor"]; - let existingState = ContextTracker.modelContextState(existingContext); - let state = ContextTracker.attemptMatchContext(newContext, existingState, null, toWrapperDistribution(transform)); + let existingState = ContextTracker.modelContextState(existingContext.left); + let state = ContextTracker.attemptMatchContext(newContext.left, existingState, null, toWrapperDistribution(transform)); assert.isNotNull(state); assert.deepEqual(state.tokens.map(token => token.raw), rawTokens); }); it("properly matches and aligns when final token is edited", function() { - let existingContext = ["an", "apple", "a", "day", "keeps", "the", "docto"]; + let existingContext = models.tokenize(defaultBreaker, { + left: "an apple a day keeps the docto" + }); let transform = { insert: 'r', deleteLeft: 0 } - let newContext = existingContext.slice(0); - newContext[newContext.length - 1] = 'doctor'; - let rawTokens = ["an", null, "apple", null, "a", null, "day", null, "keeps", null, "the", null, "doctor"]; + let newContext = models.tokenize(defaultBreaker, { + left: "an apple a day keeps the doctor" + }); + let rawTokens = ["an", " ", "apple", " ", "a", " ", "day", " ", "keeps", " ", "the", " ", "doctor"]; - let existingState = ContextTracker.modelContextState(existingContext); - let state = ContextTracker.attemptMatchContext(newContext, existingState, null, toWrapperDistribution(transform)); + let existingState = ContextTracker.modelContextState(existingContext.left); + let state = ContextTracker.attemptMatchContext(newContext.left, existingState, null, toWrapperDistribution(transform)); assert.isNotNull(state); assert.deepEqual(state.tokens.map(token => token.raw), rawTokens); }); + // Needs improved context-state management (due to 2x tokens) it("properly matches and aligns when a 'wordbreak' is added", function() { - let existingContext = ["an", "apple", "a", "day", "keeps", "the", "doctor"]; + let existingContext = models.tokenize(defaultBreaker, { + left: "an apple a day keeps the doctor" + }); let transform = { insert: ' ', deleteLeft: 0 } - let newContext = existingContext.slice(0); - newContext.push(''); - let rawTokens = ["an", null, "apple", null, "a", null, "day", null, "keeps", null, "the", null, "doctor", null, ""]; + let newContext = models.tokenize(defaultBreaker, { + left: "an apple a day keeps the doctor " + }); + let rawTokens = ["an", " ", "apple", " ", "a", " ", "day", " ", "keeps", " ", "the", " ", "doctor", " ", ""]; - let existingState = ContextTracker.modelContextState(existingContext); - let state = ContextTracker.attemptMatchContext(newContext, existingState, toWrapperDistribution(transform)); + let existingState = ContextTracker.modelContextState(existingContext.left); + let state = ContextTracker.attemptMatchContext(newContext.left, existingState, toWrapperDistribution(transform)); assert.isNotNull(state); assert.deepEqual(state.tokens.map(token => token.raw), rawTokens); @@ -68,38 +98,44 @@ describe('ContextTracker', function() { }); it("properly matches and aligns when an implied 'wordbreak' occurs (as when following \"'\")", function() { - let existingContext = ["'"]; + let existingContext = models.tokenize(defaultBreaker, { + left: "'" + }); let transform = { insert: 'a', deleteLeft: 0 } - let newContext = existingContext.slice(0); - newContext.push('a'); // The incoming transform should produce a new token WITH TEXT. - let rawTokens = ["'", null, "a"]; + let newContext = models.tokenize(defaultBreaker, { + left: "'a" + }); + let rawTokens = ["'", "a"]; - let existingState = ContextTracker.modelContextState(existingContext); - let state = ContextTracker.attemptMatchContext(newContext, existingState, toWrapperDistribution(transform)); + let existingState = ContextTracker.modelContextState(existingContext.left); + let state = ContextTracker.attemptMatchContext(newContext.left, existingState, toWrapperDistribution(transform)); assert.isNotNull(state); assert.deepEqual(state.tokens.map(token => token.raw), rawTokens); // The 'wordbreak' transform - assert.isEmpty(state.tokens[state.tokens.length - 2].transformDistributions); + assert.isNotEmpty(state.tokens[state.tokens.length - 2].transformDistributions); assert.isNotEmpty(state.tokens[state.tokens.length - 1].transformDistributions); - }); + }) + // Needs improved context-state management (due to 2x tokens) it("properly matches and aligns when lead token is removed AND a 'wordbreak' is added'", function() { - let existingContext = ["an", "apple", "a", "day", "keeps", "the", "doctor"]; + let existingContext = models.tokenize(defaultBreaker, { + left: "an apple a day keeps the doctor" + }); let transform = { insert: ' ', deleteLeft: 0 } - let newContext = existingContext.slice(0); - newContext.splice(0, 1); - newContext.push(''); - let rawTokens = ["apple", null, "a", null, "day", null, "keeps", null, "the", null, "doctor", null, ""]; + let newContext = models.tokenize(defaultBreaker, { + left: "apple a day keeps the doctor " + }); + let rawTokens = ["apple", " ", "a", " ", "day", " ", "keeps", " ", "the", " ", "doctor", " ", ""]; - let existingState = ContextTracker.modelContextState(existingContext); - let state = ContextTracker.attemptMatchContext(newContext, existingState, toWrapperDistribution(transform)); + let existingState = ContextTracker.modelContextState(existingContext.left); + let state = ContextTracker.attemptMatchContext(newContext.left, existingState, toWrapperDistribution(transform)); assert.isNotNull(state); assert.deepEqual(state.tokens.map(token => token.raw), rawTokens); @@ -111,18 +147,28 @@ describe('ContextTracker', function() { describe('modelContextState', function() { it('models without final wordbreak', function() { - let context = ["an", "apple", "a", "day", "keeps", "the", "doctor"]; - let rawTokens = ["an", null, "apple", null, "a", null, "day", null, "keeps", null, "the", null, "doctor"]; - - let state = ContextTracker.modelContextState(context); + let tokenized = ["an", " ", "apple", " ", "a", " ", "day", " ", "keeps", " ", "the", " ", "doctor"].map((entry) => { + return { + text: entry, + isWhitespace: entry == " " + }; + }); + let rawTokens = ["an", " ", "apple", " ", "a", " ", "day", " ", "keeps", " ", "the", " ", "doctor"]; + + let state = ContextTracker.modelContextState(tokenized); assert.deepEqual(state.tokens.map(token => token.raw), rawTokens); }); it('models with final wordbreak', function() { - let context = ["an", "apple", "a", "day", "keeps", "the", "doctor", ""]; - let rawTokens = ["an", null, "apple", null, "a", null, "day", null, "keeps", null, "the", null, "doctor", null, ""]; - - let state = ContextTracker.modelContextState(context); + let tokenized = ["an", " ", "apple", " ", "a", " ", "day", " ", "keeps", " ", "the", " ", "doctor", " ", ""].map((entry) => { + return { + text: entry, + isWhitespace: entry == " " + }; + }); + let rawTokens = ["an", " ", "apple", " ", "a", " ", "day", " ", "keeps", " ", "the", " ", "doctor", " ", ""]; + + let state = ContextTracker.modelContextState(tokenized); assert.deepEqual(state.tokens.map(token => token.raw), rawTokens); }); }); @@ -133,6 +179,7 @@ describe('ContextTracker', function() { insertAfterWord: ' ' }; + // Needs improved context-state management (due to 2x tokens) it('tracks an accepted suggestion', function() { let baseSuggestion = { transform: { @@ -180,7 +227,7 @@ describe('ContextTracker', function() { let postContextState = compositor.contextTracker.analyzeState(model, postContext); // Penultimate token corresponds to whitespace, which does not have a 'raw' representation. - assert.isNull(postContextState.tokens[postContextState.tokens.length - 2].raw); + assert.equal(postContextState.tokens[postContextState.tokens.length - 2].raw, ' '); // Final token is empty (follows a wordbreak) assert.equal(postContextState.tail.raw, ''); From 965eeaf17b71b2dbee157f76e00c94bf62089c9b Mon Sep 17 00:00:00 2001 From: "Joshua A. Horton" Date: Wed, 24 Jul 2024 13:04:08 +0700 Subject: [PATCH 17/88] chore(common/models): renames added unit tests --- .../templates/test/test-tokenization.js | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/common/models/templates/test/test-tokenization.js b/common/models/templates/test/test-tokenization.js index 4433a2cf18e..2a5ec136f69 100644 --- a/common/models/templates/test/test-tokenization.js +++ b/common/models/templates/test/test-tokenization.js @@ -169,7 +169,7 @@ describe('Tokenization functions', function() { assert.deepEqual(tokenization, expectedResult); }); - it('empty context case', function() { + it('properly handles empty-context cases', function() { // Wordbreaking on a empty space => no word. let context = { left: '', startOfBuffer: true, @@ -187,7 +187,7 @@ describe('Tokenization functions', function() { assert.deepEqual(tokenization, expectedResult); }); - it('nil context case', function() { + it('properly handles null context cases', function() { // Wordbreaking on a empty space => no word. let tokenization = models.tokenize(wordBreakers.default, null); @@ -200,7 +200,7 @@ describe('Tokenization functions', function() { assert.deepEqual(tokenization, expectedResult); }); - it('near-empty context: one space before caret', function() { + it('properly handles a near-empty context: one space before caret', function() { // Wordbreaking on a empty space => no word. let context = { left: ' ', startOfBuffer: true, @@ -437,7 +437,7 @@ describe('Tokenization functions', function() { return wordBreakers.default(text, customization); } - it('treats caret as `eot` for pre-caret text', function() { + it('treats caret as `eot` for pre-caret text tokenization', function() { let context = { left: "don-", // We use a hyphen here b/c single-quote is hardcoded. right: " worry", @@ -493,7 +493,7 @@ describe('Tokenization functions', function() { }); describe('getLastPreCaretToken', function() { - it('with pre-whitespace caret', function() { + it('operates properly with pre-whitespace caret', function() { let context = { left: "The quick brown fox", right: " jumped over the lazy dog", @@ -506,7 +506,7 @@ describe('Tokenization functions', function() { assert.equal(tokenization, 'fox'); }); - it('with post-whitespace caret', function() { + it('operates properly with post-whitespace caret', function() { let context = { left: "The quick brown fox ", right: "jumped over the lazy dog", @@ -520,7 +520,7 @@ describe('Tokenization functions', function() { }); - it('with post-whitespace caret, ascii breaker', function() { + it('operates properly with post-whitespace caret, ascii breaker', function() { let context = { left: "The quick brown fox ", right: "jumped over the lazy dog", @@ -533,7 +533,7 @@ describe('Tokenization functions', function() { assert.equal(tokenization, ''); }); - it('within a token', function() { + it('operates properly within a token', function() { let context = { left: "The quick brown fox jum", right: "ped over the lazy dog", @@ -546,14 +546,14 @@ describe('Tokenization functions', function() { assert.equal(tokenization, 'jum'); }); - it('with no context', function() { + it('operates properly with no context', function() { let tokenization = models.getLastPreCaretToken(wordBreakers.default, null); assert.equal(tokenization, ''); }); }); describe('wordbreak', function() { - it('with pre-whitespace caret', function() { + it('operates properly with pre-whitespace caret', function() { let context = { left: "The quick brown fox", right: " jumped over the lazy dog", @@ -566,7 +566,7 @@ describe('Tokenization functions', function() { assert.equal(tokenization, 'fox'); }); - it('with post-whitespace caret', function() { + it('operates properly with post-whitespace caret', function() { let context = { left: "The quick brown fox ", right: "jumped over the lazy dog", @@ -581,7 +581,7 @@ describe('Tokenization functions', function() { // This version is subject to change. In the future, we may wish the wordbreak // operation to include "the rest of the word" - the post-caret part. - it('within a token', function() { + it('operates properly within a token', function() { let context = { left: "The quick brown fox jum", right: "ped over the lazy dog", From ef4089d6dcb8472917830400bf6c2272e64e0714 Mon Sep 17 00:00:00 2001 From: "Joshua A. Horton" Date: Thu, 25 Jul 2024 08:10:34 +0700 Subject: [PATCH 18/88] chore(web): pulls forward some changes from child branch --- .../src/main/correction/context-tracker.ts | 66 +++++-------------- 1 file changed, 18 insertions(+), 48 deletions(-) diff --git a/common/web/lm-worker/src/main/correction/context-tracker.ts b/common/web/lm-worker/src/main/correction/context-tracker.ts index a8c42e53a18..dd618efa20c 100644 --- a/common/web/lm-worker/src/main/correction/context-tracker.ts +++ b/common/web/lm-worker/src/main/correction/context-tracker.ts @@ -169,45 +169,6 @@ export class TrackedContextState { } } - // pushWhitespaceToTail(transformDistribution: Distribution = null) { - // let whitespaceToken = new TrackedContextToken(); - - // // Track the Transform that resulted in the whitespace 'token'. - // // Will be needed for phrase-level correction/prediction. - // whitespaceToken.transformDistributions = transformDistribution ? [transformDistribution] : []; - - // whitespaceToken.raw = null; - // this.tokens.push(whitespaceToken); - // } - - // /** - // * Used for 14.0's backspace workaround, which flattens all previous Distribution - // * entries because of limitations with direct use of backspace transforms. - // * @param tokenText - // * @param transformId - // */ - // replaceTailForBackspace(tokenText: USVString, transformId: number) { - // this.tokens.pop(); - - // // It's a backspace transform; time for special handling! - // // - // // For now, with 14.0, we simply compress all remaining Transforms for the token into - // // multiple single-char transforms. Probabalistically modeling BKSP is quite complex, - // // so we simplify by assuming everything remaining after a BKSP is 'true' and 'intended' text. - // // - // // Note that we cannot just use a single, monolithic transform at this point b/c - // // of our current edit-distance optimization strategy; diagonalization is currently... - // // not very compatible with that. - // let backspacedTokenContext: Distribution[] = textToCharTransforms(tokenText, transformId).map(function(transform) { - // return [{sample: transform, p: 1.0}]; - // }); - - // let compactedToken = new TrackedContextToken(); - // compactedToken.raw = tokenText; - // compactedToken.transformDistributions = backspacedTokenContext; - // this.pushTail(compactedToken); - // } - updateToken(token: TrackedContextToken, transformDistribution: Distribution, tokenText?: USVString) { // Preserve existing text if new text isn't specified. tokenText = tokenText || (tokenText === '' ? '' : token.raw); @@ -424,25 +385,34 @@ export class ContextTracker extends CircularArray { // Now to update the end of the context window. for(let i = lastMatch+1; i < editPath.length; i++) { + const isLastToken = i == editPath.length - 1; + const incomingToken = tokenizedContext[i - poppedTokenCount] switch(editPath[i]) { case 'substitute': - if(i == editPath.length - 1) { + if(isLastToken) { state = new TrackedContextState(state); } + const token = state.tokens[i - poppedTokenCount]; + const matchToken = matchState.tokens[i]; + if(isBackspace) { - state.tokens[i - poppedTokenCount].updateWithBackspace(incomingToken.text, primaryInput.id); + token.updateWithBackspace(incomingToken.text, primaryInput.id); } else { - state.updateToken(state.tokens[i - poppedTokenCount], transformDistribution, incomingToken.text); + state.updateToken(token, transformDistribution, incomingToken.text); } - if(state != matchState) { - if(isBackspace) { - matchState.tokens[i].updateWithBackspace(incomingToken.text, primaryInput.id); - } else { - matchState.updateToken(state.tokens[i], transformDistribution, incomingToken.text); - } + // For this case, we were _likely_ called by + // ModelCompositor.acceptSuggestion(), which would have marked the + // accepted suggestion. + // + // Upon inspection, this doesn't seem entirely ideal. It works for + // the common case, but not for specially crafted keystroke + // transforms. That said, it's also very low impact. Best as I can + // see, this is only really used for debugging info? + if(state != matchState && !isLastToken) { + matchToken.replacementText = incomingToken.text; } break; case 'insert': From 8d40bc33637471e4b68cb31ba64a32e6ea2ec8b7 Mon Sep 17 00:00:00 2001 From: "Joshua A. Horton" Date: Tue, 23 Jul 2024 13:31:37 +0700 Subject: [PATCH 19/88] change(web): leverage tokenization to preserve punctuation and whitespace when predicting --- .../src/main/correction/context-tracker.ts | 180 +++++++++++++----- .../lm-worker/src/main/model-compositor.ts | 2 +- .../web/lm-worker/src/main/predict-helpers.ts | 80 +++----- .../cases/edit-distance/context-tracker.js | 73 +++---- .../mocha/cases/worker-model-compositor.js | 4 +- 5 files changed, 205 insertions(+), 134 deletions(-) diff --git a/common/web/lm-worker/src/main/correction/context-tracker.ts b/common/web/lm-worker/src/main/correction/context-tracker.ts index dd618efa20c..7e630ddc6a5 100644 --- a/common/web/lm-worker/src/main/correction/context-tracker.ts +++ b/common/web/lm-worker/src/main/correction/context-tracker.ts @@ -1,9 +1,10 @@ -import { applyTransform } from '@keymanapp/models-templates'; +import { applyTransform, buildMergedTransform } from '@keymanapp/models-templates'; import { ClassicalDistanceCalculation } from './classical-calculation.js'; import { SearchSpace } from './distance-modeler.js'; import TransformUtils from '../transformUtils.js'; import { determineModelTokenizer } from '../model-helpers.js'; +import { tokenizeTransform, tokenizeTransformDistribution } from './transform-tokenization.js'; function textToCharTransforms(text: string, transformId?: number) { let perCharTransforms: Transform[] = []; @@ -79,6 +80,18 @@ export class TrackedContextToken { this.raw = tokenText; this.transformDistributions = backspacedTokenContext; } + + update(transformDistribution: Distribution, tokenText?: USVString) { + // Preserve existing text if new text isn't specified. + tokenText = tokenText || (tokenText === '' ? '' : this.raw); + + if(transformDistribution && transformDistribution.length > 0) { + this.transformDistributions.push(transformDistribution); + } + + // Replace old token's raw-text with new token's raw-text. + this.raw = tokenText; + } } export class TrackedContextState { @@ -169,20 +182,6 @@ export class TrackedContextState { } } - updateToken(token: TrackedContextToken, transformDistribution: Distribution, tokenText?: USVString) { - // Preserve existing text if new text isn't specified. - tokenText = tokenText || (tokenText === '' ? '' : token.raw); - - if(transformDistribution && transformDistribution.length > 0) { - token.transformDistributions.push(transformDistribution); - if(this.searchSpace) { - this.searchSpace.forEach(space => space.addInput(transformDistribution)); - } - } - // Replace old token's raw-text with new token's raw-text. - token.raw = tokenText; - } - toRawTokenization() { let sequence: USVString[] = []; @@ -292,8 +291,12 @@ export class ContextTracker extends CircularArray { static attemptMatchContext( tokenizedContext: { text: USVString, isWhitespace?: boolean } [], matchState: TrackedContextState, - transformDistribution?: Distribution - ): TrackedContextState { + transformSequenceDistribution?: Distribution + ): { + state: TrackedContextState, + baseState: TrackedContextState, + preservationTransform?: Transform + } { // Map the previous tokenized state to an edit-distance friendly version. let matchContext: USVString[] = matchState.toRawTokenization(); @@ -335,7 +338,7 @@ export class ContextTracker extends CircularArray { // If we have a perfect match with a pre-existing context, no mutations have // happened; just re-use the old context state. if(firstMatch == 0 && lastMatch == editPath.length - 1) { - return matchState; + return { state: matchState, baseState: matchState }; } // If mutations HAVE happened, we have work to do. @@ -368,25 +371,51 @@ export class ContextTracker extends CircularArray { } } - const hasDistribution = transformDistribution && Array.isArray(transformDistribution); - let primaryInput = hasDistribution ? transformDistribution[0].sample : null; - if(primaryInput && primaryInput.insert == "" && primaryInput.deleteLeft == 0 && !primaryInput.deleteRight) { - primaryInput = null; - } - - // TODO: "wordbreak" the `insert` section of the transform (if it exists). - // ... wait, might have to be done at a higher level... - // ... and will probably want its own unit test ... - - const isBackspace = primaryInput && TransformUtils.isBackspace(primaryInput); + const hasDistribution = transformSequenceDistribution && Array.isArray(transformSequenceDistribution); // Reset priorEdit for the end-of-context updating loop. priorEdit = undefined; + // TODO: I'm beginning to believe that searchSpace be tracked (eventually) + // on the tokens, rather than on the overall 'state' + // - Reason: phrase-level corrections / predictions would likely need a search-state + // across per potentially-affected token. + // - Shifting the paradigm should be a separate work unit than the + // context-tracker rework currently being done, though. + // Now to update the end of the context window. + + let preservationTransform: Transform; for(let i = lastMatch+1; i < editPath.length; i++) { const isLastToken = i == editPath.length - 1; + if(!hasDistribution) { + throw new Error("Unexpected context-tracking state."); + } + const transformDistIndex = i - (lastMatch + 1); + const tokenDistribution = transformSequenceDistribution.map((entry) => { + return { + sample: entry.sample[transformDistIndex], + p: entry.p + }; + }); + + let primaryInput = hasDistribution ? tokenDistribution[0]?.sample : null; + if(primaryInput && primaryInput.insert == "" && primaryInput.deleteLeft == 0 && !primaryInput.deleteRight) { + primaryInput = null; + } + + // If this token's transform component is not part of the final token, + // it's something we'll want to preserve even when applying suggestions + // for the final token. + // + // Note: will need a either a different approach or more specialized + // handling if/when supporting phrase-level (multi-token) suggestions. + if(!isLastToken) { + preservationTransform = preservationTransform ? buildMergedTransform(preservationTransform, primaryInput) : primaryInput; + } + const isBackspace = primaryInput && TransformUtils.isBackspace(primaryInput); + const incomingToken = tokenizedContext[i - poppedTokenCount] switch(editPath[i]) { case 'substitute': @@ -399,8 +428,23 @@ export class ContextTracker extends CircularArray { if(isBackspace) { token.updateWithBackspace(incomingToken.text, primaryInput.id); + if(isLastToken) { + state.tokens.pop(); // pops `token` + // puts it back in, rebuilding a fresh search-space that uses the rebuilt + // keystroke distribution from updateWithBackspace. + state.pushTail(token); + } } else { - state.updateToken(token, transformDistribution, incomingToken.text); + token.update( + tokenDistribution, + incomingToken.text + ); + + if(isLastToken) { + // Search spaces may not exist during some unit tests; the state + // may not have an associated model during some. + state.searchSpace[0]?.addInput(tokenDistribution); + } } // For this case, we were _likely_ called by @@ -414,12 +458,21 @@ export class ContextTracker extends CircularArray { if(state != matchState && !isLastToken) { matchToken.replacementText = incomingToken.text; } + break; case 'insert': if(priorEdit && priorEdit != 'substitute' && priorEdit != 'match') { return null; } + if(!preservationTransform) { + // Allows for consistent handling of "insert" cases; even if there's no edit + // from a prior token, having a defined transform here indicates that + // a new token has been produced. This serves as a useful conditional flag + // for prediction logic. + preservationTransform = { insert: '', deleteLeft: 0 }; + } + if(state == matchState) { state = new TrackedContextState(state); } @@ -427,11 +480,21 @@ export class ContextTracker extends CircularArray { let pushedToken = new TrackedContextToken(); pushedToken.raw = incomingToken.text; - // TODO: May need something more complicated if the keystroke's - // transform triggers a wordbreak _within_ its boundaries (rather than - // on an edge). (Probably some way to map the tokenization to the indices - // within `insert`.) - pushedToken.transformDistributions = transformDistribution ? [transformDistribution] : []; + // TODO: assumes that there was no shift in wordbreaking from the + // prior context to the current one. This may actually be a major + // issue for dictionary-based wordbreaking! + // + // If there was such a shift, then we may have extra transforms + // originally on a 'previous' token that got moved into this one! + // + // Suppose we're using a dictionary-based wordbreaker and have + // `butterfl` for our context, which could become butterfly. If the + // next keystroke results in `butterfli`, this would likely be + // tokenized `butter` `fli`. (e.g: `fli` leads to `flight`.) How do + // we know to properly relocate the `f` and `l` transforms? + if(primaryInput) { + pushedToken.transformDistributions = tokenDistribution ? [tokenDistribution] : []; + } pushedToken.isWhitespace = incomingToken.isWhitespace; state.pushTail(pushedToken); @@ -448,7 +511,7 @@ export class ContextTracker extends CircularArray { } } - return state; + return { state, baseState: matchState, preservationTransform }; } private static modelContextState( @@ -476,12 +539,12 @@ export class ContextTracker extends CircularArray { // And now build the final context state object, which includes whitespace 'tokens'. let state = new TrackedContextState(lexicalModel); - if(baseTokens.length > 0) { - state.pushTail(baseTokens.splice(0, 1)[0]); - } - while(baseTokens.length > 0) { - // state.pushWhitespaceToTail(); + // We don't have a pre-existing distribution for this token, so we'll build one as + // if we'd just produced the token from a backspace. + if(baseTokens.length == 1) { + baseTokens[0].updateWithBackspace(baseTokens[0].raw, null); + } state.pushTail(baseTokens.splice(0, 1)[0]); } @@ -508,19 +571,34 @@ export class ContextTracker extends CircularArray { model: LexicalModel, context: Context, transformDistribution?: Distribution - ): TrackedContextState { + ): { state: TrackedContextState, baseState: TrackedContextState, preservationTransform?: Transform } { if(!model.traverseFromRoot) { // Assumption: LexicalModel provides a valid traverseFromRoot function. (Is technically optional) // Without it, no 'corrections' may be made; the model can only be used to predict, not correct. throw "This lexical model does not provide adequate data for correction algorithms and context reuse"; } + let tokenize = determineModelTokenizer(model); + const inputTransform = transformDistribution?.[0]; + let transformTokenLength = 0; + let tokenizedDistribution: Distribution = null; if(inputTransform) { + // These two methods apply transforms internally; do not mutate context here. + // This particularly matters for the 'distribution' variant. + transformTokenLength = tokenizeTransform(tokenize, context, inputTransform.sample).length; + tokenizedDistribution = tokenizeTransformDistribution(tokenize, context, transformDistribution); + + // Now we update the context used for context-state management based upon our input. context = applyTransform(inputTransform.sample, context); + + // While we lack phrase-based / phrase-oriented prediction support, we'll just extract the + // set that matches the token length that results from our input. + tokenizedDistribution = tokenizedDistribution.filter((entry) => entry.sample.length == transformTokenLength); } - let tokenize = determineModelTokenizer(model); + // As `context` may be updated by the prior if-block, we need to wait to this point in order + // to tokenize the context. let tokenizedContext = tokenize(context); if(tokenizedContext.left.length > 0) { @@ -547,22 +625,22 @@ export class ContextTracker extends CircularArray { continue; } - let resultState = ContextTracker.attemptMatchContext(tokenizedContext.left, this.item(i), transformDistribution); + let result = ContextTracker.attemptMatchContext(tokenizedContext.left, this.item(i), tokenizedDistribution); - if(resultState) { + if(result?.state) { // Keep it reasonably current! And it's probably fine to have it more than once // in the history. However, if it's the most current already, there's no need // to refresh it. - if(this.newest != resultState && this.newest != priorMatchState) { + if(this.newest != result.state && this.newest != priorMatchState) { // Already has a taggedContext. this.enqueue(priorMatchState); } - resultState.taggedContext = context; - if(resultState != this.item(i)) { - this.enqueue(resultState); + result.state.taggedContext = context; + if(result.state != this.item(i)) { + this.enqueue(result.state); } - return resultState; + return result; } } } @@ -575,7 +653,7 @@ export class ContextTracker extends CircularArray { let state = ContextTracker.modelContextState(tokenizedContext.left, model); state.taggedContext = context; this.enqueue(state); - return state; + return { state, baseState: null }; } clearCache() { diff --git a/common/web/lm-worker/src/main/model-compositor.ts b/common/web/lm-worker/src/main/model-compositor.ts index 73f973b3828..b8a11426564 100644 --- a/common/web/lm-worker/src/main/model-compositor.ts +++ b/common/web/lm-worker/src/main/model-compositor.ts @@ -247,7 +247,7 @@ export class ModelCompositor { if(this.contextTracker) { let contextState = this.contextTracker.newest; if(!contextState) { - contextState = this.contextTracker.analyzeState(this.lexicalModel, context); + contextState = this.contextTracker.analyzeState(this.lexicalModel, context).state; } contextState.tail.activeReplacementId = suggestion.id; diff --git a/common/web/lm-worker/src/main/predict-helpers.ts b/common/web/lm-worker/src/main/predict-helpers.ts index e505245edc5..045cc85fde1 100644 --- a/common/web/lm-worker/src/main/predict-helpers.ts +++ b/common/web/lm-worker/src/main/predict-helpers.ts @@ -51,6 +51,7 @@ export type CorrectionPredictionTuple = { correction: ProbabilityMass, totalProb: number; matchLevel: SuggestionSimilarity; + preservationTransform?: Transform; }; export enum SuggestionSimilarity { @@ -105,8 +106,6 @@ export async function correctAndEnumerate( const inputTransform = transformDistribution[0].sample; const postContext = models.applyTransform(inputTransform, context); - let postContextState: TrackedContextState = null; - let rawPredictions: CorrectionPredictionTuple[] = []; // If `this.contextTracker` does not exist, we don't have the @@ -150,6 +149,9 @@ export async function correctAndEnumerate( // Running in bulk over all suggestions, duplicate entries may be possible. rawPredictions = predictFromCorrections(lexicalModel, predictionRoots, context); + if(allowSpace) { + rawPredictions.forEach((entry) => entry.preservationTransform = inputTransform); + } return { postContextState: null, @@ -162,19 +164,21 @@ export async function correctAndEnumerate( // facilitates a more thorough correction-search pattern. // Token replacement benefits greatly from knowledge of the prior context state. - let contextState = contextTracker.analyzeState( + let { state: contextState } = contextTracker.analyzeState( lexicalModel, context, null ); + // Corrections and predictions are based upon the post-context state, though. - postContextState = contextTracker.analyzeState( + const contextChangeAnalysis = contextTracker.analyzeState( lexicalModel, context, !TransformUtils.isEmpty(inputTransform) ? transformDistribution : null ); + const postContextState = contextChangeAnalysis.state; // TODO: Should we filter backspaces & whitespaces out of the transform distribution? // Ideally, the answer (in the future) will be no, but leaving it in right now may pose an issue. @@ -192,46 +196,19 @@ export async function correctAndEnumerate( // The amount of text to 'replace' depends upon whatever sort of context change occurs // from the received input. const postContextTokens = postContextState.tokens; - let postContextLength = postContextTokens.length; // Only use of `contextState`. let contextLengthDelta = postContextTokens.length - contextState.tokens.length; // If the context now has more tokens, the token we'll be 'predicting' didn't originally exist. - if(postContextLength == 0 || contextLengthDelta > 0) { + if(contextChangeAnalysis.preservationTransform) { // As the word/token being corrected/predicted didn't originally exist, there's no - // part of it to 'replace'. + // part of it to 'replace'. (Suggestions are applied to the pre-transform state.) deleteLeft = 0; // If the new token is due to whitespace or due to a different input type that would - // likely imply a tokenization boundary... - if(TransformUtils.isWhitespace(inputTransform)) { - /* TODO: consider/implement: the second half of the comment above. - * For example: on input of a `'`, predict new words instead of replacing the `'`. - * (since after a letter, the `'` will be ignored, anyway) - * - * Idea: if the model's most likely prediction (with no root) would make a new - * token if appended to the current token, that's probably a good case. - * Keeps the check simple & quick. - * - * Might need a mixed mode, though: ';' is close enough that `l` is a reasonable - * fat-finger guess. So yeah, we're not addressing this idea right now. - * - so... consider multiple context behavior angles when building prediction roots? - * - * May need something similar to help handle contractions during their construction, - * but that'd be within `ContextTracker`. - * can' => [`can`, `'`] - * can't => [`can't`] (WB6, 7 of https://unicode.org/reports/tr29/#Word_Boundary_Rules) - * - * (Would also helps WB7b+c for Hebrew text) - */ - - // Infer 'new word' mode, even if we received new text when reaching - // this position. That new text didn't exist before, so still - nothing - // to 'replace'. - context = postContext; // As far as predictions are concerned, the post-context state - // should not be replaced. Predictions are to be rooted on - // text "up for correction" - so we want a null root for this - // branch. - } + // likely imply a tokenization boundary, infer 'new word' mode. + // Apply any part of the context change that is not considered + // to be up for correction. + context = models.applyTransform(contextChangeAnalysis.preservationTransform, context); // If the tokenized context length is shorter... sounds like a backspace (or similar). } else if (contextLengthDelta < 0) { /* Ooh, we've dropped context here. Almost certainly from a backspace. @@ -313,6 +290,7 @@ export async function correctAndEnumerate( }; let predictions = predictFromCorrections(lexicalModel, [predictionRoot], context); + predictions.forEach((entry) => entry.preservationTransform = contextChangeAnalysis.preservationTransform); // Only set 'best correction' cost when a correction ACTUALLY YIELDS predictions. if(predictions.length > 0 && bestCorrectionCost === undefined) { @@ -667,6 +645,22 @@ export function finalizeSuggestions( const suggestions = deduplicatedSuggestionTuples.map((tuple) => { const prediction = tuple.prediction; + // If this is a suggestion after any form of wordbreak input, make sure we preserve any components + // from prior tokens! + // + // Note: may need adjustment if/when supporting phrase-level correction. + if(tuple.preservationTransform) { + let mergedTransform = models.buildMergedTransform(tuple.preservationTransform, prediction.sample.transform); + mergedTransform.id = prediction.sample.transformId; + + // Temporarily and locally drops 'readonly' semantics so that we can reassign the transform. + // See https://www.typescriptlang.org/docs/handbook/release-notes/typescript-2-8.html#improved-control-over-mapped-type-modifiers + let mutableSuggestion = prediction.sample as {-readonly [transform in keyof Suggestion]: Suggestion[transform]}; + + // Assignment via by-reference behavior, as suggestion is an object + mutableSuggestion.transform = mergedTransform; + } + if(!verbose) { return { ...prediction.sample, @@ -713,18 +707,6 @@ export function finalizeSuggestions( } } - // If this is a suggestion after wordbreak input, make sure we preserve the wordbreak transform! - if(TransformUtils.isWhitespace(inputTransform)) { - let mergedTransform = models.buildMergedTransform(inputTransform, suggestion.transform); - mergedTransform.id = suggestion.transformId; - - // Temporarily and locally drops 'readonly' semantics so that we can reassign the transform. - // See https://www.typescriptlang.org/docs/handbook/release-notes/typescript-2-8.html#improved-control-over-mapped-type-modifiers - let mutableSuggestion = suggestion as {-readonly [transform in keyof Suggestion]: Suggestion[transform]}; - - // Assignment via by-reference behavior, as suggestion is an object - mutableSuggestion.transform = mergedTransform; - } }); return suggestions; diff --git a/common/web/lm-worker/src/test/mocha/cases/edit-distance/context-tracker.js b/common/web/lm-worker/src/test/mocha/cases/edit-distance/context-tracker.js index 1ad31b3e8f0..1067c3cf0df 100644 --- a/common/web/lm-worker/src/test/mocha/cases/edit-distance/context-tracker.js +++ b/common/web/lm-worker/src/test/mocha/cases/edit-distance/context-tracker.js @@ -10,9 +10,10 @@ import { deepCopy } from '@keymanapp/web-utils'; import { jsonFixture } from '@keymanapp/common-test-resources/model-helpers.mjs'; describe('ContextTracker', function() { - function toWrapperDistribution(transform) { + function toWrapperDistribution(transforms) { + transforms = Array.isArray(transforms) ? transforms : [transforms]; return [{ - sample: transform, + sample: transforms, p: 1.0 }]; } @@ -30,10 +31,10 @@ describe('ContextTracker', function() { newContext.left.splice(0, 1); let rawTokens = [" ", "apple", " ", "a", " ", "day", " ", "keeps", " ", "the", " ", "doctor"]; - let existingState = ContextTracker.modelContextState(existingContext.left); - let state = ContextTracker.attemptMatchContext(newContext.left, existingState, null, toWrapperDistribution(transform)); - assert.isNotNull(state); - assert.deepEqual(state.tokens.map(token => token.raw), rawTokens); + let baseContextMatch = ContextTracker.modelContextState(existingContext.left); + let newContextMatch = ContextTracker.attemptMatchContext(newContext.left, baseContextMatch, toWrapperDistribution(transform)); + assert.isNotNull(newContextMatch?.state); + assert.deepEqual(newContextMatch.state.tokens.map(token => token.raw), rawTokens); }); it("properly matches and aligns when lead token + following whitespace are removed", function() { @@ -48,10 +49,10 @@ describe('ContextTracker', function() { newContext.left.splice(0, 2); let rawTokens = ["apple", " ", "a", " ", "day", " ", "keeps", " ", "the", " ", "doctor"]; - let existingState = ContextTracker.modelContextState(existingContext.left); - let state = ContextTracker.attemptMatchContext(newContext.left, existingState, null, toWrapperDistribution(transform)); - assert.isNotNull(state); - assert.deepEqual(state.tokens.map(token => token.raw), rawTokens); + let baseContextMatch = ContextTracker.modelContextState(existingContext.left); + let newContextMatch = ContextTracker.attemptMatchContext(newContext.left, baseContextMatch, toWrapperDistribution(transform)); + assert.isNotNull(newContextMatch?.state); + assert.deepEqual(newContextMatch.state.tokens.map(token => token.raw), rawTokens); }); it("properly matches and aligns when final token is edited", function() { @@ -67,10 +68,10 @@ describe('ContextTracker', function() { }); let rawTokens = ["an", " ", "apple", " ", "a", " ", "day", " ", "keeps", " ", "the", " ", "doctor"]; - let existingState = ContextTracker.modelContextState(existingContext.left); - let state = ContextTracker.attemptMatchContext(newContext.left, existingState, null, toWrapperDistribution(transform)); - assert.isNotNull(state); - assert.deepEqual(state.tokens.map(token => token.raw), rawTokens); + let baseContextMatch = ContextTracker.modelContextState(existingContext.left); + let newContextMatch = ContextTracker.attemptMatchContext(newContext.left, baseContextMatch, toWrapperDistribution(transform)); + assert.isNotNull(newContextMatch?.state); + assert.deepEqual(newContextMatch.state.tokens.map(token => token.raw), rawTokens); }); // Needs improved context-state management (due to 2x tokens) @@ -87,12 +88,15 @@ describe('ContextTracker', function() { }); let rawTokens = ["an", " ", "apple", " ", "a", " ", "day", " ", "keeps", " ", "the", " ", "doctor", " ", ""]; - let existingState = ContextTracker.modelContextState(existingContext.left); - let state = ContextTracker.attemptMatchContext(newContext.left, existingState, toWrapperDistribution(transform)); - assert.isNotNull(state); - assert.deepEqual(state.tokens.map(token => token.raw), rawTokens); + let baseContextMatch = ContextTracker.modelContextState(existingContext.left); + let newContextMatch = ContextTracker.attemptMatchContext(newContext.left, baseContextMatch, toWrapperDistribution(transform)); + assert.isNotNull(newContextMatch?.state); + assert.deepEqual(newContextMatch.state.tokens.map(token => token.raw), rawTokens); + // We want to preserve the added whitespace when predicting a token that follows after it. + assert.deepEqual(newContextMatch.preservationTransform, { insert: ' ', deleteLeft: 0 }); // The 'wordbreak' transform + let state = newContextMatch?.state; assert.isNotEmpty(state.tokens[state.tokens.length - 2].transformDistributions); assert.isEmpty(state.tokens[state.tokens.length - 1].transformDistributions); }); @@ -110,12 +114,14 @@ describe('ContextTracker', function() { }); let rawTokens = ["'", "a"]; - let existingState = ContextTracker.modelContextState(existingContext.left); - let state = ContextTracker.attemptMatchContext(newContext.left, existingState, toWrapperDistribution(transform)); - assert.isNotNull(state); - assert.deepEqual(state.tokens.map(token => token.raw), rawTokens); + let baseContextMatch = ContextTracker.modelContextState(existingContext.left); + let newContextMatch = ContextTracker.attemptMatchContext(newContext.left, baseContextMatch, toWrapperDistribution(transform)); + assert.isNotNull(newContextMatch?.state); + assert.deepEqual(newContextMatch.state.tokens.map(token => token.raw), rawTokens); + assert.deepEqual(newContextMatch.preservationTransform, { insert: '', deleteLeft: 0 }); // The 'wordbreak' transform + let state = newContextMatch.state; assert.isNotEmpty(state.tokens[state.tokens.length - 2].transformDistributions); assert.isNotEmpty(state.tokens[state.tokens.length - 1].transformDistributions); }) @@ -134,12 +140,15 @@ describe('ContextTracker', function() { }); let rawTokens = ["apple", " ", "a", " ", "day", " ", "keeps", " ", "the", " ", "doctor", " ", ""]; - let existingState = ContextTracker.modelContextState(existingContext.left); - let state = ContextTracker.attemptMatchContext(newContext.left, existingState, toWrapperDistribution(transform)); - assert.isNotNull(state); - assert.deepEqual(state.tokens.map(token => token.raw), rawTokens); + let baseContextMatch = ContextTracker.modelContextState(existingContext.left); + let newContextMatch = ContextTracker.attemptMatchContext(newContext.left, baseContextMatch, toWrapperDistribution(transform)); + assert.isNotNull(newContextMatch?.state); + assert.deepEqual(newContextMatch.state.tokens.map(token => token.raw), rawTokens); + // We want to preserve the added whitespace when predicting a token that follows after it. + assert.deepEqual(newContextMatch.preservationTransform, { insert: ' ', deleteLeft: 0 }); // The 'wordbreak' transform + let state = newContextMatch.state; assert.isNotEmpty(state.tokens[state.tokens.length - 2].transformDistributions); assert.isEmpty(state.tokens[state.tokens.length - 1].transformDistributions); }); @@ -209,9 +218,9 @@ describe('ContextTracker', function() { let model = new models.TrieModel(jsonFixture('models/tries/english-1000'), options); let compositor = new ModelCompositor(model); - let baseContextState = compositor.contextTracker.analyzeState(model, baseContext); + let baseContextMatch = compositor.contextTracker.analyzeState(model, baseContext); - baseContextState.tail.replacements = [{ + baseContextMatch.state.tail.replacements = [{ suggestion: baseSuggestion, tokenWidth: 1 }]; @@ -219,18 +228,18 @@ describe('ContextTracker', function() { let reversion = compositor.acceptSuggestion(baseSuggestion, baseContext, postTransform); // Actual test assertion - was the replacement tracked? - assert.equal(baseContextState.tail.activeReplacementId, baseSuggestion.id); + assert.equal(baseContextMatch.state.tail.activeReplacementId, baseSuggestion.id); assert.equal(reversion.id, -baseSuggestion.id); // Next step - on the followup context, is the replacement still active? let postContext = models.applyTransform(baseSuggestion.transform, baseContext); - let postContextState = compositor.contextTracker.analyzeState(model, postContext); + let postContextMatch = compositor.contextTracker.analyzeState(model, postContext); // Penultimate token corresponds to whitespace, which does not have a 'raw' representation. - assert.equal(postContextState.tokens[postContextState.tokens.length - 2].raw, ' '); + assert.equal(postContextMatch.state.tokens[postContextMatch.state.tokens.length - 2].raw, ' '); // Final token is empty (follows a wordbreak) - assert.equal(postContextState.tail.raw, ''); + assert.equal(postContextMatch.state.tail.raw, ''); }); }); }); \ No newline at end of file diff --git a/common/web/lm-worker/src/test/mocha/cases/worker-model-compositor.js b/common/web/lm-worker/src/test/mocha/cases/worker-model-compositor.js index ccbc78f50cc..5f4302c13b2 100644 --- a/common/web/lm-worker/src/test/mocha/cases/worker-model-compositor.js +++ b/common/web/lm-worker/src/test/mocha/cases/worker-model-compositor.js @@ -192,7 +192,9 @@ describe('ModelCompositor', function() { const terminatedSuggestions = await firstPredict; const finalSuggestions = await secondPredict; - assert.isOk(terminatedSuggestions.find((entry) => entry.displayAs == 'a')); + if(terminatedSuggestions.length > 0) { + assert.isOk(terminatedSuggestions.find((entry) => entry.displayAs == 'a')); + } assert.isOk(finalSuggestions.find((entry) => entry.displayAs == 'applied')); }); }); From 5322365bbabbf34f7232c6afe8c2d784c3490199 Mon Sep 17 00:00:00 2001 From: "Joshua A. Horton" Date: Thu, 25 Jul 2024 08:54:29 +0700 Subject: [PATCH 20/88] chore(web): extra cleanup --- .../src/main/correction/context-tracker.ts | 56 ++++++++++++++----- 1 file changed, 41 insertions(+), 15 deletions(-) diff --git a/common/web/lm-worker/src/main/correction/context-tracker.ts b/common/web/lm-worker/src/main/correction/context-tracker.ts index 7e630ddc6a5..97224682759 100644 --- a/common/web/lm-worker/src/main/correction/context-tracker.ts +++ b/common/web/lm-worker/src/main/correction/context-tracker.ts @@ -287,16 +287,32 @@ class CircularArray { } } +interface ContextMatchResult { + /** + * Represents the current state of the context after applying incoming keystroke data. + */ + state: TrackedContextState; + + /** + * Represents the previously-cached context state that best matches `state` if available. + * May be `null` if no such state could be found within the context-state cache. + */ + baseState: TrackedContextState; + + /** + * Indicates the portion of the incoming keystroke data, if any, that applies to + * tokens before the last pre-caret token and thus should not be replaced by predictions + * based upon `state`. + */ + preservationTransform?: Transform; +} + export class ContextTracker extends CircularArray { static attemptMatchContext( tokenizedContext: { text: USVString, isWhitespace?: boolean } [], matchState: TrackedContextState, transformSequenceDistribution?: Distribution - ): { - state: TrackedContextState, - baseState: TrackedContextState, - preservationTransform?: Transform - } { + ): ContextMatchResult { // Map the previous tokenized state to an edit-distance friendly version. let matchContext: USVString[] = matchState.toRawTokenization(); @@ -376,21 +392,20 @@ export class ContextTracker extends CircularArray { // Reset priorEdit for the end-of-context updating loop. priorEdit = undefined; - // TODO: I'm beginning to believe that searchSpace be tracked (eventually) - // on the tokens, rather than on the overall 'state' - // - Reason: phrase-level corrections / predictions would likely need a search-state - // across per potentially-affected token. - // - Shifting the paradigm should be a separate work unit than the - // context-tracker rework currently being done, though. + // Used to construct and represent the part of the incoming transform that + // does not land as part of the final token in the resulting context. This + // component should be preserved by any suggestions that get applied. + let preservationTransform: Transform; // Now to update the end of the context window. - - let preservationTransform: Transform; for(let i = lastMatch+1; i < editPath.length; i++) { const isLastToken = i == editPath.length - 1; + // If we didn't get any input, we really should perfectly match + // a previous context state. If such a state is out of our cache, + // it should simply be rebuilt. if(!hasDistribution) { - throw new Error("Unexpected context-tracking state."); + return null; } const transformDistIndex = i - (lastMatch + 1); const tokenDistribution = transformSequenceDistribution.map((entry) => { @@ -400,6 +415,10 @@ export class ContextTracker extends CircularArray { }; }); + // If the tokenized part of the input is a completely empty transform, + // replace it with null. This can happen with our default wordbreaker + // immediately after a whitespace. We don't want to include this + // transform as part of the input when doing correction-search. let primaryInput = hasDistribution ? tokenDistribution[0]?.sample : null; if(primaryInput && primaryInput.insert == "" && primaryInput.deleteLeft == 0 && !primaryInput.deleteRight) { primaryInput = null; @@ -426,6 +445,12 @@ export class ContextTracker extends CircularArray { const token = state.tokens[i - poppedTokenCount]; const matchToken = matchState.tokens[i]; + // TODO: I'm beginning to believe that searchSpace should (eventually) be tracked + // on the tokens, rather than on the overall 'state'. + // - Reason: phrase-level corrections / predictions would likely need a search-state + // across per potentially-affected token. + // - Shifting the paradigm should be a separate work unit than the + // context-tracker rework currently being done, though. if(isBackspace) { token.updateWithBackspace(incomingToken.text, primaryInput.id); if(isLastToken) { @@ -497,6 +522,7 @@ export class ContextTracker extends CircularArray { } pushedToken.isWhitespace = incomingToken.isWhitespace; + // Auto-replaces the search space to correspond with the new token. state.pushTail(pushedToken); break; default: @@ -571,7 +597,7 @@ export class ContextTracker extends CircularArray { model: LexicalModel, context: Context, transformDistribution?: Distribution - ): { state: TrackedContextState, baseState: TrackedContextState, preservationTransform?: Transform } { + ): ContextMatchResult { if(!model.traverseFromRoot) { // Assumption: LexicalModel provides a valid traverseFromRoot function. (Is technically optional) // Without it, no 'corrections' may be made; the model can only be used to predict, not correct. From b2c8aea11ae4cfb9c8744aef6f3b664bbe304472 Mon Sep 17 00:00:00 2001 From: "Joshua A. Horton" Date: Thu, 25 Jul 2024 09:08:30 +0700 Subject: [PATCH 21/88] docs(web): minor doc tweak --- common/web/lm-worker/src/main/correction/context-tracker.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/common/web/lm-worker/src/main/correction/context-tracker.ts b/common/web/lm-worker/src/main/correction/context-tracker.ts index 97224682759..e707461626d 100644 --- a/common/web/lm-worker/src/main/correction/context-tracker.ts +++ b/common/web/lm-worker/src/main/correction/context-tracker.ts @@ -303,6 +303,8 @@ interface ContextMatchResult { * Indicates the portion of the incoming keystroke data, if any, that applies to * tokens before the last pre-caret token and thus should not be replaced by predictions * based upon `state`. + * + * Should always be non-null if the token before the caret did not previously exist. */ preservationTransform?: Transform; } From b7c7f5a41cc4af6b842bd5bd8af6459578bf29a9 Mon Sep 17 00:00:00 2001 From: "Joshua A. Horton" Date: Mon, 29 Jul 2024 12:13:36 +0700 Subject: [PATCH 22/88] chore(common/models): address PR review comments re needed comment, condition-clarity improvement --- common/models/templates/src/tokenization.ts | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/common/models/templates/src/tokenization.ts b/common/models/templates/src/tokenization.ts index 5db7d872c0c..7e11b72b262 100644 --- a/common/models/templates/src/tokenization.ts +++ b/common/models/templates/src/tokenization.ts @@ -36,6 +36,12 @@ export function tokenize( rejoins?: string[] } ): Tokenization { + // The Unicode word-breaker algorithm looks for places where it's "safe" to + // split a word across lines, operating upon _completed_ words. There are + // some cases where, if placed mid-word, it would add a boundary that does not + // exist at the end of the word. The single-quote character is one such + // location - it's hard to tell if `can'` is the end of a quote or the prefix + // to `can't`. So, if `'` is immediately pre-caret, we "rejoin" it. const rejoins = options?.rejoins || ["'"]; context = context || { left: undefined, @@ -78,9 +84,9 @@ export function tokenize( // // Note: the default wordbreaker won't need this code, as it emits a `''` // after final whitespace. - if(currentIndex != (context.left?.length ?? 0)) { + if(context.left != null && currentIndex != context.left.length) { tokenization.left.push({ - text: context.left!.substring(currentIndex, context.left!.length), + text: context.left.substring(currentIndex, context.left!.length), isWhitespace: true }); currentIndex = context.left!.length; @@ -168,9 +174,9 @@ export function tokenize( // // Also note: is pretty much WET with the similar check after the // leftSpan loop. - if(currentIndex != (context.right?.length ?? 0)) { + if(context.right && currentIndex != context.right.length) { tokenization.right.push({ - text: context.right!.substring(currentIndex, context.right!.length), + text: context.right.substring(currentIndex, context.right!.length), isWhitespace: true }); currentIndex = context.right!.length; From 969f997ae7d6fef62ebe7ad147374bacb47ca9de Mon Sep 17 00:00:00 2001 From: "Joshua A. Horton" Date: Mon, 29 Jul 2024 12:15:24 +0700 Subject: [PATCH 23/88] chore(web): missed a review improvement spot - let -> const --- common/web/lm-worker/src/main/correction/context-tracker.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/web/lm-worker/src/main/correction/context-tracker.ts b/common/web/lm-worker/src/main/correction/context-tracker.ts index 9dde1d6f9cc..97990c7d1f4 100644 --- a/common/web/lm-worker/src/main/correction/context-tracker.ts +++ b/common/web/lm-worker/src/main/correction/context-tracker.ts @@ -536,8 +536,8 @@ export class ContextTracker extends CircularArray { throw "This lexical model does not provide adequate data for correction algorithms and context reuse"; } - let tokenize = determineModelTokenizer(model); - let tokenizedContext = tokenize(context); + const tokenize = determineModelTokenizer(model); + const tokenizedContext = tokenize(context); if(tokenizedContext.left.length > 0) { for(let i = this.count - 1; i >= 0; i--) { From 092b6f7a4f6c3d0f20721b6828e2e143895204ac Mon Sep 17 00:00:00 2001 From: "Joshua A. Horton" Date: Mon, 29 Jul 2024 14:04:40 +0700 Subject: [PATCH 24/88] fix(common/models): null-guard left-hand token reference for split-token check during tokenization --- common/models/templates/src/tokenization.ts | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/common/models/templates/src/tokenization.ts b/common/models/templates/src/tokenization.ts index 7e11b72b262..0be102daf0d 100644 --- a/common/models/templates/src/tokenization.ts +++ b/common/models/templates/src/tokenization.ts @@ -143,13 +143,15 @@ export function tokenize( }); currentIndex = nextSpan.start; } else { - // If the first non-whitespace token to the right is non-whitespace, - // and the last token to the left is non-whitespace, the caret may - // be splitting a token. const leftTail = tokenization.left[leftTokenCount-1]; - if(firstRightToken && !leftTail.isWhitespace) { - if(wordBreaker(leftTail!.text + nextSpan.text).length == 1) { - tokenization.caretSplitsToken = true; + if(leftTail) { + // If the first non-whitespace token to the right is non-whitespace, + // and the last token to the left is non-whitespace, the caret may + // be splitting a token. + if(firstRightToken && !leftTail.isWhitespace) { + if(wordBreaker(leftTail!.text + nextSpan.text).length == 1) { + tokenization.caretSplitsToken = true; + } } } From 612adf6faedb959ac8b59e8a9eb5954714b92c94 Mon Sep 17 00:00:00 2001 From: "Joshua A. Horton" Date: Mon, 29 Jul 2024 15:38:27 +0700 Subject: [PATCH 25/88] fix(web): prevents correction from deleting tokens --- common/web/lm-worker/src/main/predict-helpers.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/common/web/lm-worker/src/main/predict-helpers.ts b/common/web/lm-worker/src/main/predict-helpers.ts index 045cc85fde1..ad7482be004 100644 --- a/common/web/lm-worker/src/main/predict-helpers.ts +++ b/common/web/lm-worker/src/main/predict-helpers.ts @@ -244,6 +244,11 @@ export async function correctAndEnumerate( // Corrections obtained: now to predict from them! const correction = match.matchString; + // If our 'match' results in fully deleting the new token, reject it and try again. + if(match.matchSequence.length == 0 && match.inputSequence.length != 0) { + continue; + } + // Worth considering: extend Traversal to allow direct prediction lookups? // let traversal = match.finalTraversal; From a966b7b8fcd1be185a99bdf25bd763cf5902fc6a Mon Sep 17 00:00:00 2001 From: "Joshua A. Horton" Date: Mon, 29 Jul 2024 15:46:25 +0700 Subject: [PATCH 26/88] chore(web): implement optional-chaining PR suggestion --- common/web/lm-worker/src/main/correction/context-tracker.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/web/lm-worker/src/main/correction/context-tracker.ts b/common/web/lm-worker/src/main/correction/context-tracker.ts index 0193e03f357..e1b93897b37 100644 --- a/common/web/lm-worker/src/main/correction/context-tracker.ts +++ b/common/web/lm-worker/src/main/correction/context-tracker.ts @@ -85,7 +85,7 @@ export class TrackedContextToken { // Preserve existing text if new text isn't specified. tokenText = tokenText || (tokenText === '' ? '' : this.raw); - if(transformDistribution && transformDistribution.length > 0) { + if(transformDistribution?.length > 0) { this.transformDistributions.push(transformDistribution); } From 9904a0c4258477691aff11418777513e65929a4b Mon Sep 17 00:00:00 2001 From: "Joshua A. Horton" Date: Wed, 31 Jul 2024 13:00:32 +0700 Subject: [PATCH 27/88] fix(web): improve post-punctuation, post-whitespace handling --- common/models/templates/src/trie-model.ts | 8 ++----- .../src/main/correction/context-tracker.ts | 22 +++++++++++++++++-- .../lm-worker/src/main/model-compositor.ts | 9 +++++++- .../web/lm-worker/src/main/predict-helpers.ts | 12 ++++++++++ 4 files changed, 42 insertions(+), 9 deletions(-) diff --git a/common/models/templates/src/trie-model.ts b/common/models/templates/src/trie-model.ts index 69f3a852897..cf0d61179fb 100644 --- a/common/models/templates/src/trie-model.ts +++ b/common/models/templates/src/trie-model.ts @@ -98,13 +98,9 @@ class Traversal implements LexiconTraversal { } child(char: USVString): LexiconTraversal | undefined { - /* - Note: would otherwise return the current instance if `char == ''`. If - such a call is happening, it's probably indicative of an implementation - issue elsewhere - let's signal now in order to catch such stuff early. - */ + // May result for blank tokens resulting immediately after whitespace. if(char == '') { - return undefined; + return this; } // Split into individual code units. diff --git a/common/web/lm-worker/src/main/correction/context-tracker.ts b/common/web/lm-worker/src/main/correction/context-tracker.ts index e1b93897b37..92f17c4210a 100644 --- a/common/web/lm-worker/src/main/correction/context-tracker.ts +++ b/common/web/lm-worker/src/main/correction/context-tracker.ts @@ -340,7 +340,14 @@ export class ContextTracker extends CircularArray { } const firstMatch = editPath.indexOf('match'); - const lastMatch = editPath.lastIndexOf('match'); + let lastMatch = editPath.lastIndexOf('match'); + + // Special handling: appending whitespace to whitespace with the default wordbreaker. + // The default wordbreaker currently adds an empty token after whitespace; this would + // show up with 'substitute', 'match' at the end of the edit path. + if(editPath.length >= 2 && editPath[editPath.length - 2] == 'substitute' && editPath[editPath.length - 1] == 'match') { + lastMatch = editPath.lastIndexOf('match', editPath.length - 2); + } // Assertion: for a long context, the bulk of the edit path should be a // continuous block of 'match' entries. If there's anything else in @@ -488,7 +495,7 @@ export class ContextTracker extends CircularArray { break; case 'insert': - if(priorEdit && priorEdit != 'substitute' && priorEdit != 'match') { + if(priorEdit && priorEdit != 'substitute' && priorEdit != 'match' && priorEdit != 'insert') { return null; } @@ -527,6 +534,15 @@ export class ContextTracker extends CircularArray { // Auto-replaces the search space to correspond with the new token. state.pushTail(pushedToken); break; + case 'match': + // The default (Unicode) wordbreaker returns an empty token after whitespace blocks. + // Adding new whitespace extends the whitespace block but preserves the empty token + // following it. + if(priorEdit == 'substitute' && tokenizedContext[tokenizedContext.length-1].text == '') { + // Keep the blank token as-is; no edit needed! + continue; + } + // else 'fallthrough' / return null default: // No 'delete' should exist on the trailing edge of context when the // context window slides. While it can happen due to keystrokes with @@ -537,6 +553,8 @@ export class ContextTracker extends CircularArray { // No 'transform' edits should exist within this section, either. return null; } + + priorEdit = editPath[i]; } return { state, baseState: matchState, preservationTransform }; diff --git a/common/web/lm-worker/src/main/model-compositor.ts b/common/web/lm-worker/src/main/model-compositor.ts index b8a11426564..563c966b4c1 100644 --- a/common/web/lm-worker/src/main/model-compositor.ts +++ b/common/web/lm-worker/src/main/model-compositor.ts @@ -95,10 +95,17 @@ export class ModelCompositor { // const allowSpace = TransformUtils.isWhitespace(inputTransform); const inputTransform = transformDistribution[0].sample; const allowBksp = TransformUtils.isBackspace(inputTransform); + const allowWhitespace = TransformUtils.isWhitespace(inputTransform); const postContext = models.applyTransform(inputTransform, context); + + // TODO: It would be best for the correctAndEnumerate method to return the + // suggestion's prefix, as it already has lots of logic oriented to this. + // The context-tracker used there with v14+ models can determine this more + // robustly. const truePrefix = this.wordbreak(postContext); - const basePrefix = allowBksp ? truePrefix : this.wordbreak(context); + // Only use of `truePrefix`. + const basePrefix = (allowBksp || allowWhitespace) ? truePrefix : this.wordbreak(context); let currentCasing: CasingForm = null; if(lexicalModel.languageUsesCasing) { diff --git a/common/web/lm-worker/src/main/predict-helpers.ts b/common/web/lm-worker/src/main/predict-helpers.ts index ad7482be004..8b2b6ed5387 100644 --- a/common/web/lm-worker/src/main/predict-helpers.ts +++ b/common/web/lm-worker/src/main/predict-helpers.ts @@ -232,6 +232,13 @@ export async function correctAndEnumerate( // NOTE: we only want this applied word-initially, when any corrections 'correct' // 100% of the word. Things are generally fine once it's not "all or nothing." let tailToken = postContextTokens[postContextTokens.length - 1]; + + // Did the wordbreaker (or similar) append a blank token before the caret? If so, + // preserve that by preventing corrections from triggering left-deletion. + if(tailToken.raw == '') { + deleteLeft = 0; + } + const isTokenStart = tailToken.transformDistributions.length <= 1; // TODO: whitespace, backspace filtering. Do it here. @@ -249,6 +256,11 @@ export async function correctAndEnumerate( continue; } + // If our 'match' fully replaces the token, reject it and try again. + if(match.matchSequence.length != 0 && match.matchSequence.length == match.knownCost) { + continue; + } + // Worth considering: extend Traversal to allow direct prediction lookups? // let traversal = match.finalTraversal; From ab5df20eefe7148225954dae379b8ccaf16c2fa3 Mon Sep 17 00:00:00 2001 From: Eberhard Beilharz Date: Mon, 29 Jul 2024 18:06:32 +0200 Subject: [PATCH 28/88] =?UTF-8?q?chore(web):=20move=20`web/src/engine/path?= =?UTF-8?q?s/`=20=E2=86=92=20`web/src/engine/interfaces/`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes: #12035 --- web/README.md | 103 +++++++++++------- web/build.sh | 8 +- web/package.json | 8 +- web/src/app/browser/src/utilApiEndpoint.ts | 2 +- .../engine/{paths => interfaces}/.c8rc.json | 2 +- web/src/engine/{paths => interfaces}/build.sh | 2 +- .../engine/{paths => interfaces}/src/index.ts | 0 .../src/optionSpec.interface.ts | 0 .../src/pathConfiguration.ts | 0 .../{paths => interfaces}/tsconfig.json | 4 +- web/src/engine/main/build.sh | 2 +- .../engine/main/src/engineConfiguration.ts | 2 +- web/src/engine/main/tsconfig.json | 2 +- web/src/engine/package-cache/build.sh | 2 +- .../package-cache/src/cloud/queryEngine.ts | 2 +- .../src/keyboardRequisitioner.ts | 2 +- web/src/engine/package-cache/tsconfig.json | 2 +- .../dom/cases/packages/cloudQueries.spec.ts | 2 +- .../packages/keyboardRequisitioner.spec.ts | 2 +- .../pathConfiguration.js | 2 +- .../engine/package-cache/cloudQueries.js | 2 +- .../package-cache/keyboardRequisitioner.js | 2 +- 22 files changed, 87 insertions(+), 66 deletions(-) rename web/src/engine/{paths => interfaces}/.c8rc.json (69%) rename web/src/engine/{paths => interfaces}/build.sh (97%) rename web/src/engine/{paths => interfaces}/src/index.ts (100%) rename web/src/engine/{paths => interfaces}/src/optionSpec.interface.ts (100%) rename web/src/engine/{paths => interfaces}/src/pathConfiguration.ts (100%) rename web/src/engine/{paths => interfaces}/tsconfig.json (77%) rename web/src/test/auto/headless/engine/{paths => interfaces}/pathConfiguration.js (99%) diff --git a/web/README.md b/web/README.md index 7558c2a2c20..cb47602bf93 100644 --- a/web/README.md +++ b/web/README.md @@ -9,33 +9,38 @@ configure your build environment. The following folders contain the distribution for Keyman Engine for Web: - src Source code - build/app/resources OSK + UI resources for inclusion in all build types; - keymanweb-osk.ttf is maintained at https://github.com/silnrsi/font-keymanweb-osk - - build/app/browser/release Fully-compiled KeymanWeb modules for release - build/app/webview/release Fully-compiled KMEA/KMEI modules for inclusion in mobile app builds - build/app/browser/debug Fully-compiled but non-minified KeymanWeb modules - build/app/webview/debug Fully-compiled but non-minified KMEA/KMEI modules - - src/samples Sample pages demonstrating ways to link with KeymanWeb - src/test/manual Test-case web-pages for various aspects of KeymanWeb functionality - src/test/auto A Node-driven test suite for automated testing of KeymanWeb +```text +src Source code +build/app/resources OSK + UI resources for inclusion in all build types; + keymanweb-osk.ttf is maintained at https://github.com/silnrsi/font-keymanweb-osk + +build/app/browser/release Fully-compiled KeymanWeb modules for release +build/app/webview/release Fully-compiled KMEA/KMEI modules for inclusion in + mobile app builds +build/app/browser/debug Fully-compiled but non-minified KeymanWeb modules +build/app/webview/debug Fully-compiled but non-minified KMEA/KMEI modules + +src/samples Sample pages demonstrating ways to link with KeymanWeb +src/test/manual Test-case web-pages for various aspects of KeymanWeb functionality +src/test/auto A Node-driven test suite for automated testing of KeymanWeb +``` ********************************************************************** ## Usage + Open **index.html** or **samples/index.html** in your browser. Be sure to compile Keyman Engine for Web before viewing the pages. Refer to the samples for usage details. To view pages using compiled Keyman Engine for Web, + 1. cd to **keyman/web/** 2. Run `./build.sh` - Use `./build.sh --help` for the script's documentation. -### Unit Testing ### +### Unit Testing Before running unit tests on Keyman Engine for Web, first run `./build.sh` according to the instructions above. @@ -45,13 +50,14 @@ on your local machine in-browser. Alternatively, see `test.sh`, which the former command executes. ### Debugging Unit Tests + 1. During development, to run a specific unit test, change the `it` to `it.only`. You can also run all tests under a specific group with `describe.only`. 2. From this directory, run `./test.sh --debug`. Alternatively, from `web/` or any `web/` subdirectory, - ``` + ```bash npm run test -- --debug ``` @@ -68,69 +74,84 @@ the former command executes. ### Approximate Overall Design ```mermaid +--- +title: Dependency Graph +--- +%% For rendering, use e.g. https://mermaid.live +%%{init: {"flowchart": {"htmlLabels": false}} }%% graph TD; - OSK[web/src/engine/osk]; - KP["common/web/keyboard-processor"]; - IP["common/web/input-processor"]; + OSK["/web/src/engine/osk"]; + KP["@keymanapp/keyboard-processor
(/common/web/keyboard-processor)"]; + IP["@keymanapp/input-processor
(/common/web/input-processor)"]; OSK-->KP; IP-->KP; - Utils["common/web/utils"]; - KP---->Utils; - Wordbreakers["common/models/wordbreakers"]; - Models["common/models/templates"]; - Models-->Utils; - LMWorker["common/web/lm-worker"]; + WebUtils["@keymanapp/web-utils
(/common/web/utils)"]; + KP---->WebUtils; + Wordbreakers["@keymanapp/models-wordbreakers
(/common/models/wordbreakers)"]; + Models["@keymanapp/models-templates
(/common/models/templates)"]; + Models-->WebUtils; + LMWorker["@keymanapp/lm-worker
(/common/web/lm-worker)"]; LMWorker-->Models; LMWorker-->Wordbreakers; - LMLayer["common/predictive-text"]; + LMLayer["@keymanapp/lexical-model-layer
(/common/predictive-text)"]; LMLayer-->LMWorker; IP-->LMLayer; + Gestures["@keymanapp/gesture-recognizer
(/common/web/gesture-recognizer)"]; + Gestures-->WebUtils; - subgraph PredText["WebWorker + its interface"] + subgraph PredText["PredText: WebWorker + its interface"] LMLayer; LMWorker; Models; Wordbreakers; end - subgraph Headless["Fully headless components"] + subgraph Headless["`**Headless** + Fully headless components`"] direction LR KP; IP; - Utils; + WebUtils; PredText; + Gestures; end - subgraph ClassicWeb["Previously unmodularized components"] - Device[web/src/engine/device-detect]; - Device----->Utils; - Elements[web/src/engine/element-wrappers]; + subgraph ClassicWeb["`**ClassicWeb** + Previously unmodularized components`"] + Device["/web/src/engine/device-detect"]; + Device----->WebUtils; + Elements["/web/src/engine/element-wrappers"]; Elements-->KP; - KeyboardCache[web/src/engine/package-cache]; + KeyboardCache["/web/src/engine/package-cache"]; KeyboardCache-->IP; - DomUtils[web/src/engine/dom-utils]; - DomUtils-->Utils; + KeyboardCache-->Configuration; + DomUtils["/web/src/engine/dom-utils"]; + DomUtils-->WebUtils; + DomUtils-->KP; OSK-->DomUtils; OSK---->IP; - Configuration[web/src/engine/paths]; + OSK-->Gestures; + Configuration["/web/src/engine/interfaces"]; Configuration-->OSK; - CommonEngine[web/src/engine/main]; + CommonEngine["/web/src/engine/main"]; CommonEngine-->Configuration; CommonEngine-->Device; CommonEngine-->KeyboardCache; CommonEngine-->OSK; - Attachment[web/src/engine/attachment]; + CommonEngine-->IP; + Attachment["/web/src/engine/attachment"]; Attachment-->DomUtils; Attachment-->Elements; end - subgraph WebEngine["Keyman Engine for Web (top-level libraries)"] - Browser[web/src/app/browser]; - WebView[web/src/app/webview]; + subgraph WebEngine["`**WebEngine** + Keyman Engine for Web (top-level libraries)`"] + Browser["/web/src/app/browser"]; + WebView["/web/src/app/webview"]; WebView--->CommonEngine; Browser--->CommonEngine; Browser-->Attachment; end -``` \ No newline at end of file +``` diff --git a/web/build.sh b/web/build.sh index 179ac250dc9..e22581b920f 100755 --- a/web/build.sh +++ b/web/build.sh @@ -30,7 +30,7 @@ builder_describe "Builds engine modules for Keyman Engine for Web (KMW)." \ ":engine/main Builds all common code used by KMW's app/-level targets" \ ":engine/osk Builds the Web OSK module" \ ":engine/package-cache Subset used to collate keyboards and request them from the cloud" \ - ":engine/paths Subset used to configure KMW" \ + ":engine/interfaces Subset used to configure KMW" \ ":samples Builds all needed resources for the KMW sample-page set" \ ":tools Builds engine-related development resources" \ ":test-pages=src/test/manual Builds resources needed for the KMW manual testing pages" \ @@ -60,7 +60,7 @@ builder_describe_outputs \ build:engine/main "/web/build/engine/main/lib/index.mjs" \ build:engine/osk "/web/build/engine/osk/lib/index.mjs" \ build:engine/package-cache "/web/build/engine/package-cache/lib/index.mjs" \ - build:engine/paths "/web/build/engine/paths/lib/index.mjs" \ + build:engine/interfaces "/web/build/engine/interfaces/lib/index.mjs" \ build:samples "/web/src/samples/simplest/keymanweb.js" \ build:tools "/web/build/tools/building/sourcemap-root/index.js" \ build:test-pages "/web/build/test-resources/sentry-manager.js" @@ -147,13 +147,13 @@ builder_run_child_actions build:engine/osk builder_run_child_actions build:engine/attachment # Uses engine/osk (due to resource-path config interface) -builder_run_child_actions build:engine/paths +builder_run_child_actions build:engine/interfaces # Uses engine/config (also due to resource-path config interface, but for the # more complete version of that interface) builder_run_child_actions build:engine/package-cache -# Uses engine/paths, engine/device-detect, engine/package-cache, & engine/osk +# Uses engine/interfaces, engine/device-detect, engine/package-cache, & engine/osk builder_run_child_actions build:engine/main # Uses all but engine/element-wrappers and engine/attachment diff --git a/web/package.json b/web/package.json index 79ded888a4a..22a00ce6efb 100644 --- a/web/package.json +++ b/web/package.json @@ -12,10 +12,10 @@ "types": "./build/engine/attachment/obj/index.d.ts", "import": "./build/engine/attachment/obj/index.js" }, - "./engine/paths": { - "es6-bundling": "./src/engine/paths/src/index.ts", - "types": "./build/engine/paths/obj/index.d.ts", - "import": "./build/engine/paths/obj/index.js" + "./engine/interfaces": { + "es6-bundling": "./src/engine/interfaces/src/index.ts", + "types": "./build/engine/interfaces/obj/index.d.ts", + "import": "./build/engine/interfaces/obj/index.js" }, "./engine/device-detect": { "es6-bundling": "./src/engine/device-detect/src/index.ts", diff --git a/web/src/app/browser/src/utilApiEndpoint.ts b/web/src/app/browser/src/utilApiEndpoint.ts index 3b3aa28f2b0..d91c67efb61 100644 --- a/web/src/app/browser/src/utilApiEndpoint.ts +++ b/web/src/app/browser/src/utilApiEndpoint.ts @@ -10,7 +10,7 @@ import { DomEventTracker } from "keyman/engine/events"; import { BrowserConfiguration, BrowserInitOptionSpec } from "./configuration.js"; import { getStyleValue } from "./utils/getStyleValue.js"; import { AlertHost } from "./utils/alertHost.js"; -import { PathConfiguration } from 'keyman/engine/paths'; +import { PathConfiguration } from 'keyman/engine/interfaces'; /** * Calls document.createElement for the specified node type and also applies diff --git a/web/src/engine/paths/.c8rc.json b/web/src/engine/interfaces/.c8rc.json similarity index 69% rename from web/src/engine/paths/.c8rc.json rename to web/src/engine/interfaces/.c8rc.json index c789716cd50..c42b096f68f 100644 --- a/web/src/engine/paths/.c8rc.json +++ b/web/src/engine/interfaces/.c8rc.json @@ -3,7 +3,7 @@ "clean": true, "exclude-after-remap": true, "reporter": ["text", "text-summary"], - "reports-dir": "build/coverage", + "reports-dir": "../../../build/engine/interfaces/coverage", "src": [ "src/" ] diff --git a/web/src/engine/paths/build.sh b/web/src/engine/interfaces/build.sh similarity index 97% rename from web/src/engine/paths/build.sh rename to web/src/engine/interfaces/build.sh index 05a669aa0bf..8df82b3a84f 100755 --- a/web/src/engine/paths/build.sh +++ b/web/src/engine/interfaces/build.sh @@ -6,7 +6,7 @@ THIS_SCRIPT="$(readlink -f "${BASH_SOURCE[0]}")" . "${THIS_SCRIPT%/*}/../../../../resources/build/builder.inc.sh" ## END STANDARD BUILD SCRIPT INCLUDE -SUBPROJECT_NAME=engine/paths +SUBPROJECT_NAME=engine/interfaces . "$KEYMAN_ROOT/web/common.inc.sh" . "$KEYMAN_ROOT/resources/shellHelperFunctions.sh" diff --git a/web/src/engine/paths/src/index.ts b/web/src/engine/interfaces/src/index.ts similarity index 100% rename from web/src/engine/paths/src/index.ts rename to web/src/engine/interfaces/src/index.ts diff --git a/web/src/engine/paths/src/optionSpec.interface.ts b/web/src/engine/interfaces/src/optionSpec.interface.ts similarity index 100% rename from web/src/engine/paths/src/optionSpec.interface.ts rename to web/src/engine/interfaces/src/optionSpec.interface.ts diff --git a/web/src/engine/paths/src/pathConfiguration.ts b/web/src/engine/interfaces/src/pathConfiguration.ts similarity index 100% rename from web/src/engine/paths/src/pathConfiguration.ts rename to web/src/engine/interfaces/src/pathConfiguration.ts diff --git a/web/src/engine/paths/tsconfig.json b/web/src/engine/interfaces/tsconfig.json similarity index 77% rename from web/src/engine/paths/tsconfig.json rename to web/src/engine/interfaces/tsconfig.json index 67460a51051..b2c9909f63d 100644 --- a/web/src/engine/paths/tsconfig.json +++ b/web/src/engine/interfaces/tsconfig.json @@ -4,8 +4,8 @@ "compilerOptions": { "baseUrl": "./", - "outDir": "../../../build/engine/paths/obj/", - "tsBuildInfoFile": "../../../build/engine/paths/obj/tsconfig.tsbuildinfo", + "outDir": "../../../build/engine/interfaces/obj/", + "tsBuildInfoFile": "../../../build/engine/interfaces/obj/tsconfig.tsbuildinfo", "rootDir": "./src" }, diff --git a/web/src/engine/main/build.sh b/web/src/engine/main/build.sh index cb1882191fe..475be61c8ac 100755 --- a/web/src/engine/main/build.sh +++ b/web/src/engine/main/build.sh @@ -14,7 +14,7 @@ SUBPROJECT_NAME=engine/main builder_describe "Builds the Keyman Engine for Web's common top-level base classes." \ "@/common/web/input-processor build" \ - "@/web/src/engine/paths build" \ + "@/web/src/engine/interfaces build" \ "@/web/src/engine/device-detect build" \ "@/web/src/engine/package-cache build" \ "@/web/src/engine/osk build" \ diff --git a/web/src/engine/main/src/engineConfiguration.ts b/web/src/engine/main/src/engineConfiguration.ts index eb870c2daf1..c66d3bd805d 100644 --- a/web/src/engine/main/src/engineConfiguration.ts +++ b/web/src/engine/main/src/engineConfiguration.ts @@ -1,7 +1,7 @@ import { EventEmitter } from "eventemitter3"; import { DeviceSpec, KeyboardProperties, ManagedPromise, OutputTarget, physicalKeyDeviceAlias, RuleBehavior, SpacebarText } from "@keymanapp/keyboard-processor"; -import { PathConfiguration, PathOptionDefaults, PathOptionSpec } from "keyman/engine/paths"; +import { PathConfiguration, PathOptionDefaults, PathOptionSpec } from "keyman/engine/interfaces"; import { Device } from "keyman/engine/device-detect"; import { KeyboardStub } from "keyman/engine/package-cache"; diff --git a/web/src/engine/main/tsconfig.json b/web/src/engine/main/tsconfig.json index 8baf1248420..dca9f1b26f9 100644 --- a/web/src/engine/main/tsconfig.json +++ b/web/src/engine/main/tsconfig.json @@ -15,6 +15,6 @@ { "path": "../device-detect" }, { "path": "../osk" }, { "path": "../package-cache" }, - { "path": "../paths" }, + { "path": "../interfaces" }, ] } diff --git a/web/src/engine/package-cache/build.sh b/web/src/engine/package-cache/build.sh index 93f0678a002..40f2a36bb25 100755 --- a/web/src/engine/package-cache/build.sh +++ b/web/src/engine/package-cache/build.sh @@ -15,7 +15,7 @@ SUBPROJECT_NAME=engine/package-cache builder_describe "Builds Keyman Engine modules for keyboard cloud-querying & caching + model caching." \ "@/common/web/es-bundling" \ "@/common/web/input-processor build" \ - "@/web/src/engine/paths" \ + "@/web/src/engine/interfaces" \ "clean" \ "configure" \ "build" \ diff --git a/web/src/engine/package-cache/src/cloud/queryEngine.ts b/web/src/engine/package-cache/src/cloud/queryEngine.ts index 46f818dd1cf..eb989d2f00a 100644 --- a/web/src/engine/package-cache/src/cloud/queryEngine.ts +++ b/web/src/engine/package-cache/src/cloud/queryEngine.ts @@ -1,6 +1,6 @@ import { EventEmitter } from 'eventemitter3'; -import { PathConfiguration } from 'keyman/engine/paths'; +import { PathConfiguration } from 'keyman/engine/interfaces'; import { default as KeyboardStub, ErrorStub, KeyboardAPISpec, mergeAndResolveStubPromises } from '../keyboardStub.js'; import { LanguageAPIPropertySpec, ManagedPromise, Version } from '@keymanapp/keyboard-processor'; diff --git a/web/src/engine/package-cache/src/keyboardRequisitioner.ts b/web/src/engine/package-cache/src/keyboardRequisitioner.ts index 88c8ea036d2..13cc7ecef83 100644 --- a/web/src/engine/package-cache/src/keyboardRequisitioner.ts +++ b/web/src/engine/package-cache/src/keyboardRequisitioner.ts @@ -4,7 +4,7 @@ import { LanguageAPIPropertySpec, RawKeyboardMetadata } from "@keymanapp/keyboard-processor"; -import { PathConfiguration } from "keyman/engine/paths"; +import { PathConfiguration } from "keyman/engine/interfaces"; // TODO: is cleanup needed here, to use local paths instead? import { diff --git a/web/src/engine/package-cache/tsconfig.json b/web/src/engine/package-cache/tsconfig.json index ab595b57983..e2553e658a0 100644 --- a/web/src/engine/package-cache/tsconfig.json +++ b/web/src/engine/package-cache/tsconfig.json @@ -12,6 +12,6 @@ "references": [ { "path": "../../../../common/web/input-processor" }, - { "path": "../paths" } + { "path": "../interfaces" } ] } diff --git a/web/src/test/auto/dom/cases/packages/cloudQueries.spec.ts b/web/src/test/auto/dom/cases/packages/cloudQueries.spec.ts index c612ee9b77d..9a2fe93c45a 100644 --- a/web/src/test/auto/dom/cases/packages/cloudQueries.spec.ts +++ b/web/src/test/auto/dom/cases/packages/cloudQueries.spec.ts @@ -2,7 +2,7 @@ import { assert } from 'chai'; import sinon from 'sinon'; import { CloudQueryEngine, type KeyboardStub } from 'keyman/engine/package-cache'; -import { PathConfiguration } from 'keyman/engine/paths'; +import { PathConfiguration } from 'keyman/engine/interfaces'; import DOMCloudRequester from 'keyman/engine/package-cache/dom-requester'; import { ManagedPromise } from '@keymanapp/web-utils'; diff --git a/web/src/test/auto/dom/cases/packages/keyboardRequisitioner.spec.ts b/web/src/test/auto/dom/cases/packages/keyboardRequisitioner.spec.ts index dddcafd8ede..ab585bc9e07 100644 --- a/web/src/test/auto/dom/cases/packages/keyboardRequisitioner.spec.ts +++ b/web/src/test/auto/dom/cases/packages/keyboardRequisitioner.spec.ts @@ -3,7 +3,7 @@ import sinon from 'sinon'; import { KeyboardHarness, MinimalKeymanGlobal } from '@keymanapp/keyboard-processor'; import { DOMKeyboardLoader } from '@keymanapp/keyboard-processor/dom-keyboard-loader'; -import { PathConfiguration } from 'keyman/engine/paths'; +import { PathConfiguration } from 'keyman/engine/interfaces'; import { CloudQueryEngine, KeyboardRequisitioner, type KeyboardStub } from 'keyman/engine/package-cache'; import DOMCloudRequester from 'keyman/engine/package-cache/dom-requester'; diff --git a/web/src/test/auto/headless/engine/paths/pathConfiguration.js b/web/src/test/auto/headless/engine/interfaces/pathConfiguration.js similarity index 99% rename from web/src/test/auto/headless/engine/paths/pathConfiguration.js rename to web/src/test/auto/headless/engine/interfaces/pathConfiguration.js index fc3cfdfb5ad..9a212480839 100644 --- a/web/src/test/auto/headless/engine/paths/pathConfiguration.js +++ b/web/src/test/auto/headless/engine/interfaces/pathConfiguration.js @@ -1,7 +1,7 @@ import { assert } from 'chai'; import sinon from 'sinon'; -import { PathOptionDefaults, PathConfiguration } from 'keyman/engine/paths'; +import { PathOptionDefaults, PathConfiguration } from 'keyman/engine/interfaces'; // Tests the activation-state logic abstraction & implementations used to model and control OSK visibility. diff --git a/web/src/test/auto/headless/engine/package-cache/cloudQueries.js b/web/src/test/auto/headless/engine/package-cache/cloudQueries.js index 685affc126c..4f379ea5636 100644 --- a/web/src/test/auto/headless/engine/package-cache/cloudQueries.js +++ b/web/src/test/auto/headless/engine/package-cache/cloudQueries.js @@ -3,7 +3,7 @@ import sinon from 'sinon'; import { ManagedPromise } from '@keymanapp/web-utils'; import { CloudQueryEngine, StubAndKeyboardCache, toPrefixedKeyboardId as prefixed } from 'keyman/engine/package-cache'; -import { PathConfiguration } from 'keyman/engine/paths'; +import { PathConfiguration } from 'keyman/engine/interfaces'; import NodeCloudRequester from 'keyman/engine/package-cache/node-requester'; import path from 'path'; diff --git a/web/src/test/auto/headless/engine/package-cache/keyboardRequisitioner.js b/web/src/test/auto/headless/engine/package-cache/keyboardRequisitioner.js index 1fd2f19f2c9..b2d64c05538 100644 --- a/web/src/test/auto/headless/engine/package-cache/keyboardRequisitioner.js +++ b/web/src/test/auto/headless/engine/package-cache/keyboardRequisitioner.js @@ -8,7 +8,7 @@ import { KeyboardRequisitioner, toPrefixedKeyboardId as prefixed } from 'keyman/engine/package-cache'; -import { PathConfiguration } from 'keyman/engine/paths'; +import { PathConfiguration } from 'keyman/engine/interfaces'; import NodeCloudRequester from 'keyman/engine/package-cache/node-requester'; import path from 'path'; From d8986be12fe0fb2078b93fa4650e2a7fe06f16d1 Mon Sep 17 00:00:00 2001 From: Marc Durdin Date: Thu, 1 Aug 2024 07:07:07 +0700 Subject: [PATCH 29/88] refactor(common): merge master into branch --- .clang-format | 4 +- .github/workflows/deb-packaging.yml | 2 +- HISTORY.md | 248 +- VERSION.md | 2 +- android/.gitignore | 2 - .../src/main/res/values-b+el/strings.xml | 164 + .../src/main/res/values-pt-rPT/strings.xml | 10 +- .../KMEA/app/src/main/assets/android-host.js | 14 +- .../app/src/main/assets/keyboard.es5.html | 30 - .../com/keyman/engine/DisplayLanguages.java | 1 + .../java/com/keyman/engine/KMKeyboard.java | 6 + .../java/com/keyman/engine/KMManager.java | 32 +- .../app/src/main/res/values-b+el/strings.xml | 172 + .../src/main/res/values-pt-rPT/strings.xml | 20 +- android/KMEA/build.sh | 2 - common/models/templates/src/common.ts | 5 +- common/models/templates/src/index.ts | 1 - common/models/templates/src/trie-model.ts | 246 +- .../templates/test/test-trie-traversal.js | 153 +- common/models/types/index.d.ts | 49 +- .../models/wordbreakers/src/default/index.ts | 2 +- .../headless/worker-trie-integration.js | 5 +- .../cases/top-level-lmlayer.spec.ts | 4 +- .../cases/worker-dummy-integration.spec.ts | 16 +- .../cases/worker-trie-integration.spec.ts | 3 +- .../i_got_distracted_by_hazel.json | 2 +- common/test/resources/model-helpers.mjs | 13 +- common/test/resources/models/simple-dummy.js | 30 +- common/test/resources/test-timeouts.mjs | 1 + common/test/resources/timeout-adapter.js | 43 - .../sourcemap-path-remapper/tsconfig.json | 2 +- common/web/es-bundling/build.sh | 3 +- common/web/es-bundling/src/common-bundle.mts | 4 +- common/web/es-bundling/src/configuration.mts | 7 +- .../src/test/auto/browser/cases/canary.def.ts | 4 +- .../auto/browser/cases/ignoredInputs.def.ts | 4 +- common/web/input-processor/src/corrections.ts | 7 +- .../src/text/prediction/languageProcessor.ts | 34 +- .../src/text/prediction/predictionContext.ts | 82 +- .../tests/cases/predictionContext.js | 2 +- common/web/keyboard-processor/build.sh | 24 +- .../src/keyboards/activeLayout.ts | 306 +- .../src/keyboards/defaultLayouts.ts | 16 +- .../src/keyboards/keyboard.ts | 14 +- .../src/text/stringDivergence.ts | 5 +- .../tests/dom/cases/domKeyboardLoader.spec.ts | 32 +- .../tests/dom/web-test-runner.config.mjs | 4 +- .../keyboard-processor/tests/tsconfig.json | 11 + .../web/keyboard-processor/tsconfig.all.json | 5 +- common/web/keyboard-processor/tsconfig.json | 4 +- common/web/keyman-version/tsconfig.json | 2 +- common/web/lm-message-types/tsconfig.json | 2 +- common/web/lm-worker/build-polyfiller.js | 7 +- common/web/lm-worker/build.sh | 34 +- common/web/lm-worker/package.json | 6 +- .../src/main/correction/distance-modeler.ts | 366 +- .../src/main/correction/execution-timer.ts | 437 ++ .../lm-worker/src/main/correction/index.ts | 3 +- common/web/lm-worker/src/main/index.ts | 21 +- .../lm-worker/src/main/model-compositor.ts | 631 +- .../web/lm-worker/src/main/model-helpers.ts | 106 + .../lm-worker/src/main/models/dummy-model.ts | 39 +- .../web/lm-worker/src/main/predict-helpers.ts | 791 +++ .../web/lm-worker/src/polyfills/array.from.js | 44 + .../src/test/mocha/cases/auto-correct.js | 598 ++ .../src/test/mocha/cases/casing-detection.js | 386 ++ .../cases/early-correction-search-stopping.js | 61 + .../cases/edit-distance/distance-modeler.js | 127 +- .../cases/edit-distance/execution-timer.js | 383 ++ .../mocha/cases/predict-from-corrections.js | 259 + .../mocha/cases/suggestion-deduplication.js | 154 + .../mocha/cases/suggestion-finalization.js | 368 ++ .../test/mocha/cases/suggestion-similarity.js | 512 ++ .../mocha/cases/worker-custom-punctuation.js | 14 +- .../mocha/cases/worker-model-compositor.js | 153 +- .../src/test/mocha/cases/worker-predict.js | 15 +- .../src/test/test-runner/cases/worker.spec.ts | 3 +- common/web/sentry-manager/src/index.ts | 7 +- common/web/tslib/README.md | 15 - common/web/tslib/build.sh | 25 - common/web/tslib/package.json | 28 - common/web/tslib/src/index.ts | 3 - common/web/tslib/tsconfig.json | 16 - common/web/types/src/deps/xml2js/parser.js | 8 +- common/web/types/src/kmx/element-string.ts | 3 +- common/web/types/src/kmx/kmx.ts | 50 +- .../types/src/ldml-keyboard/pattern-parser.ts | 3 +- common/web/types/src/main.ts | 1 + common/web/types/src/util/consts.ts | 12 + common/web/types/src/util/file-types.ts | 10 + common/web/types/src/util/util.ts | 21 +- common/web/utils/src/index.ts | 4 +- .../utils}/src/priority-queue.ts | 8 +- common/web/utils/src/surrogates.ts | 27 - .../utils/src/test/priorityQueue.js} | 2 +- common/windows/build.sh | 2 +- common/windows/cpp/src/registry.cpp | 2 +- core/commands.inc.sh | 9 +- core/src/actions_normalize.cpp | 146 +- core/src/context.hpp | 24 + core/src/core_icu.cpp | 72 + core/src/core_icu.h | 64 +- core/src/km_core_context_api.cpp | 7 + core/src/kmx/kmx_processevent.cpp | 4 - core/src/kmx/kmx_xstring.cpp | 14 + core/src/kmx/kmx_xstring.h | 3 + core/src/ldml/ldml_markers.cpp | 35 +- core/src/ldml/ldml_markers.hpp | 4 - core/src/ldml/ldml_transforms.cpp | 174 +- core/src/ldml/ldml_transforms.hpp | 10 +- core/src/meson.build | 44 +- core/src/util_normalize.cpp | 199 +- core/src/util_normalize.hpp | 21 + core/src/util_normalize_table_generator.cpp | 108 + core/src/util_regex.cpp | 352 ++ core/src/util_regex.hpp | 48 + core/tests/unit/kmnkbd/meson.build | 2 +- core/tests/unit/ldml/core_ldml_min.cpp | 39 + core/tests/unit/ldml/ldml_test_source.cpp | 3 + core/tests/unit/ldml/meson.build | 13 +- core/tests/unit/ldml/test_kmx_plus.cpp | 1 + core/tests/unit/ldml/test_transforms.cpp | 127 +- core/tests/unit/ldml/test_unicode.cpp | 50 + crowdin.yml | 8 +- developer/docs/api/etc/kmc-analyze.api.md | 2 +- .../docs/api/etc/kmc-keyboard-info.api.md | 36 +- developer/docs/api/etc/kmc-kmn.api.md | 104 +- developer/docs/api/etc/kmc-ldml.api.md | 98 +- developer/docs/api/etc/kmc-package.api.md | 14 + developer/src/.gitignore | 1 + .../src/common/include/kmn_compiler_errors.h | 20 +- .../src/common/web/test-helpers/tsconfig.json | 6 - developer/src/common/web/utils/src/index.ts | 1 + .../common/web/utils/src/is-valid-email.ts | 18 + .../web/utils/test/test-is-valid-email.ts | 35 + .../src/common/web/utils/test/tsconfig.json | 8 +- developer/src/kmc-analyze/build.sh | 1 + .../src/{messages.ts => analyzer-messages.ts} | 3 + developer/src/kmc-analyze/src/index.ts | 2 +- .../src/osk-character-use/index.ts | 8 +- .../kmc-analyze/src/osk-rewrite-pua/index.ts | 8 +- developer/src/kmc-analyze/tsconfig.json | 10 - developer/src/kmc-keyboard-info/build.sh | 1 + .../src/keyboard-info-compiler.ts | 9 +- .../khmer_angkor.kps | 67 + .../multiple-email-addresses/LICENSE.md | 21 + .../build/.gitattributes | 1 + .../build/khmer_angkor.js | 5462 +++++++++++++++++ .../build/khmer_angkor.kmp | Bin 0 -> 4291221 bytes .../build/khmer_angkor.kmx | Bin 0 -> 27666 bytes .../multiple-email-addresses/khmer_angkor.kpj | 187 + .../source/khmer_angkor.kps | 49 + .../test-keyboard-info-compiler-messages.ts | 43 +- .../test/test-keyboard-info-compiler.ts | 47 +- .../src/kmc-keyboard-info/test/tsconfig.json | 6 - developer/src/kmc-keyboard-info/tsconfig.json | 10 - developer/src/kmc-kmn/build.sh | 3 +- developer/src/kmc-kmn/package.json | 4 +- .../src/kmc-kmn/src/compiler/compiler.ts | 30 +- .../src/compiler/kmn-compiler-messages.ts | 36 +- .../src/kmw-compiler/compiler-globals.ts | 99 +- .../src/kmw-compiler/javascript-strings.ts | 40 +- .../src/kmw-compiler/kmw-compiler-messages.ts | 11 +- .../kmc-kmn/src/kmw-compiler/kmw-compiler.ts | 33 +- .../src/kmw-compiler/validate-layout-file.ts | 20 +- developer/src/kmc-kmn/src/main.ts | 2 +- .../keyboards/hint_index_store_long.kmn | 11 + .../keyboards/hint_index_store_long_key.kmn | 11 + .../keyboards/warn_index_store_short.kmn | 11 + .../keyboards/warn_index_store_short_key.kmn | 11 + .../test/fixtures/kmw/version_9_caps_lock.kmn | 11 + .../kmw/version_9_chiral_modifiers.kmn | 11 + .../fixtures/kmw/version_auto_caps_lock.kmn | 10 + .../kmw/version_auto_chiral_modifiers.kmn | 10 + .../kmw/version_gestures.keyman-touch-layout | 51 + .../test/fixtures/kmw/version_gestures.kmn | 9 + .../test/fixtures/kmw/version_gestures_16.kmn | 11 + .../test/fixtures/kmw/version_notany.kmn | 12 + .../test/fixtures/kmw/version_notany_10.kmn | 13 + ...rsion_special_key_caps.keyman-touch-layout | 45 + .../fixtures/kmw/version_special_key_caps.kmn | 9 + .../kmw/version_special_key_caps_14.kmn | 12 + .../version_u_xxxx_yyyy.keyman-touch-layout | 53 + .../test/fixtures/kmw/version_u_xxxx_yyyy.kmn | 7 + .../fixtures/kmw/version_u_xxxx_yyyy_14.kmn | 8 + .../src/kmc-kmn/test/kmw/test-kmw-compiler.ts | 175 +- .../src/kmc-kmn/test/kmw/test-kmw-messages.ts | 15 + developer/src/kmc-kmn/test/test-features.ts | 12 +- developer/src/kmc-kmn/test/test-messages.ts | 24 +- developer/src/kmc-kmn/test/test-wasm-uset.ts | 14 +- developer/src/kmc-kmn/test/tsconfig.json | 4 - developer/src/kmc-kmn/tsconfig.json | 10 - .../src/kmc-ldml/src/compiler/compiler.ts | 56 +- developer/src/kmc-ldml/src/compiler/disp.ts | 8 +- .../kmc-ldml/src/compiler/empty-compiler.ts | 10 +- developer/src/kmc-ldml/src/compiler/keys.ts | 22 +- developer/src/kmc-ldml/src/compiler/layr.ts | 8 +- ...{messages.ts => ldml-compiler-messages.ts} | 36 +- .../kmc-ldml/src/compiler/linter-keycaps.ts | 51 + developer/src/kmc-ldml/src/compiler/linter.ts | 23 + developer/src/kmc-ldml/src/compiler/loca.ts | 8 +- developer/src/kmc-ldml/src/compiler/meta.ts | 8 +- developer/src/kmc-ldml/src/compiler/tran.ts | 28 +- developer/src/kmc-ldml/src/compiler/vars.ts | 16 +- .../src/compiler/visual-keyboard-compiler.ts | 6 +- developer/src/kmc-ldml/src/main.ts | 2 +- .../fixtures/sections/keys/warn-no-keycap.xml | 29 + .../sections/layr/error-bogus-modifiers.xml | 12 + .../src/kmc-ldml/test/test-compiler-e2e.ts | 60 +- developer/src/kmc-ldml/test/test-disp.ts | 14 +- developer/src/kmc-ldml/test/test-helpers.ts | 14 +- developer/src/kmc-ldml/test/test-keys.ts | 34 +- developer/src/kmc-ldml/test/test-layr.ts | 14 +- developer/src/kmc-ldml/test/test-linter.ts | 30 + developer/src/kmc-ldml/test/test-loca.ts | 12 +- developer/src/kmc-ldml/test/test-messages.ts | 8 +- developer/src/kmc-ldml/test/test-meta.ts | 8 +- developer/src/kmc-ldml/test/test-strs.ts | 74 + developer/src/kmc-ldml/test/test-tran.ts | 50 +- developer/src/kmc-ldml/test/test-vars.ts | 24 +- developer/src/kmc-ldml/test/tsconfig.json | 10 - developer/src/kmc-ldml/tsconfig.json | 14 - .../kmc-model-info/src/model-info-compiler.ts | 7 +- .../src/kmc-model-info/test/tsconfig.json | 6 - developer/src/kmc-model-info/tsconfig.json | 9 - developer/src/kmc-model/package.json | 2 +- developer/src/kmc-model/test/tsconfig.json | 4 - developer/src/kmc-model/tsconfig.json | 5 - .../kmc-package/src/compiler/kmp-compiler.ts | 16 +- .../src/compiler/package-compiler-messages.ts | 6 +- .../package-keyboard-target-validator.ts | 6 +- .../compiler/package-metadata-collector.ts | 12 +- .../src/compiler/package-validation.ts | 46 +- .../src/compiler/package-version-validator.ts | 10 +- .../windows-package-installer-compiler.ts | 8 +- developer/src/kmc-package/src/main.ts | 2 +- .../invalid/error_invalid_author_email.kps | 32 + .../error_invalid_author_email_multiple.kps | 32 + .../src/kmc-package/test/test-messages.ts | 76 +- .../kmc-package/test/test-package-compiler.ts | 8 +- developer/src/kmc-package/test/tsconfig.json | 6 - developer/src/kmc-package/tsconfig.json | 4 - developer/src/kmc/build.sh | 3 +- developer/src/kmc/package.json | 2 +- .../buildClasses/BuildKeyboardInfo.ts | 4 +- .../commands/buildClasses/BuildModelInfo.ts | 4 +- .../src/kmc/src/commands/messageCommand.ts | 48 +- .../src/messages/infrastructureMessages.ts | 9 +- .../src/kmc/src/messages/messageNamespaces.ts | 8 +- .../kmc/src/util/extendedCompilerOptions.ts | 50 +- .../src/kmc/src/util/getLastGitCommitDate.ts | 22 +- .../get-last-git-commit-date/README.md | 5 + .../src/kmc/test/test-getLastGitCommitDate.ts | 9 + developer/src/kmc/test/tsconfig.json | 6 - developer/src/kmc/tsconfig.json | 21 - developer/src/kmcmplib/include/kmcompx.h | 1 + developer/src/kmcmplib/src/CompMsg.cpp | 32 +- developer/src/kmcmplib/src/CompMsg.h | 2 +- .../kmcmplib/src/CompileKeyboardBuffer.cpp | 6 +- developer/src/kmcmplib/src/Compiler.cpp | 148 +- .../src/kmcmplib/src/NamedCodeConstants.cpp | 2 +- developer/src/kmcmplib/src/kmx_u16.cpp | 14 +- developer/src/kmcmplib/src/kmx_u16.h | 1 + developer/src/kmcmplib/src/meson.build | 4 +- developer/src/kmcmplib/src/versioning.cpp | 8 +- developer/src/kmcmplib/tests/api-test.cpp | 33 +- .../kmcmplib/tests/gtest-compiler-test.cpp | 917 ++- .../src/kmcmplib/tests/gtest-compmsg-test.cpp | 19 + .../src/kmcmplib/tests/gtest-kmx_u16-test.cpp | 168 + developer/src/kmcmplib/tests/kmcompxtest.cpp | 10 +- developer/src/kmcmplib/tests/meson.build | 22 + .../src/kmcmplib/tests/util_callbacks.cpp | 6 +- ...Developer.System.ImportWindowsKeyboard.pas | 46 +- ....VisualKeyboardToTouchLayoutConverter.pas} | 18 +- developer/src/kmconvert/kmconvert.dpr | 2 +- developer/src/kmconvert/kmconvert.dproj | 2 +- developer/src/server/package.json | 2 +- developer/src/server/tsconfig.json | 8 - .../oskbuilder/UframeTouchLayoutBuilder.pas | 6 +- ...n.Developer.System.Project.ProjectFile.pas | 13 +- ...eveloper.System.Project.kpsProjectFile.pas | 7 + developer/src/tike/tike.dpr | 2 +- developer/src/tike/tike.dproj | 2 +- developer/src/tike/xml/help/contexthelp.xml | 478 +- .../src/tike/xml/layoutbuilder/builder.xsl | 4 + .../xml/layoutbuilder/platform-controls.js | 24 +- docs/build/macos.md | 4 +- docs/linux/keyman-config.md | 3 +- .../KeymanEngine.xcodeproj/project.pbxproj | 7 + .../Classes/el.lproj/ResourceInfoView.strings | 2 + .../KeymanEngine/el.lproj/Localizable.strings | 257 + .../el.lproj/Localizable.stringsdict | 118 + .../pt-PT.lproj/Localizable.strings | 2 +- .../Contents/Resources/ios-host.js | 4 +- .../Keyman/Keyman.xcodeproj/project.pbxproj | 5 +- .../Keyman/el.lproj/Localizable.strings | 81 + linux/.pbuilderrc | 82 - linux/Makefile | 14 +- linux/debian/changelog | 7 + linux/ibus-keyman/.gitignore | 1 + linux/ibus-keyman/build.sh | 9 + linux/ibus-keyman/meson.build | 11 +- linux/ibus-keyman/src/engine.c | 30 +- linux/ibus-keyman/src/keymanutil.c | 218 +- linux/ibus-keyman/src/keymanutil.h | 108 +- linux/ibus-keyman/src/test/keymanutil_tests.c | 310 +- linux/ibus-keyman/tests/meson.build | 2 +- linux/ibus-keyman/tests/scripts/run-tests.sh | 2 +- .../tests/scripts/test-helper.inc.sh | 28 +- linux/ibus-keyman/tests/testfixture.cpp | 2 +- .../keyman-config/keyman_config/convertico.py | 20 +- linux/keyman-config/keyman_config/get_kmp.py | 50 +- .../keyman_config/install_kmp.py | 8 +- .../keyman_config/keyman_option.py | 42 + .../keyman_config/options_widget.py | 12 +- .../keyman_config/sentry_handling.py | 17 +- .../keyman_config/view_installed.py | 61 +- linux/keyman-config/km-kvk2ldml | 4 +- .../resources/com.keyman.gschema.xml | 6 + linux/keyman-system-service/.gitignore | 1 + linux/keyman-system-service/build.sh | 9 + .../resources/{meson.build => meson.build.in} | 1 + linux/scripts/cow.sh | 50 - linux/scripts/deb-packaging.sh | 2 +- linux/scripts/deb.sh | 51 +- linux/scripts/dist.sh | 2 +- linux/scripts/launchpad.sh | 2 +- linux/scripts/upload-to-debian.sh | 28 +- .../KMInfoWindowController.strings | 2 +- .../Keyman4MacIM/KMInputMethodAppDelegate.m | 33 +- .../Keyman4MacIM/KMInputMethodEventHandler.m | 4 + .../pt-PT.lproj/Localizable.strings | 21 +- oem/firstvoices/keyboards.csv | 52 +- package-lock.json | 1300 ++-- package.json | 8 +- resources/stats/stats.sh | 31 +- web/build.sh | 57 +- web/package.json | 1 - web/src/app/browser/build.sh | 12 - web/src/app/browser/src/contextManager.ts | 4 +- web/src/app/browser/src/keymanEngine.ts | 44 +- web/src/app/browser/src/viewsAnchorpoint.ts | 6 +- web/src/app/webview/build.sh | 10 - web/src/app/webview/src/keymanEngine.ts | 37 +- web/src/engine/attachment/src/index.ts | 2 +- web/src/engine/main/src/keymanEngine.ts | 6 +- .../engine/osk/src/banner/suggestionBanner.ts | 48 +- .../osk/src/config/viewConfiguration.ts | 9 - .../src/input/gestures/browser/oskSubKey.ts | 3 +- .../osk/src/keyboard-layout/oskLayerGroup.ts | 88 +- .../engine/osk/src/views/floatingOskView.ts | 24 +- web/src/engine/osk/src/views/oskView.ts | 22 +- web/src/engine/osk/src/visualKeyboard.ts | 29 +- .../attachment/outputTargetForElement.def.ts | 48 +- .../outputTargetForElement.spec.html | 2 +- .../attachment/pageContextAttachment.spec.ts | 341 +- .../dom/cases/browser/contextManager.spec.ts | 83 +- .../element_interfaces.spec.ts | 975 +-- .../element-wrappers/target_mocks.spec.ts | 124 +- .../auto/dom/cases/osk/activation.spec.ts | 8 +- .../test/auto/dom/cases/osk/events.spec.ts | 10 +- .../dom/cases/packages/cloudQueries.spec.ts | 20 +- .../cases/packages/domCloudRequester.spec.ts | 14 +- .../packages/keyboardRequisitioner.spec.ts | 12 +- web/src/test/auto/dom/kbdLoader.ts | 12 +- web/src/test/auto/dom/test_utils.ts | 34 +- .../test/auto/dom/web-test-runner.config.mjs | 16 +- .../test/auto/integrated/cases/basics.spec.ts | 17 +- .../test/auto/integrated/cases/engine.spec.ts | 70 +- .../integrated/cases/engine_chirality.spec.ts | 1 - .../test/auto/integrated/cases/events.spec.ts | 19 +- .../integrated/cases/text_selection.spec.ts | 83 +- web/src/test/auto/integrated/test_utils.ts | 55 +- .../integrated/web-test-runner.config.mjs | 2 +- web/src/test/auto/tsconfig.json | 6 +- web/src/test/manual/web/index.html | 1 + web/src/test/manual/web/issue11785/index.html | 109 + .../web/prediction-mtnt/nrc.en.mtnt.model.js | 43 +- .../tools/testing/recorder/browserDriver.ts | 16 + web/tsconfig.base.json | 2 +- windows/src/.gitignore | 2 +- .../desktop/kmshell/locale/pt-PT/strings.xml | 24 + windows/src/engine/.gitignore | 5 - windows/src/engine/README.md | 8 +- windows/src/engine/build.sh | 3 +- windows/src/engine/engine.sln | 16 +- .../src/engine/keyman32/DebugEventTrace.cpp | 28 +- windows/src/engine/keyman32/K32_load.cpp | 45 +- windows/src/engine/keyman32/SharedBuffers.cpp | 4 +- windows/src/engine/keyman32/appcontext.cpp | 5 - windows/src/engine/keyman32/appint/aiTIP.cpp | 62 +- windows/src/engine/keyman32/appint/aiTIP.h | 10 +- .../keyman32/appint/aiWin2000Unicode.cpp | 12 +- windows/src/engine/keyman32/appint/appint.cpp | 4 +- windows/src/engine/keyman32/build.sh | 60 +- windows/src/engine/keyman32/calldll.cpp | 73 +- windows/src/engine/keyman32/globals.h | 5 +- windows/src/engine/keyman32/glossary.cpp | 14 +- windows/src/engine/keyman32/hookutils.cpp | 10 +- windows/src/engine/keyman32/hotkeys.cpp | 33 +- windows/src/engine/keyman32/k32_dbg.cpp | 159 +- windows/src/engine/keyman32/k32_globals.cpp | 39 +- .../keyman32/k32_lowlevelkeyboardhook.cpp | 57 +- windows/src/engine/keyman32/keybd_shift.cpp | 46 +- .../src/engine/keyman32/keyboardoptions.cpp | 16 +- windows/src/engine/keyman32/keyman32.cpp | 74 +- windows/src/engine/keyman32/keyman32.def | 3 + windows/src/engine/keyman32/keyman32.rc | 1 - windows/src/engine/keyman32/keyman32.vcxproj | 338 +- .../engine/keyman32/keyman32.vcxproj.filters | 6 + windows/src/engine/keyman32/keyman64.def | 45 + windows/src/engine/keyman32/keyman64.rc | 1 + windows/src/engine/keyman32/keymanengine.h | 57 +- .../engine/keyman32/kmhook_callwndproc.cpp | 20 +- .../src/engine/keyman32/kmhook_getmessage.cpp | 335 +- .../src/engine/keyman32/kmhook_keyboard.cpp | 11 +- windows/src/engine/keyman32/kmprocess.cpp | 40 +- .../src/engine/keyman32/kmprocessactions.cpp | 26 +- .../src/engine/keyman32/selectkeyboard.cpp | 46 +- .../engine/keyman32/serialkeyeventclient.cpp | 8 +- .../engine/keyman32/serialkeyeventserver.cpp | 7 +- windows/src/engine/keyman32/syskbd.cpp | 58 +- windows/src/engine/keyman32/syskbdnt.cpp | 37 +- windows/src/engine/keyman32/syskbdnt64.cpp | 44 +- .../version.rc => keyman32/version64.rc} | 0 windows/src/engine/keyman64/build.sh | 50 - windows/src/engine/keyman64/keyman64.rc | 3 - windows/src/engine/keyman64/keyman64.sln | 24 - windows/src/engine/keyman64/keyman64.vcxproj | 332 - .../engine/keyman64/keyman64.vcxproj.filters | 89 - windows/src/engine/kmtip/debug.cpp | 26 +- windows/src/engine/kmtip/globals.cpp | 14 - windows/src/engine/kmtip/globals.h | 50 +- windows/src/engine/kmtip/inserttext.cpp | 190 +- .../src/engine/kmtip/keyman32interface.cpp | 16 +- windows/src/engine/kmtip/keys.cpp | 97 +- windows/src/engine/kmtip/kmkey.cpp | 158 +- windows/src/engine/kmtip/kmtip.cpp | 35 +- windows/src/engine/kmtip/kmtip.h | 8 +- windows/src/engine/kmtip/registryw.cpp | 2 +- windows/src/engine/kmtip/tmgrsink.cpp | 29 +- windows/src/engine/testhost/testhost.cpp | 4 +- .../keymandebuglog/UfrmKeymanDebugLogMain.pas | 2 +- .../i3619tip/i3619tip/i3619tip/kmkey.cpp | 18 +- .../i3619tip/i3619tip/i3619tip/registryw.cpp | 2 +- 445 files changed, 21506 insertions(+), 6701 deletions(-) create mode 100644 android/KMAPro/kMAPro/src/main/res/values-b+el/strings.xml delete mode 100644 android/KMEA/app/src/main/assets/keyboard.es5.html create mode 100644 android/KMEA/app/src/main/res/values-b+el/strings.xml create mode 100644 common/test/resources/test-timeouts.mjs delete mode 100644 common/test/resources/timeout-adapter.js create mode 100644 common/web/keyboard-processor/tests/tsconfig.json create mode 100644 common/web/lm-worker/src/main/correction/execution-timer.ts create mode 100644 common/web/lm-worker/src/main/model-helpers.ts create mode 100644 common/web/lm-worker/src/main/predict-helpers.ts create mode 100644 common/web/lm-worker/src/polyfills/array.from.js create mode 100644 common/web/lm-worker/src/test/mocha/cases/auto-correct.js create mode 100644 common/web/lm-worker/src/test/mocha/cases/casing-detection.js create mode 100644 common/web/lm-worker/src/test/mocha/cases/early-correction-search-stopping.js create mode 100644 common/web/lm-worker/src/test/mocha/cases/edit-distance/execution-timer.js create mode 100644 common/web/lm-worker/src/test/mocha/cases/predict-from-corrections.js create mode 100644 common/web/lm-worker/src/test/mocha/cases/suggestion-deduplication.js create mode 100644 common/web/lm-worker/src/test/mocha/cases/suggestion-finalization.js create mode 100644 common/web/lm-worker/src/test/mocha/cases/suggestion-similarity.js delete mode 100644 common/web/tslib/README.md delete mode 100755 common/web/tslib/build.sh delete mode 100644 common/web/tslib/package.json delete mode 100644 common/web/tslib/src/index.ts delete mode 100644 common/web/tslib/tsconfig.json create mode 100644 common/web/types/src/util/consts.ts rename common/{models/templates => web/utils}/src/priority-queue.ts (96%) delete mode 100644 common/web/utils/src/surrogates.ts rename common/{models/templates/test/test-priority-queue.js => web/utils/src/test/priorityQueue.js} (97%) create mode 100644 core/src/core_icu.cpp create mode 100644 core/src/util_normalize_table_generator.cpp create mode 100644 core/src/util_regex.cpp create mode 100644 core/src/util_regex.hpp create mode 100644 core/tests/unit/ldml/core_ldml_min.cpp create mode 100644 developer/src/common/web/utils/src/is-valid-email.ts create mode 100644 developer/src/common/web/utils/test/test-is-valid-email.ts rename developer/src/kmc-analyze/src/{messages.ts => analyzer-messages.ts} (88%) create mode 100644 developer/src/kmc-keyboard-info/test/fixtures/missing-info-version-in-kps-11856/khmer_angkor.kps create mode 100644 developer/src/kmc-keyboard-info/test/fixtures/multiple-email-addresses/LICENSE.md create mode 100644 developer/src/kmc-keyboard-info/test/fixtures/multiple-email-addresses/build/.gitattributes create mode 100644 developer/src/kmc-keyboard-info/test/fixtures/multiple-email-addresses/build/khmer_angkor.js create mode 100644 developer/src/kmc-keyboard-info/test/fixtures/multiple-email-addresses/build/khmer_angkor.kmp create mode 100644 developer/src/kmc-keyboard-info/test/fixtures/multiple-email-addresses/build/khmer_angkor.kmx create mode 100644 developer/src/kmc-keyboard-info/test/fixtures/multiple-email-addresses/khmer_angkor.kpj create mode 100644 developer/src/kmc-keyboard-info/test/fixtures/multiple-email-addresses/source/khmer_angkor.kps create mode 100644 developer/src/kmc-kmn/test/fixtures/keyboards/hint_index_store_long.kmn create mode 100644 developer/src/kmc-kmn/test/fixtures/keyboards/hint_index_store_long_key.kmn create mode 100644 developer/src/kmc-kmn/test/fixtures/keyboards/warn_index_store_short.kmn create mode 100644 developer/src/kmc-kmn/test/fixtures/keyboards/warn_index_store_short_key.kmn create mode 100644 developer/src/kmc-kmn/test/fixtures/kmw/version_9_caps_lock.kmn create mode 100644 developer/src/kmc-kmn/test/fixtures/kmw/version_9_chiral_modifiers.kmn create mode 100644 developer/src/kmc-kmn/test/fixtures/kmw/version_auto_caps_lock.kmn create mode 100644 developer/src/kmc-kmn/test/fixtures/kmw/version_auto_chiral_modifiers.kmn create mode 100644 developer/src/kmc-kmn/test/fixtures/kmw/version_gestures.keyman-touch-layout create mode 100644 developer/src/kmc-kmn/test/fixtures/kmw/version_gestures.kmn create mode 100644 developer/src/kmc-kmn/test/fixtures/kmw/version_gestures_16.kmn create mode 100644 developer/src/kmc-kmn/test/fixtures/kmw/version_notany.kmn create mode 100644 developer/src/kmc-kmn/test/fixtures/kmw/version_notany_10.kmn create mode 100644 developer/src/kmc-kmn/test/fixtures/kmw/version_special_key_caps.keyman-touch-layout create mode 100644 developer/src/kmc-kmn/test/fixtures/kmw/version_special_key_caps.kmn create mode 100644 developer/src/kmc-kmn/test/fixtures/kmw/version_special_key_caps_14.kmn create mode 100644 developer/src/kmc-kmn/test/fixtures/kmw/version_u_xxxx_yyyy.keyman-touch-layout create mode 100644 developer/src/kmc-kmn/test/fixtures/kmw/version_u_xxxx_yyyy.kmn create mode 100644 developer/src/kmc-kmn/test/fixtures/kmw/version_u_xxxx_yyyy_14.kmn rename developer/src/kmc-ldml/src/compiler/{messages.ts => ldml-compiler-messages.ts} (85%) create mode 100644 developer/src/kmc-ldml/src/compiler/linter-keycaps.ts create mode 100644 developer/src/kmc-ldml/src/compiler/linter.ts create mode 100644 developer/src/kmc-ldml/test/fixtures/sections/keys/warn-no-keycap.xml create mode 100644 developer/src/kmc-ldml/test/fixtures/sections/layr/error-bogus-modifiers.xml create mode 100644 developer/src/kmc-ldml/test/test-linter.ts create mode 100644 developer/src/kmc-ldml/test/test-strs.ts create mode 100644 developer/src/kmc-package/test/fixtures/invalid/error_invalid_author_email.kps create mode 100644 developer/src/kmc-package/test/fixtures/invalid/error_invalid_author_email_multiple.kps create mode 100644 developer/src/kmc/test/fixtures/get-last-git-commit-date/README.md create mode 100644 developer/src/kmcmplib/tests/gtest-compmsg-test.cpp create mode 100644 developer/src/kmcmplib/tests/gtest-kmx_u16-test.cpp rename developer/src/kmconvert/{Keyman.Developer.System.TouchLayoutToVisualKeyboardConverter.pas => Keyman.Developer.System.VisualKeyboardToTouchLayoutConverter.pas} (94%) create mode 100644 ios/engine/KMEI/KeymanEngine/Classes/el.lproj/ResourceInfoView.strings create mode 100644 ios/engine/KMEI/KeymanEngine/el.lproj/Localizable.strings create mode 100644 ios/engine/KMEI/KeymanEngine/el.lproj/Localizable.stringsdict create mode 100644 ios/keyman/Keyman/Keyman/el.lproj/Localizable.strings delete mode 100644 linux/.pbuilderrc create mode 100644 linux/ibus-keyman/.gitignore create mode 100644 linux/keyman-config/keyman_config/keyman_option.py create mode 100644 linux/keyman-system-service/.gitignore rename linux/keyman-system-service/resources/{meson.build => meson.build.in} (84%) delete mode 100755 linux/scripts/cow.sh create mode 100644 web/src/test/manual/web/issue11785/index.html create mode 100644 windows/src/engine/keyman32/keyman64.def create mode 100644 windows/src/engine/keyman32/keyman64.rc rename windows/src/engine/{keyman64/version.rc => keyman32/version64.rc} (100%) delete mode 100755 windows/src/engine/keyman64/build.sh delete mode 100644 windows/src/engine/keyman64/keyman64.rc delete mode 100644 windows/src/engine/keyman64/keyman64.sln delete mode 100644 windows/src/engine/keyman64/keyman64.vcxproj delete mode 100644 windows/src/engine/keyman64/keyman64.vcxproj.filters diff --git a/.clang-format b/.clang-format index 76e36e1d44d..81ca2c82a37 100644 --- a/.clang-format +++ b/.clang-format @@ -19,7 +19,7 @@ AllowShortLambdasOnASingleLine: All AllowShortIfStatementsOnASingleLine: Never AllowShortLoopsOnASingleLine: false AlwaysBreakAfterDefinitionReturnType: None -AlwaysBreakAfterReturnType: AllDefinitions +AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: true AlwaysBreakTemplateDeclarations: MultiLine BinPackArguments: true @@ -58,7 +58,7 @@ ConstructorInitializerIndentWidth: 4 ContinuationIndentWidth: 4 Cpp11BracedListStyle: true DeriveLineEnding: true -DerivePointerAlignment: true +DerivePointerAlignment: false DisableFormat: false ExperimentalAutoDetectBinPacking: false FixNamespaceComments: true diff --git a/.github/workflows/deb-packaging.yml b/.github/workflows/deb-packaging.yml index 29f069efd50..9c374f5d050 100644 --- a/.github/workflows/deb-packaging.yml +++ b/.github/workflows/deb-packaging.yml @@ -116,7 +116,7 @@ jobs: strategy: fail-fast: true matrix: - dist: [focal, jammy, mantic, noble] + dist: [focal, jammy, noble] steps: - name: Checkout diff --git a/HISTORY.md b/HISTORY.md index 508218f04a5..94efb781a8e 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,210 @@ # Keyman Version History +## 18.0.80 alpha 2024-07-31 + +* chore(developer): remove redundant references from tsconfig.json (#12037) +* fix(web): add nullish test in setOsk (#12039) +* fix(web): unrevert #11258, leaving OSK hidden before instructed to display (#12049) +* test(developer): check correct use of u16chr when second parameter could be null (#11894) +* change(web): remove support for es5 (#11881) + +## 18.0.79 alpha 2024-07-30 + +* change(mac): add custom tags in sentry to better identify errors (#11947) +* feat(web): add unit tests for case-detection & handling (#11950) +* refactor(web): spin off method for correction-search probability-thresholding check (#11952) + +## 18.0.78 alpha 2024-07-29 + +* chore(common): Update history from 17.0.327 and add missing descriptions (#12021) +* change(web): revert #11174, which loads keyboards before initializing the OSK (#12015) +* feat(web): add unit testing for finalization of generated suggestions (#11946) +* feat(web): add unit tests for prediction lookup component (#11949) + +## 18.0.77 alpha 2024-07-27 + +* refactor(windows): clean up logging (#11921) +* chore(developer): rename to analyzer-messages.ts (#12017) +* fix(developer): remove `paths` from tsconfig.json (#12028) +* chore(developer): api doc refresh (#12029) + +## 18.0.76 alpha 2024-07-26 + +* change(linux): improve changelog PRs after upload to debian (#12024) +* test(developer): kmcmplib compiler unit tests 2 (#11663) +* fix(linux): set local directory if not specified (#12032) + +## 18.0.75 alpha 2024-07-25 + +* chore(windows): remove the posting WM_KEYUP/DOWN events to IM (#12002) +* feat(web): check for low-probability exact + exact-key correction matches (#11876) +* refactor(web): extract suggestion-finalization block into its own function (#11899) +* chore(developer): remove `CompilerMessages` stub and use `KmnCompilerMessages` (#11986) +* chore(developer): rename kmc-ldml `CompilerMessages`, `LdmlKeyboardCompilerMessages` to `LdmlCompilerMessages` (#11988) +* chore(developer): rename kmc-package `CompilerMessages` to `PackageCompilerMessages` (#11989) +* fix(developer): handle errors parsing .kps file when loading project (#12008) +* chore(common): updated stats script to support end date/sprint (#12009) +* fix(windows): align engine.sln platforms and configurations (#12011) +* refactor(web): spin off deduplication, suggestion-similarity sections (#11900) +* refactor(web): extract the correct-and-raw-predict blocks into their own method (#11888) +* refactor(web): convert internal prediction methods to stateless format (#11940) +* feat(web): add unit tests for predict auto-selection method (#11941) +* feat(web): extend unit-test oriented dummy model (#11948) +* feat(web): add unit tests for suggestion-similarity detection (#11944) +* feat(web): add unit testing for suggestion deduplication (#11945) +* feat(developer): add hint when index() store is longer than any() store (#12000) +* chore(android,ios): Add ojibwa ifinal/rdot keyboards to FirstVoices (#11889) + +## 18.0.74 alpha 2024-07-24 + +* fix(developer): correct handling of trailing spaces by GetDelimitedString() in kmcmplib compiler (#11938) +* chore(linux): remove Ubuntu Mantic, add Oracular (#12003) + +## 18.0.73 alpha 2024-07-23 + +* fix(windows): add text selected bool emit backspace key when text selected in TSF (#11884) +* fix(developer): prevent buffer overrun in `u16tok` (#11910) +* fix(developer): prevent invalid values in targets store (#11918) +* feat(developer): automatically detect version for `U_xxxx_yyyy` ids (#11957) +* feat(developer): handle automatic versioning of chiral modifiers (#11965) +* test(developer): Add tests for automatic versioning of notany() with context() (#11980) +* feat(developer): handle automatic versioning of special key caps on normal keys (#11981) +* feat(developer): automatically upgrade version when gestures are found in the touch layout (#11982) +* refactor(developer): rename `verifyMinimumKeymanVersion` (#11983) +* feat(developer): add searching for message identifiers to `kmc message` (#11984) +* fix(developer): kmc-keyboard-info: use default version 1.0 if version information missing (#11985) +* fix(web): present a "keep" option when a context-altering suggestion is auto-selected (#11969) +* fix(web): prevents auto-accept immediately after reversion (#11970) + +## 18.0.72 alpha 2024-07-22 + +* fix(web): remedy unit-test stability issues (#11933) +* refactor(web): fix TypeScript errors and warnings (#11911) + +## 18.0.71 alpha 2024-07-18 + +* chore(windows): add comments for _WIN64 tests (#11929) +* chore(common): Update Crowdin strings for Portuguese (#11974) + +## 18.0.70 alpha 2024-07-08 + +* feat(web): provide lexicon probabilities directly on the search path (#11868) +* feat(common/models): support direct-child access for Trie node iteration (#11869) +* change(common/models/templates): rework Trie predict method to utilize traversals (#11870) +* change(web): track the base correction for generated predictions (#11875) +* feat(web): add and enable auto-correction (#11866) + +## 18.0.69 alpha 2024-07-05 + +* fix(core): allow to successfully build on Ubuntu 24.04 (#11926) +* chore(windows): correct output file for 64-bit build of keyman32 in build.sh (#11930) +* chore(android,ios): Add Crowdin localization for Polytonic Greek (#11877) + +## 18.0.68 alpha 2024-07-04 + +* refactor(windows): merge keyman64 build into keyman32 (#11906) +* refactor(windows): remove wm_keyman_keydown and wm_keyman_keyup (#11920) + +## 18.0.67 alpha 2024-07-03 + +* refactor(common/models): move TS priority-queue implementation to web-utils (#11867) + +## 18.0.66 alpha 2024-07-02 + +* fix(developer): handle second parameter of index correctly in kmcmplib compiler (#11815) + +## 18.0.65 alpha 2024-07-01 + +* fix(developer): prevent non-BMP characters in key part of rule (#11806) +* chore(linux): remove unused building with pbuilder (#11862) + +## 18.0.64 alpha 2024-06-28 + +* fix(web): use fat-finger data with simple keypresses (#11854) + +## 18.0.63 alpha 2024-06-26 + +* feat(linux): implement Linux side of SimulateAltGr option :checkered_flag: (#11852) + +## 18.0.62 alpha 2024-06-25 + +* chore(common): update C/C++ formatting options (#11836) +* chore(linux): use shared meson config (#11863) +* fix(linux): ignore exceptions trying to install cache (#11861) + +## 18.0.61 alpha 2024-06-24 + +* feat(web): optimization via lazy preprocessing of keyboard touch-layout info (#11265) +* fix(android): clear globe highlight when displaying keyboard picker (#11826) +* refactor(linux): add KeymanOption class for options :checkered_flag: (#11850) +* refactor(linux): rename methods that deal with keyboard options :checkered_flag: (#11851) + +## 18.0.60 alpha 2024-06-21 + +* chore(web): define common timeout variable for automated testing (#11839) +* feat(developer): warn on empty keycaps (#11810) +* change(web): optimization for keyboard-layout preprocessing (#11263) +* feat(web): optimization via lazy construction of OSK layers (#11264) +* fix(developer): layr: fix modifier err message on layer w/o id (#11843) + +## 18.0.59 alpha 2024-06-19 + +* chore(deps-dev): bump braces from 3.0.2 to 3.0.3 (#11756) +* chore(developer): clarify project upgrade messages about file locations (#11819) +* chore(deps): bump ws from 8.16.0 to 8.17.1 (#11822) +* chore(common): update base-package node-engine setting (#11798) +* change(web): change after-word whitespace check to be more selective (#11800) +* chore: Revert "chore(common): update base-package node-engine setting" (#11829) +* change(web): drop correction batching (#11768) +* chore(web): move correction-search execution timer to its own file (#11757) +* refactor(web): overhaul predictive-text engine's timer to better detect paused time (#11758) +* feat(web): improve predictive-text responsiveness when typing rapidly (#11784) +* feat(linux): re-create missing files at run-time (#11789) + +## 18.0.58 alpha 2024-06-18 + +* test(core): Add a minimal test that exercises the core API (#11781) +* fix(developer): check HISTORY.md to get last modified date for keyboard_info and model_info (#11805) +* docs(developer): extra context help for keyboard-editor (does not exist in Keyman Developer 17.0) (#11771) + +## 18.0.57 alpha 2024-06-17 + +* fix(web): fix id of longpress keys with modifier set in touch layout (#11783) +* fix(web): prevent desktop OSK crash when addKeyboards is called before engine init (#11786) +* fix(core): serialize tests for core/wasm on mac agents (#11795) +* fix(developer): refactor kmcmplib compiler messages to use map (#11738) +* fix(developer): make native compilation of kmcmplib under Linux possible (#11779) +* feat(core): devolve regex to javascript (#11777) +* feat(core): remove ICU from core under wasm (#11778) +* fix(linux): restart ibus after manual integration test run (#11775) + +## 18.0.56 alpha 2024-06-14 + +* feat(core): devolve normalization to js (#11541) +* fix(developer): show message if no more platforms to add to touch layout editor (#11759) +* docs(developer): context help in package-editor and put the existing context help in their own tab comments (#11760) +* docs(developer): context help in keyboard-editor section (#11754) +* docs(developer): context help in new-project section (#11767) +* docs(developer): context help for new-project-parameters in keyman developer (#11769) +* docs(developer): context help for Select BCP 47 tag in Keyman Developer (#11770) +* change(common): update esbuild to 0.18.9 (#11693) +* change(web): more prep for better async prediction handling (#10347) +* fix(web): set new-context rules' device to match that of the active OSK (#11743) +* chore(linux): Update debian changelog (#11671) +* fix(web): add limited Array.from polyfill for lm-worker use (#11732) + +## 18.0.55 alpha 2024-06-13 + +* fix(developer): handle missing OSK when importing a Windows keyboard into a touch-only project (#11720) +* fix(developer): verify email addresses in .kps and .keyboard_info (#11735) +* change(web): prep for better asynchronous prediction handling (#10343) + +## 18.0.54 alpha 2024-06-12 + +* fix(common): remove subpackage entries for older TS version (#11745) +* chore(common): end use of ts-node (#11746) +* feat(web): add bulk_render variant that loads and renders keyboards from local KMP (#10432) + ## 18.0.53 alpha 2024-06-10 * fix(android): check current orientation when redisplaying system keyboard (#11604) @@ -109,7 +314,7 @@ * fix(web): explicitly terminate banner gesture-handling when banner is swapped (#11599) * chore(web): removes unused locals, imports, and private fields (#11460) * fix(web): use correct parameter name in button UI OSK `hide` event (#11600) -* (#11444) +* chore(mac): rework of main build script (#11444) * fix(ios): do not write to shared storage from system keyboard (#11613) * fix(developer): handle invalid default project path in options (#11555) * fix(developer): handle missing data in .kps `` (#11563) @@ -194,7 +399,7 @@ ## 18.0.35 alpha 2024-05-14 -* (#11340) +* chore(core): update core to C++17 (#11340) ## 18.0.34 alpha 2024-05-13 @@ -347,6 +552,41 @@ * chore(common): move to 18.0 alpha (#10713) * chore: move to 18.0 alpha +## 17.0.327 stable 2024-07-25 + +* fix(android): include DOMRect polyfill for older ES6-supporting devices (#11654) +* fix(web): Don't apply suggestion unless fully configured (#11636) +* fix(mac): handle command keys without crashing (#11675) +* fix(web): get row-height for flick constraints after performing layout (#11692) +* fix(android): handle IllegalArgumentException when initializing CloudDownloadMgr, add logging to check for unhandled side-effects (#11628) +* fix(developer): handle editor initializing after debugger when setting execution point (#11588) +* fix(developer): treat js files with unrecognized encodings as non-keyboard files (#11699) +* fix(developer): disable example edit controls if no examples in Package Editor (#11703) +* chore(developer): add extra logging for assertion failure when pressing backspace in debugger (#11709) +* fix(developer): handle encoding errors when loading wordlists (#11712) +* fix(mac): change build configuration to prevent cycle error in Xcode 15 (#11731) +* fix(developer): handle missing OSK when importing a Windows keyboard into a touch-only project (#11721) +* fix(developer): prevent two touch layout editors opening for the same file (#11727) +* fix(android): check current orientation, fix keyboard size after system keyboard rotations and resumes (#11747) +* chore(linux): Update debian changelog (#11670) +* feat(developer): support language reference in context help (#11741) +* fix(developer): show message if no more platforms to add to touch layout editor (#11766) +* fix(web): add limited Array.from polyfill for lm-worker use (#11733) +* fix(web): set new-context rules' device to match that of the active OSK (#11744) +* fix(web): prevent desktop OSK crash when addKeyboards is called before engine init (#11787) +* fix(windows): add -k parameter for keyboards build.sh (#11811) +* fix(core): serialize tests for core/wasm on mac agents (#11809) +* chore(developer): clarify project upgrade messages about file locations (#11820) +* fix(developer): check HISTORY.md to get last modified date for keyboard_info and model_info (#11808) +* fix(web): fix id of longpress keys with modifier set in touch layout (#11797) +* change(web): change after-word whitespace check to be more selective (#11824) +* fix(android): clear globe highlight when displaying keyboard picker (#11827) +* fix(web): use fat-finger data with simple keypresses (#11871) +* fix(developer): prevent non-BMP characters in key part of rule (#11807) +* fix(linux): ignore exceptions trying to install cache (#11885) +* chore(common): Update Crowdin strings for Portuguese (#11976) +* chore(linux): remove Ubuntu 23.10 Mantic (#12004) + ## 17.0.326 stable 2024-06-02 * cherrypick(android/engine): Handle globe key on lock screen (#11468) @@ -423,8 +663,8 @@ ## 17.0.317 beta 2024-05-01 -* (#11322) -* (#11321) +* chore(web): remove old reference-doc from alpha that has completed its purpose (#11322) +* fix(web): gesture-model initial-state, callback failure handling (#11321) * fix(linux): Fix icon for .kmp files (#11295) ## 17.0.316 beta 2024-04-30 diff --git a/VERSION.md b/VERSION.md index fe04a53f8ce..956e25b0baf 100644 --- a/VERSION.md +++ b/VERSION.md @@ -1 +1 @@ -18.0.54 \ No newline at end of file +18.0.81 \ No newline at end of file diff --git a/android/.gitignore b/android/.gitignore index 4f0c3bb5018..a5720bccc73 100644 --- a/android/.gitignore +++ b/android/.gitignore @@ -37,8 +37,6 @@ KMEA/**/assets/keymanandroid.js KMEA/**/assets/keyman.js.map KMEA/**/assets/keymanweb-webview.js KMEA/**/assets/keymanweb-webview.js.map -KMEA/**/assets/keymanweb-webview.es5.js -KMEA/**/assets/keymanweb-webview.es5.js.map KMEA/**/assets/map-polyfill.js KMEA/**/assets/sentry.min.js KMEA/**/assets/keyman-sentry.js diff --git a/android/KMAPro/kMAPro/src/main/res/values-b+el/strings.xml b/android/KMAPro/kMAPro/src/main/res/values-b+el/strings.xml new file mode 100644 index 00000000000..43e43eb4ca9 --- /dev/null +++ b/android/KMAPro/kMAPro/src/main/res/values-b+el/strings.xml @@ -0,0 +1,164 @@ + + + + + Μοιρασθῆτε + + Φυλλομετρητής + + Μέγεθος κειμένου + + Περισσότερα + + Διαγραφὴ κειμένου + + Πληροφορίες + + Ρυθμίσεις + + Εγκατάσταση Ενημερώσεων + + Ἔκδοση %1$s + + Τὸ Keyman ἀπαιτεῖ ἔκδοση Chrome 57 ἢ νεώτερη. + + Ἐνημερῶστε τὸ Chrome + + Ἀρχίστε νὰ γράφετε ἐδῶ… + + + + Μέγεθος κειμένου: %1$d + + Μεγαλῶστε τὸ κείμενο + + Μεγαλῶστε τὸ κείμενο + + Ρύθμιση μεγέθους κειμένου + + \nΤὸ κείμενο θὰ ἐκκαθαρισθεῖ πλήρως\n + + Μικρύνετε τὸ κείμενο + + Προσθέστε πληκτρολόγιο γιὰ τὴν γλῶσσα σας + + Ὁρίστε τὸ Κῆμαν ὡς παν-συστημικὸ πληκτρολόγιο + + Ὁρίστε τὸ Κῆμαν ὡς προεπιλεγμένο πληκτρολόγιο + + Περισσότερες πληροφορίες + + Κατὰ τὴν ἔναρξη νὰ προβάλλεται τὸ \"%1$s + + Γιὰ νὰ ἐγκαταστήσετε πακέτα πληκτρολογίων, ἐπιτρέψτε στὸ Κῆμαν νὰ διαβάζει ἐξωτερικὸ χῶρο ἀποθήκευσης. + + Ἀπερρίφθη αἴτημα χώρου ἀποθήκευσης. Πιθανὴ ἀποτυχία ἐγκατάστασης πακέτου πληκτρολογίου + Ἀπερρίφθη αἴτημα χώρου ἀποθηκεύσεως. Δοκιμάστε τὶς ρυθμίσεις Κῆμαν - Ἐγκατάσταση ἀπὸ τοπικὸ ἀρχεῖο + + Ρυθμίσεις + + + Ἐγκατεστημένες γλῶσσες (%1$d) + Ἐγκατεστημένες γλῶσσες (%1$d) + + + Ἐγκαταστῆστε πληκτρολόγιο ἢ λεξικό + + Γλῶσσα προβολῆς + + Μεταβολὴ ὕψους πληκτρολογίου + + Spacebar caption + + Πληκτρολόγιο + + Γλῶσσα + + Γλῶσσα + Πληκτρολόγιο + + Κενό + + \'Ονομα πληκτρολογίου στὸ πλῆκτρο διαστήματος + + \'Ονομα γλώσσας στὸ πλῆκτρο διαστήματος + + \'Ονομα πληκτρολογίου καὶ γλώσσας στὸ πλῆκτρο διαστήματος + + Καμμία λεζάντα στὸ πλῆκτρο διαστήματος + + Δὸνηση κατὰ τὴν πληκτρολόγηση + + Νὰ ἐμφανίζεται πάντα banner + + Πρὸς ὑλοποίησιν + + Ὅταν εἶναι off, ἐμφανίζεται μόνο ὅταν ἔχει ἐνεργοποιηθεῖ τὸ προγνωστικὸ κείμενο + + Ἐπιτρέψτε τὴν ἀποστολὴ ἀναφορῶν κατάρρευσης μέσῳ δικτύου + + Ὅταν εἶναι ΟΝ, θὰ ἀποστέλλονται ἀναφορὲς κατάρρευσης + + Ὅταν εἶναι off, δὲν θὰ ἀποστέλλονται ἀναφορὲς κατάρρευσης + + Ἐγκατάσταση ἀπὸ τὸ keyman.com + + Ἐγκατάσταση ἀπὸ τοπικὸ ἀρχεῖο + + Ἐγκατάσταση ἀπὸ ἄλλη συσκευή + + Προσθέστε γλῶσσες σὲ ἐγκατεστημένο πληκτρολόγιο + + (ἀπὸ πακέτο πληκτρολογίου) + + Ἐπιλέξτε Πακέτο Πληκτρολογίου + + Ἐπιλέξτε γλῶσσες γιὰ τὸ %1$s + + Προσετέθη ἡ γλῶσσα %1$s στὸ %2$s + + Ὅλες οἱ γλῶσσες ἔχουν ἤδη ἐγκατασταθεῖ + + Σύρετε τὸ πληκτρολόγιο γιὰ νὰ ἀλλάξετε τὸ ὕψος + + Περιστρέψτε τὴν συσκευὴ γιὰ λειτουργία πορτραίτου καὶ τοπίου + + Ἐπαναφέρετε τὶς προεπιλεγμένες ρυθμίσεις + + Ἀναζητῆστε ἢ πληκτρολογῆστε URL + + Σελιδοδεῖκτες + + Δὲν ὑπάρχουν σελιδοδεῖκτες + + Προσθέστε σελιδοδείκτη + + Τίτλος + + URL + + Τὸ πακέτο %1$s ἀπέτυχε νὰ ἐγκατασταθεῖ + + Λήψη πακέτου πληκτρολογίου\n%1$s… + + Ἀποτυχία ἐξαγωγῆς + + Ἐγκαταστῆστε πληκτρολόγιο + + Ἐγκαταστῆστε Λεξικό + + Τὸ %1$s δὲν εἶναι ἔγκυρο ἀρχεῖο πακέτου Κῆμαν.\n%2$s\" + + Τὸ πακέτο πληκτρολογίου δὲν ἔχει βελτιστοποιημένα πληκτρολόγια ἀφῆς πρὸς ἐγκατάστασιν + + Δὲν ὑπάρχει νέο προγνωστικό κείμενο πρὸς ἐγκατάστασιν + + Δὲν ὑπάρχουν πληκτρολόγια ἢ προγνωστικό κείμενο πρὸς ἐγκατάστασιν + + Τὸ πακέτο πληκτρολογίου δὲν ἔχει σχετικὲς μὲ αὐτὸ γλῶσσες πρὸς ἐγκατάστασιν + + Ἄκυρα/ἐλλιπῆ μεταδεδομένα στὸ πακέτο + + Τὸ πληκτρολόγιο ἀπαιτεῖ νεώτερη ἔκδοση τοῦ Κῆμαν + + Ἀδυναμία ἐκκινήσεως φυλλομετρητῆ + diff --git a/android/KMAPro/kMAPro/src/main/res/values-pt-rPT/strings.xml b/android/KMAPro/kMAPro/src/main/res/values-pt-rPT/strings.xml index 3e3580f9311..02f1d9291de 100644 --- a/android/KMAPro/kMAPro/src/main/res/values-pt-rPT/strings.xml +++ b/android/KMAPro/kMAPro/src/main/res/values-pt-rPT/strings.xml @@ -1,5 +1,6 @@ + Compartilhar @@ -32,6 +33,8 @@ Tamanho do texto para cima Tamanho do texto para baixo + + Tamanho do texto deslizante \nTodo o texto será limpo\n @@ -50,6 +53,7 @@ Para instalar pacotes de teclado, permita que o Keyman leia o armazenamento externo. A solicitação de permissão de armazenamento foi negada. Pode falhar na instalação do pacote de teclado + Falha no pedido de permissão de armazenamento. Experimente instalar de um ficheiro local, nas configurações do Keyman Configurações @@ -82,7 +86,7 @@ Não mostrar legenda na barra de espaço - Vibrate when typing + Vibrar ao tocar Sempre mostrar banner @@ -154,7 +158,7 @@ Metadados inválidos/ausentes no pacote - Keyboard requires a newer version of Keyman + O teclado requer uma nova versão do Keyman - Unable to launch web browser + Não foi possível carregar o navegador diff --git a/android/KMEA/app/src/main/assets/android-host.js b/android/KMEA/app/src/main/assets/android-host.js index 39ce3a5f307..9c786e17770 100644 --- a/android/KMEA/app/src/main/assets/android-host.js +++ b/android/KMEA/app/src/main/assets/android-host.js @@ -37,11 +37,7 @@ function init() { keyman.beepKeyboard = beepKeyboard; // Readies the keyboard stub for instant loading during the init process. - try { - KeymanWeb.registerStub(JSON.parse(jsInterface.initialKeyboard())); - } catch(error) { - console.error(error); - } + KeymanWeb.registerStub(JSON.parse(jsInterface.initialKeyboard())); keyman.init({ 'embeddingApp':device, @@ -337,6 +333,14 @@ function menuKeyUp() { window.location.hash = hash; } +// The keyboard-picker displayed via Android longpress disrupts Web-side +// gesture-handling; this function helps force-clear the globe key's highlighting. +function clearGlobeHighlight() { + if(keyman.osk && keyman.osk.vkbd && keyman.osk.vkbd.currentLayer.globeKey) { + keyman.osk.vkbd.currentLayer.globeKey.highlight(false) + } +} + function hideKeyboard() { fragmentToggle = (fragmentToggle + 1) % 100; window.location.hash = 'hideKeyboard' + fragmentToggle; diff --git a/android/KMEA/app/src/main/assets/keyboard.es5.html b/android/KMEA/app/src/main/assets/keyboard.es5.html deleted file mode 100644 index 6384235456b..00000000000 --- a/android/KMEA/app/src/main/assets/keyboard.es5.html +++ /dev/null @@ -1,30 +0,0 @@ - - - - - - - Keyman - - - - - - - - - - - - - diff --git a/android/KMEA/app/src/main/java/com/keyman/engine/DisplayLanguages.java b/android/KMEA/app/src/main/java/com/keyman/engine/DisplayLanguages.java index 27c462e95a0..6a159523a2e 100644 --- a/android/KMEA/app/src/main/java/com/keyman/engine/DisplayLanguages.java +++ b/android/KMEA/app/src/main/java/com/keyman/engine/DisplayLanguages.java @@ -62,6 +62,7 @@ public static final DisplayLanguageType[] getDisplayLanguages(Context context) { new DisplayLanguageType("nl-NL", "Nederlands (Dutch)"), new DisplayLanguageType("ann", "Obolo"), new DisplayLanguageType("pl-PL", "Polski (Polish)"), + new DisplayLanguageType("el", "Polytonic Greek"), new DisplayLanguageType("pt-PT", "Português do Portugal"), new DisplayLanguageType("ff-ZA", "Pulaar-Fulfulde"), // or Fulah new DisplayLanguageType("ru-RU", "Pyccĸий (Russian)"), diff --git a/android/KMEA/app/src/main/java/com/keyman/engine/KMKeyboard.java b/android/KMEA/app/src/main/java/com/keyman/engine/KMKeyboard.java index 6788c8f90e0..7d731f2b63d 100644 --- a/android/KMEA/app/src/main/java/com/keyman/engine/KMKeyboard.java +++ b/android/KMEA/app/src/main/java/com/keyman/engine/KMKeyboard.java @@ -313,6 +313,12 @@ public boolean onScroll(MotionEvent e1, MotionEvent e2, float distanceX, float d public void onLongPress(MotionEvent event) { if (KMManager.getGlobeKeyState() == KMManager.GlobeKeyState.GLOBE_KEY_STATE_DOWN) { KMManager.setGlobeKeyState(KMManager.GlobeKeyState.GLOBE_KEY_STATE_LONGPRESS); + + // When we activate the keyboard picker, this will disrupt the JS-side's control + // flow for gesture-handling; we should pre-emptively clear the globe key, + // as Web will not receive a "globe key up" event. + loadJavascript("clearGlobeHighlight()"); + KMManager.handleGlobeKeyAction(context, true, keyboardType); return; /* For future implementation diff --git a/android/KMEA/app/src/main/java/com/keyman/engine/KMManager.java b/android/KMEA/app/src/main/java/com/keyman/engine/KMManager.java index d1a40be69ae..aa1c43626a7 100644 --- a/android/KMEA/app/src/main/java/com/keyman/engine/KMManager.java +++ b/android/KMEA/app/src/main/java/com/keyman/engine/KMManager.java @@ -295,9 +295,7 @@ public String toString() { // Keyman files protected static final String KMFilename_KeyboardHtml = "keyboard.html"; - protected static final String KMFilename_KeyboardHtml_Legacy = "keyboard.es5.html"; protected static final String KMFilename_JSEngine = "keymanweb-webview.js"; - protected static final String KMFilename_JSLegacyEngine = "keymanweb-webview.es5.js"; protected static final String KMFilename_JSSentry = "sentry.min.js"; protected static final String KMFilename_JSSentryInit = "keyman-sentry.js"; protected static final String KMFilename_AndroidHost = "android-host.js"; @@ -858,25 +856,11 @@ public static boolean copyHTMLBannerAssets(Context context, String path) { private static void copyAssets(Context context) { AssetManager assetManager = context.getAssets(); - // Will build a temp WebView in order to check Chrome version internally. - boolean legacyMode = WebViewUtils.getEngineWebViewVersionStatus(context, null, null) != WebViewUtils.EngineWebViewVersionStatus.FULL; - try { // Copy KMW files - if(legacyMode) { - // Replaces the standard ES6-friendly version of the host page with a legacy one that - // includes polyfill requests and that links the legacy, ES5-compatible version of KMW. - copyAssetWithRename(context, KMFilename_KeyboardHtml_Legacy, KMFilename_KeyboardHtml, "", true); - - copyAsset(context, KMFilename_JSLegacyEngine, "", true); - } else { - copyAsset(context, KMFilename_KeyboardHtml, "", true); - - // For versions of Chrome with full ES6 support, we use the ES6 artifact. - copyAsset(context, KMFilename_JSEngine, "", true); - } + copyAsset(context, KMFilename_KeyboardHtml, "", true); - // Is still built targeting ES5. + copyAsset(context, KMFilename_JSEngine, "", true); copyAsset(context, KMFilename_JSSentry, "", true); copyAsset(context, KMFilename_JSSentryInit, "", true); copyAsset(context, KMFilename_AndroidHost, "", true); @@ -887,12 +871,6 @@ private static void copyAssets(Context context) { // Copy default keyboard font copyAsset(context, KMDefault_KeyboardFont, "", true); - if(legacyMode) { - copyAsset(context, KMFilename_JSPolyfill, "", true); - copyAsset(context, KMFilename_JSPolyfill2, "", true); - copyAsset(context, KMFilename_JSPolyfill3, "", true); - } - // Keyboard packages directory File packagesDir = new File(getPackagesDir()); if (!packagesDir.exists()) { @@ -1638,7 +1616,7 @@ public static boolean removeKeyboard(Context context, int position) { public static boolean isDefaultKey(String key) { return ( - key != null && + key != null && key.equals(KMString.format("%s_%s", KMDefault_LanguageID, KMDefault_KeyboardID))); } @@ -2084,11 +2062,11 @@ public static Point getWindowSize(Context context) { wm.getDefaultDisplay().getSize(size); return size; } - + WindowMetrics windowMetrics = wm.getCurrentWindowMetrics(); return new Point( windowMetrics.getBounds().width(), - windowMetrics.getBounds().height()); + windowMetrics.getBounds().height()); } public static float getWindowDensity(Context context) { diff --git a/android/KMEA/app/src/main/res/values-b+el/strings.xml b/android/KMEA/app/src/main/res/values-b+el/strings.xml new file mode 100644 index 00000000000..871171d63f7 --- /dev/null +++ b/android/KMEA/app/src/main/res/values-b+el/strings.xml @@ -0,0 +1,172 @@ + + + + + + + Πληκτρολόγιο + Πληκτρολόγια + + + + Ἄλλη μέθοδος εἰσαγωγῆς + Ἄλλες Μέθοδοι Εἰσαγωγῆς + + + Προσθέστε νέο Πληκτρολόγιο + + Ἐγκατεστημένες γλῶσσες + + Ρυθμίσεις %1$s + + Προσθέστε + + Πίσω + + Ἀκυρῶστε + + Κλεῖστε + + Κλεῖστε τὸ Κῆμαν + + Προχωρῆστε + + Ἑπόμενη Μέθοδος Εἰσαγωγῆς + + Λήψη + + Ἐγκαταστῆστε + + Ἀργότερα + + Ἑπόμενο + + OK + + Ἐνημερῶστε + + Δὲν ὑπάρχει σύνδεση μὲ τὸ Διαδίκτυο + + Ἀδυναμία συνδέσεως μὲ τὸν διακομιστὴ τοῦ Κῆμαν! + + Θέλετε νὰ διαγράψετε αὐτὸ τὸ πληκτρολόγιο; + + Θὰ θέλατε νὰ κατεβάσετε τὴν τελευταία ἔκδοση αὐτοῦ τοῦ πληκτρολογίου; + + Θά θέλατε νὰ ἐνημερώσετε τώρα πληκτρολόγια καὶ λεξικά; + + Ἐνημερώσεις Πόρων + + Διαθέσιμες Ἐνημερώσεις Πόρων + + %1$s (Διαθέσιμη Ἐνημέρωση) + + Διαθέσιμες ἐνημερώσεις γιὰ τὸ πληκτρολόγιο %1$s: %2$s + + Διαθέσιμες ἐνημερώσεις γιὰ τὸ λεξικὸ %1$s: %2$s + + Ἔκδοση πληκτρολογίου + + Σύνδεσμος βοηθείας + + Ἀπεγκαταστῆστε πληκτρολόγιο + + [νέο] %1$s + + Σαρῶστε αὐτὸν τὸν κωδικό γιὰ νὰ φορτώσετε\nαὐτὸ τὸ πληκτρολόγιο σὲ ἄλλη συσκευή + + Καλωσορίσατε στὸ %1$s + + Ἀπαιτεῖται βιβλιοθήκη FileProvider γιὰ νὰ δεῖτε ἀρχεῖο βοηθείας: %1$s + + Μοιραῖο σφάλμα πληκτρολογίου στὸ %1$s:%2$s γιὰ τὴν %3$s γλῶσσα. Φορτώνεται προεπιλεγμένο πληκτρολόγιο. + + Error in keyboard %1$s:%2$s for %3$s language. + + Ἔλεγχος συσχετισμένου λεξικοῦ πρὸς λῆψιν + Ἀδυναμία συνδέσεως μὲ τὸν διακομιστὴ Κῆμαν γιὰ τὸν ἔλεγχο συσχετισμένου λεξικοῦ πρὸς λῆψιν + + Θὰ θέλατε νὰ κατεβάσετε τὴν τελευταία ἔκδοση αὐτοῦ τοῦ λεξικοῦ; + + Δὲν ὑπάρχει λεξικὸ πρὸς λῆψιν + + Μὴ διαθέσιμος κατάλογος πόρων + + Ἔχει ξεκινήσει ἐνημέρωση καταλόγου στὸ παρασκήνιο + + Ἡ λήψη τοῦ καταλόγου συνεχίζεται· παρακαλοῦμε ξαναδοκιμάστε σὲ λίγο! + + Ἔλεγχος πόρου σὲ ἐξέλιξη + + Ἡ λήψη τοῦ πληκτρολογίου ἔχει ξεκινήσει στὸ παρασκήνιο + + Ἡ λήψη τοῦ ἐπιλεγέντος πληκτρολογίου βρίσκεται σὲ ἐξέλιξη· παρακαλοῦμε ξαναδοκιμάστε σὲ λίγο! + + Ἡ λήψη τοῦ πληκτρολογίου ὁλοκληρώθηκε! + + Ἡ λήψη τοῦ λεξικοῦ ἔχει ξεκινήσει στὸ παρασκήνιο + + Ἡ λήψη τοῦ ἐπιλεγέντος λεξικοῦ βρίσκεται σὲ ἐξέλιξη· παρακαλοῦμε ξαναδοκιμάστε σὲ λίγο! + + Ἡ λήψη τοῦ λεξικοῦ ὁλοκληρώθηκε. + + Ἡ λήψη ἀπέτυχε + + Ἀποτυχία ἀνακτήσεως ληφθέντος ἀρχείου + + Ἀποτυχία προσβάσεως στὸν διακομιστή! + + "Ὅλοι οἱ πόροι ἔχουν ἐνημερωθεῖ!" + + Ἕνας ἢ περισσότεροι πόροι ἀπέτυχαν νὰ ἐνημερωθοῦν! + + Οἱ πόροι ἐνημερώθηκαν ἐπιτυχῶς! + + Ἔκδοση λεξικοῦ + + Ἀπεγκαταστῆτε λεξικό + + Θὰ θέλατε νὰ διαγράψετε αὐτὸ τὸ λεξικό; + + Τὸ λεξικὸ διεγράφη + + Τὸ πληκτρολόγιο %1$s ἐγκατεστάθη + + Τὸ πληκτρολόγιο διεγράφη + + Ἐνεργοποιῆστε τὶς διορθώσεις + + Ἐνεργοποιῆστε προβλέψεις + + Λεξικά + + Λεξικό + Λεξικά + + + Ἔλεγχος διαθεσίμου λεξικοῦ + Ἔλεγχος λεξικῶν ὀνλάϊν + + Λεξικό: %1$s + + %1$s λεξικά + + + Τὸ λεξικὸ ἐγκατεστάθη + + + (%1$d πληκτρολόγιο) + (%1$d πληκτρολόγια) + + + Προεπιλεγμένη Γλῶσσα + + + + Διαγράψτε + + + Κτυπῆστε ἐδῶ γιὰ νὰ ἀλλάξετε πληκτρολόγιο + + Ἀδυναμία ἐκκινήσεως φυλλομετρητῆ + diff --git a/android/KMEA/app/src/main/res/values-pt-rPT/strings.xml b/android/KMEA/app/src/main/res/values-pt-rPT/strings.xml index f66d5fa4443..f58f48dd2dc 100644 --- a/android/KMEA/app/src/main/res/values-pt-rPT/strings.xml +++ b/android/KMEA/app/src/main/res/values-pt-rPT/strings.xml @@ -45,7 +45,7 @@ Atualizar - No internet connection + Sem ligação à Internet Não foi possível conectar ao servidor Keyman! @@ -84,7 +84,7 @@ Erro no teclado %1$s:%2$s para %3$s idioma. Verificando o download do dicionário associado - Cannot connect to Keyman server to check for associated dictionary to download + A ligação ao servidor do Keyman para verificar dicionários para descarregar falhou Gostaria de baixar a versão mais recente deste dicionário? @@ -107,7 +107,7 @@ Download do dicionário iniciado em segundo plano O dicionário selecionado já está baixando; por favor, tente novamente mais tarde! - + O download do dicionário terminou. Download falhou @@ -119,7 +119,7 @@ "Todos os recursos estão atualizados!" Um ou mais recursos falharam ao atualizar! - + Recursos atualizados com sucesso! Versão do dicionário @@ -128,11 +128,11 @@ Você gostaria de excluir este dicionário? - Dictionary deleted + Dicionário eliminado Teclado %1$s instalado - Keyboard deleted + Teclado eliminado Habilitar correções @@ -140,12 +140,12 @@ Dicionário - Dictionary - Dictionaries + Dicionário + Dicionários Verificar dicionário disponível - Check for dictionaries online + Verifique dicionários online Dicionário: %1$s @@ -168,5 +168,5 @@ Toque aqui para alterar o teclado - Unable to launch web browser + Não foi possível carregar o navegador diff --git a/android/KMEA/build.sh b/android/KMEA/build.sh index ccf7c92887b..878b54797dc 100755 --- a/android/KMEA/build.sh +++ b/android/KMEA/build.sh @@ -77,8 +77,6 @@ if builder_start_action build:engine; then echo "Copying Keyman Web artifacts" cp "$KEYMAN_WEB_ROOT/build/app/webview/$CONFIG/keymanweb-webview.js" "$ENGINE_ASSETS/keymanweb-webview.js" cp "$KEYMAN_WEB_ROOT/build/app/webview/$CONFIG/keymanweb-webview.js.map" "$ENGINE_ASSETS/keymanweb-webview.js.map" - cp "$KEYMAN_WEB_ROOT/build/app/webview/$CONFIG/keymanweb-webview.es5.js" "$ENGINE_ASSETS/keymanweb-webview.es5.js" - cp "$KEYMAN_WEB_ROOT/build/app/webview/$CONFIG/keymanweb-webview.es5.js.map" "$ENGINE_ASSETS/keymanweb-webview.es5.js.map" cp "$KEYMAN_WEB_ROOT/build/app/webview/$CONFIG/map-polyfill.js" "$ENGINE_ASSETS/map-polyfill.js" cp "$KEYMAN_WEB_ROOT/build/app/resources/osk/ajax-loader.gif" "$ENGINE_ASSETS/ajax-loader.gif" cp "$KEYMAN_WEB_ROOT/build/app/resources/osk/kmwosk.css" "$ENGINE_ASSETS/kmwosk.css" diff --git a/common/models/templates/src/common.ts b/common/models/templates/src/common.ts index 9f3eae1dc1a..853bc7c18dd 100644 --- a/common/models/templates/src/common.ts +++ b/common/models/templates/src/common.ts @@ -115,10 +115,13 @@ export function transformToSuggestion(transform: Transform, p: number): WithOutc export function transformToSuggestion(transform: Transform, p?: number): Outcome { let suggestion: Outcome = { transform: transform, - transformId: transform.id, displayAs: transform.insert }; + if(transform.id !== undefined) { + suggestion.transformId = transform.id; + } + if(p === 0 || p) { suggestion.p = p; } diff --git a/common/models/templates/src/index.ts b/common/models/templates/src/index.ts index 52650ba9bb9..8563004fefa 100644 --- a/common/models/templates/src/index.ts +++ b/common/models/templates/src/index.ts @@ -2,7 +2,6 @@ export { SENTINEL_CODE_UNIT, applyTransform, buildMergedTransform, isHighSurrogate, isLowSurrogate, isSentinel, transformToSuggestion, defaultApplyCasing } from "./common.js"; -export { default as PriorityQueue, Comparator } from "./priority-queue.js"; export { default as QuoteBehavior } from "./quote-behavior.js"; export { Tokenization, tokenize, getLastPreCaretToken, wordbreak } from "./tokenization.js"; export { default as TrieModel, TrieModelOptions } from "./trie-model.js"; \ No newline at end of file diff --git a/common/models/templates/src/trie-model.ts b/common/models/templates/src/trie-model.ts index 0a571517c7f..9de2fb9050e 100644 --- a/common/models/templates/src/trie-model.ts +++ b/common/models/templates/src/trie-model.ts @@ -26,12 +26,11 @@ // Should probably make a 'lm-utils' submodule. // Allows the kmwstring bindings to resolve. -import { extendString } from "@keymanapp/web-utils"; +import { extendString, PriorityQueue } from "@keymanapp/web-utils"; import { default as defaultWordBreaker } from "@keymanapp/models-wordbreakers"; import { applyTransform, isHighSurrogate, isSentinel, SENTINEL_CODE_UNIT, transformToSuggestion } from "./common.js"; import { getLastPreCaretToken } from "./tokenization.js"; -import PriorityQueue from "./priority-queue.js"; extendString(); @@ -74,15 +73,6 @@ export interface TrieModelOptions { punctuation?: LexicalModelPunctuation; } -/** - * Used to determine the probability of an entry from the trie. - */ -type TextWithProbability = { - text: string; - // TODO: use negative-log scaling instead? - p: number; // real-number weight, from 0 to 1 -} - class Traversal implements LexiconTraversal { /** * The lexical prefix corresponding to the current traversal state. @@ -95,14 +85,75 @@ class Traversal implements LexiconTraversal { */ root: Node; - constructor(root: Node, prefix: string) { + /** + * The max weight for the Trie being 'traversed'. Needed for probability + * calculations. + */ + totalWeight: number; + + constructor(root: Node, prefix: string, totalWeight: number) { this.root = root; this.prefix = prefix; + this.totalWeight = totalWeight; + } + + child(char: USVString): LexiconTraversal | undefined { + /* + Note: would otherwise return the current instance if `char == ''`. If + such a call is happening, it's probably indicative of an implementation + issue elsewhere - let's signal now in order to catch such stuff early. + */ + if(char == '') { + return undefined; + } + + // Split into individual code units. + let steps = char.split(''); + let traversal: Traversal | undefined = this; + + while(steps.length > 0 && traversal) { + const step: string = steps.shift()!; + traversal = traversal._child(step); + } + + return traversal; + } + + // Handles one code unit at a time. + private _child(char: USVString): Traversal | undefined { + const root = this.root; + const totalWeight = this.totalWeight; + const nextPrefix = this.prefix + char; + + if(root.type == 'internal') { + let childNode = root.children[char]; + if(!childNode) { + return undefined; + } + + return new Traversal(childNode, nextPrefix, totalWeight); + } else { + // root.type == 'leaf'; + const legalChildren = root.entries.filter(function(entry) { + return entry.key.indexOf(nextPrefix) == 0; + }); + + if(!legalChildren.length) { + return undefined; + } + + return new Traversal(root, nextPrefix, totalWeight); + } } - *children(): Generator<{char: string, traversal: () => LexiconTraversal}> { + *children(): Generator<{char: USVString, traversal: () => LexiconTraversal}> { let root = this.root; + // We refer to the field multiple times in this method, and it doesn't change. + // This also assists minification a bit, since we can't minify when re-accessing + // through `this.`. + const totalWeight = this.totalWeight; + if(root.type == 'internal') { for(let entry of root.values) { let entryNode = root.children[entry]; @@ -120,7 +171,7 @@ class Traversal implements LexiconTraversal { let prefix = this.prefix + entry + lowSurrogate; yield { char: entry + lowSurrogate, - traversal: function() { return new Traversal(internalNode.children[lowSurrogate], prefix) } + traversal: function() { return new Traversal(internalNode.children[lowSurrogate], prefix, totalWeight) } } } } else { @@ -131,7 +182,7 @@ class Traversal implements LexiconTraversal { yield { char: entry, - traversal: function () {return new Traversal(entryNode, prefix)} + traversal: function () {return new Traversal(entryNode, prefix, totalWeight)} } } } else if(isSentinel(entry)) { @@ -143,7 +194,7 @@ class Traversal implements LexiconTraversal { let prefix = this.prefix + entry; yield { char: entry, - traversal: function() { return new Traversal(entryNode, prefix)} + traversal: function() { return new Traversal(entryNode, prefix, totalWeight)} } } } @@ -165,30 +216,41 @@ class Traversal implements LexiconTraversal { } yield { char: nodeKey, - traversal: function() { return new Traversal(root, prefix + nodeKey)} + traversal: function() { return new Traversal(root, prefix + nodeKey, totalWeight)} } }; return; } } - get entries(): string[] { + get entries() { + const entryMapper = (value: Entry) => { + return { + text: value.content, + p: value.weight / this.totalWeight + } + } + if(this.root.type == 'leaf') { let prefix = this.prefix; let matches = this.root.entries.filter(function(entry) { return entry.key == prefix; }); - return matches.map(function(value) { return value.content }); + return matches.map(entryMapper); } else { let matchingLeaf = this.root.children[SENTINEL_CODE_UNIT]; if(matchingLeaf && matchingLeaf.type == 'leaf') { - return matchingLeaf.entries.map(function(value) { return value.content }); + return matchingLeaf.entries.map(entryMapper); } else { return []; } } } + + get p(): number { + return this.root.weight / this.totalWeight; + } } /** @@ -286,7 +348,7 @@ export default class TrieModel implements LexicalModel { } public traverseFromRoot(): LexiconTraversal { - return new Traversal(this._trie['root'], ''); + return this._trie.traverseFromRoot(); } }; @@ -307,11 +369,10 @@ export default class TrieModel implements LexicalModel { type SearchKey = string & { _: 'SearchKey'}; /** - * The priority queue will always pop the most weighted item. There can only - * be two kinds of items right now: nodes, and entries; both having a weight - * attribute. + * The priority queue will always pop the most probable item - be it a Traversal + * state or a lexical entry reached via Traversal. */ -type Weighted = Node | Entry; +type TraversableWithProb = TextWithProbability | LexiconTraversal; /** * A function that converts a string (word form or query) into a search key @@ -367,9 +428,9 @@ interface Entry { * Wrapper class for the trie and its nodes. */ class Trie { - private root: Node; + public readonly root: Node; /** The total weight of the entire trie. */ - private totalWeight: number; + readonly totalWeight: number; /** * Converts arbitrary strings to a search key. The trie is built up of * search keys; not each entry's word form! @@ -382,6 +443,10 @@ class Trie { this.totalWeight = totalWeight; } + public traverseFromRoot(): LexiconTraversal { + return new Traversal(this.root, '', this.totalWeight); + } + /** * Lookups an arbitrary prefix (a query) in the trie. Returns the top 3 * results in sorted order. @@ -389,13 +454,26 @@ class Trie { * @param prefix */ lookup(prefix: string): TextWithProbability[] { - let searchKey = this.toKey(prefix); - let lowestCommonNode = findPrefix(this.root, searchKey); - if (lowestCommonNode === null) { + const searchKey = this.toKey(prefix); + const rootTraversal = this.traverseFromRoot().child(searchKey); + + if(!rootTraversal) { return []; } - return getSortedResults(lowestCommonNode, searchKey, this.totalWeight); + const directEntries = rootTraversal.entries; + // `Set` requires Chrome 38+, which is more recent than Chrome 35. + const directSet: Record = {}; + for(const entry of directEntries) { + directSet[entry.text] = entry.text; + } + + const bestEntries = getSortedResults(rootTraversal); + const deduplicated = bestEntries.filter((entry) => !directSet[entry.text]); + + // Any entries directly hosted on the current node should get full display + // priority over anything from its descendants. + return directEntries.concat(deduplicated); } /** @@ -403,36 +481,10 @@ class Trie { * @param n How many suggestions, maximum, to return. */ firstN(n: number): TextWithProbability[] { - return getSortedResults(this.root, '' as SearchKey, this.totalWeight, n); + return getSortedResults(this.traverseFromRoot(), n); } } -/** - * Finds the deepest descendent in the trie with the given prefix key. - * - * This means that a search in the trie for a given prefix has a best-case - * complexity of O(m) where m is the length of the prefix. - * - * @param key The prefix to search for. - * @param index The index in the prefix. Initially 0. - */ -function findPrefix(node: Node, key: SearchKey, index: number = 0): Node | null { - // An important note - the Trie itself is built on a per-JS-character basis, - // not on a UTF-8 character-code basis. - if (node.type === 'leaf' || index === key.length) { - return node; - } - - // So, for SMP models, we need to match each char of the supplementary pair - // in sequence. Each has its own node in the Trie. - let char = key[index]; - if (node.children[char]) { - return findPrefix(node.children[char], key, index + 1); - } - - return null; -} - /** * Returns all entries matching the given prefix, in descending order of * weight. @@ -441,72 +493,36 @@ function findPrefix(node: Node, key: SearchKey, index: number = 0): Node | null * @param results the current results * @param queue */ -function getSortedResults(node: Node, prefix: SearchKey, N: number, limit = MAX_SUGGESTIONS): TextWithProbability[] { - let queue = new PriorityQueue(function(a: Weighted, b: Weighted) { +function getSortedResults(traversal: LexiconTraversal, limit = MAX_SUGGESTIONS): TextWithProbability[] { + let queue = new PriorityQueue(function(a: TraversableWithProb, b: TraversableWithProb) { // In case of Trie compilation issues that emit `null` or `undefined` - return (b ? b.weight : 0) - (a ? a.weight : 0); + return (b ? b.p : 0) - (a ? a.p : 0); }); let results: TextWithProbability[] = []; - if (node.type === 'leaf') { - // Assuming the values are sorted, we can just add all of the values in the - // leaf, until we reach the limit. - for (let item of node.entries) { - // String.startsWith is not supported on certain Android (5.0) devices we wish to support. - // Requires a minimum of Chrome 36, as opposed to 5.0's default of 35. - if (item.key.indexOf(prefix) == 0) { - let { content, weight } = item; - results.push({ - text: content, - p: weight / N - }); - - if (results.length >= limit) { - return results; - } + queue.enqueue(traversal); + + while(queue.count > 0) { + const entry = queue.dequeue(); + + if((entry as TextWithProbability)!.text !== undefined) { + const lexicalEntry = entry as TextWithProbability; + results.push(lexicalEntry); + if(results.length >= limit) { + return results; } - } - } else { - queue.enqueue(node); - let next: Weighted | undefined; - - while (next = queue.dequeue()) { - if (isNode(next)) { - // When a node is next up in the queue, that means that next least - // likely suggestion is among its decsendants. - // So we search all of its descendants! - if (next.type === 'leaf') { - queue.enqueueAll(next.entries); - } else { - // XXX: alias `next` so that TypeScript can be SURE that internal is - // in fact an internal node. Because of the callback binding to the - // original definition of node (i.e., a Node | Entry), this will not - // type-check otherwise. - let internal = next; - queue.enqueueAll(next.values.map(char => { - return internal.children[char]; - })); - } - } else { - // When an entry is up next in the queue, we just add its contents to - // the results! - results.push({ - text: next.content, - p: next.weight / N - }); - if (results.length >= limit) { - return results; - } + } else { + const traversal = entry as LexiconTraversal; + queue.enqueueAll(traversal.entries); + let children: LexiconTraversal[] = [] + for(let child of traversal.children()) { + children.push(child.traversal()); } + queue.enqueueAll(children); } } - return results; -} - -/** TypeScript type guard that returns whether the thing is a Node. */ -function isNode(x: Entry | Node): x is Node { - return 'type' in x; + return results; } /** diff --git a/common/models/templates/test/test-trie-traversal.js b/common/models/templates/test/test-trie-traversal.js index d3d04963165..09237ee9a0e 100644 --- a/common/models/templates/test/test-trie-traversal.js +++ b/common/models/templates/test/test-trie-traversal.js @@ -13,6 +13,12 @@ var smpForUnicode = function(code){ return String.fromCharCode(H, L); } +// Prob: entry weight / total weight +// "the" is the highest-weighted word in the fixture. +const PROB_OF_THE = 1000 / 500500; +const PROB_OF_TRUE = 607 / 500500; +const PROB_OF_TROUBLE = 267 / 500500; + describe('Trie traversal abstractions', function() { it('root-level iteration over child nodes', function() { var model = new TrieModel(jsonFixture('tries/english-1000')); @@ -21,7 +27,11 @@ describe('Trie traversal abstractions', function() { assert.isDefined(rootTraversal); let rootKeys = ['t', 'o', 'a', 'i', 'w', 'h', 'f', 'b', 'n', 'y', 's', 'm', - 'u', 'c', 'd', 'l', 'e', 'j', 'p', 'g', 'v', 'k', 'r', 'q'] + 'u', 'c', 'd', 'l', 'e', 'j', 'p', 'g', 'v', 'k', 'r', 'q']; + + rootKeys.forEach((entry) => assert.isOk(rootTraversal.child(entry))); + assert.isNotOk(rootTraversal.child('x')); + assert.isNotOk(rootTraversal.child('z')); for(let child of rootTraversal.children()) { let keyIndex = rootKeys.indexOf(child.char); @@ -50,6 +60,7 @@ describe('Trie traversal abstractions', function() { assert.isDefined(traversalInner1); assert.isArray(child.traversal().entries); assert.isEmpty(child.traversal().entries); + assert.equal(traversalInner1.p, PROB_OF_THE); for(let tChild of traversalInner1.children()) { if(tChild.char == 'h') { @@ -58,19 +69,28 @@ describe('Trie traversal abstractions', function() { assert.isDefined(traversalInner2); assert.isEmpty(tChild.traversal().entries); assert.isArray(tChild.traversal().entries); + assert.equal(traversalInner2.p, PROB_OF_THE); for(let hChild of traversalInner2.children()) { if(hChild.char == 'e') { eSuccess = true; let traversalInner3 = hChild.traversal(); assert.isDefined(traversalInner3); - assert.isDefined(traversalInner3.entries); - assert.equal(traversalInner3.entries[0], "the"); + assert.deepEqual(traversalInner3.entries, [ + { + text: "the", + p: PROB_OF_THE + } + ]); + assert.equal(traversalInner3.p, PROB_OF_THE); for(let eChild of traversalInner3.children()) { let keyIndex = eKeys.indexOf(eChild.char); assert.notEqual(keyIndex, -1, "Did not find char '" + eChild.char + "' in array!"); + + // THE is not accessible if any of the sub-tries of our 'e' node (traversalInner3). + assert.isBelow(eChild.traversal().p, PROB_OF_THE); eKeys.splice(keyIndex, 1); } } @@ -87,6 +107,38 @@ describe('Trie traversal abstractions', function() { assert.isEmpty(eKeys); }); + it('direct traversal with simple internal nodes', function() { + var model = new TrieModel(jsonFixture('tries/english-1000')); + + let rootTraversal = model.traverseFromRoot(); + assert.isDefined(rootTraversal); + + let eKeys = ['y', 'r', 'i', 'm', 's', 'n', 'o']; + + const tNode = rootTraversal.child('t'); + assert.isOk(tNode); + assert.isDefined(tNode); + assert.isArray(tNode.entries); + assert.isEmpty(tNode.entries); + + const hNode = tNode.child('h'); + assert.isOk(hNode); + assert.isDefined(hNode); + assert.isArray(hNode.entries); + assert.isEmpty(hNode.entries); + + const eNode = hNode.child('e'); + assert.isOk(eNode); + assert.isDefined(eNode); + assert.isArray(eNode.entries); + assert.isNotEmpty(eNode.entries); + assert.equal(eNode.entries[0].text, "the"); + + for(let key of eKeys) { + assert.isOk(eNode.child(key)); + } + }); + it('traversal over compact leaf node', function() { var model = new TrieModel(jsonFixture('tries/english-1000')); @@ -102,6 +154,7 @@ describe('Trie traversal abstractions', function() { assert.isDefined(traversalInner1); assert.isArray(child.traversal().entries); assert.isEmpty(child.traversal().entries); + assert.equal(traversalInner1.p, PROB_OF_THE); for(let tChild of traversalInner1.children()) { if(tChild.char == 'r') { @@ -109,6 +162,7 @@ describe('Trie traversal abstractions', function() { assert.isDefined(traversalInner2); assert.isArray(tChild.traversal().entries); assert.isEmpty(tChild.traversal().entries); + assert.equal(traversalInner2.p, PROB_OF_TRUE); for(let rChild of traversalInner2.children()) { if(rChild.char == 'o') { @@ -137,10 +191,17 @@ describe('Trie traversal abstractions', function() { if(leafChildSequence.length > 0) { assert.isArray(curChild.traversal().entries); assert.isEmpty(curChild.traversal().entries); + assert.equal(curChild.traversal().p, PROB_OF_TROUBLE); } else { let finalTraversal = curChild.traversal(); + assert.equal(finalTraversal.p, PROB_OF_TROUBLE); assert.isDefined(finalTraversal.entries); - assert.equal(finalTraversal.entries[0], 'trouble'); + assert.deepEqual(finalTraversal.entries, [ + { + text: 'trouble', + p: PROB_OF_TROUBLE + } + ]); eSuccess = true; } } while (leafChildSequence.length > 0); @@ -154,7 +215,6 @@ describe('Trie traversal abstractions', function() { assert.isTrue(eSuccess); }); - it('traversal with SMP entries', function() { // Two entries, both of which read "apple" to native English speakers. // One solely uses SMP characters, the other of which uses a mix of SMP and standard. @@ -179,18 +239,20 @@ describe('Trie traversal abstractions', function() { for(let child of rootTraversal.children()) { if(child.char == smpA) { aSuccess = true; - let traversalInner1 = child.traversal(); + const traversalInner1 = child.traversal(); assert.isDefined(traversalInner1); - assert.isArray(child.traversal().entries); - assert.isEmpty(child.traversal().entries); + assert.isArray(traversalInner1.entries); + assert.isEmpty(traversalInner1.entries); + assert.equal(traversalInner1.p, 0.5); // The two entries are equally weighted. for(let aChild of traversalInner1.children()) { if(aChild.char == smpP) { pSuccess = true; - let traversalInner2 = aChild.traversal(); + const traversalInner2 = aChild.traversal(); assert.isDefined(traversalInner2); - assert.isArray(aChild.traversal().entries); - assert.isEmpty(aChild.traversal().entries); + assert.isArray(traversalInner2.entries); + assert.isEmpty(traversalInner2.entries); + assert.equal(traversalInner2.p, 0.5); for(let pChild of traversalInner2.children()) { let keyIndex = pKeys.indexOf(pChild.char); @@ -198,10 +260,11 @@ describe('Trie traversal abstractions', function() { pKeys.splice(keyIndex, 1); if(pChild.char == 'p') { // We'll test traversal with the 'mixed' entry from here. - let traversalInner3 = pChild.traversal(); + const traversalInner3 = pChild.traversal(); assert.isDefined(traversalInner3); - assert.isArray(pChild.traversal().entries); - assert.isEmpty(pChild.traversal().entries); + assert.isArray(traversalInner3.entries); + assert.isEmpty(traversalInner3.entries); + assert.equal(traversalInner3.p, 0.5); // Now to handle the rest, knowing it's backed by a leaf node. let curChild = pChild; @@ -227,12 +290,20 @@ describe('Trie traversal abstractions', function() { // Conditional test - if that was not the final character, entries should be undefined. if(leafChildSequence.length > 0) { - assert.isArray(curChild.traversal().entries); - assert.isEmpty(curChild.traversal().entries); + const nextTraversal = curChild.traversal() + assert.isArray(nextTraversal.entries); + assert.isEmpty(nextTraversal.entries); + assert.equal(nextTraversal.p, 0.5); } else { let finalTraversal = curChild.traversal(); assert.isDefined(finalTraversal.entries); - assert.equal(finalTraversal.entries[0], smpA + smpP + 'pl' + smpE); + assert.deepEqual(finalTraversal.entries, [ + { + text: smpA + smpP + 'pl' + smpE, + p: 1/2 + } + ]); + assert.equal(finalTraversal.p, 0.5); eSuccess = true; } } while (leafChildSequence.length > 0); @@ -249,4 +320,52 @@ describe('Trie traversal abstractions', function() { assert.isEmpty(pKeys); }); + + it('direct traversal with SMP entries', function() { + // Two entries, both of which read "apple" to native English speakers. + // One solely uses SMP characters, the other of which uses a mix of SMP and standard. + var model = new TrieModel(jsonFixture('tries/smp-apple')); + + let rootTraversal = model.traverseFromRoot(); + assert.isDefined(rootTraversal); + + let smpA = smpForUnicode(0x1d5ba); + let smpP = smpForUnicode(0x1d5c9); + let smpL = smpForUnicode(0x1d5c5); + let smpE = smpForUnicode(0x1d5be); + + // Just to be sure our utility function is working right. + assert.equal(smpA + smpP + 'pl' + smpE, "𝖺𝗉pl𝖾"); + + let pKeys = ['p', smpP]; + let leafChildSequence = ['l', smpE]; + + const aNode = rootTraversal.child(smpA); + assert.isOk(aNode); + assert.isNotOk(rootTraversal.child('a')); + + const pNode1 = aNode.child(smpP); + assert.isOk(pNode1); + assert.isNotOk(aNode.child('p')); + + const pNode2 = pNode1.child('p'); + assert.isOk(pNode2); + assert.isOk(pNode1.child(smpP)); // Both exist for this step. + + const lNode = pNode2.child('l'); + assert.isOk(lNode); + assert.isNotOk(pNode2.child(smpL)); + + const eNode = lNode.child(smpE); + assert.isOk(eNode); + assert.isNotOk(lNode.child('e')); + + assert.deepEqual(eNode.entries, [ + { + text: smpA + smpP + 'pl' + smpE, + p: 1/2 + } + ]); + assert.equal(eNode.p, 0.5); + }); }); diff --git a/common/models/types/index.d.ts b/common/models/types/index.d.ts index 30aba3ba3d4..0d6223bc64a 100644 --- a/common/models/types/index.d.ts +++ b/common/models/types/index.d.ts @@ -19,13 +19,32 @@ declare type USVString = string; declare type CasingForm = 'lower' | 'initial' | 'upper'; +/** + * Represents one lexical entry and its probability.. + */ +type TextWithProbability = { + /** + * A lexical entry (word) offered by the model. + * + * Note: not the search-term keyed part. This will match the actual, unkeyed form. + */ + text: string; + + /** + * The probability of the lexical entry, directly based upon its frequency. + * + * A real-number weight, from 0 to 1. + */ + p: number; +} + /** * Used to facilitate edit-distance calculations by allowing the LMLayer to * efficiently search the model's lexicon in a Trie-like manner. */ declare interface LexiconTraversal { /** - * Provides an iterable pattern used to search for words with a prefix matching + * Provides an iterable pattern used to search for words with a 'keyed' prefix matching * the current traversal state's prefix when a new character is appended. Iterating * across `children` provides 'breadth' to a lexical search. * @@ -50,6 +69,20 @@ declare interface LexiconTraversal { */ children(): Generator<{char: USVString, traversal: () => LexiconTraversal}>; + /** + * Allows direct access to the traversal state that results when appending one + * or more codepoints encoded in UTF-16 to the current traversal state's prefix. + * This allows bypassing iteration among all legal child Traversals. + * + * If such a traversal state is not supported, returns `undefined`. + * + * Note: traversals navigate and represent the lexicon in its "keyed" state, + * as produced by use of the search-term keying function defined for the model. + * That is, if a model "keys" `è` to `e`, there will be no `è` child. + * @param char + */ + child(char: USVString): LexiconTraversal | undefined; + /** * Any entries directly keyed by the currently-represented lookup prefix. Entries and * children may exist simultaneously, but `entries` must always exist when no children are @@ -70,7 +103,14 @@ declare interface LexiconTraversal { * - prefix of 'crepe': ['crêpe', 'crêpé'] * - other examples: https://www.thoughtco.com/french-accent-homographs-1371072 */ - entries: USVString[]; + entries: TextWithProbability[]; + + // Note: `p`, not `maxP` - we want to see the same name for `this.entries.p` and `this.p` + /** + * Gives the probability of the highest-frequency lexical entry that is either a member or + * descendent of the represented trie `Node`. + */ + p: number; } /** @@ -294,6 +334,11 @@ declare interface Suggestion { * to the input text. Ex: 'keep', 'emoji', 'correction', etc. */ tag?: SuggestionTag; + + /** + * Set to true if this suggestion is a valid auto-accept target. + */ + autoAccept?: boolean } interface Reversion extends Suggestion { diff --git a/common/models/wordbreakers/src/default/index.ts b/common/models/wordbreakers/src/default/index.ts index 20e159ed198..ef50ab4b31c 100644 --- a/common/models/wordbreakers/src/default/index.ts +++ b/common/models/wordbreakers/src/default/index.ts @@ -274,7 +274,7 @@ export class BreakerContext { * @param chunk a chunk of text. Starts and ends at word boundaries. */ function isNonSpace(chunk: string, options?: DefaultWordBreakerOptions): boolean { - return !Array.from(chunk).map((char) => property(char, options)).every(wb => ( + return !chunk.split('').map((char) => property(char, options)).every(wb => ( wb === WordBreakProperty.CR || wb === WordBreakProperty.LF || wb === WordBreakProperty.Newline || diff --git a/common/predictive-text/unit_tests/headless/worker-trie-integration.js b/common/predictive-text/unit_tests/headless/worker-trie-integration.js index 8cd257878d1..70583a978ee 100644 --- a/common/predictive-text/unit_tests/headless/worker-trie-integration.js +++ b/common/predictive-text/unit_tests/headless/worker-trie-integration.js @@ -15,7 +15,7 @@ describe('LMLayer using the trie model', function () { beforeEach(function() { worker = Worker.constructInstance(); - lmLayer = new LMLayer(capabilities(), worker); + lmLayer = new LMLayer(capabilities(), worker, true); }); afterEach(function () { @@ -82,7 +82,8 @@ describe('LMLayer using the trie model', function () { var suggestions = rawSuggestions.filter(function skimKeepSuggestions(s) { return s.tag !== 'keep' }) - assert.isAtLeast(suggestions.length, 1) + assert.isAtLeast(rawSuggestions.length, 1); + assert.isAtLeast(suggestions.length, 1); // We SHOULD get 'naïve' suggested var topSuggestion = suggestions[0]; diff --git a/common/predictive-text/unit_tests/in_browser/cases/top-level-lmlayer.spec.ts b/common/predictive-text/unit_tests/in_browser/cases/top-level-lmlayer.spec.ts index bd55123bec6..2eb5c4d4275 100644 --- a/common/predictive-text/unit_tests/in_browser/cases/top-level-lmlayer.spec.ts +++ b/common/predictive-text/unit_tests/in_browser/cases/top-level-lmlayer.spec.ts @@ -1,10 +1,12 @@ import { assert } from 'chai'; import { LMLayer, Worker as WorkerBuilder } from "@keymanapp/lexical-model-layer/web"; + +import { DEFAULT_BROWSER_TIMEOUT } from '@keymanapp/common-test-resources/test-timeouts.mjs'; import { defaultCapabilities } from '../helpers.mjs'; describe('LMLayer', function () { - this.timeout(5000); + this.timeout(DEFAULT_BROWSER_TIMEOUT); describe('[[constructor]]', function () { it('should construct with a single argument', function () { diff --git a/common/predictive-text/unit_tests/in_browser/cases/worker-dummy-integration.spec.ts b/common/predictive-text/unit_tests/in_browser/cases/worker-dummy-integration.spec.ts index bc2ebb551ef..2ac5f62d004 100644 --- a/common/predictive-text/unit_tests/in_browser/cases/worker-dummy-integration.spec.ts +++ b/common/predictive-text/unit_tests/in_browser/cases/worker-dummy-integration.spec.ts @@ -1,6 +1,8 @@ import { assert } from 'chai'; import { LMLayer, Worker } from "@keymanapp/lexical-model-layer/web"; + +import { DEFAULT_BROWSER_TIMEOUT } from '@keymanapp/common-test-resources/test-timeouts.mjs'; import { defaultCapabilities } from '../helpers.mjs'; // Import assertions, even using 'with', aren't yet supported in Firefox's engine. @@ -20,7 +22,7 @@ let hazelModel; * of suggestions when loaded and return them sequentially. */ describe('LMLayer using dummy model', function () { - this.timeout(5000); + this.timeout(DEFAULT_BROWSER_TIMEOUT); before(async () => { let loc = document.location; @@ -34,6 +36,18 @@ describe('LMLayer using dummy model', function () { // Since Firefox can't do JSON imports quite yet. const hazelFixture = await fetch(new URL(`${domain}/resources/json/models/future_suggestions/i_got_distracted_by_hazel.json`)); hazelModel = await hazelFixture.json(); + hazelModel = hazelModel.map((set) => set.map((entry) => { + return { + ...entry, + // Dummy-model predictions all claim probability 1; there's no actual probability stuff + // used here. + 'lexical-p': 1, + // We're predicting from a single transform, not a distribution, so probability 1. + 'correction-p': 1, + // Multiply 'em together. + p: 1, + } + })); }); describe('Prediction', function () { diff --git a/common/predictive-text/unit_tests/in_browser/cases/worker-trie-integration.spec.ts b/common/predictive-text/unit_tests/in_browser/cases/worker-trie-integration.spec.ts index 00a50434db8..d551814dd96 100644 --- a/common/predictive-text/unit_tests/in_browser/cases/worker-trie-integration.spec.ts +++ b/common/predictive-text/unit_tests/in_browser/cases/worker-trie-integration.spec.ts @@ -1,6 +1,7 @@ import { assert } from 'chai'; import { LMLayer, Worker } from "@keymanapp/lexical-model-layer/web"; +import { DEFAULT_BROWSER_TIMEOUT } from '@keymanapp/common-test-resources/test-timeouts.mjs'; import { defaultCapabilities } from '../helpers.mjs'; // Import assertions, even using 'with', aren't yet supported in Firefox's engine. @@ -12,7 +13,7 @@ let domain: string; * How to run the worlist */ describe('LMLayer using the trie model', function () { - this.timeout(5000); + this.timeout(DEFAULT_BROWSER_TIMEOUT); before(async () => { let loc = document.location; diff --git a/common/test/resources/json/models/future_suggestions/i_got_distracted_by_hazel.json b/common/test/resources/json/models/future_suggestions/i_got_distracted_by_hazel.json index a9aa49753b4..d01c7f5c5af 100644 --- a/common/test/resources/json/models/future_suggestions/i_got_distracted_by_hazel.json +++ b/common/test/resources/json/models/future_suggestions/i_got_distracted_by_hazel.json @@ -48,7 +48,7 @@ [ { "transform": { - "insert": "distracted ", + "insert": "distracted by ", "deleteLeft": 0 }, "displayAs": "distracted by" diff --git a/common/test/resources/model-helpers.mjs b/common/test/resources/model-helpers.mjs index 1de10bdb15e..b2ad083f96f 100644 --- a/common/test/resources/model-helpers.mjs +++ b/common/test/resources/model-helpers.mjs @@ -113,7 +113,18 @@ export function randomToken() { } export function iGotDistractedByHazel() { - return jsonFixture('models/future_suggestions/i_got_distracted_by_hazel'); + return jsonFixture('models/future_suggestions/i_got_distracted_by_hazel').map((set) => set.map((entry) => { + return { + ...entry, + // Dummy-model predictions all claim probability 1; there's no actual probability stuff + // used here. + 'lexical-p': 1, + // We're predicting from a single transform, not a distribution, so probability 1. + 'correction-p': 1, + // Multiply 'em together. + p: 1, + } + })); } export function jsonFixture(name, root, import_root) { diff --git a/common/test/resources/models/simple-dummy.js b/common/test/resources/models/simple-dummy.js index 97180729bd6..86da6494b0f 100644 --- a/common/test/resources/models/simple-dummy.js +++ b/common/test/resources/models/simple-dummy.js @@ -9,9 +9,7 @@ Model.punctuation = { quotesForKeepSuggestion: { open: '“', close: '”'}, - // Important! Set this, or else the model compositor will - // insert something for us! - insertAfterWord: "", + insertAfterWord: " ", }; // A direct import/copy from i_got_distracted_by_hazel.json. @@ -19,21 +17,21 @@ [ { "transform": { - "insert": "I ", + "insert": "I", "deleteLeft": 0 }, "displayAs": "I" }, { "transform": { - "insert": "I'm ", + "insert": "I'm", "deleteLeft": 0 }, "displayAs": "I'm" }, { "transform": { - "insert": "Oh ", + "insert": "Oh", "deleteLeft": 0 }, "displayAs": "Oh" @@ -42,21 +40,21 @@ [ { "transform": { - "insert": "love ", + "insert": "love", "deleteLeft": 0 }, "displayAs": "love" }, { "transform": { - "insert": "am ", + "insert": "am", "deleteLeft": 0 }, "displayAs": "am" }, { "transform": { - "insert": "got ", + "insert": "got", "deleteLeft": 0 }, "displayAs": "got" @@ -65,21 +63,21 @@ [ { "transform": { - "insert": "distracted ", + "insert": "distracted by", "deleteLeft": 0 }, "displayAs": "distracted by" }, { "transform": { - "insert": "distracted ", + "insert": "distracted", "deleteLeft": 0 }, "displayAs": "distracted" }, { "transform": { - "insert": "a ", + "insert": "a", "deleteLeft": 0 }, "displayAs": "a" @@ -88,27 +86,27 @@ [ { "transform": { - "insert": "Hazel ", + "insert": "Hazel", "deleteLeft": 0 }, "displayAs": "Hazel" }, { "transform": { - "insert": "the ", + "insert": "the", "deleteLeft": 0 }, "displayAs": "the" }, { "transform": { - "insert": "a ", + "insert": "a", "deleteLeft": 0 }, "displayAs": "a" } ] - ]; + ]; return Model; }()); diff --git a/common/test/resources/test-timeouts.mjs b/common/test/resources/test-timeouts.mjs new file mode 100644 index 00000000000..2c5bfd5f2cb --- /dev/null +++ b/common/test/resources/test-timeouts.mjs @@ -0,0 +1 @@ +export const DEFAULT_BROWSER_TIMEOUT = 5000; //ms \ No newline at end of file diff --git a/common/test/resources/timeout-adapter.js b/common/test/resources/timeout-adapter.js deleted file mode 100644 index 0e93ec94a95..00000000000 --- a/common/test/resources/timeout-adapter.js +++ /dev/null @@ -1,43 +0,0 @@ -// Preprocessing of the Karma configuration's client.args parameter. - -var com = com || {}; -com.keyman = com.keyman || {}; -com.keyman.karma = com.keyman.karma || {}; - -(function() { - const testconfig = window['testconfig'] = {}; - - // Default value. - let mobile = false; - - // If we've set things up to support Device dection without loading KMW... - if(com.keyman.Device) { - try { - let device = new com.keyman.Device(); - device.detect(); - - mobile = (device.formFactor != 'desktop'); - } finally { - // no-op; silent failure's fine here. - } - } - - let configArgs = window['__karma__'].config.args; // Where Karma gives us our custom args. - for(var i = 0; i < configArgs.length; configArgs++) { - switch(configArgs[i].type) { - case 'timeouts': - var timeouts = JSON.parse(JSON.stringify(configArgs[i])); - delete timeouts.type; - - if(mobile) { - for(var key in timeouts) { - if(key != 'mobileFactor') { - timeouts[key] = timeouts[key] * timeouts['mobileFactor']; - } - } - } - testconfig['timeouts'] = timeouts; - break; - } - } -})(); diff --git a/common/tools/sourcemap-path-remapper/tsconfig.json b/common/tools/sourcemap-path-remapper/tsconfig.json index e207097f614..35744ee9887 100644 --- a/common/tools/sourcemap-path-remapper/tsconfig.json +++ b/common/tools/sourcemap-path-remapper/tsconfig.json @@ -10,7 +10,7 @@ "inlineSources": true, "sourceRoot": "/common/tools/sourcemap-path-remapper/src", "lib": ["dom", "es6"], - "target": "es5", + "target": "es6", "types": ["node"], "downlevelIteration": true, "baseUrl": "./", diff --git a/common/web/es-bundling/build.sh b/common/web/es-bundling/build.sh index cd6b975255a..768f41fa2fd 100755 --- a/common/web/es-bundling/build.sh +++ b/common/web/es-bundling/build.sh @@ -11,7 +11,6 @@ THIS_SCRIPT="$(readlink -f "${BASH_SOURCE[0]}")" ################################ Main script ################################ builder_describe "Builds KMW's esbuild-oriented common configuration & tooling" \ - "@/common/web/tslib" \ "clean" \ "configure" \ "build" @@ -24,4 +23,4 @@ builder_parse "$@" builder_run_action configure verify_npm_setup builder_run_action clean rm -rf build/ -builder_run_action build tsc -b tsconfig.json \ No newline at end of file +builder_run_action build tsc -b tsconfig.json diff --git a/common/web/es-bundling/src/common-bundle.mts b/common/web/es-bundling/src/common-bundle.mts index d99a7bfc644..fb3dfb95a44 100644 --- a/common/web/es-bundling/src/common-bundle.mts +++ b/common/web/es-bundling/src/common-bundle.mts @@ -12,7 +12,7 @@ let profilePath; let sourceRoot; let platform; -let jsVersionTarget='es5'; +let jsVersionTarget='es6'; function doHelp(errCode?: number) { console.log(` @@ -141,4 +141,4 @@ const results = await esbuild.build(config); if(results.metafile) { let filesizeProfile = await esbuild.analyzeMetafile(results.metafile, { verbose: true }); fs.writeFileSync(profilePath, filesizeProfile); -} \ No newline at end of file +} diff --git a/common/web/es-bundling/src/configuration.mts b/common/web/es-bundling/src/configuration.mts index 4978aa73c77..0c7a724a416 100644 --- a/common/web/es-bundling/src/configuration.mts +++ b/common/web/es-bundling/src/configuration.mts @@ -2,16 +2,13 @@ import type * as esbuild from 'esbuild'; import { pluginForDowncompiledClassTreeshaking } from './classTreeshaker.mjs'; export const esmConfiguration: esbuild.BuildOptions = { - alias: { - 'tslib': '@keymanapp/tslib' - }, bundle: true, format: "esm", outExtension: { '.js': '.mjs'}, plugins: [ pluginForDowncompiledClassTreeshaking ], sourcemap: true, sourcesContent: true, - target: "es5" + target: "es6" }; export const iifeConfiguration: esbuild.BuildOptions = { @@ -70,4 +67,4 @@ export function bundleObjEntryPoints(configFolder: 'lib' | 'debug' | 'release', entryPoints: path, outdir: mappedRoot }; -} \ No newline at end of file +} diff --git a/common/web/gesture-recognizer/src/test/auto/browser/cases/canary.def.ts b/common/web/gesture-recognizer/src/test/auto/browser/cases/canary.def.ts index a17311bec82..241977b3221 100644 --- a/common/web/gesture-recognizer/src/test/auto/browser/cases/canary.def.ts +++ b/common/web/gesture-recognizer/src/test/auto/browser/cases/canary.def.ts @@ -7,8 +7,10 @@ import { InputSequenceSimulator } from '#tools'; +import { DEFAULT_BROWSER_TIMEOUT } from '@keymanapp/common-test-resources/test-timeouts.mjs'; + describe("'Canary' checks", function() { - this.timeout(5000); + this.timeout(DEFAULT_BROWSER_TIMEOUT); let domain: string; diff --git a/common/web/gesture-recognizer/src/test/auto/browser/cases/ignoredInputs.def.ts b/common/web/gesture-recognizer/src/test/auto/browser/cases/ignoredInputs.def.ts index e6fb6ad4309..b107fdea5d2 100644 --- a/common/web/gesture-recognizer/src/test/auto/browser/cases/ignoredInputs.def.ts +++ b/common/web/gesture-recognizer/src/test/auto/browser/cases/ignoredInputs.def.ts @@ -8,8 +8,10 @@ import { SequenceRecorder } from '#tools'; +import { DEFAULT_BROWSER_TIMEOUT } from '@keymanapp/common-test-resources/test-timeouts.mjs'; + describe("Layer one - DOM -> InputSequence", function() { - this.timeout(5000); + this.timeout(DEFAULT_BROWSER_TIMEOUT); let controller: HostFixtureLayoutController; diff --git a/common/web/input-processor/src/corrections.ts b/common/web/input-processor/src/corrections.ts index fa9b77036fe..39c5ae2ef37 100644 --- a/common/web/input-processor/src/corrections.ts +++ b/common/web/input-processor/src/corrections.ts @@ -80,7 +80,12 @@ export function distributionFromDistanceMaps(squaredDistMaps: Map any; /** * Covers 'tryaccept' events. */ -export type TryUIHandler = (source: string) => boolean; +export type TryUIHandler = (source: string, returnObj: {shouldSwallow: boolean}) => boolean; export type InvalidateSourceEnum = 'new'|'context'; @@ -168,9 +168,9 @@ export default class LanguageProcessor extends EventEmitter { - let result = new ReadySuggestions(suggestions, transform.id); - this.emit("suggestionsready", result); - this.currentPromise = null; + let promise = this.currentPromise = this.lmEngine.revertSuggestion(reversion, new ContextWindow(original.preInput, this.configuration, null)) + // If the "current Promise" is as set above, clear it. + // If another one has been triggered since... don't. + promise.then(() => this.currentPromise = (this.currentPromise == promise) ? null : this.currentPromise); - return suggestions; - }); + return promise; } public predictFromTarget(outputTarget: OutputTarget, layerId: string): Promise { @@ -453,12 +450,13 @@ export default class LanguageProcessor extends EventEmitter; private swallowPrediction: boolean = false; @@ -85,9 +86,12 @@ export default class PredictionContext extends EventEmitter { + this.suggestionReverter = async (reversion) => { if(validSuggestionState()) { - langProcessor.applyReversion(reversion, this.currentTarget); + let suggestions = await langProcessor.applyReversion(reversion, this.currentTarget); + // We want to avoid altering flags that indicate our post-reversion state. + this.swallowPrediction = true; + this.updateSuggestions(new ReadySuggestions(suggestions, reversion.id ? -reversion.id : undefined)); } } @@ -107,10 +111,6 @@ export default class PredictionContext extends EventEmitter { - //let keyman = com.keyman.singleton; + private doTryAccept = (source: string, returnObj: {shouldSwallow: boolean}): void => { + const recentAcceptCause = this.recentAcceptCause; - if(!this.recentAccept && this.selected) { + if(!recentAcceptCause && this.selected) { this.accept(this.selected); - // returnObj.shouldSwallow = true; - } else if(this.recentAccept && source == 'space') { - this.recentAccept = false; - // // If the model doesn't insert wordbreaks, don't swallow the space. If it does, - // // we consider that insertion to be the results of the first post-accept space. - // returnObj.shouldSwallow = !!keyman.core.languageProcessor.wordbreaksAfterSuggestions; // can be handed outside + returnObj.shouldSwallow = true; + + // doTryAccept is the path for keystroke-based auto-acceptance. + // Overwrite the cause to reflect this. + this.recentAcceptCause = 'key'; + } else if(recentAcceptCause && source == 'space') { + this.recentAcceptCause = null; + if(recentAcceptCause == 'key') { + // No need to swallow the keystroke's whitespace; we triggered the prior acceptance + // FROM a space, so we've already aliased the suggestion's built-in space. + returnObj.shouldSwallow = false; + return; + } + + // Standard whitespace applications from the banner, those we DO want to + // swallow the first time. + // + // If the model doesn't insert wordbreaks, there's no space to alias, so + // don't swallow the space. If it does, we consider that insertion to be + // the results of the first post-accept space. + returnObj.shouldSwallow = !!this.langProcessor.wordbreaksAfterSuggestions; // can be handed outside } else { - // returnObj.shouldSwallow = false; + returnObj.shouldSwallow = false; } } @@ -250,9 +272,9 @@ export default class PredictionContext extends EventEmitter { // By default, we assume that the context is the same until we notice otherwise. this.initNewContext = false; + this.selected = null; if(!this.swallowPrediction || source == 'context') { - this.recentAccept = false; + this.recentAcceptCause = null; this.doRevert = false; this.recentRevert = false; @@ -299,7 +322,7 @@ export default class PredictionContext extends EventEmitter