From e5ee26aaeaad599789ace2d1d58bfb22ed4825f3 Mon Sep 17 00:00:00 2001 From: "Steven R. Loomis" Date: Fri, 3 May 2024 16:47:22 -0500 Subject: [PATCH] fix(core): add a test to verify ICU and Unicode version - load version data from node.js, Blocks.txt, and ICU4C - support wasm: copy package.json, nodeversions.json and Blocks.txt into the keyboard area so that they can be mounted under wasm also: - rename 'fallback' macro to KMN_FALLBACK to not conflict with hedley in utfcodec.hpp - fix ambiguous path type in tests Fixes: #10183 --- core/src/utfcodec.hpp | 12 +- core/tests/unit/ldml/meson.build | 52 ++++- core/tests/unit/ldml/test_unicode.cpp | 219 ++++++++++++++++++++ core/tests/unit/ldml/write_node_versions.js | 8 + 4 files changed, 282 insertions(+), 9 deletions(-) create mode 100644 core/tests/unit/ldml/test_unicode.cpp create mode 100644 core/tests/unit/ldml/write_node_versions.js diff --git a/core/src/utfcodec.hpp b/core/src/utfcodec.hpp index ed40d55653b..3f1bf9ab21a 100644 --- a/core/src/utfcodec.hpp +++ b/core/src/utfcodec.hpp @@ -21,11 +21,11 @@ typedef uint32_t uchar_t; /* Intentional fallthrough */ #if defined(__clang__) -#define fallthrough [[clang::fallthrough]] +#define KMN_FALLTHROUGH [[clang::fallthrough]] #elif defined(__GNUC__) && __GNUC__ >= 7 -#define fallthrough [[gnu::fallthrough]] +#define KMN_FALLTHROUGH [[gnu::fallthrough]] #else -#define fallthrough ((void)0) +#define KMN_FALLTHROUGH ((void)0) #endif template @@ -151,9 +151,9 @@ struct _utf_codec<8> auto rl = 1; bool toolong = false; switch(seq_sz) { - case 4: u <<= 6; u |= *++cp & 0x3F; if (*cp >> 6 != 2) break; ++rl; toolong = (u < 0x10); fallthrough; - case 3: u <<= 6; u |= *++cp & 0x3F; if (*cp >> 6 != 2) break; ++rl; toolong |= (u < 0x20); fallthrough; - case 2: u <<= 6; u |= *++cp & 0x3F; if (*cp >> 6 != 2) break; ++rl; toolong |= (u < 0x80); fallthrough; + case 4: u <<= 6; u |= *++cp & 0x3F; if (*cp >> 6 != 2) break; ++rl; toolong = (u < 0x10); KMN_FALLTHROUGH; + case 3: u <<= 6; u |= *++cp & 0x3F; if (*cp >> 6 != 2) break; ++rl; toolong |= (u < 0x20); KMN_FALLTHROUGH; + case 2: u <<= 6; u |= *++cp & 0x3F; if (*cp >> 6 != 2) break; ++rl; toolong |= (u < 0x80); KMN_FALLTHROUGH; case 1: l = seq_sz; break; case 0: l = -1; return 0xFFFD; } diff --git a/core/tests/unit/ldml/meson.build b/core/tests/unit/ldml/meson.build index e0d3d0540de..8d660136202 100644 --- a/core/tests/unit/ldml/meson.build +++ b/core/tests/unit/ldml/meson.build @@ -35,20 +35,46 @@ if node.found() subdir('invalid-keyboards') endif - # Build ldml test executable +keyboard_build_path = join_paths(meson.current_build_dir(),'keyboards') + if cpp_compiler.get_id() == 'emscripten' tests_flags = ['--embed-file', join_paths(meson.current_build_dir(),'keyboards','@')] tests_flags += ['--embed-file', join_paths(meson.current_build_dir(),'invalid-keyboards','@')] + tests_flags += ['--embed-file', join_paths(meson.current_build_dir(),'nodeversions.json') + '@nodeversions.json'] + tests_flags += ['--embed-file', join_paths(meson.current_build_dir(),'package.json') + '@package.json'] + tests_flags += ['--embed-file', join_paths(meson.current_build_dir(),'Blocks.txt') + '@Blocks.txt'] test_path = '/' + test_unicode_path = '/' invalid_test_path = '/' + tests_flags += ['-lnodefs.js', + '-sNO_DISABLE_EXCEPTION_CATCHING', # for test exceptions + '-sEXPORTED_RUNTIME_METHODS=[\'UTF8ToString\']'] else tests_flags = [] test_path = join_paths(meson.current_build_dir(),'keyboards') + test_unicode_path = join_paths(meson.current_build_dir()) invalid_test_path = join_paths(meson.current_build_dir(),'invalid-keyboards') endif +# copy package.json into build dir for test use +configure_file( + copy: true, + input: '../../../../package.json', + output: 'package.json', +) +configure_file( + copy: true, + input: '../../../../resources/standards-data/unicode-character-database/Blocks.txt', + output: 'Blocks.txt', +) + +configure_file( + command: [node, join_paths(meson.current_source_dir(), 'write_node_versions.js'),'@OUTPUT@'], + output: 'nodeversions.json', +) + ldml = executable('ldml', ['ldml.cpp', 'ldml_test_source.cpp', @@ -94,14 +120,34 @@ if cpp_compiler.get_id() == 'emscripten' normalization_tests_flags += ['-lnodefs.js', '-sEXPORTED_RUNTIME_METHODS=[\'UTF8ToString\']'] endif -t = executable('test_context_normalization', +tc = executable('test_context_normalization', ['test_context_normalization.cpp', common_test_files], cpp_args: defns + warns, include_directories: [inc, libsrc, '../../../../developer/src/ext/json'], link_args: links + normalization_tests_flags, dependencies: [icu_uc, icu_i18n], objects: lib.extract_all_objects(recursive: false)) -test('test_context_normalization', t, suite: 'ldml') +test('test_context_normalization', tc, suite: 'ldml') + +# Build and run additional test_unicode test + +u = executable('test_unicode', 'test_unicode.cpp', + ['test_unicode.cpp', '../emscripten_filesystem.cpp'], + cpp_args: defns + warns, + include_directories: [inc, libsrc, '../../../../developer/src/ext/json'], + link_args: links + tests_flags, + dependencies: [icu_uc, icu_i18n], + objects: lib.extract_all_objects(recursive: false), + +) + +test('test_unicode', u, suite: 'ldml', + args: [ + join_paths(test_unicode_path, 'nodeversions.json'), + join_paths(test_unicode_path, 'package.json'), + join_paths(test_unicode_path, 'Blocks.txt'), + ], +) # Run tests on all keyboards (`tests` defined in keyboards/meson.build) diff --git a/core/tests/unit/ldml/test_unicode.cpp b/core/tests/unit/ldml/test_unicode.cpp new file mode 100644 index 00000000000..77ae0202aca --- /dev/null +++ b/core/tests/unit/ldml/test_unicode.cpp @@ -0,0 +1,219 @@ +/* + Copyright: © 2024 SIL International. + Description: Tests for normalization in the context API. + Create Date: 3 May 2024 + Authors: Steven R. Loomis + History: 3 May 2024 - SRL - Initial implementation. +*/ + +#include +#include +#include + +#include "keyman_core.h" + +#include "path.hpp" +#include "action.hpp" + +#include +#include "../emscripten_filesystem.h" + +#include "core_icu.h" +#include +#include +#include "json.hpp" + +#include +#include + +#ifdef assert_basic_equal +#undef assert_basic_equal +#endif +#define assert_basic_equal(actual, expected) { \ + if ((actual) != (expected)) { \ + std::cerr \ + << "Test failed at " << __FILE__ << ":" << __LINE__ << ":" \ + << std::endl \ + << "expected: " << (expected) << std::endl \ + << "actual: " << (actual) << std::endl; \ + std::exit(EXIT_FAILURE); \ + } \ +} + +//------------------------------------------------------------------------------------- +// Unicode version tests +//------------------------------------------------------------------------------------- + +std::string arg_path; + +nlohmann::json load_json(const km::core::path &jsonpath) { + std::cout << "== " << __FUNCTION__ << " loading " << jsonpath << std::endl; + std::ifstream json_file(jsonpath.native()); + if (!json_file) { + std::cerr << "ERROR Could not load: " << jsonpath << std::endl; + assert (json_file); + } + nlohmann::json data = nlohmann::json::parse(json_file); + return data; +} + +/** @returns the major version of 'ver', skipping initial '^'. empty on err */ +std::string get_major(const std::string& ver) { + assert(!ver.empty()); + auto start = 0; + // skip leading '^' + if (ver[start] == '^') { + start++; + } + // find first '.' + auto end = ver.find('.', start); + assert(end != std::string::npos); + return ver.substr(start, end - start); +} + +/** @return the Unicode version from a Blocks.txt file */ +std::string get_block_unicode_ver(const char *blocks_path) { + std::cout << "= " << __FUNCTION__ << " load " << blocks_path << std::endl; + // fetch Blocks.txt + std::ifstream blocks_file( + km::core::path(blocks_path).native()); + assert(blocks_file.good()); + std::string block_line; + assert(std::getline(blocks_file, block_line)); // first line + const std::string prefix = "# Blocks-"; + assert(block_line.length() > prefix.length()); + return block_line.substr(prefix.length()); +} + +void test_unicode_versions(const nlohmann::json &versions, const nlohmann::json &package, +const std::string &block_unicode_ver) { + std::cout << "== test: " << __FUNCTION__ << std::endl; + +#define SHOW_VAR(x) (std::cout << #x << "\t" << (x) << std::endl) + + const std::string cxx_icu(U_ICU_VERSION); + SHOW_VAR(cxx_icu); + + const std::string cxx_icu_unicode(U_UNICODE_VERSION); + SHOW_VAR(cxx_icu_unicode); + + SHOW_VAR(versions); + + const std::string node_icu_unicode(versions["unicode"].template get()); + SHOW_VAR(node_icu_unicode); + SHOW_VAR(versions["node"]); + + const std::string node(versions["node"].template get()); + SHOW_VAR(node); + + const std::string node_icu(versions["icu"].template get()); + SHOW_VAR(node_icu); + + const std::string node_engine(package["engines"]["node"].template get()); + SHOW_VAR(node_engine); + + std::cout << "=== Loaded from JSON" << std::endl; + + SHOW_VAR(block_unicode_ver); + std::cout << "=== calculating major versions" << std::endl; + + // calculations + auto block_ver_major = get_major(block_unicode_ver); + auto node_engine_major = get_major(node_engine); + auto node_major = get_major(node); + auto cxx_icu_major = get_major(cxx_icu); + auto node_icu_major = get_major(node_icu); + auto cxx_icu_unicode_major = get_major(cxx_icu_unicode); + auto node_icu_unicode_major = get_major(node_icu_unicode); + +#undef SHOW_VAR + + // allow the Node.js version to be >= required + auto node_engine_num = std::atoi(node_engine_major.c_str()); + auto node_num = std::atoi(node_major.c_str()); + assert(node_num >= node_engine_num); + + // the cxx_icu can come from the Ubuntu environment, so do not depend on it + // for now. + //assert_basic_equal(node_icu_unicode_major, cxx_icu_unicode_major); + assert_basic_equal(node_icu_unicode_major, block_ver_major); + + // seems less important if the C++ ICU verison matches the Node.js ICU version. + //assert_basic_equal(cxx_icu_major, node_icu_major); + + std::cout << "All OK!" << std::endl; + + std::cout << std::endl; +} + +int test_all(const char *jsonpath, const char *packagepath, const char *blockspath) { + std::cout << "= " << __FUNCTION__ << std::endl; + + // load the dump of node's process.versions which the meson.build file generated + auto versions = load_json(km::core::path(jsonpath)); + assert(!versions.empty()); + + // load our top level package.json + auto package = load_json(km::core::path(packagepath)); + assert(!package.empty()); + + const auto block_unicode_ver = get_block_unicode_ver(blockspath); + + test_unicode_versions(versions, package, block_unicode_ver); + + return EXIT_SUCCESS; +} + +//------------------------------------------------------------------------------------- +// Launcher +//------------------------------------------------------------------------------------- + +constexpr const auto help_str = "\ +test_unicode [--color] nodeversions.json package.json Blocks.txt\n\ +\n\ + --color Force color output\n"; + +int error_args() { + std::cerr << "test_unicode: Invalid arguments." << std::endl; + std::cout << help_str; + return EXIT_FAILURE; +} + +int main(int argc, char *argv []) { + int first_arg = 1; + auto arg_color = argc > first_arg && std::string(argv[first_arg]) == "--color"; + if (arg_color) first_arg++; + console_color::enabled = console_color::isaterminal() || arg_color; + + // Get the path of the current executable + arg_path = argv[0]; + auto last = arg_path.find_last_of("/\\"); + if(last == std::string::npos) { + std::cerr << "could not parse argv[0]: " << argv[0] << std::endl; + return 1; + } + arg_path.resize(last+1); + +#ifdef __EMSCRIPTEN__ + arg_path = get_wasm_file_path(arg_path); +#endif + + if (argc <= first_arg) { + return error_args(); + } + auto jsonpath = argv[first_arg++]; + + if (argc <= first_arg) { + return error_args(); + } + auto packagepath = argv[first_arg++]; + + if (argc <= first_arg) { + return error_args(); + } + auto blockspath = argv[first_arg++]; + + int rc = test_all(jsonpath, packagepath, blockspath); + + return rc; +} diff --git a/core/tests/unit/ldml/write_node_versions.js b/core/tests/unit/ldml/write_node_versions.js new file mode 100644 index 00000000000..ae8cd880b32 --- /dev/null +++ b/core/tests/unit/ldml/write_node_versions.js @@ -0,0 +1,8 @@ +// Simple utility to dump process.versions to argument 1 + +const { argv, versions } = require('process'); +const { writeFileSync } = require('fs'); + +const [ f ] = argv.slice(2); +writeFileSync(f, JSON.stringify(versions, null, ' ')); +console.log('Wrote:', f);