diff --git a/.gitignore b/.gitignore index c2658d7..dd3d82b 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ node_modules/ +yarn-error.log diff --git a/.npmignore b/.npmignore index 6ca4202..921f1c9 100644 --- a/.npmignore +++ b/.npmignore @@ -1,2 +1,3 @@ test.* suite.* +bench/ diff --git a/bench/.gitignore b/bench/.gitignore new file mode 100644 index 0000000..2211df6 --- /dev/null +++ b/bench/.gitignore @@ -0,0 +1 @@ +*.txt diff --git a/bench/README.md b/bench/README.md new file mode 100644 index 0000000..957b9cb --- /dev/null +++ b/bench/README.md @@ -0,0 +1,69 @@ +Benchmark code. +Usage: + +```bash +./compare.js +# or +./compare.js +``` + +If you don't provide a source file, or specify a length instead, this will generate actual random text in JavaScript. + +For a better test, use suggested UTF-8 encoded source text from [Project Gutenberg](https://www.gutenberg.org/files/23841/23841-0.txt). +This has a ratio of "bytes-to-length" of 0.35; the lower the ratio, the better the test (ASCII has a ratio of 1). + +This is an odd number, but we're comparing the on-disk UTF-8 bytes (which optimize for ASCII and other low Unicode values) to the length of JavaScript's UCS-2 / UTF-16 internal representation. +All Unicode code points can be represented as either one or two "lengths" of a JavaScript string, but each code point can be between 1-4 bytes in UTF-8. +The possible ratios therefore range from 0.25 through 1.0. + +# Results + +For the suggested text on my test rig (macOS 3.6GHz i9), output looks like (snipped): + +``` +compare (file): length=971478, bytes=2740678 (ratio=0.35) + + 10.2209ms .native 971477 + 10.8853ms .native 971477 + 10.9297ms .native 971477 + 11.1351ms .native 971477 + 11.3154ms .native 971477 + 11.3741ms .native 971477 + 11.4921ms .native 971477 + 12.1611ms .native 971477 + 25.9949ms fast-text-encoding + 26.3912ms fast-text-encoding + 26.7037ms fast-text-encoding + 32.1910ms fast-text-encoding + 36.6454ms fast-text-encoding + 44.6358ms fast-text-encoding + 47.1846ms fast-text-encoding + 51.7178ms fast-text-encoding + 125.2835ms fastestsmallesttextencoderdecoder + 126.0772ms fastestsmallesttextencoderdecoder + 129.5148ms fastestsmallesttextencoderdecoder + 129.9449ms fastestsmallesttextencoderdecoder + 135.1421ms fastestsmallesttextencoderdecoder + 137.6716ms fastestsmallesttextencoderdecoder + 152.4639ms fastestsmallesttextencoderdecoder + 155.1741ms fastestsmallesttextencoderdecoder + 467.4895ms text-encoding-polyfill 971477 + 469.5857ms text-encoding-polyfill 971477 + 470.4829ms text-encoding-polyfill 971477 + 472.6093ms text-encoding-polyfill 971477 + 472.6358ms text-encoding-polyfill 971477 + 474.5790ms text-encoding-polyfill 971477 + 476.7881ms text-encoding-polyfill 971477 + 477.0778ms text-encoding 971477 + 478.0450ms text-encoding-polyfill 971477 + 478.2031ms text-encoding 971477 + 480.0009ms text-encoding 971477 + 480.2125ms text-encoding 971477 + 485.2014ms text-encoding 971477 + 485.9727ms text-encoding 971477 + 486.2783ms text-encoding 971477 + 490.5393ms text-encoding 971477 +``` + +As you'd expect, the native implementation is the speediest. +There's a bit of noise in the test; it's not perfect. diff --git a/bench/compare.js b/bench/compare.js new file mode 100755 index 0000000..5d90b33 --- /dev/null +++ b/bench/compare.js @@ -0,0 +1,111 @@ +#!/usr/bin/env node --max-old-space-size=8192 + +const {performance} = require('perf_hooks'); +const chalk = require('chalk'); +const fs = require('fs'); + +const packages = ['fast-text-encoding', 'text-encoding', 'text-encoding-polyfill', 'text-encoding-utf-8', 'fastestsmallesttextencoderdecoder']; +const runs = 8; + +function buildRandomString(length) { + const parts = []; + for (let i = 0; i < length; ++i) { + const v = 1.0 - Math.pow(Math.random(), 0.25); // bias towards start + parts.push(Math.floor(v * 0x10FFFF)); + } + return String.fromCodePoint(...parts); +} + +let string; + +if (+process.argv[2] || process.argv.length < 3) { + // possibly a number + string = buildRandomString(+process.argv[2] || (256 * 256)); + console.info(`compare (random): length=${chalk.yellow(string.length)}`); +} else { + const stat = fs.statSync(process.argv[2]); + string = fs.readFileSync(process.argv[2], 'utf-8'); + const ratio = (string.length / stat.size); + console.info(`compare (file): length=${chalk.yellow(string.length)}, bytes=${chalk.yellow(stat.size)} (ratio=${chalk.yellow(ratio.toFixed(2))})`); +} + +// remove 'text-encoding-utf-8' after a certain size as it's just pathologically bad +if (string.length >= 32768) { + const index = packages.indexOf('text-encoding-utf-8'); + packages.splice(index, 1); +} + +console.info(''); + +function run(use, s) { + const te = new use.TextEncoder('utf-8'); + const data = te.encode(s); + + const td = new use.TextDecoder('utf-8'); + const outs = td.decode(data); + + return outs.length; +} + +function shuffle(arr) { + const out = []; + while (arr.length) { + const choice = Math.floor(Math.random() * arr.length); + out.push(arr.splice(choice, 1)[0]); + } + arr.push(...out); +} + +const results = []; +const impl = {}; +const hasNative = (global.TextEncoder && global.TextDecoder) +const nativeImpl = hasNative ? {TextEncoder: global.TextEncoder, TextDecoder: global.TextDecoder} : null; + +for (const name of packages) { + delete global.TextDecoder; + delete global.TextEncoder; + const exports = require(name); + const use = {TextEncoder: global.TextEncoder, TextDecoder: global.TextDecoder, ...exports}; + + if (hasNative && ((use.TextDecoder === nativeImpl.TextDecoder || use.TextEncoder === nativeImpl.TextEncoder))) { + throw new Error(`package ${name} used native code`); + } + + impl[name] = use; +} + +if (hasNative) { + packages.push('.native'); + impl['.native'] = nativeImpl; +} + +(async function() { + + for (let i = 0; i < runs; ++i) { + shuffle(packages); + console.info('run', (i + 1)); + + for (const name of packages) { + delete global.TextDecoder; + delete global.TextEncoder; + + console.debug(chalk.gray(name)); + const use = impl[name]; + + const start = performance.now(); + const length = run(use, string); + const duration = performance.now() - start; + results.push({name, duration, length}); + + // take a breather + await new Promise((r) => setTimeout(r, 100)); + } + } + + results.sort(({duration: a}, {duration: b}) => a - b); + + for (const {name, duration, length} of results) { + console.info((duration.toFixed(4) + 'ms').padStart(11), chalk.green(name), length !== string.length ? chalk.red(length) : ''); + } + +})(); diff --git a/bench/package.json b/bench/package.json new file mode 100644 index 0000000..71d199e --- /dev/null +++ b/bench/package.json @@ -0,0 +1,10 @@ +{ + "dependencies": { + "chalk": "^4.0.0", + "fast-text-encoding": "^1.0.2", + "fastestsmallesttextencoderdecoder": "^1.0.21", + "text-encoding": "^0.7.0", + "text-encoding-polyfill": "^0.6.7", + "text-encoding-utf-8": "^1.0.2" + } +} diff --git a/bench/yarn.lock b/bench/yarn.lock new file mode 100644 index 0000000..ee3d086 --- /dev/null +++ b/bench/yarn.lock @@ -0,0 +1,73 @@ +# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY. +# yarn lockfile v1 + + +"@types/color-name@^1.1.1": + version "1.1.1" + resolved "https://registry.yarnpkg.com/@types/color-name/-/color-name-1.1.1.tgz#1c1261bbeaa10a8055bbc5d8ab84b7b2afc846a0" + integrity sha512-rr+OQyAjxze7GgWrSaJwydHStIhHq2lvY3BOC2Mj7KnzI7XK0Uw1TOOdI9lDoajEbSWLiYgoo4f1R51erQfhPQ== + +ansi-styles@^4.1.0: + version "4.2.1" + resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-4.2.1.tgz#90ae75c424d008d2624c5bf29ead3177ebfcf359" + integrity sha512-9VGjrMsG1vePxcSweQsN20KY/c4zN0h9fLjqAbwbPfahM3t+NL+M9HC8xeXG2I8pX5NoamTGNuomEUFI7fcUjA== + dependencies: + "@types/color-name" "^1.1.1" + color-convert "^2.0.1" + +chalk@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/chalk/-/chalk-4.0.0.tgz#6e98081ed2d17faab615eb52ac66ec1fe6209e72" + integrity sha512-N9oWFcegS0sFr9oh1oz2d7Npos6vNoWW9HvtCg5N1KRFpUhaAhvTv5Y58g880fZaEYSNm3qDz8SU1UrGvp+n7A== + dependencies: + ansi-styles "^4.1.0" + supports-color "^7.1.0" + +color-convert@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-2.0.1.tgz#72d3a68d598c9bdb3af2ad1e84f21d896abd4de3" + integrity sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ== + dependencies: + color-name "~1.1.4" + +color-name@~1.1.4: + version "1.1.4" + resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.4.tgz#c2a09a87acbde69543de6f63fa3995c826c536a2" + integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA== + +fast-text-encoding@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/fast-text-encoding/-/fast-text-encoding-1.0.2.tgz#ff1ad5677bde049e0f8656aa6083a7ef2c5836e2" + integrity sha512-5rQdinSsycpzvAoHga2EDn+LRX1d5xLFsuNG0Kg61JrAT/tASXcLL0nf/33v+sAxlQcfYmWbTURa1mmAf55jGw== + +fastestsmallesttextencoderdecoder@^1.0.21: + version "1.0.21" + resolved "https://registry.yarnpkg.com/fastestsmallesttextencoderdecoder/-/fastestsmallesttextencoderdecoder-1.0.21.tgz#b67599f879417229bad311c9e1f2918dfd155c63" + integrity sha512-43gkbs+ruBXej0jhqcOKZ/DfKJmdWXrHZ5ZeHfYdnJ53aqU+p4JfhZrcNHBLNswieV7DOlj6f4q/+Fw9YMy/5Q== + +has-flag@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/has-flag/-/has-flag-4.0.0.tgz#944771fd9c81c81265c4d6941860da06bb59479b" + integrity sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ== + +supports-color@^7.1.0: + version "7.1.0" + resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-7.1.0.tgz#68e32591df73e25ad1c4b49108a2ec507962bfd1" + integrity sha512-oRSIpR8pxT1Wr2FquTNnGet79b3BWljqOuoW/h4oBhxJ/HUbX5nX6JSruTkvXDCFMwDPvsaTTbvMLKZWSy0R5g== + dependencies: + has-flag "^4.0.0" + +text-encoding-polyfill@^0.6.7: + version "0.6.7" + resolved "https://registry.yarnpkg.com/text-encoding-polyfill/-/text-encoding-polyfill-0.6.7.tgz#4d27de0153e4c86eb2631ffd74c2f3f57969a9ec" + integrity sha512-/DZ1XJqhbqRkCop6s9ZFu8JrFRwmVuHg4quIRm+ziFkR3N3ec6ck6yBvJ1GYeEQZhLVwRW0rZE+C3SSJpy0RTg== + +text-encoding-utf-8@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/text-encoding-utf-8/-/text-encoding-utf-8-1.0.2.tgz#585b62197b0ae437e3c7b5d0af27ac1021e10d13" + integrity sha512-8bw4MY9WjdsD2aMtO0OzOCY3pXGYNx2d2FfHRVUKkiCPDWjKuOlhLVASS+pD7VkLTVjW268LYJHwsnPFlBpbAg== + +text-encoding@^0.7.0: + version "0.7.0" + resolved "https://registry.yarnpkg.com/text-encoding/-/text-encoding-0.7.0.tgz#f895e836e45990624086601798ea98e8f36ee643" + integrity sha512-oJQ3f1hrOnbRLOcwKz0Liq2IcrvDeZRHXhd9RgLrsT+DjWY/nty1Hi7v3dtkaEYbPYe0mUoOfzRrMwfXXwgPUA==