forked from samthor/fast-text-encoding
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
266 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
node_modules/ | ||
yarn-error.log |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
test.* | ||
suite.* | ||
bench/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
*.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
Benchmark code. | ||
Usage: | ||
|
||
```bash | ||
./compare.js <filename> | ||
# or | ||
./compare.js <length> | ||
``` | ||
|
||
If you don't provide a source file, or specify a length instead, this will generate actual random text in JavaScript. | ||
|
||
For a better test, use suggested UTF-8 encoded source text from [Project Gutenberg](https://www.gutenberg.org/files/23841/23841-0.txt). | ||
This has a ratio of "bytes-to-length" of 0.35; the lower the ratio, the better the test (ASCII has a ratio of 1). | ||
|
||
This is an odd number, but we're comparing the on-disk UTF-8 bytes (which optimize for ASCII and other low Unicode values) to the length of JavaScript's UCS-2 / UTF-16 internal representation. | ||
All Unicode code points can be represented as either one or two "lengths" of a JavaScript string, but each code point can be between 1-4 bytes in UTF-8. | ||
The possible ratios therefore range from 0.25 through 1.0. | ||
|
||
# Results | ||
|
||
For the suggested text on my test rig (macOS 3.6GHz i9), output looks like (snipped): | ||
|
||
``` | ||
compare (file): length=971478, bytes=2740678 (ratio=0.35) | ||
10.2209ms .native 971477 | ||
10.8853ms .native 971477 | ||
10.9297ms .native 971477 | ||
11.1351ms .native 971477 | ||
11.3154ms .native 971477 | ||
11.3741ms .native 971477 | ||
11.4921ms .native 971477 | ||
12.1611ms .native 971477 | ||
25.9949ms fast-text-encoding | ||
26.3912ms fast-text-encoding | ||
26.7037ms fast-text-encoding | ||
32.1910ms fast-text-encoding | ||
36.6454ms fast-text-encoding | ||
44.6358ms fast-text-encoding | ||
47.1846ms fast-text-encoding | ||
51.7178ms fast-text-encoding | ||
125.2835ms fastestsmallesttextencoderdecoder | ||
126.0772ms fastestsmallesttextencoderdecoder | ||
129.5148ms fastestsmallesttextencoderdecoder | ||
129.9449ms fastestsmallesttextencoderdecoder | ||
135.1421ms fastestsmallesttextencoderdecoder | ||
137.6716ms fastestsmallesttextencoderdecoder | ||
152.4639ms fastestsmallesttextencoderdecoder | ||
155.1741ms fastestsmallesttextencoderdecoder | ||
467.4895ms text-encoding-polyfill 971477 | ||
469.5857ms text-encoding-polyfill 971477 | ||
470.4829ms text-encoding-polyfill 971477 | ||
472.6093ms text-encoding-polyfill 971477 | ||
472.6358ms text-encoding-polyfill 971477 | ||
474.5790ms text-encoding-polyfill 971477 | ||
476.7881ms text-encoding-polyfill 971477 | ||
477.0778ms text-encoding 971477 | ||
478.0450ms text-encoding-polyfill 971477 | ||
478.2031ms text-encoding 971477 | ||
480.0009ms text-encoding 971477 | ||
480.2125ms text-encoding 971477 | ||
485.2014ms text-encoding 971477 | ||
485.9727ms text-encoding 971477 | ||
486.2783ms text-encoding 971477 | ||
490.5393ms text-encoding 971477 | ||
``` | ||
|
||
As you'd expect, the native implementation is the speediest. | ||
There's a bit of noise in the test; it's not perfect. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
#!/usr/bin/env node --max-old-space-size=8192 | ||
|
||
const {performance} = require('perf_hooks'); | ||
const chalk = require('chalk'); | ||
const fs = require('fs'); | ||
|
||
const packages = ['fast-text-encoding', 'text-encoding', 'text-encoding-polyfill', 'text-encoding-utf-8', 'fastestsmallesttextencoderdecoder']; | ||
const runs = 8; | ||
|
||
function buildRandomString(length) { | ||
const parts = []; | ||
for (let i = 0; i < length; ++i) { | ||
const v = 1.0 - Math.pow(Math.random(), 0.25); // bias towards start | ||
parts.push(Math.floor(v * 0x10FFFF)); | ||
} | ||
return String.fromCodePoint(...parts); | ||
} | ||
|
||
let string; | ||
|
||
if (+process.argv[2] || process.argv.length < 3) { | ||
// possibly a number | ||
string = buildRandomString(+process.argv[2] || (256 * 256)); | ||
console.info(`compare (random): length=${chalk.yellow(string.length)}`); | ||
} else { | ||
const stat = fs.statSync(process.argv[2]); | ||
string = fs.readFileSync(process.argv[2], 'utf-8'); | ||
const ratio = (string.length / stat.size); | ||
console.info(`compare (file): length=${chalk.yellow(string.length)}, bytes=${chalk.yellow(stat.size)} (ratio=${chalk.yellow(ratio.toFixed(2))})`); | ||
} | ||
|
||
// remove 'text-encoding-utf-8' after a certain size as it's just pathologically bad | ||
if (string.length >= 32768) { | ||
const index = packages.indexOf('text-encoding-utf-8'); | ||
packages.splice(index, 1); | ||
} | ||
|
||
console.info(''); | ||
|
||
function run(use, s) { | ||
const te = new use.TextEncoder('utf-8'); | ||
const data = te.encode(s); | ||
|
||
const td = new use.TextDecoder('utf-8'); | ||
const outs = td.decode(data); | ||
|
||
return outs.length; | ||
} | ||
|
||
function shuffle(arr) { | ||
const out = []; | ||
while (arr.length) { | ||
const choice = Math.floor(Math.random() * arr.length); | ||
out.push(arr.splice(choice, 1)[0]); | ||
} | ||
arr.push(...out); | ||
} | ||
|
||
const results = []; | ||
const impl = {}; | ||
const hasNative = (global.TextEncoder && global.TextDecoder) | ||
const nativeImpl = hasNative ? {TextEncoder: global.TextEncoder, TextDecoder: global.TextDecoder} : null; | ||
|
||
for (const name of packages) { | ||
delete global.TextDecoder; | ||
delete global.TextEncoder; | ||
const exports = require(name); | ||
const use = {TextEncoder: global.TextEncoder, TextDecoder: global.TextDecoder, ...exports}; | ||
|
||
if (hasNative && ((use.TextDecoder === nativeImpl.TextDecoder || use.TextEncoder === nativeImpl.TextEncoder))) { | ||
throw new Error(`package ${name} used native code`); | ||
} | ||
|
||
impl[name] = use; | ||
} | ||
|
||
if (hasNative) { | ||
packages.push('.native'); | ||
impl['.native'] = nativeImpl; | ||
} | ||
|
||
(async function() { | ||
|
||
for (let i = 0; i < runs; ++i) { | ||
shuffle(packages); | ||
console.info('run', (i + 1)); | ||
|
||
for (const name of packages) { | ||
delete global.TextDecoder; | ||
delete global.TextEncoder; | ||
|
||
console.debug(chalk.gray(name)); | ||
const use = impl[name]; | ||
|
||
const start = performance.now(); | ||
const length = run(use, string); | ||
const duration = performance.now() - start; | ||
results.push({name, duration, length}); | ||
|
||
// take a breather | ||
await new Promise((r) => setTimeout(r, 100)); | ||
} | ||
} | ||
|
||
results.sort(({duration: a}, {duration: b}) => a - b); | ||
|
||
for (const {name, duration, length} of results) { | ||
console.info((duration.toFixed(4) + 'ms').padStart(11), chalk.green(name), length !== string.length ? chalk.red(length) : ''); | ||
} | ||
|
||
})(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
{ | ||
"dependencies": { | ||
"chalk": "^4.0.0", | ||
"fast-text-encoding": "^1.0.2", | ||
"fastestsmallesttextencoderdecoder": "^1.0.21", | ||
"text-encoding": "^0.7.0", | ||
"text-encoding-polyfill": "^0.6.7", | ||
"text-encoding-utf-8": "^1.0.2" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY. | ||
# yarn lockfile v1 | ||
|
||
|
||
"@types/color-name@^1.1.1": | ||
version "1.1.1" | ||
resolved "https://registry.yarnpkg.com/@types/color-name/-/color-name-1.1.1.tgz#1c1261bbeaa10a8055bbc5d8ab84b7b2afc846a0" | ||
integrity sha512-rr+OQyAjxze7GgWrSaJwydHStIhHq2lvY3BOC2Mj7KnzI7XK0Uw1TOOdI9lDoajEbSWLiYgoo4f1R51erQfhPQ== | ||
|
||
ansi-styles@^4.1.0: | ||
version "4.2.1" | ||
resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-4.2.1.tgz#90ae75c424d008d2624c5bf29ead3177ebfcf359" | ||
integrity sha512-9VGjrMsG1vePxcSweQsN20KY/c4zN0h9fLjqAbwbPfahM3t+NL+M9HC8xeXG2I8pX5NoamTGNuomEUFI7fcUjA== | ||
dependencies: | ||
"@types/color-name" "^1.1.1" | ||
color-convert "^2.0.1" | ||
|
||
chalk@^4.0.0: | ||
version "4.0.0" | ||
resolved "https://registry.yarnpkg.com/chalk/-/chalk-4.0.0.tgz#6e98081ed2d17faab615eb52ac66ec1fe6209e72" | ||
integrity sha512-N9oWFcegS0sFr9oh1oz2d7Npos6vNoWW9HvtCg5N1KRFpUhaAhvTv5Y58g880fZaEYSNm3qDz8SU1UrGvp+n7A== | ||
dependencies: | ||
ansi-styles "^4.1.0" | ||
supports-color "^7.1.0" | ||
|
||
color-convert@^2.0.1: | ||
version "2.0.1" | ||
resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-2.0.1.tgz#72d3a68d598c9bdb3af2ad1e84f21d896abd4de3" | ||
integrity sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ== | ||
dependencies: | ||
color-name "~1.1.4" | ||
|
||
color-name@~1.1.4: | ||
version "1.1.4" | ||
resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.4.tgz#c2a09a87acbde69543de6f63fa3995c826c536a2" | ||
integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA== | ||
|
||
fast-text-encoding@^1.0.2: | ||
version "1.0.2" | ||
resolved "https://registry.yarnpkg.com/fast-text-encoding/-/fast-text-encoding-1.0.2.tgz#ff1ad5677bde049e0f8656aa6083a7ef2c5836e2" | ||
integrity sha512-5rQdinSsycpzvAoHga2EDn+LRX1d5xLFsuNG0Kg61JrAT/tASXcLL0nf/33v+sAxlQcfYmWbTURa1mmAf55jGw== | ||
|
||
fastestsmallesttextencoderdecoder@^1.0.21: | ||
version "1.0.21" | ||
resolved "https://registry.yarnpkg.com/fastestsmallesttextencoderdecoder/-/fastestsmallesttextencoderdecoder-1.0.21.tgz#b67599f879417229bad311c9e1f2918dfd155c63" | ||
integrity sha512-43gkbs+ruBXej0jhqcOKZ/DfKJmdWXrHZ5ZeHfYdnJ53aqU+p4JfhZrcNHBLNswieV7DOlj6f4q/+Fw9YMy/5Q== | ||
|
||
has-flag@^4.0.0: | ||
version "4.0.0" | ||
resolved "https://registry.yarnpkg.com/has-flag/-/has-flag-4.0.0.tgz#944771fd9c81c81265c4d6941860da06bb59479b" | ||
integrity sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ== | ||
|
||
supports-color@^7.1.0: | ||
version "7.1.0" | ||
resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-7.1.0.tgz#68e32591df73e25ad1c4b49108a2ec507962bfd1" | ||
integrity sha512-oRSIpR8pxT1Wr2FquTNnGet79b3BWljqOuoW/h4oBhxJ/HUbX5nX6JSruTkvXDCFMwDPvsaTTbvMLKZWSy0R5g== | ||
dependencies: | ||
has-flag "^4.0.0" | ||
|
||
text-encoding-polyfill@^0.6.7: | ||
version "0.6.7" | ||
resolved "https://registry.yarnpkg.com/text-encoding-polyfill/-/text-encoding-polyfill-0.6.7.tgz#4d27de0153e4c86eb2631ffd74c2f3f57969a9ec" | ||
integrity sha512-/DZ1XJqhbqRkCop6s9ZFu8JrFRwmVuHg4quIRm+ziFkR3N3ec6ck6yBvJ1GYeEQZhLVwRW0rZE+C3SSJpy0RTg== | ||
|
||
text-encoding-utf-8@^1.0.2: | ||
version "1.0.2" | ||
resolved "https://registry.yarnpkg.com/text-encoding-utf-8/-/text-encoding-utf-8-1.0.2.tgz#585b62197b0ae437e3c7b5d0af27ac1021e10d13" | ||
integrity sha512-8bw4MY9WjdsD2aMtO0OzOCY3pXGYNx2d2FfHRVUKkiCPDWjKuOlhLVASS+pD7VkLTVjW268LYJHwsnPFlBpbAg== | ||
|
||
text-encoding@^0.7.0: | ||
version "0.7.0" | ||
resolved "https://registry.yarnpkg.com/text-encoding/-/text-encoding-0.7.0.tgz#f895e836e45990624086601798ea98e8f36ee643" | ||
integrity sha512-oJQ3f1hrOnbRLOcwKz0Liq2IcrvDeZRHXhd9RgLrsT+DjWY/nty1Hi7v3dtkaEYbPYe0mUoOfzRrMwfXXwgPUA== |