From dc4a675997bb439f36b39b1a20af6e20f74d7c5d Mon Sep 17 00:00:00 2001 From: valadaptive Date: Fri, 23 Feb 2024 19:38:10 -0500 Subject: [PATCH] Redo writeString without Buffer.byteLength This took some fiddling but it's now *faster* than the previous implementation. --- lib/utils.js | 154 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 111 insertions(+), 43 deletions(-) diff --git a/lib/utils.js b/lib/utils.js index 098bfdcc..a142787f 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -464,43 +464,42 @@ if (typeof Buffer === 'function' && Buffer.prototype.utf8Slice) { }; } -function encodeSliceManual(arr, str, start, end) { - let pos = start; - for (let i = 0, l = end - start; i < l; i++) { - let c1 = str.charCodeAt(i); - let c2; - if (c1 < 0x80) { - arr[pos++] = c1; - } else if (c1 < 0x800) { - arr[pos++] = c1 >> 6 | 0xc0; - arr[pos++] = c1 & 0x3f | 0x80; - } else if ( - (c1 & 0xfc00) === 0xd800 && - ((c2 = str.charCodeAt(i + 1)) & 0xfc00) === 0xdc00 - ) { - c1 = 0x10000 + ((c1 & 0x03ff) << 10) + (c2 & 0x03ff); - i++; - arr[pos++] = c1 >> 18 | 0xf0; - arr[pos++] = c1 >> 12 & 0x3f | 0x80; - arr[pos++] = c1 >> 6 & 0x3f | 0x80; - arr[pos++] = c1 & 0x3f | 0x80; - } else { - arr[pos++] = c1 >> 12 | 0xe0; - arr[pos++] = c1 >> 6 & 0x3f | 0x80; - arr[pos++] = c1 & 0x3f | 0x80; - } +const ENCODER = new TextEncoder(); +const encodeBuf = new Uint8Array(4096); +const encodeBufs = []; +// Believe it or not, `subarray` is actually quite expensive. To avoid the cost, +// we call `subarray` once for each possible slice length and reuse those cached +// views. +for (let i = 0; i <= encodeBuf.length; i++) { + encodeBufs.push(encodeBuf.subarray(0, i)); +} + +function encodeSlice(str) { + const {read, written} = ENCODER.encodeInto(str, encodeBuf); + if (read === str.length) { + return encodeBufs[written]; } + + return ENCODER.encode(str); } -let encodeSlice; -if (typeof Buffer === 'function' && Buffer.prototype.utf8Write) { - encodeSlice = function(arr, str, start, end) { - Buffer.prototype.utf8Write.call(arr, str, start, end - start); - }; +let utf8Length; +if (typeof Buffer === 'function') { + utf8Length = Buffer.byteLength; } else { - const ENCODER = new TextEncoder(); - encodeSlice = function(arr, str, start, end) { - ENCODER.encodeInto(str, arr.subarray(start, end)); + utf8Length = function(str) { + let len = 0; + for (;;) { + // encodeInto is faster than any manual implementation (or even + // Buffer.byteLength), provided the string fits entirely within the + // buffer. Past that, it slows down but is still faster than other + // options. + const {read, written} = ENCODER.encodeInto(str, encodeBuf); + len += written; + if (read === str.length) break; + str = str.slice(read); + } + return len; }; } @@ -803,18 +802,87 @@ class Tap { } writeString (s) { - let len = Buffer.byteLength(s); let buf = this.arr; - this.writeLong(len); - let pos = this.pos; - this.pos += len; - if (this.pos > buf.length) { - return; - } - if (len > 64) { - encodeSlice(buf, s, pos, pos + len); + const stringLen = s.length; + // The maximum number that a signed varint can store in a single byte is 63. + // The maximum size of a UTF-8 representation of a UTF-16 string is 3 times + // its length, as one UTF-16 character can be represented by up to 3 bytes + // in UTF-8. Therefore, if the string is 21 characters or less, we know that + // its length can be stored in a single byte, which is why we choose 21 as + // the small-string threshold specifically. + if (stringLen > 21) { + let encodedLength, encoded; + + // If we're already over the buffer size, we don't need to encode the + // string. While encodeInto is actually faster than Buffer.byteLength, we + // could still overflow the preallocated encoding buffer and have to fall + // back to allocating, which is really really slow. + if (this.isValid()) { + encoded = encodeSlice(s); + encodedLength = encoded.length; + } else { + encodedLength = utf8Length(s); + } + this.writeLong(encodedLength); + let pos = this.pos; + + if (this.isValid() && typeof encoded != 'undefined') { + buf.set(encoded, pos); + } + + this.pos += encodedLength; } else { - encodeSliceManual(buf, s, pos, pos + len); + // For small strings, this manual implementation is faster. + + // Set aside 1 byte to write the string length. + let pos = this.pos + 1; + let startPos = pos; + let bufLen = buf.length; + + // This is not a micro-optimization: caching the string length for the + // loop predicate really does make a difference! + for (let i = 0; i < stringLen; i++) { + let c1 = s.charCodeAt(i); + let c2; + if (c1 < 0x80) { + if (pos < bufLen) buf[pos] = c1; + pos++; + } else if (c1 < 0x800) { + if (pos + 1 < bufLen) { + buf[pos] = c1 >> 6 | 0xc0; + buf[pos + 1] = c1 & 0x3f | 0x80; + } + pos += 2; + } else if ( + (c1 & 0xfc00) === 0xd800 && + ((c2 = s.charCodeAt(i + 1)) & 0xfc00) === 0xdc00 + ) { + c1 = 0x10000 + ((c1 & 0x03ff) << 10) + (c2 & 0x03ff); + i++; + if (pos + 3 < bufLen) { + buf[pos] = c1 >> 18 | 0xf0; + buf[pos + 1] = c1 >> 12 & 0x3f | 0x80; + buf[pos + 2] = c1 >> 6 & 0x3f | 0x80; + buf[pos + 3] = c1 & 0x3f | 0x80; + } + pos += 4; + } else { + if (pos + 2 < bufLen) { + buf[pos] = c1 >> 12 | 0xe0; + buf[pos + 1] = c1 >> 6 & 0x3f | 0x80; + buf[pos + 2] = c1 & 0x3f | 0x80; + } + pos += 3; + } + } + + // Note that we've not yet updated this.pos, so it's currently pointing to + // the place where we want to write the string length. + if (this.pos <= bufLen) { + this.writeLong(pos - startPos); + } + + this.pos = pos; } }