From dc4a675997bb439f36b39b1a20af6e20f74d7c5d Mon Sep 17 00:00:00 2001
From: valadaptive <valadaptive@protonmail.com>
Date: Fri, 23 Feb 2024 19:38:10 -0500
Subject: [PATCH] Redo writeString without Buffer.byteLength

This took some fiddling but it's now *faster* than the previous
implementation.
---
 lib/utils.js | 154 +++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 111 insertions(+), 43 deletions(-)

diff --git a/lib/utils.js b/lib/utils.js
index 098bfdcc..a142787f 100644
--- a/lib/utils.js
+++ b/lib/utils.js
@@ -464,43 +464,42 @@ if (typeof Buffer === 'function' && Buffer.prototype.utf8Slice) {
   };
 }
 
-function encodeSliceManual(arr, str, start, end) {
-  let pos = start;
-  for (let i = 0, l = end - start; i < l; i++) {
-    let c1 = str.charCodeAt(i);
-    let c2;
-    if (c1 < 0x80) {
-      arr[pos++] = c1;
-    } else if (c1 < 0x800) {
-      arr[pos++] = c1 >> 6 | 0xc0;
-      arr[pos++] = c1 & 0x3f | 0x80;
-    } else if (
-      (c1 & 0xfc00) === 0xd800 &&
-      ((c2 = str.charCodeAt(i + 1)) & 0xfc00) === 0xdc00
-    ) {
-      c1 = 0x10000 + ((c1 & 0x03ff) << 10) + (c2 & 0x03ff);
-      i++;
-      arr[pos++] = c1 >> 18 | 0xf0;
-      arr[pos++] = c1 >> 12 & 0x3f | 0x80;
-      arr[pos++] = c1 >> 6 & 0x3f | 0x80;
-      arr[pos++] = c1 & 0x3f | 0x80;
-    } else {
-      arr[pos++] = c1 >> 12 | 0xe0;
-      arr[pos++] = c1 >> 6 & 0x3f | 0x80;
-      arr[pos++] = c1 & 0x3f | 0x80;
-    }
+const ENCODER = new TextEncoder();
+const encodeBuf = new Uint8Array(4096);
+const encodeBufs = [];
+// Believe it or not, `subarray` is actually quite expensive. To avoid the cost,
+// we call `subarray` once for each possible slice length and reuse those cached
+// views.
+for (let i = 0; i <= encodeBuf.length; i++) {
+  encodeBufs.push(encodeBuf.subarray(0, i));
+}
+
+function encodeSlice(str) {
+  const {read, written} = ENCODER.encodeInto(str, encodeBuf);
+  if (read === str.length) {
+    return encodeBufs[written];
   }
+
+  return ENCODER.encode(str);
 }
 
-let encodeSlice;
-if (typeof Buffer === 'function' && Buffer.prototype.utf8Write) {
-  encodeSlice = function(arr, str, start, end) {
-    Buffer.prototype.utf8Write.call(arr, str, start, end - start);
-  };
+let utf8Length;
+if (typeof Buffer === 'function') {
+  utf8Length = Buffer.byteLength;
 } else {
-  const ENCODER = new TextEncoder();
-  encodeSlice = function(arr, str, start, end) {
-    ENCODER.encodeInto(str, arr.subarray(start, end));
+  utf8Length = function(str) {
+    let len = 0;
+    for (;;) {
+      // encodeInto is faster than any manual implementation (or even
+      // Buffer.byteLength), provided the string fits entirely within the
+      // buffer. Past that, it slows down but is still faster than other
+      // options.
+      const {read, written} = ENCODER.encodeInto(str, encodeBuf);
+      len += written;
+      if (read === str.length) break;
+      str = str.slice(read);
+    }
+    return len;
   };
 }
 
@@ -803,18 +802,87 @@ class Tap {
   }
 
   writeString (s) {
-    let len = Buffer.byteLength(s);
     let buf = this.arr;
-    this.writeLong(len);
-    let pos = this.pos;
-    this.pos += len;
-    if (this.pos > buf.length) {
-      return;
-    }
-    if (len > 64) {
-      encodeSlice(buf, s, pos, pos + len);
+    const stringLen = s.length;
+    // The maximum number that a signed varint can store in a single byte is 63.
+    // The maximum size of a UTF-8 representation of a UTF-16 string is 3 times
+    // its length, as one UTF-16 character can be represented by up to 3 bytes
+    // in UTF-8. Therefore, if the string is 21 characters or less, we know that
+    // its length can be stored in a single byte, which is why we choose 21 as
+    // the small-string threshold specifically.
+    if (stringLen > 21) {
+      let encodedLength, encoded;
+
+      // If we're already over the buffer size, we don't need to encode the
+      // string. While encodeInto is actually faster than Buffer.byteLength, we
+      // could still overflow the preallocated encoding buffer and have to fall
+      // back to allocating, which is really really slow.
+      if (this.isValid()) {
+        encoded = encodeSlice(s);
+        encodedLength = encoded.length;
+      } else {
+        encodedLength = utf8Length(s);
+      }
+      this.writeLong(encodedLength);
+      let pos = this.pos;
+
+      if (this.isValid() && typeof encoded != 'undefined') {
+        buf.set(encoded, pos);
+      }
+
+      this.pos += encodedLength;
     } else {
-      encodeSliceManual(buf, s, pos, pos + len);
+      // For small strings, this manual implementation is faster.
+
+      // Set aside 1 byte to write the string length.
+      let pos = this.pos + 1;
+      let startPos = pos;
+      let bufLen = buf.length;
+
+      // This is not a micro-optimization: caching the string length for the
+      // loop predicate really does make a difference!
+      for (let i = 0; i < stringLen; i++) {
+        let c1 = s.charCodeAt(i);
+        let c2;
+        if (c1 < 0x80) {
+          if (pos < bufLen) buf[pos] = c1;
+          pos++;
+        } else if (c1 < 0x800) {
+          if (pos + 1 < bufLen) {
+            buf[pos] = c1 >> 6 | 0xc0;
+            buf[pos + 1] = c1 & 0x3f | 0x80;
+          }
+          pos += 2;
+        } else if (
+          (c1 & 0xfc00) === 0xd800 &&
+          ((c2 = s.charCodeAt(i + 1)) & 0xfc00) === 0xdc00
+        ) {
+          c1 = 0x10000 + ((c1 & 0x03ff) << 10) + (c2 & 0x03ff);
+          i++;
+          if (pos + 3 < bufLen) {
+            buf[pos] = c1 >> 18 | 0xf0;
+            buf[pos + 1] = c1 >> 12 & 0x3f | 0x80;
+            buf[pos + 2] = c1 >> 6 & 0x3f | 0x80;
+            buf[pos + 3] = c1 & 0x3f | 0x80;
+          }
+          pos += 4;
+        } else {
+          if (pos + 2 < bufLen) {
+            buf[pos] = c1 >> 12 | 0xe0;
+            buf[pos + 1] = c1 >> 6 & 0x3f | 0x80;
+            buf[pos + 2] = c1 & 0x3f | 0x80;
+          }
+          pos += 3;
+        }
+      }
+
+      // Note that we've not yet updated this.pos, so it's currently pointing to
+      // the place where we want to write the string length.
+      if (this.pos <= bufLen) {
+        this.writeLong(pos - startPos);
+      }
+
+      this.pos = pos;
     }
   }