Skip to content

Commit

Permalink
Improve code style with unicorn (#1496)
Browse files Browse the repository at this point in the history
Dependency to be added in a follow-up
  • Loading branch information
fb55 authored Jun 26, 2024
1 parent 3df92f5 commit b0759f7
Show file tree
Hide file tree
Showing 21 changed files with 345 additions and 315 deletions.
3 changes: 2 additions & 1 deletion scripts/.eslintrc.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"rules": {
"n/no-unsupported-features/es-builtins": 0
"n/no-unsupported-features/es-builtins": 0,
"n/no-unsupported-features/node-builtins": 0
}
}
36 changes: 18 additions & 18 deletions scripts/benchmark.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ const htmlEntitiesHtml5EncodeOptions: htmlEntities.EncodeOptions = {

const heEscapeOptions = { useNamedReferences: true };

const encoders: [string, (str: string) => string][] = [
["entities", (str: string) => entities.encodeHTML(str)],
["he", (str: string) => he.encode(str, heEscapeOptions)],
const encoders: [string, (stringToEncode: string) => string][] = [
["entities", (stringToEncode) => entities.encodeHTML(stringToEncode)],
["he", (stringToEncode) => he.encode(stringToEncode, heEscapeOptions)],
[
"html-entities",
(str: string) =>
htmlEntities.encode(str, htmlEntitiesHtml5EncodeOptions),
(stringToEncode) =>
htmlEntities.encode(stringToEncode, htmlEntitiesHtml5EncodeOptions),
],
];

Expand All @@ -28,14 +28,14 @@ const htmlEntitiesHtml5DecodeOptions: htmlEntities.DecodeOptions = {
scope: "body",
};

const decoders: [string, (str: string) => string][] = [
["entities", (str: string) => entities.decodeHTML(str)],
["he", (str: string) => he.decode(str)],
["parse-entities", (str: string) => parseEntities(str)],
const decoders: [string, (stringToDecode: string) => string][] = [
["entities", (stringToDecode) => entities.decodeHTML(stringToDecode)],
["he", (stringToDecode) => he.decode(stringToDecode)],
["parse-entities", (stringToDecode) => parseEntities(stringToDecode)],
[
"html-entities",
(str: string) =>
htmlEntities.decode(str, htmlEntitiesHtml5DecodeOptions),
(stringToDecode) =>
htmlEntities.decode(stringToDecode, htmlEntitiesHtml5DecodeOptions),
],
];

Expand All @@ -44,13 +44,13 @@ const htmlEntitiesXmlEncodeOptions: htmlEntities.EncodeOptions = {
mode: "specialChars",
};

const escapers: [string, (str: string) => string][] = [
["entities", (str: string) => entities.escapeUTF8(str)],
["he", (str: string) => he.escape(str)],
const escapers: [string, (escapee: string) => string][] = [
["entities", (escapee) => entities.escapeUTF8(escapee)],
["he", (escapee) => he.escape(escapee)],
// Html-entities cannot escape, so we use its simplest mode.
[
"html-entities",
(str: string) => htmlEntities.encode(str, htmlEntitiesXmlEncodeOptions),
(escapee) => htmlEntities.encode(escapee, htmlEntitiesXmlEncodeOptions),
],
];

Expand All @@ -77,23 +77,23 @@ console.log(

for (const [name, escape] of escapers) {
console.time(`Escaping ${name}`);
for (let i = 0; i < RUNS; i++) {
for (let index = 0; index < RUNS; index++) {
escape(textToEncode);
}
console.timeEnd(`Escaping ${name}`);
}

for (const [name, encode] of encoders) {
console.time(`Encoding ${name}`);
for (let i = 0; i < RUNS; i++) {
for (let index = 0; index < RUNS; index++) {
encode(textToEncode);
}
console.timeEnd(`Encoding ${name}`);
}

for (const [name, decode] of decoders) {
console.time(`Decoding ${name}`);
for (let i = 0; i < RUNS; i++) {
for (let index = 0; index < RUNS; index++) {
decode(textToDecode);
}
console.timeEnd(`Decoding ${name}`);
Expand Down
5 changes: 1 addition & 4 deletions scripts/trie/decode-trie.spec.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import { encodeTrie } from "./encode-trie.js";
import { decodeNode } from "./decode-trie.js";

import { getTrie } from "./trie.js";

import xmlMap from "../../maps/xml.json";
import entityMap from "../../maps/entities.json";
import legacyMap from "../../maps/legacy.json";
Expand Down Expand Up @@ -89,8 +87,7 @@ describe("decode_trie", () => {
mergeMaps(xmlMap, {}),
));

// Test takes a long time — skipped by default
it.skip("should decode the HTML map", () =>
it("should decode the HTML map", () =>
expect(decode(encodeTrie(getTrie(entityMap, legacyMap)))).toStrictEqual(
mergeMaps(entityMap, legacyMap),
));
Expand Down
34 changes: 17 additions & 17 deletions scripts/trie/decode-trie.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,39 +34,39 @@ export function decodeNode(
return;
}

const branchIdx = startIndex + Math.max(valueLength, 1);
const branchIndex = startIndex + Math.max(valueLength, 1);

if (branchLength === 0) {
return decodeNode(
decodeMap,
resultMap,
prefix + String.fromCharCode(jumpOffset),
branchIdx,
branchIndex,
);
}

if (jumpOffset !== 0) {
for (let i = 0; i < branchLength; i++) {
const val = decodeMap[branchIdx + i] - 1;
if (val !== -1) {
const code = jumpOffset + i;
if (jumpOffset === 0) {
for (let index = 0; index < branchLength; index++) {
decodeNode(
decodeMap,
resultMap,
prefix + String.fromCharCode(decodeMap[branchIndex + index]),
decodeMap[branchIndex + branchLength + index],
);
}
} else {
for (let index = 0; index < branchLength; index++) {
const value = decodeMap[branchIndex + index] - 1;
if (value !== -1) {
const code = jumpOffset + index;

decodeNode(
decodeMap,
resultMap,
prefix + String.fromCharCode(code),
val,
value,
);
}
}
} else {
for (let i = 0; i < branchLength; i++) {
decodeNode(
decodeMap,
resultMap,
prefix + String.fromCharCode(decodeMap[branchIdx + i]),
decodeMap[branchIdx + branchLength + i],
);
}
}
}
6 changes: 3 additions & 3 deletions scripts/trie/encode-trie.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,9 @@ describe("encode_trie", () => {

it("should encode a recursive branch to a jump map", () => {
const jumpRecursiveTrie = { next: new Map() };
[48, 49, 52, 54, 56, 57].forEach((val) =>
jumpRecursiveTrie.next.set(val, jumpRecursiveTrie),
);
for (const value of [48, 49, 52, 54, 56, 57]) {
jumpRecursiveTrie.next.set(value, jumpRecursiveTrie);
}
expect(encodeTrie(jumpRecursiveTrie)).toStrictEqual([
0b0000_0101_0011_0000, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1,
]);
Expand Down
51 changes: 28 additions & 23 deletions scripts/trie/encode-trie.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import * as assert from "assert";
import * as assert from "node:assert";
import type { TrieNode } from "./trie.js";

function binaryLength(num: number) {
return Math.ceil(Math.log2(num));
/**
* Determines the binary length of an integer.
*/
function binaryLength(integer: number): number {
return Math.ceil(Math.log2(integer));
}

/**
Expand Down Expand Up @@ -37,7 +40,7 @@ export function encodeTrie(trie: TrieNode, maxJumpTableOverhead = 2): number[] {

encodeCache.set(node, startIndex);

const nodeIdx = enc.push(0) - 1;
const nodeIndex = enc.push(0) - 1;

if (node.value != null) {
let valueLength = 0;
Expand All @@ -62,26 +65,26 @@ export function encodeTrie(trie: TrieNode, maxJumpTableOverhead = 2): number[] {
"Too many bits for value length",
);

enc[nodeIdx] |= valueLength << 14;
enc[nodeIndex] |= valueLength << 14;

if (valueLength === 1) {
enc[nodeIdx] |= node.value.charCodeAt(0);
enc[nodeIndex] |= node.value.charCodeAt(0);
} else {
for (let i = 0; i < node.value.length; i++) {
enc.push(node.value.charCodeAt(i));
for (let index = 0; index < node.value.length; index++) {
enc.push(node.value.charCodeAt(index));
}
}
}

if (node.next) addBranches(node.next, nodeIdx);
if (node.next) addBranches(node.next, nodeIndex);

assert.strictEqual(nodeIdx, startIndex, "Has expected location");
assert.strictEqual(nodeIndex, startIndex, "Has expected location");

return startIndex;
}

function addBranches(next: Map<number, TrieNode>, nodeIdx: number) {
const branches = Array.from(next.entries());
function addBranches(next: Map<number, TrieNode>, nodeIndex: number) {
const branches = [...next.entries()];

// Sort branches ASC by key
branches.sort(([a], [b]) => a - b);
Expand All @@ -97,7 +100,7 @@ export function encodeTrie(trie: TrieNode, maxJumpTableOverhead = 2): number[] {

assert.ok(binaryLength(char) <= 7, "Too many bits for single char");

enc[nodeIdx] |= char;
enc[nodeIndex] |= char;
encodeNode(next);
return;
}
Expand Down Expand Up @@ -127,15 +130,15 @@ export function encodeTrie(trie: TrieNode, maxJumpTableOverhead = 2): number[] {
);

// Write the length of the adjusted table, plus jump offset
enc[nodeIdx] |= (jumpTableLength << 7) | jumpOffset;
enc[nodeIndex] |= (jumpTableLength << 7) | jumpOffset;

assert.ok(
binaryLength(jumpTableLength) <= 7,
`Too many bits (${binaryLength(jumpTableLength)}) for branches`,
);

// Reserve space for the jump table
for (let i = 0; i < jumpTableLength; i++) enc.push(0);
for (let index = 0; index < jumpTableLength; index++) enc.push(0);

// Write the jump table
for (const [char, next] of branches) {
Expand All @@ -147,7 +150,7 @@ export function encodeTrie(trie: TrieNode, maxJumpTableOverhead = 2): number[] {
return;
}

enc[nodeIdx] |= branches.length << 7;
enc[nodeIndex] |= branches.length << 7;

enc.push(
...branches.map(([char]) => char),
Expand All @@ -162,13 +165,13 @@ export function encodeTrie(trie: TrieNode, maxJumpTableOverhead = 2): number[] {
);

// Encode the branches
branches.forEach(([val, next], idx) => {
assert.ok(val < 128, "Branch value too large");
for (const [index, [value, next]] of branches.entries()) {
assert.ok(value < 128, "Branch value too large");

const currentIndex = branchIndex + branches.length + idx;
const currentIndex = branchIndex + branches.length + index;
assert.strictEqual(
enc[currentIndex - branches.length],
val,
value,
"Should have the value as the first element",
);
assert.strictEqual(
Expand All @@ -180,16 +183,18 @@ export function encodeTrie(trie: TrieNode, maxJumpTableOverhead = 2): number[] {

assert.ok(binaryLength(offset) <= 16, "Too many bits for offset");
enc[currentIndex] = offset;
});
}
}

encodeNode(trie);

// Make sure that every value fits in a UInt16
assert.ok(
enc.every(
(val) =>
typeof val === "number" && val >= 0 && binaryLength(val) <= 16,
(value) =>
typeof value === "number" &&
value >= 0 &&
binaryLength(value) <= 16,
),
"Too many bits",
);
Expand Down
29 changes: 15 additions & 14 deletions scripts/trie/trie.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ export function getTrie(
// Resolve the key
let lastMap = trie;
let next!: TrieNode;
for (let i = 0; i < key.length; i++) {
const char = key.charCodeAt(i);
for (let index = 0; index < key.length; index++) {
const char = key.charCodeAt(index);
next = lastMap.get(char) ?? {};
lastMap.set(char, next);
lastMap = next.next ??= new Map();
Expand Down Expand Up @@ -43,28 +43,29 @@ export function getTrie(
return false;
}

const next1 = Array.from(node1.next);
const next2 = Array.from(node2.next);
for (const [char, node] of node1.next) {
const value = node2.next.get(char);
if (value == null || !isEqual(node, value)) {
return false;
}
}

return next1.every(([char1, node1], idx) => {
const [char2, node2] = next2[idx];
return char1 === char2 && isEqual(node1, node2);
});
return true;
}

function mergeDuplicates(node: TrieNode) {
const nodes = [node];

for (let nodeIdx = 0; nodeIdx < nodes.length; nodeIdx++) {
const { next } = nodes[nodeIdx];
for (let nodeIndex = 0; nodeIndex < nodes.length; nodeIndex++) {
const { next } = nodes[nodeIndex];

if (!next) continue;

for (const [char, node] of Array.from(next)) {
const idx = nodes.findIndex((n) => isEqual(n, node));
for (const [char, node] of next) {
const index = nodes.findIndex((n) => isEqual(n, node));

if (idx >= 0) {
next.set(char, nodes[idx]);
if (index >= 0) {
next.set(char, nodes[index]);
} else {
nodes.push(node);
}
Expand Down
Loading

0 comments on commit b0759f7

Please sign in to comment.