Skip to content

Commit

Permalink
fix(encode): Fix surrogate check (#562)
Browse files Browse the repository at this point in the history
  • Loading branch information
fb55 authored Aug 9, 2021
1 parent 07897f9 commit 769ce35
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 4 deletions.
15 changes: 12 additions & 3 deletions src/encode-trie.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,23 @@
import htmlMap from "./maps/entities.json";

const enum Surrogate {
Mask = 0b1111_1100_0000_0000,
High = 0b1101_1000_0000_0000,
}

function isHighSurrugate(c: number) {
return (c & Surrogate.Mask) === Surrogate.High;
}

// For compatibility with node < 4, we wrap `codePointAt`
export const getCodePoint =
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
String.prototype.codePointAt != null
? (str: string, index: number): number => str.codePointAt(index)!
: // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
(c: string, index: number): number =>
(c.charCodeAt(index) & 0xd800) === 0xd800
? (c.charCodeAt(index) - 0xd800) * 0x400 +
isHighSurrugate(c.charCodeAt(index))
? (c.charCodeAt(index) - Surrogate.High) * 0x400 +
c.charCodeAt(index + 1) -
0xdc00 +
0x10000
Expand Down Expand Up @@ -45,7 +54,7 @@ export function encodeHTMLTrieRe(regExp: RegExp, str: string): string {
i
).toString(16)};`;
// Increase by 1 if we have a surrogate pair
lastIdx = regExp.lastIndex += Number((char & 0xd800) === 0xd800);
lastIdx = regExp.lastIndex += Number(isHighSurrugate(char));
}
}

Expand Down
7 changes: 7 additions & 0 deletions src/encode.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,11 @@ describe("encodeNonAsciiHTML", () => {
expect(entities.encodeNonAsciiHTML("😄🍾🥳💥😇")).toBe(
"&#x1f604;&#x1f37e;&#x1f973;&#x1f4a5;&#x1f607;"
));

it("should encode chars above surrogates", () =>
expect(
entities.encodeNonAsciiHTML("♒️♓️♈️♉️♊️♋️♌️♍️♎️♏️♐️♑️")
).toBe(
"&#x2652;&#xfe0f;&#x2653;&#xfe0f;&#x2648;&#xfe0f;&#x2649;&#xfe0f;&#x264a;&#xfe0f;&#x264b;&#xfe0f;&#x264c;&#xfe0f;&#x264d;&#xfe0f;&#x264e;&#xfe0f;&#x264f;&#xfe0f;&#x2650;&#xfe0f;&#x2651;&#xfe0f;"
));
});
2 changes: 1 addition & 1 deletion src/encode.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ export function encodeXML(str: string): string {
).toString(16)};`;
// Increase by 1 if we have a surrogate pair
lastIdx = xmlReplacer.lastIndex += Number(
(char & 0xd800) === 0xd800
(char & 0b1111_1111_1000_0000) === 0xd800
);
}
}
Expand Down

0 comments on commit 769ce35

Please sign in to comment.