Skip to content

Commit

Permalink
Recognize non-BMP punctuation & symbols (#297)
Browse files Browse the repository at this point in the history
Closes #296.
  • Loading branch information
tats-u authored Dec 18, 2024
1 parent 38d2938 commit e489bf5
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 2 deletions.
24 changes: 22 additions & 2 deletions lib/inlines.js
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,10 @@ var match = function(re) {

// Returns the code for the character at the current subject position, or -1
// there are no more characters.
// This function must be non-BMP aware because the Unicode category of its result is used.
var peek = function() {
if (this.pos < this.subject.length) {
return this.subject.charCodeAt(this.pos);
return this.subject.codePointAt(this.pos);
} else {
return -1;
}
Expand Down Expand Up @@ -270,7 +271,7 @@ var scanDelims = function(cc) {
return null;
}

char_before = startpos === 0 ? "\n" : this.subject.charAt(startpos - 1);
char_before = previousChar(this.subject, startpos);

cc_after = this.peek();
if (cc_after === -1) {
Expand Down Expand Up @@ -304,6 +305,25 @@ var scanDelims = function(cc) {
}
this.pos = startpos;
return { numdelims: numdelims, can_open: can_open, can_close: can_close };

function previousChar(str, pos) {
if (pos === 0) {
return "\n";
}
var previous_cc = str.charCodeAt(pos - 1);
// not low surrogate (BMP)
if ((previous_cc & 0xfc00) !== 0xdc00) {
return str.charAt(pos - 1);
}
// returns NaN if out of range
var two_previous_cc = str.charCodeAt(pos - 2);
// NaN & 0xfc00 = 0
// checks if 2 previous char is high surrogate
if ((two_previous_cc & 0xfc00) !== 0xd800) {
return previous_char;
}
return str.slice(pos - 2, pos);
}
};

// Handle a delimiter marker for emphasis or a quote.
Expand Down
16 changes: 16 additions & 0 deletions test/regression.txt
Original file line number Diff line number Diff line change
Expand Up @@ -546,3 +546,19 @@ foo <!-- test --> more -->
<p> 全角スペース (U+3000) 全形空白 </p>
<p>ZWNBSP (U+FEFF) ZWNBSP</p>
````````````````````````````````

#296
```````````````````````````````` example
a**a∇**a

a**∇a**a

a**a𝜵**a

a**𝜵a**a
.
<p>a**a∇**a</p>
<p>a**∇a**a</p>
<p>a**a𝜵**a</p>
<p>a**𝜵a**a</p>
````````````````````````````````

0 comments on commit e489bf5

Please sign in to comment.