Skip to content

Commit

Permalink
Numeric separator literal (#603)
Browse files Browse the repository at this point in the history
* Added support for separators in numeric literals.

* Improved support for Unicode code point escapes (\u{...}) in regexes.

* Improved number parsing compatibility.

---------

Co-authored-by: Joan López de la Franca Beltran <[email protected]>
  • Loading branch information
dop251 and joanlopez authored Aug 28, 2024
1 parent fa6d1ed commit 016eb72
Show file tree
Hide file tree
Showing 10 changed files with 137 additions and 62 deletions.
4 changes: 2 additions & 2 deletions builtin_regexp.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ func compileRegexp(patternStr, flags string) (p *regexpPattern, err error) {
patternStr = convertRegexpToUtf16(patternStr)
}

re2Str, err1 := parser.TransformRegExp(patternStr, dotAll)
re2Str, err1 := parser.TransformRegExp(patternStr, dotAll, unicode)
if err1 == nil {
re2flags := ""
if multiline {
Expand All @@ -268,7 +268,7 @@ func compileRegexp(patternStr, flags string) (p *regexpPattern, err error) {
err = err1
return
}
wrapper2, err = compileRegexp2(patternStr, multiline, dotAll, ignoreCase)
wrapper2, err = compileRegexp2(patternStr, multiline, dotAll, ignoreCase, unicode)
if err != nil {
err = fmt.Errorf("Invalid regular expression (regexp2): %s (%v)", patternStr, err)
return
Expand Down
20 changes: 12 additions & 8 deletions parser/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -633,9 +633,13 @@ func (self *_parser) skipWhiteSpace() {
}
}

func (self *_parser) scanMantissa(base int) {
for digitValue(self.chr) < base {
func (self *_parser) scanMantissa(base int, allowSeparator bool) {
for digitValue(self.chr) < base || (allowSeparator && self.chr == '_') {
afterUnderscore := self.chr == '_'
self.read()
if afterUnderscore && !isDigit(self.chr, base) {
self.error(self.chrOffset, "Only one underscore is allowed as numeric separator")
}
}
}

Expand Down Expand Up @@ -1140,7 +1144,7 @@ func (self *_parser) scanNumericLiteral(decimalPoint bool) (token.Token, string)

if decimalPoint {
offset--
self.scanMantissa(10)
self.scanMantissa(10, true)
} else {
if self.chr == '0' {
self.read()
Expand All @@ -1156,23 +1160,23 @@ func (self *_parser) scanNumericLiteral(decimalPoint bool) (token.Token, string)
// no-op
default:
// legacy octal
self.scanMantissa(8)
self.scanMantissa(8, false)
goto end
}
if base > 0 {
self.read()
if !isDigit(self.chr, base) {
return token.ILLEGAL, self.str[offset:self.chrOffset]
}
self.scanMantissa(base)
self.scanMantissa(base, true)
goto end
}
} else {
self.scanMantissa(10)
self.scanMantissa(10, true)
}
if self.chr == '.' {
self.read()
self.scanMantissa(10)
self.scanMantissa(10, true)
}
}

Expand All @@ -1183,7 +1187,7 @@ func (self *_parser) scanNumericLiteral(decimalPoint bool) (token.Token, string)
}
if isDecimalDigit(self.chr) {
self.read()
self.scanMantissa(10)
self.scanMantissa(10, true)
} else {
return token.ILLEGAL, self.str[offset:self.chrOffset]
}
Expand Down
5 changes: 5 additions & 0 deletions parser/lexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,11 @@ Second line \
token.NUMBER, "12.3", 5,
)

test("1_000 1_000_000",
token.NUMBER, "1_000", 1,
token.NUMBER, "1_000_000", 7,
)

test(`1n`,
token.NUMBER, "1n", 1,
)
Expand Down
17 changes: 10 additions & 7 deletions parser/regexp.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ type _RegExp_parser struct {
goRegexp strings.Builder
passOffset int

dotAll bool // Enable dotAll mode
dotAll bool // Enable dotAll mode
unicode bool
}

// TransformRegExp transforms a JavaScript pattern into a Go "regexp" pattern.
Expand All @@ -57,16 +58,17 @@ type _RegExp_parser struct {
//
// If the pattern is invalid (not valid even in JavaScript), then this function
// returns an empty string and a generic error.
func TransformRegExp(pattern string, dotAll bool) (transformed string, err error) {
func TransformRegExp(pattern string, dotAll, unicode bool) (transformed string, err error) {

if pattern == "" {
return "", nil
}

parser := _RegExp_parser{
str: pattern,
length: len(pattern),
dotAll: dotAll,
str: pattern,
length: len(pattern),
dotAll: dotAll,
unicode: unicode,
}
err = parser.parse()
if err != nil {
Expand Down Expand Up @@ -292,7 +294,7 @@ func (self *_RegExp_parser) scanEscape(inClass bool) {

case 'u':
self.read()
if self.chr == '{' {
if self.chr == '{' && self.unicode {
self.read()
length, base = 0, 16
} else {
Expand Down Expand Up @@ -392,7 +394,8 @@ func (self *_RegExp_parser) scanEscape(inClass bool) {
digit := uint32(digitValue(self.chr))
if digit >= base {
// Not a valid digit
goto skip
self.error(true, "Invalid Unicode escape")
return
}
self.read()
}
Expand Down
14 changes: 7 additions & 7 deletions parser/regexp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ func TestRegExp(t *testing.T) {
{
// err
test := func(input string, expect interface{}) {
_, err := TransformRegExp(input, false)
_, err := TransformRegExp(input, false, false)
_, incompat := err.(RegexpErrorIncompatible)
is(incompat, false)
is(err, expect)
Expand All @@ -33,7 +33,7 @@ func TestRegExp(t *testing.T) {
{
// incompatible
test := func(input string, expectErr interface{}) {
_, err := TransformRegExp(input, false)
_, err := TransformRegExp(input, false, false)
_, incompat := err.(RegexpErrorIncompatible)
is(incompat, true)
is(err, expectErr)
Expand All @@ -54,7 +54,7 @@ func TestRegExp(t *testing.T) {
{
// err
test := func(input string, expect string) {
result, err := TransformRegExp(input, false)
result, err := TransformRegExp(input, false, false)
is(err, nil)
_, incompat := err.(RegexpErrorIncompatible)
is(incompat, false)
Expand Down Expand Up @@ -151,18 +151,18 @@ func TestRegExp(t *testing.T) {

func TestTransformRegExp(t *testing.T) {
tt(t, func() {
pattern, err := TransformRegExp(`\s+abc\s+`, false)
pattern, err := TransformRegExp(`\s+abc\s+`, false, false)
is(err, nil)
is(pattern, `[`+WhitespaceChars+`]+abc[`+WhitespaceChars+`]+`)
is(regexp.MustCompile(pattern).MatchString("\t abc def"), true)
})
tt(t, func() {
pattern, err := TransformRegExp(`\u{1d306}`, false)
pattern, err := TransformRegExp(`\u{1d306}`, false, true)
is(err, nil)
is(pattern, `\x{1d306}`)
})
tt(t, func() {
pattern, err := TransformRegExp(`\u1234`, false)
pattern, err := TransformRegExp(`\u1234`, false, false)
is(err, nil)
is(pattern, `\x{1234}`)
})
Expand All @@ -173,7 +173,7 @@ func BenchmarkTransformRegExp(b *testing.B) {
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_, _ = TransformRegExp(reStr, false)
_, _ = TransformRegExp(reStr, false, false)
}
}

Expand Down
7 changes: 5 additions & 2 deletions regexp.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ type regexpPattern struct {
regexp2Wrapper *regexp2Wrapper
}

func compileRegexp2(src string, multiline, dotAll, ignoreCase bool) (*regexp2Wrapper, error) {
func compileRegexp2(src string, multiline, dotAll, ignoreCase, unicode bool) (*regexp2Wrapper, error) {
var opts regexp2.RegexOptions = regexp2.ECMAScript
if multiline {
opts |= regexp2.Multiline
Expand All @@ -78,6 +78,9 @@ func compileRegexp2(src string, multiline, dotAll, ignoreCase bool) (*regexp2Wra
if ignoreCase {
opts |= regexp2.IgnoreCase
}
if unicode {
opts |= regexp2.Unicode
}
regexp2Pattern, err1 := regexp2.Compile(src, opts)
if err1 != nil {
return nil, fmt.Errorf("Invalid regular expression (regexp2): %s (%v)", src, err1)
Expand All @@ -90,7 +93,7 @@ func (p *regexpPattern) createRegexp2() {
if p.regexp2Wrapper != nil {
return
}
rx, err := compileRegexp2(p.src, p.multiline, p.dotAll, p.ignoreCase)
rx, err := compileRegexp2(p.src, p.multiline, p.dotAll, p.ignoreCase, p.unicode)
if err != nil {
// At this point the regexp should have been successfully converted to re2, if it fails now, it's a bug.
panic(err)
Expand Down
26 changes: 26 additions & 0 deletions regexp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -721,6 +721,32 @@ func TestRegexpDotAll(t *testing.T) {

}

func TestRegexpNumSeparators(t *testing.T) {
const SCRIPT = `
const re = /(?<=a)\u{65}_/u;
assert(re.test("ae_") && !re.test("e_"));
assert.throws(SyntaxError, () => {
new RegExp("(?<=a)\\u{6_5}", "u");
});
assert.throws(SyntaxError, () => {
new RegExp("a\\u{6_5}", "u");
});
`
testScriptWithTestLib(SCRIPT, _undefined, t)
}

func TestRegexpUnicodeEscape(t *testing.T) {
const SCRIPT = `
assert.sameValue("u{0_2}".match(/\u{0_2}/)[0], "u{0_2}");
assert.sameValue("uu\x02".match(/\u{2}/u)[0], '\x02');
assert.sameValue("uu\x02".match(/\u{2}/)[0], "uu");
`
testScriptWithTestLib(SCRIPT, _undefined, t)
}

func BenchmarkRegexpSplitWithBackRef(b *testing.B) {
const SCRIPT = `
"aaaaaaaaaaaaaaaaaaaaaaaaa++bbbbbbbbbbbbbbbbbbbbbb+-ccccccccccccccccccccccc".split(/([+-])\1/)
Expand Down
25 changes: 25 additions & 0 deletions runtime_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2979,6 +2979,31 @@ func TestDestructAssignToSymbol(t *testing.T) {
testScriptWithTestLib(SCRIPT, _undefined, t)
}

func TestToNumber(t *testing.T) {
const SCRIPT = `
assert(isNaN(Number("+")));
assert(isNaN(Number("++")));
assert(isNaN(Number("-")));
assert(isNaN(Number("0xfp1")));
assert(isNaN(Number("0Xfp1")));
assert(isNaN(Number("+0xfp1")));
assert(isNaN(Number(" +0xfp1")));
assert(isNaN(Number(" + 0xfp1")));
assert(isNaN(Number(" 0xfp1")));
assert(isNaN(Number("-0xfp1")));
assert(isNaN(Number("- 0xfp1")));
assert(isNaN(Number(" - 0xfp1")));
assert.sameValue(Number("0."), 0);
assert.sameValue(Number(" "), 0);
assert.sameValue(Number(" Infinity"), Infinity);
let a = [1];
assert.sameValue(1, a.at("0xfp1"));
assert.sameValue(1, a.at(" 0xfp1"));
`
testScriptWithTestLib(SCRIPT, _undefined, t)
}

/*
func TestArrayConcatSparse(t *testing.T) {
function foo(a,b,c)
Expand Down
Loading

0 comments on commit 016eb72

Please sign in to comment.