diff --git a/builtin_regexp.go b/builtin_regexp.go index 89402820..2d02b667 100644 --- a/builtin_regexp.go +++ b/builtin_regexp.go @@ -183,7 +183,7 @@ func compileRegexpFromValueString(patternStr String, flags string) (*regexpPatte } func compileRegexp(patternStr, flags string) (p *regexpPattern, err error) { - var global, ignoreCase, multiline, sticky, unicode bool + var global, ignoreCase, multiline, dotAll, sticky, unicode bool var wrapper *regexpWrapper var wrapper2 *regexp2Wrapper @@ -205,6 +205,12 @@ func compileRegexp(patternStr, flags string) (p *regexpPattern, err error) { return } multiline = true + case 's': + if dotAll { + invalidFlags() + return + } + dotAll = true case 'i': if ignoreCase { invalidFlags() @@ -235,12 +241,15 @@ func compileRegexp(patternStr, flags string) (p *regexpPattern, err error) { patternStr = convertRegexpToUtf16(patternStr) } - re2Str, err1 := parser.TransformRegExp(patternStr) + re2Str, err1 := parser.TransformRegExp(patternStr, dotAll) if err1 == nil { re2flags := "" if multiline { re2flags += "m" } + if dotAll { + re2flags += "s" + } if ignoreCase { re2flags += "i" } @@ -259,7 +268,7 @@ func compileRegexp(patternStr, flags string) (p *regexpPattern, err error) { err = err1 return } - wrapper2, err = compileRegexp2(patternStr, multiline, ignoreCase) + wrapper2, err = compileRegexp2(patternStr, multiline, dotAll, ignoreCase) if err != nil { err = fmt.Errorf("Invalid regular expression (regexp2): %s (%v)", patternStr, err) return @@ -273,6 +282,7 @@ func compileRegexp(patternStr, flags string) (p *regexpPattern, err error) { global: global, ignoreCase: ignoreCase, multiline: multiline, + dotAll: dotAll, sticky: sticky, unicode: unicode, } @@ -431,6 +441,9 @@ func (r *Runtime) regexpproto_toString(call FunctionCall) Value { if this.pattern.multiline { sb.WriteRune('m') } + if this.pattern.dotAll { + sb.WriteRune('s') + } if this.pattern.unicode { sb.WriteRune('u') } @@ -538,6 +551,20 @@ func (r *Runtime) regexpproto_getMultiline(call FunctionCall) Value { } } +func (r *Runtime) regexpproto_getDotAll(call FunctionCall) Value { + if this, ok := r.toObject(call.This).self.(*regexpObject); ok { + if this.pattern.dotAll { + return valueTrue + } else { + return valueFalse + } + } else if call.This == r.global.RegExpPrototype { + return _undefined + } else { + panic(r.NewTypeError("Method RegExp.prototype.dotAll getter called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This}))) + } +} + func (r *Runtime) regexpproto_getIgnoreCase(call FunctionCall) Value { if this, ok := r.toObject(call.This).self.(*regexpObject); ok { if this.pattern.ignoreCase { @@ -581,7 +608,7 @@ func (r *Runtime) regexpproto_getSticky(call FunctionCall) Value { } func (r *Runtime) regexpproto_getFlags(call FunctionCall) Value { - var global, ignoreCase, multiline, sticky, unicode bool + var global, ignoreCase, multiline, dotAll, sticky, unicode bool thisObj := r.toObject(call.This) size := 0 @@ -603,6 +630,12 @@ func (r *Runtime) regexpproto_getFlags(call FunctionCall) Value { size++ } } + if v := thisObj.self.getStr("dotAll", nil); v != nil { + dotAll = v.ToBoolean() + if dotAll { + size++ + } + } if v := thisObj.self.getStr("sticky", nil); v != nil { sticky = v.ToBoolean() if sticky { @@ -627,6 +660,9 @@ func (r *Runtime) regexpproto_getFlags(call FunctionCall) Value { if multiline { sb.WriteByte('m') } + if dotAll { + sb.WriteByte('s') + } if unicode { sb.WriteByte('u') } @@ -1272,6 +1308,11 @@ func (r *Runtime) getRegExpPrototype() *Object { getterFunc: r.newNativeFunc(r.regexpproto_getMultiline, "get multiline", 0), accessor: true, }, false) + o.setOwnStr("dotAll", &valueProperty{ + configurable: true, + getterFunc: r.newNativeFunc(r.regexpproto_getDotAll, "get dotAll", 0), + accessor: true, + }, false) o.setOwnStr("ignoreCase", &valueProperty{ configurable: true, getterFunc: r.newNativeFunc(r.regexpproto_getIgnoreCase, "get ignoreCase", 0), diff --git a/go.mod b/go.mod index 4361fa0f..74f938b7 100644 --- a/go.mod +++ b/go.mod @@ -3,11 +3,12 @@ module github.com/dop251/goja go 1.20 require ( - github.com/dlclark/regexp2 v1.7.0 + github.com/dlclark/regexp2 v1.11.4 github.com/dop251/goja_nodejs v0.0.0-20211022123610-8dd9abb0616d github.com/go-sourcemap/sourcemap v2.1.3+incompatible github.com/google/pprof v0.0.0-20230207041349-798e818bf904 - github.com/kr/pretty v0.3.0 // indirect golang.org/x/text v0.3.8 gopkg.in/yaml.v2 v2.4.0 ) + +require github.com/kr/pretty v0.3.0 // indirect diff --git a/go.sum b/go.sum index ad04f993..0ff74a76 100644 --- a/go.sum +++ b/go.sum @@ -5,6 +5,10 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3 github.com/dlclark/regexp2 v1.4.1-0.20201116162257-a2a8dda75c91/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc= github.com/dlclark/regexp2 v1.7.0 h1:7lJfhqlPssTb1WQx4yvTHN0uElPEv52sbaECrAQxjAo= github.com/dlclark/regexp2 v1.7.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= +github.com/dlclark/regexp2 v1.11.3 h1:tdwMFLz4VxHteujuVYHzG5Bje3M2ORsvv2jvbCTufTA= +github.com/dlclark/regexp2 v1.11.3/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= +github.com/dlclark/regexp2 v1.11.4 h1:rPYF9/LECdNymJufQKmri9gV604RvvABwgOA8un7yAo= +github.com/dlclark/regexp2 v1.11.4/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= github.com/dop251/goja v0.0.0-20211022113120-dc8c55024d06/go.mod h1:R9ET47fwRVRPZnOGvHxxhuZcbrMCuiqOz3Rlrh4KSnk= github.com/dop251/goja_nodejs v0.0.0-20210225215109-d91c329300e7/go.mod h1:hn7BA7c8pLvoGndExHudxTDKZ84Pyvv+90pbBjbTz0Y= github.com/dop251/goja_nodejs v0.0.0-20211022123610-8dd9abb0616d h1:W1n4DvpzZGOISgp7wWNtraLcHtnmnTwBlJidqtMIuwQ= diff --git a/parser/regexp.go b/parser/regexp.go index 2dbd0e19..72bb3c7f 100644 --- a/parser/regexp.go +++ b/parser/regexp.go @@ -40,6 +40,8 @@ type _RegExp_parser struct { goRegexp strings.Builder passOffset int + + dotAll bool // Enable dotAll mode } // TransformRegExp transforms a JavaScript pattern into a Go "regexp" pattern. @@ -55,7 +57,7 @@ type _RegExp_parser struct { // // If the pattern is invalid (not valid even in JavaScript), then this function // returns an empty string and a generic error. -func TransformRegExp(pattern string) (transformed string, err error) { +func TransformRegExp(pattern string, dotAll bool) (transformed string, err error) { if pattern == "" { return "", nil @@ -64,6 +66,7 @@ func TransformRegExp(pattern string) (transformed string, err error) { parser := _RegExp_parser{ str: pattern, length: len(pattern), + dotAll: dotAll, } err = parser.parse() if err != nil { @@ -147,6 +150,10 @@ func (self *_RegExp_parser) scan() { self.error(true, "Unmatched ')'") return case '.': + if self.dotAll { + self.pass() + break + } self.writeString(Re2Dot) self.read() default: diff --git a/parser/regexp_test.go b/parser/regexp_test.go index 9a1d44de..b126769c 100644 --- a/parser/regexp_test.go +++ b/parser/regexp_test.go @@ -10,7 +10,7 @@ func TestRegExp(t *testing.T) { { // err test := func(input string, expect interface{}) { - _, err := TransformRegExp(input) + _, err := TransformRegExp(input, false) _, incompat := err.(RegexpErrorIncompatible) is(incompat, false) is(err, expect) @@ -33,7 +33,7 @@ func TestRegExp(t *testing.T) { { // incompatible test := func(input string, expectErr interface{}) { - _, err := TransformRegExp(input) + _, err := TransformRegExp(input, false) _, incompat := err.(RegexpErrorIncompatible) is(incompat, true) is(err, expectErr) @@ -54,7 +54,7 @@ func TestRegExp(t *testing.T) { { // err test := func(input string, expect string) { - result, err := TransformRegExp(input) + result, err := TransformRegExp(input, false) is(err, nil) _, incompat := err.(RegexpErrorIncompatible) is(incompat, false) @@ -151,18 +151,18 @@ func TestRegExp(t *testing.T) { func TestTransformRegExp(t *testing.T) { tt(t, func() { - pattern, err := TransformRegExp(`\s+abc\s+`) + pattern, err := TransformRegExp(`\s+abc\s+`, false) is(err, nil) is(pattern, `[`+WhitespaceChars+`]+abc[`+WhitespaceChars+`]+`) is(regexp.MustCompile(pattern).MatchString("\t abc def"), true) }) tt(t, func() { - pattern, err := TransformRegExp(`\u{1d306}`) + pattern, err := TransformRegExp(`\u{1d306}`, false) is(err, nil) is(pattern, `\x{1d306}`) }) tt(t, func() { - pattern, err := TransformRegExp(`\u1234`) + pattern, err := TransformRegExp(`\u1234`, false) is(err, nil) is(pattern, `\x{1234}`) }) @@ -173,7 +173,7 @@ func BenchmarkTransformRegExp(b *testing.B) { b.ResetTimer() b.ReportAllocs() for i := 0; i < b.N; i++ { - _, _ = TransformRegExp(reStr) + _, _ = TransformRegExp(reStr, false) } } diff --git a/regexp.go b/regexp.go index 1c3e7e06..430bf05f 100644 --- a/regexp.go +++ b/regexp.go @@ -61,17 +61,20 @@ func (rd *arrayRuneReader) ReadRune() (r rune, size int, err error) { type regexpPattern struct { src string - global, ignoreCase, multiline, sticky, unicode bool + global, ignoreCase, multiline, dotAll, sticky, unicode bool regexpWrapper *regexpWrapper regexp2Wrapper *regexp2Wrapper } -func compileRegexp2(src string, multiline, ignoreCase bool) (*regexp2Wrapper, error) { +func compileRegexp2(src string, multiline, dotAll, ignoreCase bool) (*regexp2Wrapper, error) { var opts regexp2.RegexOptions = regexp2.ECMAScript if multiline { opts |= regexp2.Multiline } + if dotAll { + opts |= regexp2.Singleline + } if ignoreCase { opts |= regexp2.IgnoreCase } @@ -87,7 +90,7 @@ func (p *regexpPattern) createRegexp2() { if p.regexp2Wrapper != nil { return } - rx, err := compileRegexp2(p.src, p.multiline, p.ignoreCase) + rx, err := compileRegexp2(p.src, p.multiline, p.dotAll, p.ignoreCase) if err != nil { // At this point the regexp should have been successfully converted to re2, if it fails now, it's a bug. panic(err) @@ -175,6 +178,7 @@ func (p *regexpPattern) clone() *regexpPattern { global: p.global, ignoreCase: p.ignoreCase, multiline: p.multiline, + dotAll: p.dotAll, sticky: p.sticky, unicode: p.unicode, } diff --git a/regexp_test.go b/regexp_test.go index 3ea22430..97e2d2a5 100644 --- a/regexp_test.go +++ b/regexp_test.go @@ -712,6 +712,15 @@ func TestRegexpConcurrentLiterals(t *testing.T) { _, _ = vm.RunProgram(prg) } +func TestRegexpDotAll(t *testing.T) { + const SCRIPT = ` + var re = /./s; + re.test("\r") && re.test("\n") + ` + testScript(SCRIPT, valueTrue, t) + +} + func BenchmarkRegexpSplitWithBackRef(b *testing.B) { const SCRIPT = ` "aaaaaaaaaaaaaaaaaaaaaaaaa++bbbbbbbbbbbbbbbbbbbbbb+-ccccccccccccccccccccccc".split(/([+-])\1/) diff --git a/tc39_test.go b/tc39_test.go index bea79580..964e2b57 100644 --- a/tc39_test.go +++ b/tc39_test.go @@ -207,7 +207,6 @@ var ( "BigInt", "resizable-arraybuffer", "regexp-named-groups", - "regexp-dotall", "regexp-unicode-property-escapes", "regexp-match-indices", "legacy-regexp",