Skip to content

Commit

Permalink
feat: regular expressions support DotAll flag (#593)
Browse files Browse the repository at this point in the history
  • Loading branch information
monkeyWie authored Aug 6, 2024
1 parent aafb077 commit 3491d4a
Show file tree
Hide file tree
Showing 8 changed files with 83 additions and 18 deletions.
49 changes: 45 additions & 4 deletions builtin_regexp.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ func compileRegexpFromValueString(patternStr String, flags string) (*regexpPatte
}

func compileRegexp(patternStr, flags string) (p *regexpPattern, err error) {
var global, ignoreCase, multiline, sticky, unicode bool
var global, ignoreCase, multiline, dotAll, sticky, unicode bool
var wrapper *regexpWrapper
var wrapper2 *regexp2Wrapper

Expand All @@ -205,6 +205,12 @@ func compileRegexp(patternStr, flags string) (p *regexpPattern, err error) {
return
}
multiline = true
case 's':
if dotAll {
invalidFlags()
return
}
dotAll = true
case 'i':
if ignoreCase {
invalidFlags()
Expand Down Expand Up @@ -235,12 +241,15 @@ func compileRegexp(patternStr, flags string) (p *regexpPattern, err error) {
patternStr = convertRegexpToUtf16(patternStr)
}

re2Str, err1 := parser.TransformRegExp(patternStr)
re2Str, err1 := parser.TransformRegExp(patternStr, dotAll)
if err1 == nil {
re2flags := ""
if multiline {
re2flags += "m"
}
if dotAll {
re2flags += "s"
}
if ignoreCase {
re2flags += "i"
}
Expand All @@ -259,7 +268,7 @@ func compileRegexp(patternStr, flags string) (p *regexpPattern, err error) {
err = err1
return
}
wrapper2, err = compileRegexp2(patternStr, multiline, ignoreCase)
wrapper2, err = compileRegexp2(patternStr, multiline, dotAll, ignoreCase)
if err != nil {
err = fmt.Errorf("Invalid regular expression (regexp2): %s (%v)", patternStr, err)
return
Expand All @@ -273,6 +282,7 @@ func compileRegexp(patternStr, flags string) (p *regexpPattern, err error) {
global: global,
ignoreCase: ignoreCase,
multiline: multiline,
dotAll: dotAll,
sticky: sticky,
unicode: unicode,
}
Expand Down Expand Up @@ -431,6 +441,9 @@ func (r *Runtime) regexpproto_toString(call FunctionCall) Value {
if this.pattern.multiline {
sb.WriteRune('m')
}
if this.pattern.dotAll {
sb.WriteRune('s')
}
if this.pattern.unicode {
sb.WriteRune('u')
}
Expand Down Expand Up @@ -538,6 +551,20 @@ func (r *Runtime) regexpproto_getMultiline(call FunctionCall) Value {
}
}

func (r *Runtime) regexpproto_getDotAll(call FunctionCall) Value {
if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
if this.pattern.dotAll {
return valueTrue
} else {
return valueFalse
}
} else if call.This == r.global.RegExpPrototype {
return _undefined
} else {
panic(r.NewTypeError("Method RegExp.prototype.dotAll getter called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This})))
}
}

func (r *Runtime) regexpproto_getIgnoreCase(call FunctionCall) Value {
if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
if this.pattern.ignoreCase {
Expand Down Expand Up @@ -581,7 +608,7 @@ func (r *Runtime) regexpproto_getSticky(call FunctionCall) Value {
}

func (r *Runtime) regexpproto_getFlags(call FunctionCall) Value {
var global, ignoreCase, multiline, sticky, unicode bool
var global, ignoreCase, multiline, dotAll, sticky, unicode bool

thisObj := r.toObject(call.This)
size := 0
Expand All @@ -603,6 +630,12 @@ func (r *Runtime) regexpproto_getFlags(call FunctionCall) Value {
size++
}
}
if v := thisObj.self.getStr("dotAll", nil); v != nil {
dotAll = v.ToBoolean()
if dotAll {
size++
}
}
if v := thisObj.self.getStr("sticky", nil); v != nil {
sticky = v.ToBoolean()
if sticky {
Expand All @@ -627,6 +660,9 @@ func (r *Runtime) regexpproto_getFlags(call FunctionCall) Value {
if multiline {
sb.WriteByte('m')
}
if dotAll {
sb.WriteByte('s')
}
if unicode {
sb.WriteByte('u')
}
Expand Down Expand Up @@ -1272,6 +1308,11 @@ func (r *Runtime) getRegExpPrototype() *Object {
getterFunc: r.newNativeFunc(r.regexpproto_getMultiline, "get multiline", 0),
accessor: true,
}, false)
o.setOwnStr("dotAll", &valueProperty{
configurable: true,
getterFunc: r.newNativeFunc(r.regexpproto_getDotAll, "get dotAll", 0),
accessor: true,
}, false)
o.setOwnStr("ignoreCase", &valueProperty{
configurable: true,
getterFunc: r.newNativeFunc(r.regexpproto_getIgnoreCase, "get ignoreCase", 0),
Expand Down
5 changes: 3 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@ module github.com/dop251/goja
go 1.20

require (
github.com/dlclark/regexp2 v1.7.0
github.com/dlclark/regexp2 v1.11.4
github.com/dop251/goja_nodejs v0.0.0-20211022123610-8dd9abb0616d
github.com/go-sourcemap/sourcemap v2.1.3+incompatible
github.com/google/pprof v0.0.0-20230207041349-798e818bf904
github.com/kr/pretty v0.3.0 // indirect
golang.org/x/text v0.3.8
gopkg.in/yaml.v2 v2.4.0
)

require github.com/kr/pretty v0.3.0 // indirect
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
github.com/dlclark/regexp2 v1.4.1-0.20201116162257-a2a8dda75c91/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc=
github.com/dlclark/regexp2 v1.7.0 h1:7lJfhqlPssTb1WQx4yvTHN0uElPEv52sbaECrAQxjAo=
github.com/dlclark/regexp2 v1.7.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
github.com/dlclark/regexp2 v1.11.3 h1:tdwMFLz4VxHteujuVYHzG5Bje3M2ORsvv2jvbCTufTA=
github.com/dlclark/regexp2 v1.11.3/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
github.com/dlclark/regexp2 v1.11.4 h1:rPYF9/LECdNymJufQKmri9gV604RvvABwgOA8un7yAo=
github.com/dlclark/regexp2 v1.11.4/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
github.com/dop251/goja v0.0.0-20211022113120-dc8c55024d06/go.mod h1:R9ET47fwRVRPZnOGvHxxhuZcbrMCuiqOz3Rlrh4KSnk=
github.com/dop251/goja_nodejs v0.0.0-20210225215109-d91c329300e7/go.mod h1:hn7BA7c8pLvoGndExHudxTDKZ84Pyvv+90pbBjbTz0Y=
github.com/dop251/goja_nodejs v0.0.0-20211022123610-8dd9abb0616d h1:W1n4DvpzZGOISgp7wWNtraLcHtnmnTwBlJidqtMIuwQ=
Expand Down
9 changes: 8 additions & 1 deletion parser/regexp.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ type _RegExp_parser struct {

goRegexp strings.Builder
passOffset int

dotAll bool // Enable dotAll mode
}

// TransformRegExp transforms a JavaScript pattern into a Go "regexp" pattern.
Expand All @@ -55,7 +57,7 @@ type _RegExp_parser struct {
//
// If the pattern is invalid (not valid even in JavaScript), then this function
// returns an empty string and a generic error.
func TransformRegExp(pattern string) (transformed string, err error) {
func TransformRegExp(pattern string, dotAll bool) (transformed string, err error) {

if pattern == "" {
return "", nil
Expand All @@ -64,6 +66,7 @@ func TransformRegExp(pattern string) (transformed string, err error) {
parser := _RegExp_parser{
str: pattern,
length: len(pattern),
dotAll: dotAll,
}
err = parser.parse()
if err != nil {
Expand Down Expand Up @@ -147,6 +150,10 @@ func (self *_RegExp_parser) scan() {
self.error(true, "Unmatched ')'")
return
case '.':
if self.dotAll {
self.pass()
break
}
self.writeString(Re2Dot)
self.read()
default:
Expand Down
14 changes: 7 additions & 7 deletions parser/regexp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ func TestRegExp(t *testing.T) {
{
// err
test := func(input string, expect interface{}) {
_, err := TransformRegExp(input)
_, err := TransformRegExp(input, false)
_, incompat := err.(RegexpErrorIncompatible)
is(incompat, false)
is(err, expect)
Expand All @@ -33,7 +33,7 @@ func TestRegExp(t *testing.T) {
{
// incompatible
test := func(input string, expectErr interface{}) {
_, err := TransformRegExp(input)
_, err := TransformRegExp(input, false)
_, incompat := err.(RegexpErrorIncompatible)
is(incompat, true)
is(err, expectErr)
Expand All @@ -54,7 +54,7 @@ func TestRegExp(t *testing.T) {
{
// err
test := func(input string, expect string) {
result, err := TransformRegExp(input)
result, err := TransformRegExp(input, false)
is(err, nil)
_, incompat := err.(RegexpErrorIncompatible)
is(incompat, false)
Expand Down Expand Up @@ -151,18 +151,18 @@ func TestRegExp(t *testing.T) {

func TestTransformRegExp(t *testing.T) {
tt(t, func() {
pattern, err := TransformRegExp(`\s+abc\s+`)
pattern, err := TransformRegExp(`\s+abc\s+`, false)
is(err, nil)
is(pattern, `[`+WhitespaceChars+`]+abc[`+WhitespaceChars+`]+`)
is(regexp.MustCompile(pattern).MatchString("\t abc def"), true)
})
tt(t, func() {
pattern, err := TransformRegExp(`\u{1d306}`)
pattern, err := TransformRegExp(`\u{1d306}`, false)
is(err, nil)
is(pattern, `\x{1d306}`)
})
tt(t, func() {
pattern, err := TransformRegExp(`\u1234`)
pattern, err := TransformRegExp(`\u1234`, false)
is(err, nil)
is(pattern, `\x{1234}`)
})
Expand All @@ -173,7 +173,7 @@ func BenchmarkTransformRegExp(b *testing.B) {
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_, _ = TransformRegExp(reStr)
_, _ = TransformRegExp(reStr, false)
}
}

Expand Down
10 changes: 7 additions & 3 deletions regexp.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,17 +61,20 @@ func (rd *arrayRuneReader) ReadRune() (r rune, size int, err error) {
type regexpPattern struct {
src string

global, ignoreCase, multiline, sticky, unicode bool
global, ignoreCase, multiline, dotAll, sticky, unicode bool

regexpWrapper *regexpWrapper
regexp2Wrapper *regexp2Wrapper
}

func compileRegexp2(src string, multiline, ignoreCase bool) (*regexp2Wrapper, error) {
func compileRegexp2(src string, multiline, dotAll, ignoreCase bool) (*regexp2Wrapper, error) {
var opts regexp2.RegexOptions = regexp2.ECMAScript
if multiline {
opts |= regexp2.Multiline
}
if dotAll {
opts |= regexp2.Singleline
}
if ignoreCase {
opts |= regexp2.IgnoreCase
}
Expand All @@ -87,7 +90,7 @@ func (p *regexpPattern) createRegexp2() {
if p.regexp2Wrapper != nil {
return
}
rx, err := compileRegexp2(p.src, p.multiline, p.ignoreCase)
rx, err := compileRegexp2(p.src, p.multiline, p.dotAll, p.ignoreCase)
if err != nil {
// At this point the regexp should have been successfully converted to re2, if it fails now, it's a bug.
panic(err)
Expand Down Expand Up @@ -175,6 +178,7 @@ func (p *regexpPattern) clone() *regexpPattern {
global: p.global,
ignoreCase: p.ignoreCase,
multiline: p.multiline,
dotAll: p.dotAll,
sticky: p.sticky,
unicode: p.unicode,
}
Expand Down
9 changes: 9 additions & 0 deletions regexp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -712,6 +712,15 @@ func TestRegexpConcurrentLiterals(t *testing.T) {
_, _ = vm.RunProgram(prg)
}

func TestRegexpDotAll(t *testing.T) {
const SCRIPT = `
var re = /./s;
re.test("\r") && re.test("\n")
`
testScript(SCRIPT, valueTrue, t)

}

func BenchmarkRegexpSplitWithBackRef(b *testing.B) {
const SCRIPT = `
"aaaaaaaaaaaaaaaaaaaaaaaaa++bbbbbbbbbbbbbbbbbbbbbb+-ccccccccccccccccccccccc".split(/([+-])\1/)
Expand Down
1 change: 0 additions & 1 deletion tc39_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,6 @@ var (
"BigInt",
"resizable-arraybuffer",
"regexp-named-groups",
"regexp-dotall",
"regexp-unicode-property-escapes",
"regexp-match-indices",
"legacy-regexp",
Expand Down

0 comments on commit 3491d4a

Please sign in to comment.