Skip to content

Commit

Permalink
Avoid btree Index() call on pattern with separators.
Browse files Browse the repository at this point in the history
To avoid hard Index()'ing of given text with btree matcher we implement
an prefix_any and suffix_any matchers that can work well in many cases.

BTree matcher Index() will be implemented in upcoming commits to prevent
same bugs.

Fixes gobwas#23
  • Loading branch information
gobwas committed Feb 8, 2018
1 parent 51eb1ee commit 034ebb2
Show file tree
Hide file tree
Showing 8 changed files with 258 additions and 26 deletions.
30 changes: 18 additions & 12 deletions compiler/compiler.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,37 +43,43 @@ func optimizeMatcher(matcher match.Matcher) match.Matcher {
return m
}

leftNil := m.Left == nil
rightNil := m.Right == nil

var (
leftNil = m.Left == nil
rightNil = m.Right == nil
)
if leftNil && rightNil {
return match.NewText(r.Str)
}

_, leftSuper := m.Left.(match.Super)
lp, leftPrefix := m.Left.(match.Prefix)
la, leftAny := m.Left.(match.Any)

_, rightSuper := m.Right.(match.Super)
rs, rightSuffix := m.Right.(match.Suffix)
ra, rightAny := m.Right.(match.Any)

if leftSuper && rightSuper {
switch {
case leftSuper && rightSuper:
return match.NewContains(r.Str, false)
}

if leftSuper && rightNil {
case leftSuper && rightNil:
return match.NewSuffix(r.Str)
}

if rightSuper && leftNil {
case rightSuper && leftNil:
return match.NewPrefix(r.Str)
}

if leftNil && rightSuffix {
case leftNil && rightSuffix:
return match.NewPrefixSuffix(r.Str, rs.Suffix)
}

if rightNil && leftPrefix {
case rightNil && leftPrefix:
return match.NewPrefixSuffix(lp.Prefix, r.Str)

case rightNil && leftAny:
return match.NewSuffixAny(r.Str, la.Separators)

case leftNil && rightAny:
return match.NewPrefixAny(r.Str, ra.Separators)
}

return m
Expand Down
30 changes: 20 additions & 10 deletions glob_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,16 @@ func TestGlob(t *testing.T) {
glob(true, "/{rate,[0-9][0-9][0-9]}*", "/rate"),
glob(true, "/{rate,[a-z][a-z][a-z]}*", "/usd"),

glob(true, "{*.google.*,*.yandex.*}", "www.google.com", '.'),
glob(true, "{*.google.*,*.yandex.*}", "www.yandex.com", '.'),
glob(false, "{*.google.*,*.yandex.*}", "yandex.com", '.'),
glob(false, "{*.google.*,*.yandex.*}", "google.com", '.'),

glob(true, "{*.google.*,yandex.*}", "www.google.com", '.'),
glob(true, "{*.google.*,yandex.*}", "yandex.com", '.'),
glob(false, "{*.google.*,yandex.*}", "www.yandex.com", '.'),
glob(false, "{*.google.*,yandex.*}", "google.com", '.'),

glob(true, pattern_all, fixture_all_match),
glob(false, pattern_all, fixture_all_mismatch),

Expand Down Expand Up @@ -149,16 +159,16 @@ func TestGlob(t *testing.T) {
glob(true, pattern_prefix_suffix, fixture_prefix_suffix_match),
glob(false, pattern_prefix_suffix, fixture_prefix_suffix_mismatch),
} {
g, err := Compile(test.pattern, test.delimiters...)
if err != nil {
t.Errorf("parsing pattern %q error: %s", test.pattern, err)
continue
}

result := g.Match(test.match)
if result != test.should {
t.Errorf("pattern %q matching %q should be %v but got %v\n%s", test.pattern, test.match, test.should, result, g)
}
t.Run("", func(t *testing.T) {
g := MustCompile(test.pattern, test.delimiters...)
result := g.Match(test.match)
if result != test.should {
t.Errorf(
"pattern %q matching %q should be %v but got %v\n%s",
test.pattern, test.match, test.should, result, g,
)
}
})
}
}

Expand Down
4 changes: 1 addition & 3 deletions match/any_of.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
package match

import (
"fmt"
)
import "fmt"

type AnyOf struct {
Matchers Matchers
Expand Down
55 changes: 55 additions & 0 deletions match/prefix_any.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package match

import (
"fmt"
"strings"
"unicode/utf8"

sutil "github.com/gobwas/glob/util/strings"
)

type PrefixAny struct {
Prefix string
Separators []rune
}

func NewPrefixAny(s string, sep []rune) PrefixAny {
return PrefixAny{s, sep}
}

func (self PrefixAny) Index(s string) (int, []int) {
idx := strings.Index(s, self.Prefix)
if idx == -1 {
return -1, nil
}

n := len(self.Prefix)
sub := s[idx+n:]
i := sutil.IndexAnyRunes(sub, self.Separators)
if i > -1 {
sub = sub[:i]
}

seg := acquireSegments(len(sub) + 1)
seg = append(seg, n)
for i, r := range sub {
seg = append(seg, n+i+utf8.RuneLen(r))
}

return idx, seg
}

func (self PrefixAny) Len() int {
return lenNo
}

func (self PrefixAny) Match(s string) bool {
if !strings.HasPrefix(s, self.Prefix) {
return false
}
return sutil.IndexAnyRunes(s[len(self.Prefix):], self.Separators) == -1
}

func (self PrefixAny) String() string {
return fmt.Sprintf("<prefix_any:%s![%s]>", self.Prefix, string(self.Separators))
}
47 changes: 47 additions & 0 deletions match/prefix_any_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package match

import (
"reflect"
"testing"
)

func TestPrefixAnyIndex(t *testing.T) {
for id, test := range []struct {
prefix string
separators []rune
fixture string
index int
segments []int
}{
{
"ab",
[]rune{'.'},
"ab",
0,
[]int{2},
},
{
"ab",
[]rune{'.'},
"abc",
0,
[]int{2, 3},
},
{
"ab",
[]rune{'.'},
"qw.abcd.efg",
3,
[]int{2, 3, 4},
},
} {
p := NewPrefixAny(test.prefix, test.separators)
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
}
if !reflect.DeepEqual(segments, test.segments) {
t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments)
}
}
}
43 changes: 43 additions & 0 deletions match/suffix_any.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package match

import (
"fmt"
"strings"

sutil "github.com/gobwas/glob/util/strings"
)

type SuffixAny struct {
Suffix string
Separators []rune
}

func NewSuffixAny(s string, sep []rune) SuffixAny {
return SuffixAny{s, sep}
}

func (self SuffixAny) Index(s string) (int, []int) {
idx := strings.Index(s, self.Suffix)
if idx == -1 {
return -1, nil
}

i := sutil.LastIndexAnyRunes(s[:idx], self.Separators) + 1

return i, []int{idx + len(self.Suffix) - i}
}

func (self SuffixAny) Len() int {
return lenNo
}

func (self SuffixAny) Match(s string) bool {
if !strings.HasSuffix(s, self.Suffix) {
return false
}
return sutil.IndexAnyRunes(s[:len(s)-len(self.Suffix)], self.Separators) == -1
}

func (self SuffixAny) String() string {
return fmt.Sprintf("<suffix_any:![%s]%s>", string(self.Separators), self.Suffix)
}
47 changes: 47 additions & 0 deletions match/suffix_any_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package match

import (
"reflect"
"testing"
)

func TestSuffixAnyIndex(t *testing.T) {
for id, test := range []struct {
suffix string
separators []rune
fixture string
index int
segments []int
}{
{
"ab",
[]rune{'.'},
"ab",
0,
[]int{2},
},
{
"ab",
[]rune{'.'},
"cab",
0,
[]int{3},
},
{
"ab",
[]rune{'.'},
"qw.cdab.efg",
3,
[]int{4},
},
} {
p := NewSuffixAny(test.suffix, test.separators)
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
}
if !reflect.DeepEqual(segments, test.segments) {
t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments)
}
}
}
28 changes: 27 additions & 1 deletion util/strings/strings.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package strings

import "strings"
import (
"strings"
"unicode/utf8"
)

func IndexAnyRunes(s string, rs []rune) int {
for _, r := range rs {
Expand All @@ -11,3 +14,26 @@ func IndexAnyRunes(s string, rs []rune) int {

return -1
}

func LastIndexAnyRunes(s string, rs []rune) int {
for _, r := range rs {
i := -1
if 0 <= r && r < utf8.RuneSelf {
i = strings.LastIndexByte(s, byte(r))
} else {
sub := s
for len(sub) > 0 {
j := strings.IndexRune(s, r)
if j == -1 {
break
}
i = j
sub = sub[i+1:]
}
}
if i != -1 {
return i
}
}
return -1
}

0 comments on commit 034ebb2

Please sign in to comment.