-
Notifications
You must be signed in to change notification settings - Fork 539
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* basic integration Signed-off-by: Joe Elliott <[email protected]> * patch tests for new meaning Signed-off-by: Joe Elliott <[email protected]> * patch up more tests Signed-off-by: Joe Elliott <[email protected]> * add basic tests Signed-off-by: Joe Elliott <[email protected]> * changelog + docs Signed-off-by: Joe Elliott <[email protected]> * remove benches Signed-off-by: Joe Elliott <[email protected]> * Cleaned up + tests Signed-off-by: Joe Elliott <[email protected]> * comment Signed-off-by: Joe Elliott <[email protected]> * lint Signed-off-by: Joe Elliott <[email protected]> * Update docs/sources/tempo/traceql/_index.md Co-authored-by: Kim Nylander <[email protected]> * comment Signed-off-by: Joe Elliott <[email protected]> --------- Signed-off-by: Joe Elliott <[email protected]> Co-authored-by: Kim Nylander <[email protected]>
- Loading branch information
1 parent
7073d04
commit b61c3ce
Showing
9 changed files
with
214 additions
and
60 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
package regexp | ||
|
||
import ( | ||
"fmt" | ||
"regexp" | ||
"unsafe" | ||
|
||
"github.com/prometheus/prometheus/model/labels" | ||
) | ||
|
||
// in order to prevent building an enormous map on extremely high cardinality fields we institute a max | ||
// this number is not tuned. on extremely high cardinality fields memoization is wasteful b/c we rarely | ||
// see the same value 2x. this is all overhead. on lower cardinality fields with an expensive regex memoization | ||
// is very effective at speeding up queries. | ||
const maxMemoize = 1000 | ||
|
||
type Regexp struct { | ||
matchers []*labels.FastRegexMatcher | ||
matches map[string]bool | ||
shouldMatch bool | ||
} | ||
|
||
func NewRegexp(regexps []string, shouldMatch bool) (*Regexp, error) { | ||
matchers := make([]*labels.FastRegexMatcher, 0, len(regexps)) | ||
|
||
for _, r := range regexps { | ||
m, err := labels.NewFastRegexMatcher(r) | ||
if err != nil { | ||
return nil, err | ||
} | ||
matchers = append(matchers, m) | ||
} | ||
|
||
// only memoize if there's a unoptimized matcher | ||
// TODO: should we limit memoization to N values? | ||
var matches map[string]bool | ||
for _, m := range matchers { | ||
if shouldMemoize(m) { | ||
matches = make(map[string]bool) | ||
break | ||
} | ||
} | ||
|
||
return &Regexp{ | ||
matchers: matchers, | ||
matches: matches, | ||
shouldMatch: shouldMatch, | ||
}, nil | ||
} | ||
|
||
func (r *Regexp) Match(b []byte) bool { | ||
return r.MatchString(unsafe.String(unsafe.SliceData(b), len(b))) | ||
} | ||
|
||
func (r *Regexp) MatchString(s string) bool { | ||
// if we're memoizing check existing matches | ||
if r.matches != nil { | ||
if matched, ok := r.matches[s]; ok { | ||
return matched | ||
} | ||
} | ||
|
||
matched := false | ||
for _, m := range r.matchers { | ||
if m.MatchString(s) == r.shouldMatch { | ||
matched = true | ||
break | ||
} | ||
} | ||
|
||
if r.matches != nil && len(r.matches) < maxMemoize { | ||
r.matches[s] = matched | ||
} | ||
|
||
return matched | ||
} | ||
|
||
func (r *Regexp) Reset() { | ||
if r.matches != nil { | ||
clear(r.matches) | ||
} | ||
} | ||
|
||
func (r *Regexp) String() string { | ||
var strings string | ||
for _, m := range r.matchers { | ||
strings += fmt.Sprintf("%s, ", m.GetRegexString()) | ||
} | ||
|
||
return strings | ||
} | ||
|
||
// shouldMemoize returns true if we believe that memoizing this regex would be faster | ||
// the evaluating it directly. see thoughts below. | ||
func shouldMemoize(m *labels.FastRegexMatcher) bool { | ||
// matches labels.FastRegexMatcher | ||
type cheatToSeeInternals struct { | ||
reString string | ||
re *regexp.Regexp | ||
|
||
setMatches []string | ||
stringMatcher labels.StringMatcher | ||
prefix string | ||
suffix string | ||
contains []string | ||
|
||
matchString func(string) bool | ||
} | ||
|
||
cheat := (*cheatToSeeInternals)(unsafe.Pointer(m)) | ||
|
||
// TODO: research and improve this. we're making a guess on whether an optimization will improve the regex | ||
// performance enough that its faster to not memoize. See compileMatchStringFunction() in the labels | ||
// package. maybe there's even a way to do this dynamically? | ||
return cheat.stringMatcher == nil && // a stringMatcher definitively rejects or accepts. if a string matcher is present the regex will never be executed | ||
len(cheat.setMatches) == 0 && // setMatches definitively reject or accept. if len != 0 the regex will never be executed, but perhaps if there are a very large # of setMatches we prefer memoizing anyway? | ||
cheat.prefix == "" && // prefix and suffix _do not_ prevent the regex from executing, but they are quick to evaluate and tend to nicely filter down. | ||
cheat.suffix == "" // perhaps a length requirement would be an improvement? i.e. require a prefix or suffix of at least 3 chars? | ||
// len(cheat.contains) == 0 // in testing, it was faster to memoize with a contains filter. perhaps if the filters are long enough we don't memoize? | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
package regexp | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/prometheus/prometheus/model/labels" | ||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
func TestRegexpMatch(t *testing.T) { | ||
r, err := NewRegexp([]string{"^a.*"}, true) | ||
require.NoError(t, err) | ||
|
||
require.True(t, r.Match([]byte("abc"))) | ||
require.True(t, r.MatchString("abc")) | ||
require.False(t, r.MatchString("xyz")) | ||
|
||
r, err = NewRegexp([]string{"^a.*"}, false) | ||
require.NoError(t, err) | ||
|
||
require.False(t, r.Match([]byte("abc"))) | ||
require.False(t, r.MatchString("abc")) | ||
require.True(t, r.MatchString("xyz")) | ||
} | ||
|
||
func TestShouldMemoize(t *testing.T) { | ||
tcs := []struct { | ||
regex string | ||
shouldMemoize bool | ||
}{ | ||
{ | ||
regex: ".*", | ||
shouldMemoize: false, | ||
}, | ||
{ | ||
regex: "foo.*", | ||
shouldMemoize: false, | ||
}, | ||
{ | ||
regex: ".*bar", | ||
shouldMemoize: false, | ||
}, | ||
{ | ||
regex: "foo|bar", | ||
shouldMemoize: false, | ||
}, | ||
{ | ||
regex: ".*bar.*", // creates a containsStringMatcher so should not memoize | ||
shouldMemoize: false, | ||
}, | ||
{ | ||
regex: ".*bar.*foo.*", // calls contains in order | ||
shouldMemoize: true, | ||
}, | ||
} | ||
|
||
for _, tc := range tcs { | ||
t.Run(tc.regex, func(t *testing.T) { | ||
m, err := labels.NewFastRegexMatcher(tc.regex) | ||
require.NoError(t, err) | ||
require.Equal(t, tc.shouldMemoize, shouldMemoize(m)) | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.