diff --git a/libcalico-go/Makefile b/libcalico-go/Makefile index 1e27ec028ab..1cbd8f85c46 100644 --- a/libcalico-go/Makefile +++ b/libcalico-go/Makefile @@ -40,7 +40,8 @@ clean: GENERATED_FILES:=./lib/apis/v3/zz_generated.deepcopy.go \ ./lib/upgrade/migrator/clients/v1/k8s/custom/zz_generated.deepcopy.go \ ./lib/apis/v3/generated.openapi.go \ - ./lib/apis/v1/generated.openapi.go + ./lib/apis/v1/generated.openapi.go \ + ./lib/selector/tokenizer/kind_string.go .PHONY: gen-files ## Force rebuild generated go utilities (e.g. deepcopy-gen) and generated files @@ -97,6 +98,9 @@ gen-crds: --output-pkg "$(PACKAGE_NAME)/lib/apis/v1" \ "$(PACKAGE_NAME)/lib/apis/v1"' +./lib/selector/tokenizer/kind_string.go: ./lib/selector/tokenizer/tokenizer.go + $(DOCKER_GO_BUILD) go generate ./lib/selector/tokenizer + ############################################################################### # Static checks ############################################################################### diff --git a/libcalico-go/lib/selector/parser/parser.go b/libcalico-go/lib/selector/parser/parser.go index 7e07fd4b58a..b7ce28c2a4f 100644 --- a/libcalico-go/lib/selector/parser/parser.go +++ b/libcalico-go/lib/selector/parser/parser.go @@ -17,6 +17,7 @@ package parser import ( "errors" "fmt" + "sync" log "github.com/sirupsen/logrus" @@ -27,17 +28,68 @@ const parserDebug = false // Parse parses a string representation of a selector expression into a Selector. func Parse(selector string) (sel Selector, err error) { - log.Debugf("Parsing %#v", selector) - tokens, err := tokenizer.Tokenize(selector) + sharedParserLock.Lock() + defer sharedParserLock.Unlock() + + return sharedParser.Parse(selector) +} + +func Validate(selector string) (err error) { + sharedParserLock.Lock() + defer sharedParserLock.Unlock() + + return sharedParser.Validate(selector) +} + +var ( + sharedParserLock sync.Mutex + sharedParser = NewParser() +) + +func NewParser() *Parser { + return &Parser{ + tokBuf: make([]tokenizer.Token, 0, 128), + } +} + +type Parser struct { + tokBuf []tokenizer.Token +} + +func (p *Parser) Parse(selector string) (sel Selector, err error) { + if log.IsLevelEnabled(log.DebugLevel) { + log.Debugf("Parsing %q", selector) + } + return p.parseRoot(selector, false) +} + +func (p *Parser) Validate(selector string) (err error) { + if log.IsLevelEnabled(log.DebugLevel) { + log.Debugf("Validating %q", selector) + } + _, err = p.parseRoot(selector, true) + return +} + +func (p *Parser) parseRoot(selector string, validateOnly bool) (sel Selector, err error) { + // Fixed size array to avoid heap allocation (for smaller selectors). + tokens, err := tokenizer.AppendTokens(p.tokBuf[:0], selector) if err != nil { return } + if tokens[0].Kind == tokenizer.TokEOF { + if validateOnly { + return nil, nil + } return &selectorRoot{root: &AllNode{}}, nil } - log.Debugf("Tokens %v", tokens) + if log.IsLevelEnabled(log.DebugLevel) { + log.Debugf("Tokens %v", tokens) + } + // The "||" operator has the lowest precedence so we start with that. - node, remTokens, err := parseOrExpression(tokens) + node, remTokens, err := parseOrExpression(tokens, validateOnly) if err != nil { return } @@ -45,34 +97,45 @@ func Parse(selector string) (sel Selector, err error) { err = errors.New(fmt.Sprint("unexpected content at end of selector ", remTokens)) return } + if validateOnly { + return + } sel = &selectorRoot{root: node} return } // parseOrExpression parses a one or more "&&" terms, separated by "||" operators. -func parseOrExpression(tokens []tokenizer.Token) (sel node, remTokens []tokenizer.Token, err error) { +func parseOrExpression(tokens []tokenizer.Token, validateOnly bool) (sel node, remTokens []tokenizer.Token, err error) { if parserDebug { log.Debugf("Parsing ||s from %v", tokens) } // Look for the first expression. - andNodes := make([]node, 0) - sel, remTokens, err = parseAndExpression(tokens) + var andNodes []node + sel, remTokens, err = parseAndExpression(tokens, validateOnly) if err != nil { return } - andNodes = append(andNodes, sel) + if !validateOnly { + andNodes = append(andNodes, sel) + } // Then loop looking for "||" followed by an for { switch remTokens[0].Kind { case tokenizer.TokOr: remTokens = remTokens[1:] - sel, remTokens, err = parseAndExpression(remTokens) + sel, remTokens, err = parseAndExpression(remTokens, validateOnly) if err != nil { return } + if validateOnly { + continue + } andNodes = append(andNodes, sel) default: + if validateOnly { + return + } if len(andNodes) == 1 { sel = andNodes[0] } else { @@ -84,29 +147,40 @@ func parseOrExpression(tokens []tokenizer.Token) (sel node, remTokens []tokenize } // parseAndExpression parses a one or more operations, separated by "&&" operators. -func parseAndExpression(tokens []tokenizer.Token) (sel node, remTokens []tokenizer.Token, err error) { +func parseAndExpression( + tokens []tokenizer.Token, + validateOnly bool, +) (sel node, remTokens []tokenizer.Token, err error) { if parserDebug { log.Debugf("Parsing &&s from %v", tokens) } // Look for the first operation. - opNodes := make([]node, 0) - sel, remTokens, err = parseOperation(tokens) + var opNodes []node + sel, remTokens, err = parseOperation(tokens, validateOnly) if err != nil { return } - opNodes = append(opNodes, sel) + if !validateOnly { + opNodes = append(opNodes, sel) + } // Then loop looking for "&&" followed by another operation. for { switch remTokens[0].Kind { case tokenizer.TokAnd: remTokens = remTokens[1:] - sel, remTokens, err = parseOperation(remTokens) + sel, remTokens, err = parseOperation(remTokens, validateOnly) if err != nil { return } + if validateOnly { + continue + } opNodes = append(opNodes, sel) default: + if validateOnly { + return + } if len(opNodes) == 1 { sel = opNodes[0] } else { @@ -117,14 +191,22 @@ func parseAndExpression(tokens []tokenizer.Token) (sel node, remTokens []tokeniz } } +var ( + ErrUnexpectedEOF = errors.New("unexpected end of string looking for op") + ErrExpectedRParen = errors.New("expected )") + ErrExpectedRBrace = errors.New("expected }") + ErrExpectedString = errors.New("expected string") + ErrExpectedSetLit = errors.New("expected set literal") +) + // parseOperations parses a single, possibly negated operation (i.e. ==, !=, has()). // It also handles calling parseOrExpression recursively for parenthesized expressions. -func parseOperation(tokens []tokenizer.Token) (sel node, remTokens []tokenizer.Token, err error) { +func parseOperation(tokens []tokenizer.Token, validateOnly bool) (sel node, remTokens []tokenizer.Token, err error) { if parserDebug { log.Debugf("Parsing op from %v", tokens) } if len(tokens) == 0 { - err = errors.New("Unexpected end of string looking for op") + err = ErrUnexpectedEOF return } @@ -142,55 +224,71 @@ func parseOperation(tokens []tokenizer.Token) (sel node, remTokens []tokenizer.T // Then, look for the various types of operator. switch tokens[0].Kind { case tokenizer.TokHas: - sel = &HasNode{tokens[0].Value.(string)} + if !validateOnly { + sel = &HasNode{tokens[0].Value} + } remTokens = tokens[1:] case tokenizer.TokAll: - sel = &AllNode{} + if !validateOnly { + sel = &AllNode{} + } remTokens = tokens[1:] case tokenizer.TokGlobal: - sel = &GlobalNode{} + if !validateOnly { + sel = &GlobalNode{} + } remTokens = tokens[1:] case tokenizer.TokLabel: // should have an operator and a literal. if len(tokens) < 3 { - err = errors.New(fmt.Sprint("Unexpected end of string in middle of op", tokens)) + err = ErrUnexpectedEOF return } switch tokens[1].Kind { case tokenizer.TokEq: if tokens[2].Kind == tokenizer.TokStringLiteral { - sel = &LabelEqValueNode{tokens[0].Value.(string), tokens[2].Value.(string)} + if !validateOnly { + sel = &LabelEqValueNode{tokens[0].Value, tokens[2].Value} + } remTokens = tokens[3:] } else { - err = errors.New("Expected string") + err = ErrExpectedString } case tokenizer.TokNe: if tokens[2].Kind == tokenizer.TokStringLiteral { - sel = &LabelNeValueNode{tokens[0].Value.(string), tokens[2].Value.(string)} + if !validateOnly { + sel = &LabelNeValueNode{tokens[0].Value, tokens[2].Value} + } remTokens = tokens[3:] } else { - err = errors.New("Expected string") + err = ErrExpectedString } case tokenizer.TokContains: if tokens[2].Kind == tokenizer.TokStringLiteral { - sel = &LabelContainsValueNode{tokens[0].Value.(string), tokens[2].Value.(string)} + if !validateOnly { + sel = &LabelContainsValueNode{tokens[0].Value, tokens[2].Value} + } remTokens = tokens[3:] } else { - err = errors.New("Expected string") + err = ErrExpectedString } case tokenizer.TokStartsWith: if tokens[2].Kind == tokenizer.TokStringLiteral { - sel = &LabelStartsWithValueNode{tokens[0].Value.(string), tokens[2].Value.(string)} + if !validateOnly { + sel = &LabelStartsWithValueNode{tokens[0].Value, tokens[2].Value} + } remTokens = tokens[3:] } else { - err = errors.New("Expected string") + err = ErrExpectedString } case tokenizer.TokEndsWith: if tokens[2].Kind == tokenizer.TokStringLiteral { - sel = &LabelEndsWithValueNode{tokens[0].Value.(string), tokens[2].Value.(string)} + if !validateOnly { + sel = &LabelEndsWithValueNode{tokens[0].Value, tokens[2].Value} + } remTokens = tokens[3:] } else { - err = errors.New("Expected string") + err = ErrExpectedString } case tokenizer.TokIn, tokenizer.TokNotIn: if tokens[2].Kind == tokenizer.TokLBrace { @@ -198,7 +296,7 @@ func parseOperation(tokens []tokenizer.Token) (sel node, remTokens []tokenizer.T values := []string{} for { if remTokens[0].Kind == tokenizer.TokStringLiteral { - value := remTokens[0].Value.(string) + value := remTokens[0].Value values = append(values, value) remTokens = remTokens[1:] if remTokens[0].Kind == tokenizer.TokComma { @@ -211,45 +309,51 @@ func parseOperation(tokens []tokenizer.Token) (sel node, remTokens []tokenizer.T } } if remTokens[0].Kind != tokenizer.TokRBrace { - err = errors.New("Expected }") + err = ErrExpectedRBrace } else { // Skip over the } remTokens = remTokens[1:] - labelName := tokens[0].Value.(string) + labelName := tokens[0].Value set := ConvertToStringSetInPlace(values) // Mutates values. if tokens[1].Kind == tokenizer.TokIn { - sel = &LabelInSetNode{labelName, set} + if !validateOnly { + sel = &LabelInSetNode{labelName, set} + } } else { - sel = &LabelNotInSetNode{labelName, set} + if !validateOnly { + sel = &LabelNotInSetNode{labelName, set} + } } } } else { - err = errors.New("Expected set literal") + err = ErrExpectedSetLit } default: - err = errors.New(fmt.Sprint("Expected == or != not ", tokens[1])) + err = fmt.Errorf("expected == or != not: %v", tokens[1]) return } case tokenizer.TokLParen: // We hit a paren, skip past it, then recurse. - sel, remTokens, err = parseOrExpression(tokens[1:]) + sel, remTokens, err = parseOrExpression(tokens[1:], validateOnly) if err != nil { return } // After parsing the nested expression, there should be // a matching paren. if len(remTokens) < 1 || remTokens[0].Kind != tokenizer.TokRParen { - err = errors.New("Expected )") + err = ErrExpectedRParen return } remTokens = remTokens[1:] default: - err = errors.New(fmt.Sprint("Unexpected token: ", tokens[0])) + err = fmt.Errorf("unexpected token: %v", tokens[0]) return } if negated && err == nil { - sel = &NotNode{sel} + if !validateOnly { + sel = &NotNode{sel} + } } return } diff --git a/libcalico-go/lib/selector/parser/parser_test.go b/libcalico-go/lib/selector/parser/parser_test.go index eddb8dadff5..0e64e1e690c 100644 --- a/libcalico-go/lib/selector/parser/parser_test.go +++ b/libcalico-go/lib/selector/parser/parser_test.go @@ -267,9 +267,13 @@ var _ = Describe("Parser", func() { It("Should reject bad selector", func() { for _, sel := range badSelectors { - By(fmt.Sprint("Rejecting ", sel)) + By(fmt.Sprint("Rejecting Parse()", sel)) _, err := parser.Parse(sel) - Expect(err).ToNot(BeNil()) + Expect(err).To(HaveOccurred()) + + By(fmt.Sprint("Rejecting Validate()", sel)) + err = parser.Validate(sel) + Expect(err).To(HaveOccurred()) } }) @@ -313,7 +317,11 @@ var _ = Describe("Visitor", func() { func(inSelector, outSelector string, visitor parser.Visitor) { s, err := parser.Parse(inSelector) By(fmt.Sprintf("parsing the selector %s", inSelector), func() { - Expect(err).To(BeNil()) + Expect(err).NotTo(HaveOccurred()) + }) + err = parser.Validate(inSelector) + By(fmt.Sprintf("validating the selector %s", inSelector), func() { + Expect(err).NotTo(HaveOccurred()) }) // Run the visitor against the selector. diff --git a/libcalico-go/lib/selector/selector.go b/libcalico-go/lib/selector/selector.go index 549e8505ead..cdd87274758 100644 --- a/libcalico-go/lib/selector/selector.go +++ b/libcalico-go/lib/selector/selector.go @@ -43,6 +43,11 @@ func Parse(selector string) (sel Selector, err error) { return parser.Parse(selector) } +// Validate checks the syntax of the given selector. +func Validate(selector string) (err error) { + return parser.Validate(selector) +} + // Normalise converts the given selector to the form returned by // Selector.String(), i.e. "" is converted to "all()" and whitespace is // tidied up. If the input string cannot be parsed, it is returned unaltered. diff --git a/libcalico-go/lib/selector/selector_benchmarks_test.go b/libcalico-go/lib/selector/selector_benchmarks_test.go new file mode 100644 index 00000000000..a03628b7207 --- /dev/null +++ b/libcalico-go/lib/selector/selector_benchmarks_test.go @@ -0,0 +1,51 @@ +// Copyright (c) 2024 Tigera, Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package selector + +import ( + "testing" + + "github.com/sirupsen/logrus" + + "github.com/projectcalico/calico/libcalico-go/lib/selector/parser" +) + +var sel Selector + +func BenchmarkParse(b *testing.B) { + logrus.SetLevel(logrus.InfoLevel) + p := parser.NewParser() + b.ReportAllocs() + for i := 0; i < b.N; i++ { + var err error + sel, err = p.Parse(`! (a == "b"&&! c != "d") || !(a == "b" && !c != "d")`) + if err != nil { + b.Errorf("Failed to parse selector: %v", err) + } + } +} + +func BenchmarkValidate(b *testing.B) { + logrus.SetLevel(logrus.InfoLevel) + p := parser.NewParser() + b.ReportAllocs() + for i := 0; i < b.N; i++ { + var err error + err = p.Validate(`! (a == "b"&&! c != "d") || !(a == "b" && !c != "d")`) + if err != nil { + b.Errorf("Failed to parse selector: %v", err) + } + } +} diff --git a/libcalico-go/lib/selector/tokenizer/.gitattributes b/libcalico-go/lib/selector/tokenizer/.gitattributes new file mode 100644 index 00000000000..2d900d76c97 --- /dev/null +++ b/libcalico-go/lib/selector/tokenizer/.gitattributes @@ -0,0 +1 @@ +kind_string.go linguist-generated=true diff --git a/libcalico-go/lib/selector/tokenizer/kind_string.go b/libcalico-go/lib/selector/tokenizer/kind_string.go new file mode 100644 index 00000000000..878eac0b9d1 --- /dev/null +++ b/libcalico-go/lib/selector/tokenizer/kind_string.go @@ -0,0 +1,44 @@ +// Code generated by "stringer -type=Kind"; DO NOT EDIT. + +package tokenizer + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[TokNone-0] + _ = x[TokLabel-1] + _ = x[TokStringLiteral-2] + _ = x[TokLBrace-3] + _ = x[TokRBrace-4] + _ = x[TokComma-5] + _ = x[TokEq-6] + _ = x[TokNe-7] + _ = x[TokIn-8] + _ = x[TokNot-9] + _ = x[TokNotIn-10] + _ = x[TokContains-11] + _ = x[TokStartsWith-12] + _ = x[TokEndsWith-13] + _ = x[TokAll-14] + _ = x[TokHas-15] + _ = x[TokLParen-16] + _ = x[TokRParen-17] + _ = x[TokAnd-18] + _ = x[TokOr-19] + _ = x[TokGlobal-20] + _ = x[TokEOF-21] +} + +const _Kind_name = "TokNoneTokLabelTokStringLiteralTokLBraceTokRBraceTokCommaTokEqTokNeTokInTokNotTokNotInTokContainsTokStartsWithTokEndsWithTokAllTokHasTokLParenTokRParenTokAndTokOrTokGlobalTokEOF" + +var _Kind_index = [...]uint8{0, 7, 15, 31, 40, 49, 57, 62, 67, 72, 78, 86, 97, 110, 121, 127, 133, 142, 151, 157, 162, 171, 177} + +func (i Kind) String() string { + if i < 0 || i >= Kind(len(_Kind_index)-1) { + return "Kind(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _Kind_name[_Kind_index[i]:_Kind_index[i+1]] +} diff --git a/libcalico-go/lib/selector/tokenizer/tokenizer.go b/libcalico-go/lib/selector/tokenizer/tokenizer.go index 1584b0e03c5..8206d6dafc8 100644 --- a/libcalico-go/lib/selector/tokenizer/tokenizer.go +++ b/libcalico-go/lib/selector/tokenizer/tokenizer.go @@ -17,16 +17,17 @@ package tokenizer import ( "errors" "fmt" - "regexp" "strings" log "github.com/sirupsen/logrus" ) -type tokenKind uint8 +type Kind int + +//go:generate stringer -type=Kind const ( - TokNone tokenKind = iota + TokNone Kind = iota TokLabel TokStringLiteral TokLBrace @@ -56,34 +57,22 @@ var whitespace = " \t" // Token has a kind and a value type Token struct { - Kind tokenKind - Value interface{} + Kind Kind + Value string } -const ( - // LabelKeyMatcher is the base regex for a valid label key. - LabelKeyMatcher = `[a-zA-Z0-9_./-]{1,512}` - hasExpr = `has\(\s*(` + LabelKeyMatcher + `)\s*\)` - allExpr = `all\(\s*\)` - notInExpr = `not\s*in\b` - inExpr = `in\b` - globalExpr = `global\(\s*\)` -) - -var ( - identifierRegex = regexp.MustCompile("^" + LabelKeyMatcher) - containsRegex = regexp.MustCompile(`^contains`) - startsWithRegex = regexp.MustCompile(`^starts\s*with`) - endsWithRegex = regexp.MustCompile(`^ends\s*with`) - hasRegex = regexp.MustCompile("^" + hasExpr) - allRegex = regexp.MustCompile("^" + allExpr) - notInRegex = regexp.MustCompile("^" + notInExpr) - inRegex = regexp.MustCompile("^" + inExpr) - globalRegex = regexp.MustCompile("^" + globalExpr) -) +func (t Token) String() string { + return fmt.Sprintf("%s(%s)", t.Kind, t.Value) +} // Tokenize transforms string to token slice func Tokenize(input string) (tokens []Token, err error) { + return AppendTokens(nil, input) +} + +// AppendTokens transforms string to token slice, appending it to the input +// tokens slice, which may be nil. +func AppendTokens(tokens []Token, input string) ([]Token, error) { for { if tokenizerDebug { log.Debug("Remaining input: ", input) @@ -91,8 +80,8 @@ func Tokenize(input string) (tokens []Token, err error) { startLen := len(input) input = strings.TrimLeft(input, whitespace) if len(input) == 0 { - tokens = append(tokens, Token{TokEOF, nil}) - return + tokens = append(tokens, Token{Kind: TokEOF}) + return tokens, nil } var lastTokKind = TokNone if len(tokens) > 0 { @@ -100,10 +89,10 @@ func Tokenize(input string) (tokens []Token, err error) { } switch input[0] { case '(': - tokens = append(tokens, Token{TokLParen, nil}) + tokens = append(tokens, Token{Kind: TokLParen}) input = input[1:] case ')': - tokens = append(tokens, Token{TokRParen, nil}) + tokens = append(tokens, Token{Kind: TokRParen}) input = input[1:] case '"': input = input[1:] @@ -124,39 +113,39 @@ func Tokenize(input string) (tokens []Token, err error) { tokens = append(tokens, Token{TokStringLiteral, value}) input = input[index+1:] case '{': - tokens = append(tokens, Token{TokLBrace, nil}) + tokens = append(tokens, Token{Kind: TokLBrace}) input = input[1:] case '}': - tokens = append(tokens, Token{TokRBrace, nil}) + tokens = append(tokens, Token{Kind: TokRBrace}) input = input[1:] case ',': - tokens = append(tokens, Token{TokComma, nil}) + tokens = append(tokens, Token{Kind: TokComma}) input = input[1:] case '=': if len(input) > 1 && input[1] == '=' { - tokens = append(tokens, Token{TokEq, nil}) + tokens = append(tokens, Token{Kind: TokEq}) input = input[2:] } else { return nil, errors.New("expected ==") } case '!': if len(input) > 1 && input[1] == '=' { - tokens = append(tokens, Token{TokNe, nil}) + tokens = append(tokens, Token{Kind: TokNe}) input = input[2:] } else { - tokens = append(tokens, Token{TokNot, nil}) + tokens = append(tokens, Token{Kind: TokNot}) input = input[1:] } case '&': if len(input) > 1 && input[1] == '&' { - tokens = append(tokens, Token{TokAnd, nil}) + tokens = append(tokens, Token{Kind: TokAnd}) input = input[2:] } else { return nil, errors.New("expected &&") } case '|': if len(input) > 1 && input[1] == '|' { - tokens = append(tokens, Token{TokOr, nil}) + tokens = append(tokens, Token{Kind: TokOr}) input = input[2:] } else { return nil, errors.New("expected ||") @@ -165,62 +154,116 @@ func Tokenize(input string) (tokens []Token, err error) { // Handle less-simple cases with regex matches. We've already stripped any whitespace. if lastTokKind == TokLabel { // If we just saw a label, look for a contains/starts with/ends with operator instead of another label. - if idxs := containsRegex.FindStringIndex(input); idxs != nil { - // Found "all" - tokens = append(tokens, Token{TokContains, nil}) - input = input[idxs[1]:] - } else if idxs := startsWithRegex.FindStringIndex(input); idxs != nil { - // Found "all" - tokens = append(tokens, Token{TokStartsWith, nil}) - input = input[idxs[1]:] - } else if idxs := endsWithRegex.FindStringIndex(input); idxs != nil { - // Found "all" - tokens = append(tokens, Token{TokEndsWith, nil}) - input = input[idxs[1]:] - } else if idxs := notInRegex.FindStringIndex(input); idxs != nil { - // Found "not in" - tokens = append(tokens, Token{TokNotIn, nil}) - input = input[idxs[1]:] - } else if idxs := inRegex.FindStringIndex(input); idxs != nil { - // Found "in" - tokens = append(tokens, Token{TokIn, nil}) - input = input[idxs[1]:] + if strings.HasPrefix(input, "contains") { + tokens = append(tokens, Token{Kind: TokContains}) + input = input[len("contains"):] + } else if strings.HasPrefix(input, "starts") { + input = input[len("starts"):] + input = strings.TrimLeft(input, whitespace) + if strings.HasPrefix(input, "with") { + tokens = append(tokens, Token{Kind: TokStartsWith}) + input = input[len("with"):] + } else { + return nil, fmt.Errorf("unexpected characters after label '%v', was expecting an operator", + tokens[len(tokens)-1].Value) + } + } else if strings.HasPrefix(input, "ends") { + input = input[len("ends"):] + input = strings.TrimLeft(input, whitespace) + if strings.HasPrefix(input, "with") { + tokens = append(tokens, Token{Kind: TokEndsWith}) + input = input[len("with"):] + } else { + return nil, fmt.Errorf("unexpected characters after label '%v', was expecting an operator", + tokens[len(tokens)-1].Value) + } + } else if strings.HasPrefix(input, "not") { + input = input[len("not"):] + input = strings.TrimLeft(input, whitespace) + if strings.HasPrefix(input, "in") { + tokens = append(tokens, Token{Kind: TokNotIn}) + input = input[len("in"):] + } else { + return nil, fmt.Errorf("unexpected characters after label '%v', was expecting an operator", + tokens[len(tokens)-1].Value) + } + } else if strings.HasPrefix(input, "in") { + tokens = append(tokens, Token{Kind: TokIn}) + input = input[len("in"):] } else { - err = fmt.Errorf("unexpected characters after label '%v', was expecting an operator", + return nil, fmt.Errorf("unexpected characters after label '%v', was expecting an operator", tokens[len(tokens)-1].Value) - return } - } else if idxs := hasRegex.FindStringSubmatchIndex(input); idxs != nil { - // Found "has(label)" - wholeMatchEnd := idxs[1] - labelNameMatchStart := idxs[2] - labelNameMatchEnd := idxs[3] - labelName := input[labelNameMatchStart:labelNameMatchEnd] - tokens = append(tokens, Token{TokHas, labelName}) - input = input[wholeMatchEnd:] - } else if idxs := allRegex.FindStringIndex(input); idxs != nil { + } else if strings.HasPrefix(input, "has(") { + // Found "has()" + input = input[len("has("):] + input = strings.TrimLeft(input, whitespace) + if ident := findIdentifier(input); ident != "" { + // Found "has(label)" + input = input[len(ident):] + input = strings.TrimLeft(input, whitespace) + if strings.HasPrefix(input, ")") { + tokens = append(tokens, Token{TokHas, ident}) + input = input[1:] + } else { + return nil, fmt.Errorf("no closing ')' after has(") + } + } else { + return nil, errors.New("no label name in has( expression") + } + } else if strings.HasPrefix(input, "all(") { // Found "all" - tokens = append(tokens, Token{TokAll, nil}) - input = input[idxs[1]:] - } else if idxs := globalRegex.FindStringIndex(input); idxs != nil { - // Found "global" - tokens = append(tokens, Token{TokGlobal, nil}) - input = input[idxs[1]:] - } else if idxs := identifierRegex.FindStringIndex(input); idxs != nil { + input = input[len("all("):] + input = strings.TrimLeft(input, whitespace) + if strings.HasPrefix(input, ")") { + tokens = append(tokens, Token{Kind: TokAll}) + input = input[1:] + } else { + return nil, fmt.Errorf("no closing ')' after all(") + } + } else if strings.HasPrefix(input, "global(") { + // Found "all" + input = input[len("global("):] + input = strings.TrimLeft(input, whitespace) + if strings.HasPrefix(input, ")") { + tokens = append(tokens, Token{Kind: TokGlobal}) + input = input[1:] + } else { + return nil, fmt.Errorf("no closing ')' after global(") + } + } else if ident := findIdentifier(input); ident != "" { // Found "label" - endIndex := idxs[1] - identifier := input[:endIndex] - log.Debug("Identifier ", identifier) - tokens = append(tokens, Token{TokLabel, identifier}) - input = input[endIndex:] + if log.IsLevelEnabled(log.DebugLevel) { + log.Debug("Identifier ", ident) + } + tokens = append(tokens, Token{TokLabel, ident}) + input = input[len(ident):] } else { - err = errors.New("unexpected characters") - return + return nil, errors.New("unexpected characters") } } if len(input) >= startLen { - err = errors.New("infinite loop detected in tokenizer") - return + return nil, errors.New("infinite loop detected in tokenizer") + } + } +} + +func findIdentifier(in string) string { + for i := 0; i < len(in); i++ { + c := in[i] + if c >= 'a' && c <= 'z' { + continue + } + if c >= 'A' && c <= 'Z' { + continue + } + if c >= '0' && c <= '9' { + continue + } + if c == '_' || c == '.' || c == '/' || c == '-' { + continue } + return in[:i] } + return in } diff --git a/libcalico-go/lib/selector/tokenizer/tokenizer_test.go b/libcalico-go/lib/selector/tokenizer/tokenizer_test.go index 5b48ba0df3e..87b44fb0739 100644 --- a/libcalico-go/lib/selector/tokenizer/tokenizer_test.go +++ b/libcalico-go/lib/selector/tokenizer/tokenizer_test.go @@ -29,196 +29,195 @@ var tokenTests = []struct { }{ {`a=="b"`, []tokenizer.Token{ {tokenizer.TokLabel, "a"}, - {tokenizer.TokEq, nil}, + {tokenizer.TokEq, ""}, {tokenizer.TokStringLiteral, "b"}, - {tokenizer.TokEOF, nil}, + {tokenizer.TokEOF, ""}, }}, {`a=="b"`, []tokenizer.Token{ {tokenizer.TokLabel, "a"}, - {tokenizer.TokEq, nil}, + {tokenizer.TokEq, ""}, {tokenizer.TokStringLiteral, "b"}, - {tokenizer.TokEOF, nil}, + {tokenizer.TokEOF, ""}, }}, {`label == "value"`, []tokenizer.Token{ {tokenizer.TokLabel, "label"}, - {tokenizer.TokEq, nil}, + {tokenizer.TokEq, ""}, {tokenizer.TokStringLiteral, "value"}, - {tokenizer.TokEOF, nil}, + {tokenizer.TokEOF, ""}, }}, {`label contains "value"`, []tokenizer.Token{ {tokenizer.TokLabel, "label"}, - {tokenizer.TokContains, nil}, + {tokenizer.TokContains, ""}, {tokenizer.TokStringLiteral, "value"}, - {tokenizer.TokEOF, nil}, + {tokenizer.TokEOF, ""}, }}, {`contains contains "contains"`, []tokenizer.Token{ {tokenizer.TokLabel, "contains"}, - {tokenizer.TokContains, nil}, + {tokenizer.TokContains, ""}, {tokenizer.TokStringLiteral, "contains"}, - {tokenizer.TokEOF, nil}, + {tokenizer.TokEOF, ""}, }}, {`contains contains 'contains'`, []tokenizer.Token{ {tokenizer.TokLabel, "contains"}, - {tokenizer.TokContains, nil}, + {tokenizer.TokContains, ""}, {tokenizer.TokStringLiteral, "contains"}, - {tokenizer.TokEOF, nil}, + {tokenizer.TokEOF, ""}, }}, {`label contains"value"`, []tokenizer.Token{ {tokenizer.TokLabel, "label"}, - {tokenizer.TokContains, nil}, + {tokenizer.TokContains, ""}, {tokenizer.TokStringLiteral, "value"}, - {tokenizer.TokEOF, nil}, + {tokenizer.TokEOF, ""}, }}, {`label startswith "value"`, []tokenizer.Token{ {tokenizer.TokLabel, "label"}, - {tokenizer.TokStartsWith, nil}, + {tokenizer.TokStartsWith, ""}, {tokenizer.TokStringLiteral, "value"}, - {tokenizer.TokEOF, nil}, + {tokenizer.TokEOF, ""}, }}, {`label endswith "value"`, []tokenizer.Token{ {tokenizer.TokLabel, "label"}, - {tokenizer.TokEndsWith, nil}, + {tokenizer.TokEndsWith, ""}, {tokenizer.TokStringLiteral, "value"}, - {tokenizer.TokEOF, nil}, + {tokenizer.TokEOF, ""}, }}, {`label starts with "value"`, []tokenizer.Token{ {tokenizer.TokLabel, "label"}, - {tokenizer.TokStartsWith, nil}, + {tokenizer.TokStartsWith, ""}, {tokenizer.TokStringLiteral, "value"}, - {tokenizer.TokEOF, nil}, + {tokenizer.TokEOF, ""}, }}, {`startswith starts with "value"`, []tokenizer.Token{ {tokenizer.TokLabel, "startswith"}, - {tokenizer.TokStartsWith, nil}, + {tokenizer.TokStartsWith, ""}, {tokenizer.TokStringLiteral, "value"}, - {tokenizer.TokEOF, nil}, + {tokenizer.TokEOF, ""}, }}, {`label ends with "value"`, []tokenizer.Token{ {tokenizer.TokLabel, "label"}, - {tokenizer.TokEndsWith, nil}, + {tokenizer.TokEndsWith, ""}, {tokenizer.TokStringLiteral, "value"}, - {tokenizer.TokEOF, nil}, + {tokenizer.TokEOF, ""}, }}, {`endswith ends with "value"`, []tokenizer.Token{ {tokenizer.TokLabel, "endswith"}, - {tokenizer.TokEndsWith, nil}, + {tokenizer.TokEndsWith, ""}, {tokenizer.TokStringLiteral, "value"}, - {tokenizer.TokEOF, nil}, + {tokenizer.TokEOF, ""}, }}, {`label starts with "value"`, []tokenizer.Token{ {tokenizer.TokLabel, "label"}, - {tokenizer.TokStartsWith, nil}, + {tokenizer.TokStartsWith, ""}, {tokenizer.TokStringLiteral, "value"}, - {tokenizer.TokEOF, nil}, + {tokenizer.TokEOF, ""}, }}, {`label ends with "value"`, []tokenizer.Token{ {tokenizer.TokLabel, "label"}, - {tokenizer.TokEndsWith, nil}, + {tokenizer.TokEndsWith, ""}, {tokenizer.TokStringLiteral, "value"}, - {tokenizer.TokEOF, nil}, + {tokenizer.TokEOF, ""}, }}, {`label starts foo "value"`, nil}, {`label ends foo "value"`, nil}, {`label squiggles "value"`, nil}, {`a not in "bar" && !has(foo) || b in c`, []tokenizer.Token{ {tokenizer.TokLabel, "a"}, - {tokenizer.TokNotIn, nil}, + {tokenizer.TokNotIn, ""}, {tokenizer.TokStringLiteral, "bar"}, - {tokenizer.TokAnd, nil}, - {tokenizer.TokNot, nil}, + {tokenizer.TokAnd, ""}, + {tokenizer.TokNot, ""}, {tokenizer.TokHas, "foo"}, - {tokenizer.TokOr, nil}, + {tokenizer.TokOr, ""}, {tokenizer.TokLabel, "b"}, - {tokenizer.TokIn, nil}, + {tokenizer.TokIn, ""}, {tokenizer.TokLabel, "c"}, - {tokenizer.TokEOF, nil}, + {tokenizer.TokEOF, ""}, }}, {`has(calico/k8s_ns)`, []tokenizer.Token{ {tokenizer.TokHas, "calico/k8s_ns"}, - {tokenizer.TokEOF, nil}, + {tokenizer.TokEOF, ""}, }}, {`has(calico/k8s_ns/role)`, []tokenizer.Token{ {tokenizer.TokHas, "calico/k8s_ns/role"}, - {tokenizer.TokEOF, nil}, + {tokenizer.TokEOF, ""}, }}, {`has(calico/k8s_NS-.1/role)`, []tokenizer.Token{ {tokenizer.TokHas, "calico/k8s_NS-.1/role"}, - {tokenizer.TokEOF, nil}, + {tokenizer.TokEOF, ""}, }}, {`calico/k8s_ns == "kube-system" && k8s-app == "kube-dns"`, []tokenizer.Token{ {tokenizer.TokLabel, "calico/k8s_ns"}, - {tokenizer.TokEq, nil}, + {tokenizer.TokEq, ""}, {tokenizer.TokStringLiteral, "kube-system"}, - {tokenizer.TokAnd, nil}, + {tokenizer.TokAnd, ""}, {tokenizer.TokLabel, "k8s-app"}, - {tokenizer.TokEq, nil}, + {tokenizer.TokEq, ""}, {tokenizer.TokStringLiteral, "kube-dns"}, - {tokenizer.TokEOF, nil}, + {tokenizer.TokEOF, ""}, }}, {`a not in "bar" && ! has( foo ) || b in c `, []tokenizer.Token{ {tokenizer.TokLabel, "a"}, - {tokenizer.TokNotIn, nil}, + {tokenizer.TokNotIn, ""}, {tokenizer.TokStringLiteral, "bar"}, - {tokenizer.TokAnd, nil}, - {tokenizer.TokNot, nil}, + {tokenizer.TokAnd, ""}, + {tokenizer.TokNot, ""}, {tokenizer.TokHas, "foo"}, - {tokenizer.TokOr, nil}, + {tokenizer.TokOr, ""}, {tokenizer.TokLabel, "b"}, - {tokenizer.TokIn, nil}, + {tokenizer.TokIn, ""}, {tokenizer.TokLabel, "c"}, - {tokenizer.TokEOF, nil}, + {tokenizer.TokEOF, ""}, }}, {`a notin"bar"&&!has(foo)||b in"c"`, []tokenizer.Token{ {tokenizer.TokLabel, "a"}, - {tokenizer.TokNotIn, nil}, + {tokenizer.TokNotIn, ""}, {tokenizer.TokStringLiteral, "bar"}, - {tokenizer.TokAnd, nil}, - {tokenizer.TokNot, nil}, + {tokenizer.TokAnd, ""}, + {tokenizer.TokNot, ""}, {tokenizer.TokHas, "foo"}, - {tokenizer.TokOr, nil}, + {tokenizer.TokOr, ""}, {tokenizer.TokLabel, "b"}, - {tokenizer.TokIn, nil}, + {tokenizer.TokIn, ""}, {tokenizer.TokStringLiteral, "c"}, - {tokenizer.TokEOF, nil}, + {tokenizer.TokEOF, ""}, }}, {`a not in {}`, []tokenizer.Token{ {tokenizer.TokLabel, "a"}, - {tokenizer.TokNotIn, nil}, - {tokenizer.TokLBrace, nil}, - {tokenizer.TokRBrace, nil}, - {tokenizer.TokEOF, nil}, + {tokenizer.TokNotIn, ""}, + {tokenizer.TokLBrace, ""}, + {tokenizer.TokRBrace, ""}, + {tokenizer.TokEOF, ""}, }}, {`a not in {"a"}`, []tokenizer.Token{ {tokenizer.TokLabel, "a"}, - {tokenizer.TokNotIn, nil}, - {tokenizer.TokLBrace, nil}, + {tokenizer.TokNotIn, ""}, + {tokenizer.TokLBrace, ""}, {tokenizer.TokStringLiteral, "a"}, - {tokenizer.TokRBrace, nil}, - {tokenizer.TokEOF, nil}, + {tokenizer.TokRBrace, ""}, + {tokenizer.TokEOF, ""}, }}, {`a not in {"a","B"}`, []tokenizer.Token{ {tokenizer.TokLabel, "a"}, - {tokenizer.TokNotIn, nil}, - {tokenizer.TokLBrace, nil}, + {tokenizer.TokNotIn, ""}, + {tokenizer.TokLBrace, ""}, {tokenizer.TokStringLiteral, "a"}, - {tokenizer.TokComma, nil}, + {tokenizer.TokComma, ""}, {tokenizer.TokStringLiteral, "B"}, - {tokenizer.TokRBrace, nil}, - {tokenizer.TokEOF, nil}, + {tokenizer.TokRBrace, ""}, + {tokenizer.TokEOF, ""}, }}, {`global()`, []tokenizer.Token{ - {tokenizer.TokGlobal, nil}, - {tokenizer.TokEOF, nil}, + {tokenizer.TokGlobal, ""}, + {tokenizer.TokEOF, ""}, }}, {`all()`, []tokenizer.Token{ - {tokenizer.TokAll, nil}, - {tokenizer.TokEOF, nil}, + {tokenizer.TokAll, ""}, + {tokenizer.TokEOF, ""}, }}, } var _ = Describe("Token", func() { - for _, test := range tokenTests { test := test // Take copy for closure if test.expected == nil { @@ -227,8 +226,10 @@ var _ = Describe("Token", func() { Expect(err).To(HaveOccurred()) }) } else { - It(fmt.Sprintf("should tokenize %#v as %v", test.input, test.expected), func() { - Expect(tokenizer.Tokenize(test.input)).To(Equal(test.expected)) + It(fmt.Sprintf("should tokenize %s as %v", test.input, test.expected), func() { + tokens, err := tokenizer.Tokenize(test.input) + Expect(err).NotTo(HaveOccurred()) + Expect(tokens).To(Equal(test.expected), fmt.Sprintf("Got: %s", tokens)) }) } } diff --git a/libcalico-go/lib/validator/v1/validator.go b/libcalico-go/lib/validator/v1/validator.go index 076edafccfd..c3763c5fa15 100644 --- a/libcalico-go/lib/validator/v1/validator.go +++ b/libcalico-go/lib/validator/v1/validator.go @@ -205,7 +205,7 @@ func validateSelector(fl validator.FieldLevel) bool { log.Debugf("Validate selector: %s", s) // We use the selector parser to validate a selector string. - _, err := selector.Parse(s) + err := selector.Validate(s) if err != nil { log.Debugf("Selector %#v was invalid: %v", s, err) return false diff --git a/libcalico-go/lib/validator/v3/validator.go b/libcalico-go/lib/validator/v3/validator.go index 7544dff10ba..ab3b4a57f93 100644 --- a/libcalico-go/lib/validator/v3/validator.go +++ b/libcalico-go/lib/validator/v3/validator.go @@ -35,6 +35,7 @@ import ( "github.com/projectcalico/calico/libcalico-go/lib/names" cnet "github.com/projectcalico/calico/libcalico-go/lib/net" "github.com/projectcalico/calico/libcalico-go/lib/selector" + "github.com/projectcalico/calico/libcalico-go/lib/selector/tokenizer" "github.com/projectcalico/calico/libcalico-go/lib/set" ) @@ -79,10 +80,6 @@ var ( // Hostname have to be valid ipv4, ipv6 or strings up to 64 characters. prometheusHostRegexp = regexp.MustCompile(`^[a-zA-Z0-9:._+-]{1,64}$`) - // global() cannot be used with other selectors. - andOr = `(&&|\|\|)` - globalSelectorRegex = regexp.MustCompile(fmt.Sprintf(`%v global\(\)|global\(\) %v`, andOr, andOr)) - interfaceRegex = regexp.MustCompile("^[a-zA-Z0-9_.-]{1,15}$") bgpFilterInterfaceRegex = regexp.MustCompile("^[a-zA-Z0-9_.*-]{1,15}$") bgpFilterPrefixLengthV4 = regexp.MustCompile("^([0-9]|[12][0-9]|3[0-2])$") @@ -550,7 +547,7 @@ func validateSelector(fl validator.FieldLevel) bool { log.Debugf("Validate selector: %s", s) // We use the selector parser to validate a selector string. - _, err := selector.Parse(s) + err := selector.Validate(s) if err != nil { log.Debugf("Selector %#v was invalid: %v", s, err) return false @@ -1292,17 +1289,17 @@ func validateEntityRule(structLevel validator.StructLevel) { "Selector field", "", reason(globalSelectorEntRule), "") } - // Get the parsed and canonicalised string of the namespaceSelector - // so we can make assertions against it. - // Note: err can be ignored; the field is validated separately and before - // this point. - n, _ := selector.Parse(rule.NamespaceSelector) - namespaceSelector := n.String() - - // If the namespaceSelector contains global(), then it should be the only selector. - if globalSelectorRegex.MatchString(namespaceSelector) { - structLevel.ReportError(reflect.ValueOf(rule.NamespaceSelector), - "NamespaceSelector field", "", reason(globalSelectorOnly), "") + if strings.Contains(rule.NamespaceSelector, "global(") && + rule.NamespaceSelector != "global()" { + // Looks like the selector has a global() clause but it's not _only_ + // that. Tokenize the selector so we can more easily check it. + var tokenArr [16]tokenizer.Token + tokens, err := tokenizer.AppendTokens(tokenArr[:0], rule.NamespaceSelector) + if err != nil || len(tokens) > 2 || tokens[0].Kind != tokenizer.TokGlobal { + // If the namespaceSelector contains global(), then it should be the only selector. + structLevel.ReportError(reflect.ValueOf(rule.NamespaceSelector), + "NamespaceSelector field", "", reason(globalSelectorOnly), "") + } } if rule.Services != nil { @@ -1425,7 +1422,13 @@ func validateBGPFilterRuleV6(structLevel validator.StructLevel) { validateBGPFilterRule(structLevel, fs.CIDR, fs.MatchOperator, nil, fs.PrefixLength) } -func validateBGPFilterRule(structLevel validator.StructLevel, cidr string, op api.BGPFilterMatchOperator, prefixLengthV4 *api.BGPFilterPrefixLengthV4, prefixLengthV6 *api.BGPFilterPrefixLengthV6) { +func validateBGPFilterRule( + structLevel validator.StructLevel, + cidr string, + op api.BGPFilterMatchOperator, + prefixLengthV4 *api.BGPFilterPrefixLengthV4, + prefixLengthV6 *api.BGPFilterPrefixLengthV6, +) { if cidr != "" && op == "" { structLevel.ReportError(cidr, "CIDR", "", reason("MatchOperator cannot be empty when CIDR is not"), "")