diff --git a/Makefile b/Makefile index 03ca888..35cd12b 100644 --- a/Makefile +++ b/Makefile @@ -59,7 +59,7 @@ test-integration: .PHONY:benchmark benchmark: # Use -run to exclude non-benchmark tests - $(GOCMD) test $(VERBOSE) -bench=. -run=XXX ./pkg/... + $(GOCMD) test $(VERBOSE) -bench=. -benchmem -run=XXX ./pkg/... .PHONY:clean clean: diff --git a/cmd/cli/args.go b/cmd/cli/args.go index b30e2c2..56e5bf7 100644 --- a/cmd/cli/args.go +++ b/cmd/cli/args.go @@ -40,6 +40,7 @@ type rawArgs struct { searchPattern string accessToken string accessTokenFile string + caseInsensitive bool // Presentation/display behaviour quiet bool verbose bool @@ -48,12 +49,13 @@ type rawArgs struct { } type Args struct { - location fetchTypes.Location - searchPattern string - filetypes []types.FileExtension - tokenSource oauth2.TokenSource - verbosity VerbosityLevel - enableColour bool + location fetchTypes.Location + searchPattern string + filetypes []types.FileExtension + tokenSource oauth2.TokenSource + caseInsensitive bool + verbosity VerbosityLevel + enableColour bool } func GetArgs() (*Args, error) { @@ -89,12 +91,13 @@ func GetArgs() (*Args, error) { enableColour := getColourEnabled(raw.colour, raw.noColour) return &Args{ - location: location, - searchPattern: pattern, - filetypes: filetypes, - tokenSource: tokenSource, - verbosity: verbosity, - enableColour: enableColour, + location: location, + searchPattern: pattern, + filetypes: filetypes, + tokenSource: tokenSource, + caseInsensitive: raw.caseInsensitive, + verbosity: verbosity, + enableColour: enableColour, }, nil } @@ -118,6 +121,7 @@ func parseArguments() (*rawArgs, error) { "", "file containing access token for repository access", ) + flag.BoolVar(&args.caseInsensitive, "i", false, "enable case-insensitive matching") flag.BoolVar(&args.quiet, "quiet", false, "disable logging; overrides verbose mode") flag.BoolVar(&args.verbose, "verbose", false, "increase logging; overridden by quiet mode") flag.BoolVar(&args.colour, "colour", false, "force coloured outputs; overridden by no-colour") diff --git a/cmd/cli/main.go b/cmd/cli/main.go index 220edfd..d030ed8 100644 --- a/cmd/cli/main.go +++ b/cmd/cli/main.go @@ -35,7 +35,7 @@ func main() { console := console.New(os.Stdout, args.enableColour) - matcher := match.New(logger, args.filetypes) + matcher := match.New(logger, args.caseInsensitive, args.filetypes) fetcher := fetch.New(logger, args.location, args.tokenSource) uri := makeURI(args.location) diff --git a/pkg/match/benchmark_results/exact_matcher_20220611_case_insensitivity.txt b/pkg/match/benchmark_results/exact_matcher_20220611_case_insensitivity.txt new file mode 100644 index 0000000..017835e --- /dev/null +++ b/pkg/match/benchmark_results/exact_matcher_20220611_case_insensitivity.txt @@ -0,0 +1,35 @@ +# Use -run to exclude non-benchmark tests +go test -bench=. -benchmem -run=XXX ./pkg/... +? github.com/agrski/greg/pkg/auth [no test files] +? github.com/agrski/greg/pkg/fetch [no test files] +PASS +ok github.com/agrski/greg/pkg/fetch/github 0.003s +? github.com/agrski/greg/pkg/fetch/types [no test files] +goos: linux +goarch: amd64 +pkg: github.com/agrski/greg/pkg/match +cpu: 11th Gen Intel(R) Core(TM) i5-11400H @ 2.70GHz +BenchmarkExactMatcher_Pattern10_Text100-12 894871 1321 ns/op 4784 B/op 7 allocs/op +BenchmarkExactMatcher_Pattern10_Text100_CaseInsensitive-12 695253 2016 ns/op 4968 B/op 17 allocs/op +BenchmarkExactMatcher_Pattern10_Text1_000-12 508456 2249 ns/op 5760 B/op 17 allocs/op +BenchmarkExactMatcher_Pattern100_Text1_000-12 564966 2041 ns/op 5720 B/op 17 allocs/op +BenchmarkExactMatcher_Pattern10_Text1_000_CaseInsensitive-12 201091 6317 ns/op 7096 B/op 62 allocs/op +BenchmarkExactMatcher_Pattern100_Text1_000_CaseInsensitive-12 119967 10515 ns/op 9016 B/op 61 allocs/op +BenchmarkExactMatcher_Pattern10_Text10_000-12 102597 11269 ns/op 15392 B/op 150 allocs/op +BenchmarkExactMatcher_Pattern100_Text10_000-12 121268 9567 ns/op 15472 B/op 150 allocs/op +BenchmarkExactMatcher_Pattern1_000_Text10_000-12 125361 9173 ns/op 15360 B/op 151 allocs/op +BenchmarkExactMatcher_Pattern10_Text10_000_CaseInsensitive-12 15655 79508 ns/op 28232 B/op 431 allocs/op +BenchmarkExactMatcher_Pattern100_Text10_000_CaseInsensitive-12 10242 109205 ns/op 42089 B/op 426 allocs/op +BenchmarkExactMatcher_Pattern1_000_Text10_000_CaseInsensitive-12 2324 473883 ns/op 193198 B/op 472 allocs/op +BenchmarkExactMatcher_Pattern10_Text100_000-12 7128 144612 ns/op 112047 B/op 1500 allocs/op +BenchmarkExactMatcher_Pattern100_Text100_000-12 9650 109717 ns/op 111955 B/op 1546 allocs/op +BenchmarkExactMatcher_Pattern1_000_Text100_000-12 11798 102373 ns/op 112273 B/op 1551 allocs/op +BenchmarkExactMatcher_Pattern10_000_Text100_000-12 9936 100823 ns/op 112196 B/op 1527 allocs/op +BenchmarkExactMatcher_Pattern10_Text100_000_CaseInsensitive-12 1245 918331 ns/op 244030 B/op 4607 allocs/op +BenchmarkExactMatcher_Pattern100_Text100_000_CaseInsensitive-12 957 1290626 ns/op 398777 B/op 4704 allocs/op +BenchmarkExactMatcher_Pattern1_000_Text100_000_CaseInsensitive-12 258 4819877 ns/op 1820148 B/op 4589 allocs/op +BenchmarkExactMatcher_Pattern10_000_Text100_000_CaseInsensitive-12 15 73241531 ns/op 15966074 B/op 4473 allocs/op +PASS +ok github.com/agrski/greg/pkg/match 29.407s +? github.com/agrski/greg/pkg/present/console [no test files] +? github.com/agrski/greg/pkg/types [no test files] diff --git a/pkg/match/exact.go b/pkg/match/exact.go index 84c4d7b..b0e3014 100644 --- a/pkg/match/exact.go +++ b/pkg/match/exact.go @@ -9,15 +9,17 @@ import ( ) type exactMatcher struct { - logger zerolog.Logger + caseInsensitive bool + logger zerolog.Logger } var _ Matcher = (*exactMatcher)(nil) -func newExactMatcher(logger zerolog.Logger) *exactMatcher { +func newExactMatcher(logger zerolog.Logger, caseInsensitive bool) *exactMatcher { logger = logger.With().Str("source", "ExactMatcher").Logger() return &exactMatcher{ - logger: logger, + caseInsensitive: caseInsensitive, + logger: logger, } } @@ -62,6 +64,11 @@ func (em *exactMatcher) Match(pattern string, next *types.FileInfo) (*Match, boo } func (em *exactMatcher) matchLine(pattern string, line string) []uint { + if em.caseInsensitive { + pattern = strings.ToLower(pattern) + line = strings.ToLower(line) + } + column := 0 matchColumns := []uint{} diff --git a/pkg/match/exact_benchmark_test.go b/pkg/match/exact_benchmark_test.go index a229331..dfeacf2 100644 --- a/pkg/match/exact_benchmark_test.go +++ b/pkg/match/exact_benchmark_test.go @@ -46,8 +46,8 @@ func makeTextOfLength(n int) string { return sb.String() } -func benchmarkExactMatcher(b *testing.B, patternSize int, textSize int) { - matcher := newExactMatcher(zerolog.Nop()) +func benchmarkExactMatcher(b *testing.B, patternSize int, textSize int, caseInsensitive bool) { + matcher := newExactMatcher(zerolog.Nop(), caseInsensitive) pattern := makeTextOfLength(patternSize) fileInfo := &types.FileInfo{} fileInfo.IsBinary = false @@ -62,35 +62,65 @@ func benchmarkExactMatcher(b *testing.B, patternSize int, textSize int) { } func BenchmarkExactMatcher_Pattern10_Text100(b *testing.B) { - benchmarkExactMatcher(b, 10, 100) + benchmarkExactMatcher(b, 10, 100, false) +} +func BenchmarkExactMatcher_Pattern10_Text100_CaseInsensitive(b *testing.B) { + benchmarkExactMatcher(b, 10, 100, true) } func BenchmarkExactMatcher_Pattern10_Text1_000(b *testing.B) { - benchmarkExactMatcher(b, 10, 1_000) + benchmarkExactMatcher(b, 10, 1_000, false) } func BenchmarkExactMatcher_Pattern100_Text1_000(b *testing.B) { - benchmarkExactMatcher(b, 100, 1_000) + benchmarkExactMatcher(b, 100, 1_000, false) +} +func BenchmarkExactMatcher_Pattern10_Text1_000_CaseInsensitive(b *testing.B) { + benchmarkExactMatcher(b, 10, 1_000, true) +} +func BenchmarkExactMatcher_Pattern100_Text1_000_CaseInsensitive(b *testing.B) { + benchmarkExactMatcher(b, 100, 1_000, true) } func BenchmarkExactMatcher_Pattern10_Text10_000(b *testing.B) { - benchmarkExactMatcher(b, 10, 10_000) + benchmarkExactMatcher(b, 10, 10_000, false) } func BenchmarkExactMatcher_Pattern100_Text10_000(b *testing.B) { - benchmarkExactMatcher(b, 100, 10_000) + benchmarkExactMatcher(b, 100, 10_000, false) } func BenchmarkExactMatcher_Pattern1_000_Text10_000(b *testing.B) { - benchmarkExactMatcher(b, 1_000, 10_000) + benchmarkExactMatcher(b, 1_000, 10_000, false) +} +func BenchmarkExactMatcher_Pattern10_Text10_000_CaseInsensitive(b *testing.B) { + benchmarkExactMatcher(b, 10, 10_000, true) +} +func BenchmarkExactMatcher_Pattern100_Text10_000_CaseInsensitive(b *testing.B) { + benchmarkExactMatcher(b, 100, 10_000, true) +} +func BenchmarkExactMatcher_Pattern1_000_Text10_000_CaseInsensitive(b *testing.B) { + benchmarkExactMatcher(b, 1_000, 10_000, true) } func BenchmarkExactMatcher_Pattern10_Text100_000(b *testing.B) { - benchmarkExactMatcher(b, 10, 100_000) + benchmarkExactMatcher(b, 10, 100_000, false) } func BenchmarkExactMatcher_Pattern100_Text100_000(b *testing.B) { - benchmarkExactMatcher(b, 100, 100_000) + benchmarkExactMatcher(b, 100, 100_000, false) } func BenchmarkExactMatcher_Pattern1_000_Text100_000(b *testing.B) { - benchmarkExactMatcher(b, 1_000, 100_000) + benchmarkExactMatcher(b, 1_000, 100_000, false) } func BenchmarkExactMatcher_Pattern10_000_Text100_000(b *testing.B) { - benchmarkExactMatcher(b, 10_000, 100_000) + benchmarkExactMatcher(b, 10_000, 100_000, false) +} +func BenchmarkExactMatcher_Pattern10_Text100_000_CaseInsensitive(b *testing.B) { + benchmarkExactMatcher(b, 10, 100_000, true) +} +func BenchmarkExactMatcher_Pattern100_Text100_000_CaseInsensitive(b *testing.B) { + benchmarkExactMatcher(b, 100, 100_000, true) +} +func BenchmarkExactMatcher_Pattern1_000_Text100_000_CaseInsensitive(b *testing.B) { + benchmarkExactMatcher(b, 1_000, 100_000, true) +} +func BenchmarkExactMatcher_Pattern10_000_Text100_000_CaseInsensitive(b *testing.B) { + benchmarkExactMatcher(b, 10_000, 100_000, true) } diff --git a/pkg/match/exact_test.go b/pkg/match/exact_test.go index 2744491..e47779f 100644 --- a/pkg/match/exact_test.go +++ b/pkg/match/exact_test.go @@ -10,52 +10,58 @@ import ( func TestMatch(t *testing.T) { type test struct { - name string - isBinary bool - text string - pattern string - expected *Match - expectedOk bool + name string + isBinary bool + isCaseInsensitive bool + text string + pattern string + expected *Match + expectedOk bool } tests := []test{ { - name: "should ignore binary files", - isBinary: true, - text: "asdf", - pattern: "as", - expected: nil, - expectedOk: false, + name: "should ignore binary files", + isBinary: true, + isCaseInsensitive: false, + text: "asdf", + pattern: "as", + expected: nil, + expectedOk: false, }, { - name: "should reject non-matching text file", - isBinary: false, - text: "asdf", - pattern: "foo", - expected: nil, - expectedOk: false, + name: "should reject non-matching text file", + isBinary: false, + isCaseInsensitive: false, + text: "asdf", + pattern: "foo", + expected: nil, + expectedOk: false, }, { - name: "should reject empty text file with non-empty pattern", - isBinary: false, - text: "", - pattern: "foo", - expected: nil, - expectedOk: false, + name: "should reject empty text file with non-empty pattern", + isBinary: false, + isCaseInsensitive: false, + text: "", + pattern: "foo", + expected: nil, + expectedOk: false, }, { - name: "should reject empty text file with empty pattern", - isBinary: false, - text: "", - pattern: "", - expected: nil, - expectedOk: false, + name: "should reject empty text file with empty pattern", + isBinary: false, + isCaseInsensitive: false, + text: "", + pattern: "", + expected: nil, + expectedOk: false, }, { - name: "should accept matching text file", - isBinary: false, - text: "foo bar baz", - pattern: "bar", + name: "should accept matching text file", + isBinary: false, + isCaseInsensitive: false, + text: "foo bar baz", + pattern: "bar", expected: &Match{ Positions: []*FilePosition{ { @@ -69,8 +75,9 @@ func TestMatch(t *testing.T) { expectedOk: true, }, { - name: "should accept matching multi-line text file", - isBinary: false, + name: "should accept matching multi-line text file", + isBinary: false, + isCaseInsensitive: false, text: `first second @@ -91,8 +98,9 @@ foo expectedOk: true, }, { - name: "should accept multiple matches in multi-line text file", - isBinary: false, + name: "should accept multiple matches in multi-line text file", + isBinary: false, + isCaseInsensitive: false, text: `first second foo @@ -119,10 +127,11 @@ foo fifth expectedOk: true, }, { - name: "should accept multiple matches on same line", - isBinary: false, - text: "foo bar foo", - pattern: "foo", + name: "should accept multiple matches on same line", + isBinary: false, + isCaseInsensitive: false, + text: "foo bar foo", + pattern: "foo", expected: &Match{ Positions: []*FilePosition{ { @@ -141,6 +150,60 @@ foo fifth }, expectedOk: true, }, + { + name: "should accept lowercase pattern, uppercase text when case-insensitive", + isBinary: false, + isCaseInsensitive: true, + text: "HELLO WORLD", + pattern: "world", + expected: &Match{ + Positions: []*FilePosition{ + { + Line: 0, + ColumnStart: 6, + ColumnEnd: 11, + Text: "HELLO WORLD", + }, + }, + }, + expectedOk: true, + }, + { + name: "should accept uppercase pattern, lowercase text when case-insensitive", + isBinary: false, + isCaseInsensitive: true, + text: "hello world", + pattern: "WORLD", + expected: &Match{ + Positions: []*FilePosition{ + { + Line: 0, + ColumnStart: 6, + ColumnEnd: 11, + Text: "hello world", + }, + }, + }, + expectedOk: true, + }, + { + name: "should accept mixed-case pattern, mixed-case text when case-insensitive", + isBinary: false, + isCaseInsensitive: true, + text: "Hello wOrLd", + pattern: "WoRlD", + expected: &Match{ + Positions: []*FilePosition{ + { + Line: 0, + ColumnStart: 6, + ColumnEnd: 11, + Text: "Hello wOrLd", + }, + }, + }, + expectedOk: true, + }, } for _, tt := range tests { @@ -149,7 +212,7 @@ foo fifth fileInfo.IsBinary = tt.isBinary fileInfo.Text = tt.text - matcher := newExactMatcher(zerolog.Nop()) + matcher := newExactMatcher(zerolog.Nop(), tt.isCaseInsensitive) actual, ok := matcher.Match(tt.pattern, fileInfo) diff --git a/pkg/match/match.go b/pkg/match/match.go index 34dc1bc..f87da8e 100644 --- a/pkg/match/match.go +++ b/pkg/match/match.go @@ -28,8 +28,12 @@ type filteringMatcher struct { var _ Matcher = (*filteringMatcher)(nil) -func New(logger zerolog.Logger, allowedFiletypes []types.FileExtension) *filteringMatcher { - em := newExactMatcher(logger) +func New( + logger zerolog.Logger, + caseInsensitive bool, + allowedFiletypes []types.FileExtension, +) *filteringMatcher { + em := newExactMatcher(logger, caseInsensitive) logger = logger.With().Str("source", "FilteringMatcher").Logger() return &filteringMatcher{