From be9fefb8a8f1c24ecf43d4abbaa019447062a661 Mon Sep 17 00:00:00 2001 From: Daniel Jaglowski Date: Fri, 22 Sep 2023 10:08:52 -0600 Subject: [PATCH] [chore][pkg/stanza] Minor cleanup in split package (#27075) --- pkg/stanza/split/split.go | 42 +++++++++++++++++++--------------- pkg/stanza/split/split_test.go | 20 ++++++++-------- 2 files changed, 33 insertions(+), 29 deletions(-) diff --git a/pkg/stanza/split/split.go b/pkg/stanza/split/split.go index b5883c14841f..310a8f031c66 100644 --- a/pkg/stanza/split/split.go +++ b/pkg/stanza/split/split.go @@ -20,33 +20,38 @@ type Config struct { } // Func will return a bufio.SplitFunc based on the config -func (c Config) Func(enc encoding.Encoding, flushAtEOF bool, maxLogSize int) (splitFunc bufio.SplitFunc, err error) { - switch { - case c.LineEndPattern != "" && c.LineStartPattern != "": - return nil, fmt.Errorf("only one of line_start_pattern or line_end_pattern can be set") - case enc == encoding.Nop && (c.LineEndPattern != "" || c.LineStartPattern != ""): - return nil, fmt.Errorf("line_start_pattern or line_end_pattern should not be set when using nop encoding") - case enc == encoding.Nop: - return NoSplitFunc(maxLogSize), nil - case c.LineEndPattern == "" && c.LineStartPattern == "": - splitFunc, err = NewlineSplitFunc(enc, flushAtEOF) - if err != nil { - return nil, err +func (c Config) Func(enc encoding.Encoding, flushAtEOF bool, maxLogSize int) (bufio.SplitFunc, error) { + if enc == encoding.Nop { + if c.LineEndPattern != "" { + return nil, fmt.Errorf("line_end_pattern should not be set when using nop encoding") + } + if c.LineStartPattern != "" { + return nil, fmt.Errorf("line_start_pattern should not be set when using nop encoding") } - case c.LineEndPattern != "": + return NoSplitFunc(maxLogSize), nil + } + + if c.LineEndPattern == "" && c.LineStartPattern == "" { + return NewlineSplitFunc(enc, flushAtEOF) + } + + if c.LineEndPattern != "" && c.LineStartPattern == "" { re, err := regexp.Compile("(?m)" + c.LineEndPattern) if err != nil { return nil, fmt.Errorf("compile line end regex: %w", err) } - splitFunc = LineEndSplitFunc(re, c.OmitPattern, flushAtEOF) - case c.LineStartPattern != "": + return LineEndSplitFunc(re, c.OmitPattern, flushAtEOF), nil + } + + if c.LineEndPattern == "" && c.LineStartPattern != "" { re, err := regexp.Compile("(?m)" + c.LineStartPattern) if err != nil { return nil, fmt.Errorf("compile line start regex: %w", err) } - splitFunc = LineStartSplitFunc(re, c.OmitPattern, flushAtEOF) + return LineStartSplitFunc(re, c.OmitPattern, flushAtEOF), nil } - return splitFunc, nil + + return nil, fmt.Errorf("only one of line_start_pattern or line_end_pattern can be set") } // LineStartSplitFunc creates a bufio.SplitFunc that splits an incoming stream into @@ -61,8 +66,7 @@ func LineStartSplitFunc(re *regexp.Regexp, omitPattern bool, flushAtEOF bool) bu } return 0, nil, nil // read more data and try again. } - firstMatchStart := firstLoc[0] - firstMatchEnd := firstLoc[1] + firstMatchStart, firstMatchEnd := firstLoc[0], firstLoc[1] if firstMatchStart != 0 { // the beginning of the file does not match the start pattern, so return a token up to the first match so we don't lose data diff --git a/pkg/stanza/split/split_test.go b/pkg/stanza/split/split_test.go index 9db4b7dbb39e..27495a95e69c 100644 --- a/pkg/stanza/split/split_test.go +++ b/pkg/stanza/split/split_test.go @@ -41,6 +41,16 @@ func TestConfigFunc(t *testing.T) { assert.Equal(t, raw[:maxLogSize], token) }) + t.Run("NopEncodingError", func(t *testing.T) { + endCfg := Config{LineEndPattern: "\n"} + _, err := endCfg.Func(encoding.Nop, false, 0) + require.Equal(t, err, fmt.Errorf("line_end_pattern should not be set when using nop encoding")) + + startCfg := Config{LineStartPattern: "\n"} + _, err = startCfg.Func(encoding.Nop, false, 0) + require.Equal(t, err, fmt.Errorf("line_start_pattern should not be set when using nop encoding")) + }) + t.Run("Newline", func(t *testing.T) { cfg := Config{} f, err := cfg.Func(unicode.UTF8, false, maxLogSize) @@ -778,16 +788,6 @@ func TestNoSplitFunc(t *testing.T) { } } -func TestNoopEncodingError(t *testing.T) { - endCfg := Config{LineEndPattern: "\n"} - _, err := endCfg.Func(encoding.Nop, false, 0) - require.Equal(t, err, fmt.Errorf("line_start_pattern or line_end_pattern should not be set when using nop encoding")) - - startCfg := Config{LineStartPattern: "\n"} - _, err = startCfg.Func(encoding.Nop, false, 0) - require.Equal(t, err, fmt.Errorf("line_start_pattern or line_end_pattern should not be set when using nop encoding")) -} - func TestNewlineSplitFunc_Encodings(t *testing.T) { cases := []struct { name string