From 49b569e12132299550c3465f79ebe3ad62518c7f Mon Sep 17 00:00:00 2001 From: mattdurham Date: Tue, 17 Oct 2023 10:39:16 -0700 Subject: [PATCH] fix issue with non utf8 encodings and environment variable expansion (#5515) * Fix issue with encoding not being utf8, since envar shoves utf8 into an utfX file. Now we convert all to utf8. * Fix linting and test on linux. * update changelog * update go.mod * update go.sum * Allow strict usage of utf8. * fix spelling * reorganize to prevent cyclic dependencies --- CHANGELOG.md | 2 + go.mod | 6 +-- go.sum | 2 - pkg/config/config.go | 12 +++-- pkg/config/config_test.go | 50 +++++++++++++++++-- pkg/config/encoder/encoder.go | 50 +++++++++++++++++++ pkg/config/encoder/test_encoding_unknown.txt | 2 + pkg/config/encoder/test_encoding_utf16be.txt | Bin 0 -> 60 bytes pkg/config/encoder/test_encoding_utf16le.txt | Bin 0 -> 60 bytes pkg/config/encoder/test_encoding_utf32be.txt | Bin 0 -> 116 bytes pkg/config/encoder/test_encoding_utf32le.txt | Bin 0 -> 116 bytes pkg/config/encoder/test_encoding_utf8.txt | 2 + pkg/config/encoder/test_encoding_utf8bom.txt | 2 + pkg/flow/source.go | 5 ++ 14 files changed, 119 insertions(+), 14 deletions(-) create mode 100644 pkg/config/encoder/encoder.go create mode 100644 pkg/config/encoder/test_encoding_unknown.txt create mode 100644 pkg/config/encoder/test_encoding_utf16be.txt create mode 100644 pkg/config/encoder/test_encoding_utf16le.txt create mode 100644 pkg/config/encoder/test_encoding_utf32be.txt create mode 100644 pkg/config/encoder/test_encoding_utf32le.txt create mode 100644 pkg/config/encoder/test_encoding_utf8.txt create mode 100644 pkg/config/encoder/test_encoding_utf8bom.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index aee8d0432204..79c6971b7ab9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,6 +48,8 @@ Main (unreleased) - Fixed issue where adding a module after initial start, that failed to load then subsequently resolving the issue would cause the module to permanently fail to load with `id already exists` error. (@mattdurham) + +- Allow the usage of encodings other than UTF8 to be used with environment variable expansion. (@mattdurham) ### Enhancements diff --git a/go.mod b/go.mod index 4d4c72562c4b..c61e9639d11f 100644 --- a/go.mod +++ b/go.mod @@ -330,7 +330,7 @@ require ( github.com/denverdino/aliyungo v0.0.0-20190125010748-a747050bb1ba // indirect github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect github.com/digitalocean/godo v1.99.0 // indirect - github.com/dimchansky/utfbom v1.1.1 // indirect + github.com/dimchansky/utfbom v1.1.1 github.com/docker/cli v23.0.3+incompatible // indirect github.com/docker/distribution v2.8.2+incompatible // indirect github.com/docker/go-units v0.5.0 // indirect @@ -703,6 +703,8 @@ replace ( // and eventually remove if windows_exporter shifts to it. https://github.com/leoluk/perflib_exporter/pull/43 github.com/leoluk/perflib_exporter => github.com/grafana/perflib_exporter v0.1.1-0.20230511173423-6166026bd090 + github.com/prometheus-community/postgres_exporter => github.com/grafana/postgres_exporter v0.8.1-0.20210722175051-db35d7c2f520 + // TODO(mattdurham): this is to allow defaults to propogate properly: https://github.com/prometheus-community/windows_exporter/pull/1227 github.com/prometheus-community/windows_exporter => github.com/grafana/windows_exporter v0.15.1-0.20230612134738-fdb3ba7accd8 @@ -711,8 +713,6 @@ replace ( // TODO(marctc, mattdurham): Replace node_export with custom fork for multi usage. https://github.com/prometheus/node_exporter/pull/2812 github.com/prometheus/node_exporter => github.com/grafana/node_exporter v0.18.1-grafana-r01.0.20231004161416-702318429731 - - github.com/prometheus-community/postgres_exporter => github.com/grafana/postgres_exporter v0.8.1-0.20210722175051-db35d7c2f520 ) // Excluding fixes a conflict in test packages and allows "go mod tidy" to run. diff --git a/go.sum b/go.sum index d25a855bab0a..e5f98933baa0 100644 --- a/go.sum +++ b/go.sum @@ -3067,8 +3067,6 @@ google.golang.org/grpc v1.39.0/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnD google.golang.org/grpc v1.39.1/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnDzfrE= google.golang.org/grpc v1.40.0/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34= google.golang.org/grpc v1.41.0/go.mod h1:U3l9uK9J0sini8mHphKoXyaqDA/8VyGnDee1zzIUK6k= -google.golang.org/grpc v1.58.0 h1:32JY8YpPMSR45K+c3o6b8VL73V+rR8k+DeMIr4vRH8o= -google.golang.org/grpc v1.58.0/go.mod h1:tgX3ZQDlNJGU96V6yHh1T/JeoBQ2TXdr43YbYSsCJk0= google.golang.org/grpc v1.58.3 h1:BjnpXut1btbtgN/6sp+brB2Kbm2LjNXnidYujAVbSoQ= google.golang.org/grpc v1.58.3/go.mod h1:tgX3ZQDlNJGU96V6yHh1T/JeoBQ2TXdr43YbYSsCJk0= google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.1.0/go.mod h1:6Kw0yEErY5E/yWrBtf03jp27GLLJujG4z/JK95pnjjw= diff --git a/pkg/config/config.go b/pkg/config/config.go index c11493dc90b9..21b9ffe1422a 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -13,6 +13,7 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/grafana/agent/pkg/build" + "github.com/grafana/agent/pkg/config/encoder" "github.com/grafana/agent/pkg/config/features" "github.com/grafana/agent/pkg/config/instrumentation" "github.com/grafana/agent/pkg/logs" @@ -245,13 +246,10 @@ func (c *Config) RegisterFlags(f *flag.FlagSet) { // LoadFile reads a file and passes the contents to Load func LoadFile(filename string, expandEnvVars bool, c *Config) error { buf, err := os.ReadFile(filename) - if err != nil { return fmt.Errorf("error reading config file %w", err) } - instrumentation.InstrumentConfig(buf) - return LoadBytes(buf, expandEnvVars, c) } @@ -341,15 +339,19 @@ func LoadRemote(url string, expandEnvVars bool, c *Config) error { } func performEnvVarExpansion(buf []byte, expandEnvVars bool) ([]byte, error) { + utf8Buf, err := encoder.EnsureUTF8(buf, false) + if err != nil { + return nil, err + } // (Optionally) expand with environment variables if expandEnvVars { - s, err := envsubst.Eval(string(buf), getenv) + s, err := envsubst.Eval(string(utf8Buf), getenv) if err != nil { return nil, fmt.Errorf("unable to substitute config with environment variables: %w", err) } return []byte(s), nil } - return buf, nil + return utf8Buf, nil } // LoadBytes unmarshals a config from a buffer. Defaults are not diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index a10b47b81e96..57bf4052376c 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -2,22 +2,24 @@ package config import ( "flag" + "net/url" + "os" + "path" "strings" "testing" "time" - commonCfg "github.com/prometheus/common/config" - - "github.com/stretchr/testify/assert" - + "github.com/grafana/agent/pkg/config/encoder" "github.com/grafana/agent/pkg/metrics" "github.com/grafana/agent/pkg/metrics/instance" "github.com/grafana/agent/pkg/server" "github.com/grafana/agent/pkg/util" + commonCfg "github.com/prometheus/common/config" "github.com/prometheus/common/model" promCfg "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/model/labels" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" ) @@ -545,3 +547,43 @@ integrations: require.NoError(t, LoadBytes([]byte(input), false, &cfg)) require.NoError(t, cfg.Validate(nil)) } + +func TestConfigEncoding(t *testing.T) { + type testCase struct { + filename string + success bool + } + cases := []testCase{ + {filename: "test_encoding_unknown.txt", success: false}, + {filename: "test_encoding_utf8.txt", success: true}, + {filename: "test_encoding_utf8bom.txt", success: true}, + {filename: "test_encoding_utf16le.txt", success: true}, + {filename: "test_encoding_utf16be.txt", success: true}, + {filename: "test_encoding_utf32be.txt", success: true}, + {filename: "test_encoding_utf32le.txt", success: true}, + } + for _, tt := range cases { + t.Run(tt.filename, func(t *testing.T) { + buf, err := os.ReadFile(path.Join("encoder", tt.filename)) + t.Setenv("TEST", "debug") + require.NoError(t, err) + c := &Config{} + err = LoadBytes(buf, true, c) + if tt.success { + require.NoError(t, err) + require.True(t, c.Server.LogLevel.String() == "debug") + } else { + require.Error(t, err) + } + }) + } +} + +func TestConfigEncodingStrict(t *testing.T) { + buf, err := os.ReadFile(path.Join("encoder", "test_encoding_utf16le.txt")) + require.NoError(t, err) + _, err = encoder.EnsureUTF8(buf, false) + require.NoError(t, err) + _, err = encoder.EnsureUTF8(buf, true) + require.Error(t, err) +} diff --git a/pkg/config/encoder/encoder.go b/pkg/config/encoder/encoder.go new file mode 100644 index 000000000000..20c1bb64dae5 --- /dev/null +++ b/pkg/config/encoder/encoder.go @@ -0,0 +1,50 @@ +package encoder + +import ( + "bytes" + "fmt" + "io" + "unicode/utf8" + + "github.com/dimchansky/utfbom" + "golang.org/x/text/encoding" + uni "golang.org/x/text/encoding/unicode" + "golang.org/x/text/encoding/unicode/utf32" +) + +// EnsureUTF8 will convert from the most common encodings to UTF8. +// If useStrictUTF8 is enabled then if the file is not already utf8 then an error will be returned. +func EnsureUTF8(config []byte, useStrictUTF8 bool) ([]byte, error) { + buffer := bytes.NewBuffer(config) + src, enc := utfbom.Skip(buffer) + var converted []byte + skippedBytes, err := io.ReadAll(src) + if err != nil { + return nil, err + } + var encoder encoding.Encoding + switch enc { + case utfbom.UTF16BigEndian: + encoder = uni.UTF16(uni.BigEndian, uni.IgnoreBOM) + case utfbom.UTF16LittleEndian: + encoder = uni.UTF16(uni.LittleEndian, uni.IgnoreBOM) + case utfbom.UTF32BigEndian: + encoder = utf32.UTF32(utf32.BigEndian, utf32.IgnoreBOM) + case utfbom.UTF32LittleEndian: + encoder = utf32.UTF32(utf32.LittleEndian, utf32.IgnoreBOM) + case utfbom.UTF8: // This only checks utf8 bom + return config, nil + default: + // If its utf8 valid then return. + if utf8.Valid(config) { + return config, nil + } + return nil, fmt.Errorf("unknown encoding for config") + } + if useStrictUTF8 { + return nil, fmt.Errorf("configuration is encoded with %s but must be utf8", enc.String()) + } + decoder := encoder.NewDecoder() + converted, err = decoder.Bytes(skippedBytes) + return converted, err +} diff --git a/pkg/config/encoder/test_encoding_unknown.txt b/pkg/config/encoder/test_encoding_unknown.txt new file mode 100644 index 000000000000..b3f3b5b78170 --- /dev/null +++ b/pkg/config/encoder/test_encoding_unknown.txt @@ -0,0 +1,2 @@ +ñ: + log_level: ${TESTzúß­úß­úß \ No newline at end of file diff --git a/pkg/config/encoder/test_encoding_utf16be.txt b/pkg/config/encoder/test_encoding_utf16be.txt new file mode 100644 index 0000000000000000000000000000000000000000..c823726ee03a157295cd9e04e9210dafb1182c0a GIT binary patch literal 60 zcmezOpP`r`m7$2C3`|-v@G@{QC;(v&Lq0<~Lp+d91# Dmz4{Z literal 0 HcmV?d00001 diff --git a/pkg/config/encoder/test_encoding_utf32be.txt b/pkg/config/encoder/test_encoding_utf32be.txt new file mode 100644 index 0000000000000000000000000000000000000000..30810387aded9948a2b9656d90cca7c8e21d7255 GIT binary patch literal 116 zcmZQz`1hZIfuR_PQ-Qb$h|7@JRzNlv5Gz1&4v@|V;&dR62VxK(qz|SC#0Tk70Adv& Qt_I=|Aa(`fU?{s50QcJqhyVZp literal 0 HcmV?d00001 diff --git a/pkg/config/encoder/test_encoding_utf32le.txt b/pkg/config/encoder/test_encoding_utf32le.txt new file mode 100644 index 0000000000000000000000000000000000000000..e6d7efdfd54784e0261ae4faf709a44efb19032d GIT binary patch literal 116 zcmezWkAb0>fq@|vh>L)@42f+8WOD(r0u<)}>3kqg2jX}j2Ju1qV0u7&kRAmfRsrH_ QAPxayS0E0CvTK1f02qS{hyVZp literal 0 HcmV?d00001 diff --git a/pkg/config/encoder/test_encoding_utf8.txt b/pkg/config/encoder/test_encoding_utf8.txt new file mode 100644 index 000000000000..ca62e9a04042 --- /dev/null +++ b/pkg/config/encoder/test_encoding_utf8.txt @@ -0,0 +1,2 @@ +server: + log_level: ${TEST} \ No newline at end of file diff --git a/pkg/config/encoder/test_encoding_utf8bom.txt b/pkg/config/encoder/test_encoding_utf8bom.txt new file mode 100644 index 000000000000..4534ecee1eef --- /dev/null +++ b/pkg/config/encoder/test_encoding_utf8bom.txt @@ -0,0 +1,2 @@ +server: + log_level: ${TEST} \ No newline at end of file diff --git a/pkg/flow/source.go b/pkg/flow/source.go index 310af1d2e6b9..acd7d2ce2f58 100644 --- a/pkg/flow/source.go +++ b/pkg/flow/source.go @@ -6,6 +6,7 @@ import ( "sort" "strings" + "github.com/grafana/agent/pkg/config/encoder" "github.com/grafana/river/ast" "github.com/grafana/river/diag" "github.com/grafana/river/parser" @@ -27,6 +28,10 @@ type Source struct { // // bb must not be modified after passing to ParseSource. func ParseSource(name string, bb []byte) (*Source, error) { + bb, err := encoder.EnsureUTF8(bb, true) + if err != nil { + return nil, err + } node, err := parser.ParseFile(name, bb) if err != nil { return nil, err