From 1c96225b6445082d106e622bae928e21971a444c Mon Sep 17 00:00:00 2001 From: Dmitrii Anoshin Date: Wed, 24 Jul 2024 14:18:54 -0700 Subject: [PATCH 1/3] [confmap] Fix expansion of escaped environment variables (#10716) This change fixes the issue where environment variables escaped with $$ are expanded. The collector now converts `$${ENV_VAR}` to `${ENV_VAR}` and `$$ENV_VAR` to `$ENV_VAR` without further expansion (or the expansion failure in case of `$ENV_VAR`). Fixes https://github.com/open-telemetry/opentelemetry-collector/issues/10713 --- .chloggen/fix-env-var-double-escaping.yaml | 27 ++++++ confmap/converter/expandconverter/expand.go | 1 + confmap/expand_test.go | 43 --------- confmap/internal/e2e/expand_test.go | 94 +++++++++++++++++++ confmap/internal/e2e/go.mod | 6 ++ .../e2e/testdata/expand-escaped-env.yaml | 29 ++++++ confmap/resolver.go | 10 +- confmap/testdata/expand-escaped-env.yaml | 31 ------ 8 files changed, 158 insertions(+), 83 deletions(-) create mode 100644 .chloggen/fix-env-var-double-escaping.yaml create mode 100644 confmap/internal/e2e/expand_test.go create mode 100644 confmap/internal/e2e/testdata/expand-escaped-env.yaml delete mode 100644 confmap/testdata/expand-escaped-env.yaml diff --git a/.chloggen/fix-env-var-double-escaping.yaml b/.chloggen/fix-env-var-double-escaping.yaml new file mode 100644 index 00000000000..5ea730a1965 --- /dev/null +++ b/.chloggen/fix-env-var-double-escaping.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: bug_fix + +# The name of the component, or a single word describing the area of concern, (e.g. otlpreceiver) +component: confmap + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Fix wrong expansion of environment variables escaped with `$$`, e.g. `$${ENV_VAR}` and `$$ENV_VAR`. + +# One or more tracking issues or pull requests related to the change +issues: [10713] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: | + This change fixes the issue where environment variables escaped with $$ were expanded. + The collector now converts `$${ENV_VAR}` to `${ENV_VAR}` and `$$ENV_VAR` to `$ENV_VAR` without further expansion. + +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [api] diff --git a/confmap/converter/expandconverter/expand.go b/confmap/converter/expandconverter/expand.go index e603ca9ce04..2c4af613499 100644 --- a/confmap/converter/expandconverter/expand.go +++ b/confmap/converter/expandconverter/expand.go @@ -85,6 +85,7 @@ func (c converter) expandEnv(s string) (string, error) { // - $FOO will be substituted with env var FOO // - $$FOO will be replaced with $FOO // - $$$FOO will be replaced with $ + substituted env var FOO + // TODO: Move the escaping of $$ out from the expand converter to the resolver. if str == "$" { return "$" } diff --git a/confmap/expand_test.go b/confmap/expand_test.go index dd406922948..53b244374b1 100644 --- a/confmap/expand_test.go +++ b/confmap/expand_test.go @@ -577,46 +577,3 @@ func TestResolverDefaultProviderExpand(t *testing.T) { require.NoError(t, err) assert.Equal(t, map[string]any{"foo": "localhost"}, cfgMap.ToStringMap()) } - -func Test_EscapedEnvVars(t *testing.T) { - const mapValue2 = "some map value" - - expectedMap := map[string]any{ - "test_map": map[string]any{ - "recv.1": "$MAP_VALUE_1", - "recv.2": "$$MAP_VALUE_2", - "recv.3": "$$MAP_VALUE_3", - "recv.4": "$" + mapValue2, - "recv.5": "some${MAP_VALUE_4}text", - "recv.6": "${ONE}${TWO}", - "recv.7": "text$", - "recv.8": "$", - "recv.9": "${1}${env:2}", - "recv.10": "some${env:MAP_VALUE_4}text", - "recv.11": "${env:" + mapValue2 + "}", - "recv.12": "${env:${MAP_VALUE_2}}", - "recv.13": "env:MAP_VALUE_2}${MAP_VALUE_2}{", - "recv.14": "${env:MAP_VALUE_2${MAP_VALUE_2}", - "recv.15": "$" + mapValue2, - }} - - fileProvider := newFakeProvider("file", func(_ context.Context, uri string, _ WatcherFunc) (*Retrieved, error) { - return NewRetrieved(newConfFromFile(t, uri[5:])) - }) - envProvider := newFakeProvider("env", func(_ context.Context, uri string, _ WatcherFunc) (*Retrieved, error) { - if uri == "env:MAP_VALUE_2" { - return NewRetrieved(mapValue2) - } - return nil, errors.New("should not be expanding any other env vars") - }) - - resolver, err := NewResolver(ResolverSettings{URIs: []string{filepath.Join("testdata", "expand-escaped-env.yaml")}, ProviderFactories: []ProviderFactory{fileProvider, envProvider}, ConverterFactories: nil, DefaultScheme: "env"}) - require.NoError(t, err) - - // Test that expanded configs are the same with the simple config with no env vars. - cfgMap, err := resolver.Resolve(context.Background()) - require.NoError(t, err) - m := cfgMap.ToStringMap() - assert.Equal(t, expectedMap, m) - -} diff --git a/confmap/internal/e2e/expand_test.go b/confmap/internal/e2e/expand_test.go new file mode 100644 index 00000000000..018725d30d5 --- /dev/null +++ b/confmap/internal/e2e/expand_test.go @@ -0,0 +1,94 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package e2etest + +import ( + "context" + "fmt" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "go.opentelemetry.io/collector/confmap" + "go.opentelemetry.io/collector/confmap/converter/expandconverter" + "go.opentelemetry.io/collector/confmap/provider/envprovider" + "go.opentelemetry.io/collector/confmap/provider/fileprovider" + "go.opentelemetry.io/collector/confmap/provider/yamlprovider" +) + +// Test_EscapedEnvVars tests that the resolver supports escaped env vars working together with expand converter. +func Test_EscapedEnvVars(t *testing.T) { + tests := []struct { + name string + scheme string + }{ + { + name: "no_default_scheme", + scheme: "", + }, + { + name: "env", + scheme: "env", + }, + } + + const expandedValue = "some expanded value" + t.Setenv("ENV_VALUE", expandedValue) + + expectedFailures := map[string]string{ + "$ENV_VALUE": "variable substitution using $VAR has been deprecated in favor of ${VAR} and ${env:VAR}", + "$$$ENV_VALUE": "variable substitution using $VAR has been deprecated in favor of ${VAR} and ${env:VAR}", + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + expectedMap := map[string]any{ + "test_map": map[string]any{ + "key1": "$ENV_VALUE", + "key2": "$$ENV_VALUE", + "key3": "$" + expandedValue, + "key4": "some" + expandedValue + "text", + "key5": "some${ENV_VALUE}text", + "key6": "${ONE}${TWO}", + "key7": "text$", + "key8": "$", + "key9": "${1}${env:2}", + "key10": "some${env:ENV_VALUE}text", + "key11": "${env:" + expandedValue + "}", + "key12": "${env:${ENV_VALUE}}", + "key13": "env:MAP_VALUE_2}${ENV_VALUE}{", + "key14": "$" + expandedValue, + }, + } + + resolver, err := confmap.NewResolver(confmap.ResolverSettings{ + URIs: []string{filepath.Join("testdata", "expand-escaped-env.yaml")}, + ProviderFactories: []confmap.ProviderFactory{fileprovider.NewFactory(), envprovider.NewFactory()}, + ConverterFactories: []confmap.ConverterFactory{expandconverter.NewFactory()}, + DefaultScheme: tt.scheme, + }) + require.NoError(t, err) + + // Test that expanded configs are the same with the simple config with no env vars. + cfgMap, err := resolver.Resolve(context.Background()) + require.NoError(t, err) + m := cfgMap.ToStringMap() + assert.Equal(t, expectedMap, m) + + for val, expectedErr := range expectedFailures { + resolver, err = confmap.NewResolver(confmap.ResolverSettings{ + URIs: []string{fmt.Sprintf("yaml: test: %s", val)}, + ProviderFactories: []confmap.ProviderFactory{yamlprovider.NewFactory(), envprovider.NewFactory()}, + ConverterFactories: []confmap.ConverterFactory{expandconverter.NewFactory()}, + DefaultScheme: tt.scheme, + }) + require.NoError(t, err) + _, err := resolver.Resolve(context.Background()) + require.ErrorContains(t, err, expectedErr) + } + }) + } +} diff --git a/confmap/internal/e2e/go.mod b/confmap/internal/e2e/go.mod index 705f96795f8..4e27927b911 100644 --- a/confmap/internal/e2e/go.mod +++ b/confmap/internal/e2e/go.mod @@ -5,8 +5,10 @@ go 1.21.0 require ( github.com/stretchr/testify v1.9.0 go.opentelemetry.io/collector/confmap v0.105.0 + go.opentelemetry.io/collector/confmap/converter/expandconverter v0.105.0 go.opentelemetry.io/collector/confmap/provider/envprovider v0.105.0 go.opentelemetry.io/collector/confmap/provider/fileprovider v0.105.0 + go.opentelemetry.io/collector/confmap/provider/yamlprovider v0.105.0 go.opentelemetry.io/collector/featuregate v1.12.0 go.opentelemetry.io/collector/internal/globalgates v0.105.0 ) @@ -32,6 +34,10 @@ replace go.opentelemetry.io/collector/confmap/provider/fileprovider => ../../pro replace go.opentelemetry.io/collector/confmap/provider/envprovider => ../../provider/envprovider +replace go.opentelemetry.io/collector/confmap/provider/yamlprovider => ../../provider/yamlprovider + replace go.opentelemetry.io/collector/featuregate => ../../../featuregate replace go.opentelemetry.io/collector/internal/globalgates => ../../../internal/globalgates + +replace go.opentelemetry.io/collector/confmap/converter/expandconverter => ../../converter/expandconverter diff --git a/confmap/internal/e2e/testdata/expand-escaped-env.yaml b/confmap/internal/e2e/testdata/expand-escaped-env.yaml new file mode 100644 index 00000000000..ae8cb280ede --- /dev/null +++ b/confmap/internal/e2e/testdata/expand-escaped-env.yaml @@ -0,0 +1,29 @@ +test_map: + # $$ -> escaped $ + key1: "$$ENV_VALUE" + # $$$$ -> two escaped $ + key2: "$$$$ENV_VALUE" + # $$ -> escaped $ + ${ENV_VALUE} expanded + key3: "$$${ENV_VALUE}" + # expanded in the middle + key4: "some${ENV_VALUE}text" + # escaped $ in the middle + key5: "some$${ENV_VALUE}text" + # two escaped $ + key6: "$${ONE}$${TWO}" + # trailing escaped $ + key7: "text$$" + # escaped $ alone + key8: "$$" + # escaped number and uri + key9: "$${1}$${env:2}" + # escape provider + key10: "some$${env:ENV_VALUE}text" + # can escape outer when nested + key11: "$${env:${ENV_VALUE}}" + # can escape inner and outer when nested + key12: "$${env:$${ENV_VALUE}}" + # can escape partial + key13: "env:MAP_VALUE_2}$${ENV_VALUE}{" + # $$$ -> escaped $ + expanded env var + key14: "$$${env:ENV_VALUE}" diff --git a/confmap/resolver.go b/confmap/resolver.go index ca197076327..7c9ed303c73 100644 --- a/confmap/resolver.go +++ b/confmap/resolver.go @@ -12,8 +12,6 @@ import ( "go.uber.org/multierr" "go.uber.org/zap" - - "go.opentelemetry.io/collector/internal/globalgates" ) // follows drive-letter specification: @@ -173,13 +171,7 @@ func (mr *Resolver) Resolve(ctx context.Context) (*Conf, error) { if err != nil { return nil, err } - - if v, ok := val.(string); ok && globalgates.UseUnifiedEnvVarExpansionRules.IsEnabled() { - cfgMap[k] = strings.ReplaceAll(v, "$$", "$") - } else { - cfgMap[k] = val - } - + cfgMap[k] = val } retMap = NewFromStringMap(cfgMap) diff --git a/confmap/testdata/expand-escaped-env.yaml b/confmap/testdata/expand-escaped-env.yaml deleted file mode 100644 index 6b2cd162831..00000000000 --- a/confmap/testdata/expand-escaped-env.yaml +++ /dev/null @@ -1,31 +0,0 @@ -test_map: - # $$ -> escaped $ - recv.1: "$$MAP_VALUE_1" - # $$$ -> escaped $ + $MAP_VALUE_2 - recv.2: "$$$MAP_VALUE_2" - # $$$$ -> two escaped $ - recv.3: "$$$$MAP_VALUE_3" - # $$$ -> escaped $ + substituted env var - recv.4: "$$${MAP_VALUE_2}" - # escaped $ in the middle - recv.5: "some$${MAP_VALUE_4}text" - # two escaped $ - recv.6: "$${ONE}$${TWO}" - # trailing escaped $ - recv.7: "text$$" - # escaped $ alone - recv.8: "$$" - # Escape numbers - recv.9: "$${1}$${env:2}" - # can escape provider - recv.10: "some$${env:MAP_VALUE_4}text" - # can escape outer when nested - recv.11: "$${env:${MAP_VALUE_2}}" - # can escape inner and outer when nested - recv.12: "$${env:$${MAP_VALUE_2}}" - # can escape partial - recv.13: "env:MAP_VALUE_2}$${MAP_VALUE_2}{" - # can escape partial - recv.14: "${env:MAP_VALUE_2$${MAP_VALUE_2}" - # $$$ -> escaped $ + substituted env var - recv.15: "$$${env:MAP_VALUE_2}" From 0001db27595c159ce35e498d7a96537ae0b74bde Mon Sep 17 00:00:00 2001 From: Pablo Baeyens Date: Thu, 25 Jul 2024 10:06:08 +0200 Subject: [PATCH 2/3] [confmap] Store original string in confmap.Conf (#10618) #### Description - Adds new `expandedValue` struct that holds the original string representation if available for values resolved from a provider. - Removes any mention of `expandedValue` in the public API by adding a `sanitize` step before returning any `Get`s or `ToStringMap`s. - Adds new decoding hook that checks if the target field is of `string` type and uses the string representation in that case. #### Link to tracking issue Fixes #10605, Fixes #10405, Fixes #10659 #### Testing This changes the behavior in some cases, I update the test cases. #### Documentation | ENV value | ${ENV} before unification | ${ENV} in v0.105.0 (also ${env:ENV} before unification) | Value after this PR | |----------------------------|----------------------------|---------------------------------------------------------|----------------------------| | foo\nbar | foo\nbar | foo bar | foo\nbar | | 1111:1111:1111:1111:1111:: | 1111:1111:1111:1111:1111:: | **Error** | 1111:1111:1111:1111:1111:: | | "0123" | "0123" | 0123 | "0123" | --- ...mx-psi_string-value-for-string-fields.yaml | 27 ++ confmap/confmap.go | 73 ++++- confmap/confmap_test.go | 32 +++ confmap/expand.go | 57 +++- confmap/internal/e2e/fuzz_test.go | 88 ++++++ confmap/internal/e2e/types_test.go | 259 +++++++++++++----- confmap/provider.go | 8 +- confmap/provider_test.go | 4 +- confmap/resolver.go | 2 +- docs/rfcs/env-vars.md | 13 +- 10 files changed, 476 insertions(+), 87 deletions(-) create mode 100644 .chloggen/mx-psi_string-value-for-string-fields.yaml create mode 100644 confmap/internal/e2e/fuzz_test.go diff --git a/.chloggen/mx-psi_string-value-for-string-fields.yaml b/.chloggen/mx-psi_string-value-for-string-fields.yaml new file mode 100644 index 00000000000..0a7a49175c4 --- /dev/null +++ b/.chloggen/mx-psi_string-value-for-string-fields.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: breaking + +# The name of the component, or a single word describing the area of concern, (e.g. otlpreceiver) +component: confmap + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: When passing configuration for a string field using any provider, use the verbatim string representation as the value. + +# One or more tracking issues or pull requests related to the change +issues: [10605, 10405] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: | + This matches the behavior of `${ENV}` syntax prior to the promotion of the `confmap.unifyEnvVarExpansion` feature gate + to beta. It changes the behavior of the `${env:ENV}` syntax with escaped strings. + +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] diff --git a/confmap/confmap.go b/confmap/confmap.go index f22d3f3f242..59524527b06 100644 --- a/confmap/confmap.go +++ b/confmap/confmap.go @@ -108,9 +108,34 @@ func (l *Conf) Marshal(rawVal any, _ ...MarshalOption) error { return l.Merge(NewFromStringMap(out)) } +func (l *Conf) unsanitizedGet(key string) any { + return l.k.Get(key) +} + +func sanitize(a any) any { + switch m := a.(type) { + case map[string]any: + c := maps.Copy(m) + for k, v := range m { + c[k] = sanitize(v) + } + return c + case []any: + var newSlice []any + for _, e := range m { + newSlice = append(newSlice, sanitize(e)) + } + return newSlice + case expandedValue: + return m.Value + } + return a +} + // Get can retrieve any value given the key to use. func (l *Conf) Get(key string) any { - return l.k.Get(key) + val := l.unsanitizedGet(key) + return sanitize(val) } // IsSet checks to see if the key has been set in any of the data locations. @@ -128,7 +153,7 @@ func (l *Conf) Merge(in *Conf) error { // It returns an error is the sub-config is not a map[string]any (use Get()), and an empty Map if none exists. func (l *Conf) Sub(key string) (*Conf, error) { // Code inspired by the koanf "Cut" func, but returns an error instead of empty map for unsupported sub-config type. - data := l.Get(key) + data := l.unsanitizedGet(key) if data == nil { return New(), nil } @@ -140,9 +165,14 @@ func (l *Conf) Sub(key string) (*Conf, error) { return nil, fmt.Errorf("unexpected sub-config value kind for key:%s value:%v kind:%v", key, data, reflect.TypeOf(data).Kind()) } +func (l *Conf) toStringMapWithExpand() map[string]any { + m := maps.Unflatten(l.k.All(), KeyDelimiter) + return m +} + // ToStringMap creates a map[string]any from a Parser. func (l *Conf) ToStringMap() map[string]any { - return maps.Unflatten(l.k.All(), KeyDelimiter) + return sanitize(l.toStringMapWithExpand()).(map[string]any) } // decodeConfig decodes the contents of the Conf into the result argument, using a @@ -160,6 +190,7 @@ func decodeConfig(m *Conf, result any, errorUnused bool, skipTopLevelUnmarshaler WeaklyTypedInput: !globalgates.StrictlyTypedInputGate.IsEnabled(), MatchName: caseSensitiveMatchName, DecodeHook: mapstructure.ComposeDecodeHookFunc( + useExpandValue(), expandNilStructPointersHookFunc(), mapstructure.StringToSliceHookFunc(","), mapKeyStringToMapKeyTextUnmarshalerHookFunc(), @@ -177,7 +208,7 @@ func decodeConfig(m *Conf, result any, errorUnused bool, skipTopLevelUnmarshaler if err != nil { return err } - if err = decoder.Decode(m.ToStringMap()); err != nil { + if err = decoder.Decode(m.toStringMapWithExpand()); err != nil { if strings.HasPrefix(err.Error(), "error decoding ''") { return errors.Unwrap(err) } @@ -206,6 +237,40 @@ func caseSensitiveMatchName(a, b string) bool { return a == b } +func castTo(exp expandedValue, useOriginal bool) (any, error) { + // If the target field is a string, use `exp.Original` or fail if not available. + if globalgates.StrictlyTypedInputGate.IsEnabled() && useOriginal { + if !exp.HasOriginal { + return nil, fmt.Errorf("cannot expand value to string: original value not set") + } + return exp.Original, nil + } + // Otherwise, use the parsed value (previous behavior). + return exp.Value, nil +} + +// When a value has been loaded from an external source via a provider, we keep both the +// parsed value and the original string value. This allows us to expand the value to its +// original string representation when decoding into a string field, and use the original otherwise. +func useExpandValue() mapstructure.DecodeHookFuncType { + return func( + _ reflect.Type, + to reflect.Type, + data any) (any, error) { + if exp, ok := data.(expandedValue); ok { + return castTo(exp, to.Kind() == reflect.String) + } + + // If the target field is a map or slice, sanitize input to remove expandedValue references. + switch to.Kind() { + case reflect.Array, reflect.Slice, reflect.Map: + // This does not handle map[string]string and []string explicitly. + return sanitize(data), nil + } + return data, nil + } +} + // In cases where a config has a mapping of something to a struct pointers // we want nil values to resolve to a pointer to the zero value of the // underlying struct just as we want nil values of a mapping of something diff --git a/confmap/confmap_test.go b/confmap/confmap_test.go index 5a93975ae2a..713583a7115 100644 --- a/confmap/confmap_test.go +++ b/confmap/confmap_test.go @@ -845,3 +845,35 @@ func TestRecursiveUnmarshaling(t *testing.T) { require.NoError(t, conf.Unmarshal(r)) require.Equal(t, "something", r.Foo) } + +func TestExpandedValue(t *testing.T) { + cm := NewFromStringMap(map[string]any{ + "key": expandedValue{ + Value: 0xdeadbeef, + HasOriginal: true, + Original: "original", + }}) + assert.Equal(t, 0xdeadbeef, cm.Get("key")) + assert.Equal(t, map[string]any{"key": 0xdeadbeef}, cm.ToStringMap()) + + type ConfigStr struct { + Key string `mapstructure:"key"` + } + + cfgStr := ConfigStr{} + assert.NoError(t, cm.Unmarshal(&cfgStr)) + assert.Equal(t, "original", cfgStr.Key) + + type ConfigInt struct { + Key int `mapstructure:"key"` + } + cfgInt := ConfigInt{} + assert.NoError(t, cm.Unmarshal(&cfgInt)) + assert.Equal(t, 0xdeadbeef, cfgInt.Key) + + type ConfigBool struct { + Key bool `mapstructure:"key"` + } + cfgBool := ConfigBool{} + assert.Error(t, cm.Unmarshal(&cfgBool)) +} diff --git a/confmap/expand.go b/confmap/expand.go index 9bfbdbaccbe..56dc512382b 100644 --- a/confmap/expand.go +++ b/confmap/expand.go @@ -43,6 +43,40 @@ func (mr *Resolver) expandValueRecursively(ctx context.Context, value any) (any, func (mr *Resolver) expandValue(ctx context.Context, value any) (any, bool, error) { switch v := value.(type) { + case expandedValue: + expanded, changed, err := mr.expandValue(ctx, v.Value) + if err != nil { + return nil, false, err + } + + switch exp := expanded.(type) { + case expandedValue, string: + // Return expanded values or strings verbatim. + return exp, changed, nil + } + + // At this point we don't know the target field type, so we need to expand the original representation as well. + originalExpanded, originalChanged, err := mr.expandValue(ctx, v.Original) + if err != nil { + return nil, false, err + } + + if originalExpanded, ok := originalExpanded.(string); ok { + // If the original representation is a string, return the expanded value with the original representation. + return expandedValue{ + Value: expanded, + Original: originalExpanded, + HasOriginal: true, + }, changed || originalChanged, nil + } + + result := expandedValue{ + Value: expanded, + Original: v.Original, + HasOriginal: v.HasOriginal, + } + + return result, changed || originalChanged, nil case string: if !strings.Contains(v, "${") || !strings.Contains(v, "}") { // No URIs to expand. @@ -117,6 +151,20 @@ func (mr *Resolver) findURI(input string) string { return input[openIndex : closeIndex+1] } +// expandedValue holds the YAML parsed value and original representation of a value. +// It keeps track of the original representation to be used by the 'useExpandValue' hook +// if the target field is a string. We need to keep both representations because we don't know +// what the target field type is until `Unmarshal` is called. +type expandedValue struct { + // Value is the expanded value. + Value any + // HasOriginal is true if the original representation is set. + HasOriginal bool + // Original is the original representation of the value. + // It is only valid if HasOriginal is true. + Original string +} + // findAndExpandURI attempts to find and expand the first occurrence of an expandable URI in input. If an expandable URI is found it // returns the input with the URI expanded, true and nil. Otherwise, it returns the unchanged input, false and the expanding error. // This method expects input to start with ${ and end with } @@ -134,10 +182,17 @@ func (mr *Resolver) findAndExpandURI(ctx context.Context, input string) (any, bo return input, false, err } - expanded, err := ret.AsRaw() + expanded := expandedValue{} + expanded.Value, err = ret.AsRaw() if err != nil { return input, false, err } + + if asStr, err2 := ret.AsString(); err2 == nil { + expanded.HasOriginal = true + expanded.Original = asStr + } + return expanded, true, err } expanded, err := mr.expandURI(ctx, uri) diff --git a/confmap/internal/e2e/fuzz_test.go b/confmap/internal/e2e/fuzz_test.go new file mode 100644 index 00000000000..462f66a484e --- /dev/null +++ b/confmap/internal/e2e/fuzz_test.go @@ -0,0 +1,88 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package e2etest + +import ( + "context" + "os" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// targetNested tests the following property: +// > Passing a value of type T directly through an environment variable +// > should be equivalent to passing it through a nested environment variable. +func targetNested[T any](t *testing.T, value string) { + resolver := NewResolver(t, "types_expand.yaml") + + // Use os.Setenv so we can check the error and return instead of failing the fuzzing. + os.Setenv("ENV", "${env:ENV2}") // nolint:tenv + defer os.Unsetenv("ENV") + err := os.Setenv("ENV2", value) // nolint:tenv + defer os.Unsetenv("ENV2") + if err != nil { + return + } + confNested, errResolveNested := resolver.Resolve(context.Background()) + + err = os.Setenv("ENV", value) // nolint:tenv + if err != nil { + return + } + confSimple, errResolveSimple := resolver.Resolve(context.Background()) + require.Equal(t, errResolveNested, errResolveSimple) + if errResolveNested != nil { + return + } + + var cfgNested TargetConfig[T] + errNested := confNested.Unmarshal(cfgNested) + + var cfgSimple TargetConfig[T] + errSimple := confSimple.Unmarshal(cfgSimple) + + require.Equal(t, errNested, errSimple) + if errNested != nil { + return + } + assert.Equal(t, cfgNested, cfgSimple) +} + +// testStrings for fuzzing targets +var testStrings = []string{ + "123", + "opentelemetry", + "!!str 123", + "\"0123\"", + "\"", + "1111:1111:1111:1111:1111::", + "{field: value}", + "0xdeadbeef", + "0b101", + "field:", + "2006-01-02T15:04:05Z07:00", +} + +func FuzzNestedString(f *testing.F) { + for _, value := range testStrings { + f.Add(value) + } + f.Fuzz(targetNested[string]) +} + +func FuzzNestedInt(f *testing.F) { + for _, value := range testStrings { + f.Add(value) + } + f.Fuzz(targetNested[int]) +} + +func FuzzNestedMap(f *testing.F) { + for _, value := range testStrings { + f.Add(value) + } + f.Fuzz(targetNested[map[string]any]) +} diff --git a/confmap/internal/e2e/types_test.go b/confmap/internal/e2e/types_test.go index aee111de7c3..30b763d8be1 100644 --- a/confmap/internal/e2e/types_test.go +++ b/confmap/internal/e2e/types_test.go @@ -38,6 +38,18 @@ type TargetConfig[T any] struct { Field T `mapstructure:"field"` } +func NewResolver(t testing.TB, path string) *confmap.Resolver { + resolver, err := confmap.NewResolver(confmap.ResolverSettings{ + URIs: []string{filepath.Join("testdata", path)}, + ProviderFactories: []confmap.ProviderFactory{ + fileprovider.NewFactory(), + envprovider.NewFactory(), + }, + }) + require.NoError(t, err) + return resolver +} + func AssertExpectedMatch[T any](t *testing.T, tt Test, conf *confmap.Conf, cfg *TargetConfig[T]) { err := conf.Unmarshal(cfg) if tt.unmarshalErr != "" { @@ -48,6 +60,29 @@ func AssertExpectedMatch[T any](t *testing.T, tt Test, conf *confmap.Conf, cfg * require.Equal(t, tt.expected, cfg.Field) } +func AssertResolvesTo(t *testing.T, resolver *confmap.Resolver, tt Test) { + conf, err := resolver.Resolve(context.Background()) + if tt.resolveErr != "" { + require.ErrorContains(t, err, tt.resolveErr) + return + } + require.NoError(t, err) + + switch tt.targetField { + case TargetFieldInt: + var cfg TargetConfig[int] + AssertExpectedMatch(t, tt, conf, &cfg) + case TargetFieldString, TargetFieldInlineString: + var cfg TargetConfig[string] + AssertExpectedMatch(t, tt, conf, &cfg) + case TargetFieldBool: + var cfg TargetConfig[bool] + AssertExpectedMatch(t, tt, conf, &cfg) + default: + t.Fatalf("unexpected target field %q", tt.targetField) + } +} + func TestTypeCasting(t *testing.T) { values := []Test{ { @@ -170,6 +205,16 @@ func TestTypeCasting(t *testing.T) { targetField: TargetFieldInlineString, expected: "inline field with 1111:1111:1111:1111:1111:: expansion", }, + { + value: "2006-01-02T15:04:05Z07:00", + targetField: TargetFieldString, + expected: "2006-01-02T15:04:05Z07:00", + }, + { + value: "2006-01-02T15:04:05Z07:00", + targetField: TargetFieldInlineString, + expected: "inline field with 2006-01-02T15:04:05Z07:00 expansion", + }, } previousValue := globalgates.StrictlyTypedInputGate.IsEnabled() @@ -186,34 +231,9 @@ func TestTypeCasting(t *testing.T) { if tt.targetField == TargetFieldInlineString { testFile = "types_expand_inline.yaml" } - - resolver, err := confmap.NewResolver(confmap.ResolverSettings{ - URIs: []string{filepath.Join("testdata", testFile)}, - ProviderFactories: []confmap.ProviderFactory{ - fileprovider.NewFactory(), - envprovider.NewFactory(), - }, - }) - require.NoError(t, err) + resolver := NewResolver(t, testFile) t.Setenv("ENV", tt.value) - - conf, err := resolver.Resolve(context.Background()) - require.NoError(t, err) - - switch tt.targetField { - case TargetFieldInt: - var cfg TargetConfig[int] - AssertExpectedMatch(t, tt, conf, &cfg) - case TargetFieldString, TargetFieldInlineString: - var cfg TargetConfig[string] - AssertExpectedMatch(t, tt, conf, &cfg) - case TargetFieldBool: - var cfg TargetConfig[bool] - AssertExpectedMatch(t, tt, conf, &cfg) - default: - t.Fatalf("unexpected target field %q", tt.targetField) - } - + AssertResolvesTo(t, resolver, tt) }) } } @@ -226,9 +246,9 @@ func TestStrictTypeCasting(t *testing.T) { expected: 123, }, { - value: "123", - targetField: TargetFieldString, - unmarshalErr: "'field' expected type 'string', got unconvertible type 'int', value: '123'", + value: "123", + targetField: TargetFieldString, + expected: "123", }, { value: "123", @@ -241,9 +261,9 @@ func TestStrictTypeCasting(t *testing.T) { expected: 83, }, { - value: "0123", - targetField: TargetFieldString, - unmarshalErr: "'field' expected type 'string', got unconvertible type 'int', value: '83'", + value: "0123", + targetField: TargetFieldString, + expected: "0123", }, { value: "0123", @@ -256,9 +276,9 @@ func TestStrictTypeCasting(t *testing.T) { expected: 3735928559, }, { - value: "0xdeadbeef", - targetField: TargetFieldString, - unmarshalErr: "'field' expected type 'string', got unconvertible type 'int', value: '3735928559'", + value: "0xdeadbeef", + targetField: TargetFieldString, + expected: "0xdeadbeef", }, { value: "0xdeadbeef", @@ -268,27 +288,27 @@ func TestStrictTypeCasting(t *testing.T) { { value: "\"0123\"", targetField: TargetFieldString, - expected: "0123", + expected: "\"0123\"", }, { value: "\"0123\"", targetField: TargetFieldInt, - unmarshalErr: "'field' expected type 'int', got unconvertible type 'string', value: '0123'", + unmarshalErr: "'field' expected type 'int', got unconvertible type 'string', value: '\"0123\"'", }, { value: "\"0123\"", targetField: TargetFieldInlineString, - expected: "inline field with 0123 expansion", + expected: "inline field with \"0123\" expansion", }, { value: "!!str 0123", targetField: TargetFieldString, - expected: "0123", + expected: "!!str 0123", }, { value: "!!str 0123", targetField: TargetFieldInlineString, - expected: "inline field with 0123 expansion", + expected: "inline field with !!str 0123 expansion", }, { value: "t", @@ -311,9 +331,19 @@ func TestStrictTypeCasting(t *testing.T) { expected: "inline field with 1111:1111:1111:1111:1111:: expansion", }, { - value: "1111:1111:1111:1111:1111::", - targetField: TargetFieldString, - unmarshalErr: "'field' expected type 'string', got unconvertible type 'map[string]interface {}', value: 'map[1111:1111:1111:1111:1111::]'", + value: "1111:1111:1111:1111:1111::", + targetField: TargetFieldString, + expected: "1111:1111:1111:1111:1111::", + }, + { + value: "2006-01-02T15:04:05Z07:00", + targetField: TargetFieldString, + expected: "2006-01-02T15:04:05Z07:00", + }, + { + value: "2006-01-02T15:04:05Z07:00", + targetField: TargetFieldInlineString, + expected: "inline field with 2006-01-02T15:04:05Z07:00 expansion", }, } @@ -326,43 +356,130 @@ func TestStrictTypeCasting(t *testing.T) { }() for _, tt := range values { - t.Run(tt.value+"/"+string(tt.targetField), func(t *testing.T) { + t.Run(tt.value+"/"+string(tt.targetField)+"/"+"direct", func(t *testing.T) { testFile := "types_expand.yaml" if tt.targetField == TargetFieldInlineString { testFile = "types_expand_inline.yaml" } - - resolver, err := confmap.NewResolver(confmap.ResolverSettings{ - URIs: []string{filepath.Join("testdata", testFile)}, - ProviderFactories: []confmap.ProviderFactory{ - fileprovider.NewFactory(), - envprovider.NewFactory(), - }, - }) - require.NoError(t, err) + resolver := NewResolver(t, testFile) t.Setenv("ENV", tt.value) + AssertResolvesTo(t, resolver, tt) + }) - conf, err := resolver.Resolve(context.Background()) - if tt.resolveErr != "" { - require.ErrorContains(t, err, tt.resolveErr) - return + t.Run(tt.value+"/"+string(tt.targetField)+"/"+"indirect", func(t *testing.T) { + testFile := "types_expand.yaml" + if tt.targetField == TargetFieldInlineString { + testFile = "types_expand_inline.yaml" } - require.NoError(t, err) - switch tt.targetField { - case TargetFieldInt: - var cfg TargetConfig[int] - AssertExpectedMatch(t, tt, conf, &cfg) - case TargetFieldString, TargetFieldInlineString: - var cfg TargetConfig[string] - AssertExpectedMatch(t, tt, conf, &cfg) - case TargetFieldBool: - var cfg TargetConfig[bool] - AssertExpectedMatch(t, tt, conf, &cfg) - default: - t.Fatalf("unexpected target field %q", tt.targetField) + resolver := NewResolver(t, testFile) + t.Setenv("ENV", "${env:ENV2}") + t.Setenv("ENV2", tt.value) + AssertResolvesTo(t, resolver, tt) + }) + } +} + +func TestRecursiveInlineString(t *testing.T) { + values := []Test{ + { + value: "123", + targetField: TargetFieldString, + expected: "The value The value 123 is wrapped is wrapped", + }, + { + value: "123", + targetField: TargetFieldInlineString, + expected: "inline field with The value The value 123 is wrapped is wrapped expansion", + }, + { + value: "opentelemetry", + targetField: TargetFieldString, + expected: "The value The value opentelemetry is wrapped is wrapped", + }, + { + value: "opentelemetry", + targetField: TargetFieldInlineString, + expected: "inline field with The value The value opentelemetry is wrapped is wrapped expansion", + }, + } + + previousValue := globalgates.StrictlyTypedInputGate.IsEnabled() + err := featuregate.GlobalRegistry().Set(globalgates.StrictlyTypedInputID, true) + require.NoError(t, err) + defer func() { + err := featuregate.GlobalRegistry().Set(globalgates.StrictlyTypedInputID, previousValue) + require.NoError(t, err) + }() + + for _, tt := range values { + t.Run(tt.value+"/"+string(tt.targetField), func(t *testing.T) { + testFile := "types_expand.yaml" + if tt.targetField == TargetFieldInlineString { + testFile = "types_expand_inline.yaml" } + resolver := NewResolver(t, testFile) + t.Setenv("ENV", "The value ${env:ENV2} is wrapped") + t.Setenv("ENV2", "The value ${env:ENV3} is wrapped") + t.Setenv("ENV3", tt.value) + AssertResolvesTo(t, resolver, tt) }) } } + +func TestRecursiveMaps(t *testing.T) { + value := "{value: 123}" + + previousValue := globalgates.StrictlyTypedInputGate.IsEnabled() + err := featuregate.GlobalRegistry().Set(globalgates.StrictlyTypedInputID, true) + require.NoError(t, err) + defer func() { + seterr := featuregate.GlobalRegistry().Set(globalgates.StrictlyTypedInputID, previousValue) + require.NoError(t, seterr) + }() + + resolver := NewResolver(t, "types_expand.yaml") + t.Setenv("ENV", `{env: "${env:ENV2}", inline: "inline ${env:ENV2}"}`) + t.Setenv("ENV2", `{env2: "${env:ENV3}"}`) + t.Setenv("ENV3", value) + conf, err := resolver.Resolve(context.Background()) + require.NoError(t, err) + + type Value struct { + Value int `mapstructure:"value"` + } + type ENV2 struct { + Env2 Value `mapstructure:"env2"` + } + type ENV struct { + Env ENV2 `mapstructure:"env"` + Inline string `mapstructure:"inline"` + } + type Target struct { + Field ENV `mapstructure:"field"` + } + + var cfg Target + err = conf.Unmarshal(&cfg) + require.NoError(t, err) + require.Equal(t, + Target{Field: ENV{ + Env: ENV2{ + Env2: Value{ + Value: 123, + }}, + Inline: "inline {env2: \"{value: 123}\"}", + }}, + cfg, + ) + + confStr, err := resolver.Resolve(context.Background()) + require.NoError(t, err) + var cfgStr TargetConfig[string] + err = confStr.Unmarshal(&cfgStr) + require.NoError(t, err) + require.Equal(t, `{env: "{env2: "{value: 123}"}", inline: "inline {env2: "{value: 123}"}"}`, + cfgStr.Field, + ) +} diff --git a/confmap/provider.go b/confmap/provider.go index 161e2473971..3338d72bddf 100644 --- a/confmap/provider.go +++ b/confmap/provider.go @@ -9,6 +9,8 @@ import ( "go.uber.org/zap" "gopkg.in/yaml.v3" + + "go.opentelemetry.io/collector/internal/globalgates" ) // ProviderSettings are the settings to initialize a Provider. @@ -140,7 +142,11 @@ func NewRetrievedFromYAML(yamlBytes []byte, opts ...RetrievedOption) (*Retrieved switch v := rawConf.(type) { case string: - opts = append(opts, withStringRepresentation(v)) + val := v + if globalgates.StrictlyTypedInputGate.IsEnabled() { + val = string(yamlBytes) + } + return NewRetrieved(val, append(opts, withStringRepresentation(val))...) case int, int32, int64, float32, float64, bool, map[string]any: opts = append(opts, withStringRepresentation(string(yamlBytes))) } diff --git a/confmap/provider_test.go b/confmap/provider_test.go index ebbd5562ec3..e168b49fa6a 100644 --- a/confmap/provider_test.go +++ b/confmap/provider_test.go @@ -85,8 +85,8 @@ func TestNewRetrievedFromYAMLString(t *testing.T) { }, { yaml: "\"string\"", - value: "string", - altStrRepr: "string", + value: "\"string\"", + altStrRepr: "\"string\"", }, { yaml: "123", diff --git a/confmap/resolver.go b/confmap/resolver.go index 7c9ed303c73..7b7de3d5092 100644 --- a/confmap/resolver.go +++ b/confmap/resolver.go @@ -167,7 +167,7 @@ func (mr *Resolver) Resolve(ctx context.Context) (*Conf, error) { cfgMap := make(map[string]any) for _, k := range retMap.AllKeys() { - val, err := mr.expandValueRecursively(ctx, retMap.Get(k)) + val, err := mr.expandValueRecursively(ctx, retMap.unsanitizedGet(k)) if err != nil { return nil, err } diff --git a/docs/rfcs/env-vars.md b/docs/rfcs/env-vars.md index a2709412788..0be8bf8c30b 100644 --- a/docs/rfcs/env-vars.md +++ b/docs/rfcs/env-vars.md @@ -178,22 +178,21 @@ matches `\${[^$}]+}`). ### Type casting rules The environment variable value is parsed by the yaml.v3 parser to an -any-typed variable and the original representation as a string is stored -for numeric types. The `yaml.v3` parser mostly follows the YAML v1.2 -specification with [*some +any-typed variable and the original representation as a string is also stored. +The `yaml.v3` parser mostly follows the YAML v1.2 specification with [*some exceptions*](https://github.com/go-yaml/yaml#compatibility). You can see how it works for some edge cases in this example: [*https://go.dev/play/p/RtPmH8aZA1X*](https://go.dev/play/p/RtPmH8aZA1X). When unmarshalling, we use mapstructure with WeaklyTypedInput -**disabled**. We check via a hook an `AsString` method from confmap.Conf +**disabled**. We check via a hook the original string representation of the data and use its return value when it is valid and we are mapping to a string field. This method has default casting rules for unambiguous scalar types but may return the original representation depending on the construction of confmap.Conf (see the comparison table below for details). For using this notation in inline mode (e.g.`http://endpoint/${env:PATH}`), we -use the `AsString` method from confmap.Conf (see the comparison table below for details). +use the original string representation as well (see the comparison table below for details). ### Character set @@ -216,7 +215,7 @@ loading a field with the braces syntax, `env` syntax. | `0123` | integer | 83 | 83 | 83 | n/a | | `0123` | string | 0123 | 83 | 0123 | 0123 | | `0xdeadbeef` | string | 0xdeadbeef | 3735928559 | 0xdeadbeef | 0xdeadbeef | -| `"0123"` | string | "0123" | 0123 | 0123 | 0123 | -| `!!str 0123` | string | !!str 0123 | 0123 | 0123 | 0123 | +| `"0123"` | string | "0123" | 0123 | "0123" | "0123" | +| `!!str 0123` | string | !!str 0123 | 0123 | !!str 0123 | !!str 0123 | | `t` | boolean | true | true | Error: mapping string to bool | n/a | | `23` | boolean | true | true | Error: mapping integer to bool | n/a | From bfab7f6d54b762ec6e88b8daf836ac4f68fa488f Mon Sep 17 00:00:00 2001 From: Pablo Baeyens Date: Thu, 25 Jul 2024 10:22:47 +0200 Subject: [PATCH 3/3] [confmap/provider/http(s)provider] Validate URIs before fetching (#10721) #### Description Validate URLs before fetching. #### Link to tracking issue Fixes #10468, Relates to #10121 --- .chloggen/mx-psi_validate-uris.yaml | 25 ++++++++++++++++ .../configurablehttpprovider/provider.go | 5 ++++ .../configurablehttpprovider/provider_test.go | 30 ++++++++++++++++--- 3 files changed, 56 insertions(+), 4 deletions(-) create mode 100644 .chloggen/mx-psi_validate-uris.yaml diff --git a/.chloggen/mx-psi_validate-uris.yaml b/.chloggen/mx-psi_validate-uris.yaml new file mode 100644 index 00000000000..69af525d77d --- /dev/null +++ b/.chloggen/mx-psi_validate-uris.yaml @@ -0,0 +1,25 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. otlpreceiver) +component: httpprovider, httpsprovider + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Validate URIs in HTTP and HTTPS providers before fetching. + +# One or more tracking issues or pull requests related to the change +issues: [10468] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] diff --git a/confmap/provider/internal/configurablehttpprovider/provider.go b/confmap/provider/internal/configurablehttpprovider/provider.go index f5bac2c6d51..2d968e3bafe 100644 --- a/confmap/provider/internal/configurablehttpprovider/provider.go +++ b/confmap/provider/internal/configurablehttpprovider/provider.go @@ -10,6 +10,7 @@ import ( "fmt" "io" "net/http" + "net/url" "os" "path/filepath" "strings" @@ -84,6 +85,10 @@ func (fmp *provider) Retrieve(_ context.Context, uri string, _ confmap.WatcherFu return nil, fmt.Errorf("%q uri is not supported by %q provider", uri, string(fmp.scheme)) } + if _, err := url.ParseRequestURI(uri); err != nil { + return nil, fmt.Errorf("invalid uri %q: %w", uri, err) + } + client, err := fmp.createClient() if err != nil { diff --git a/confmap/provider/internal/configurablehttpprovider/provider_test.go b/confmap/provider/internal/configurablehttpprovider/provider_test.go index 125c1cbdd80..0561d51030a 100644 --- a/confmap/provider/internal/configurablehttpprovider/provider_test.go +++ b/confmap/provider/internal/configurablehttpprovider/provider_test.go @@ -297,9 +297,31 @@ func TestValidateProviderScheme(t *testing.T) { assert.NoError(t, confmaptest.ValidateProviderScheme(New(HTTPScheme, confmaptest.NewNopProviderSettings()))) } -func TestInvalidTransport(t *testing.T) { - fp := New("foo", confmaptest.NewNopProviderSettings()) +func TestInvalidURI(t *testing.T) { + fp := New(HTTPScheme, confmaptest.NewNopProviderSettings()) - _, err := fp.Retrieve(context.Background(), "foo://..", nil) - assert.Error(t, err) + tests := []struct { + uri string + err string + }{ + { + uri: "foo://..", + err: "uri is not supported by \"http\" provider", + }, + { + uri: "http://", + err: "no Host in request URL", + }, + { + uri: "http://{}", + err: "invalid character \"{\" in host name", + }, + } + + for _, tt := range tests { + t.Run(tt.uri, func(t *testing.T) { + _, err := fp.Retrieve(context.Background(), tt.uri, nil) + assert.ErrorContains(t, err, tt.err) + }) + } }