Skip to content

Commit

Permalink
[pkg/translator/prometheus] Add option to keep UTF-8 characters
Browse files Browse the repository at this point in the history
Signed-off-by: Arthur Silva Sens <[email protected]>
  • Loading branch information
ArthurSens committed Nov 2, 2024
1 parent e2b8f38 commit dd96ad5
Show file tree
Hide file tree
Showing 18 changed files with 330 additions and 147 deletions.
27 changes: 27 additions & 0 deletions .chloggen/allowutf8-prom-translator.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: pkg/translator/prometheus

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Optionally allow UTF-8 characters in Prometheus metric and label names.

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [35459]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: [api]
2 changes: 1 addition & 1 deletion exporter/googlemanagedprometheusexporter/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func (c *GMPConfig) toCollectorConfig() collector.Config {
cfg.MetricConfig.ServiceResourceLabels = false
// Update metric naming to match GMP conventions
cfg.MetricConfig.GetMetricName = func(baseName string, metric pmetric.Metric) (string, error) {
compliantName := prometheus.BuildCompliantName(metric, "", c.MetricConfig.Config.AddMetricSuffixes)
compliantName := prometheus.BuildCompliantName(metric, "", c.MetricConfig.Config.AddMetricSuffixes, false)
return googlemanagedprometheus.GetMetricName(baseName, compliantName, metric)
}
// Map to the prometheus_target monitored resource
Expand Down
8 changes: 4 additions & 4 deletions exporter/prometheusexporter/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ func newCollector(config *Config, logger *zap.Logger) *collector {
return &collector{
accumulator: newAccumulator(logger, config.MetricExpiration),
logger: logger,
namespace: prometheustranslator.CleanUpString(config.Namespace),
namespace: prometheustranslator.CleanUpString(config.Namespace, false),
sendTimestamps: config.SendTimestamps,
constLabels: config.ConstLabels,
addMetricSuffixes: config.AddMetricSuffixes,
Expand Down Expand Up @@ -110,7 +110,7 @@ func (c *collector) getMetricMetadata(metric pmetric.Metric, attributes pcommon.
values := make([]string, 0, attributes.Len()+2)

attributes.Range(func(k string, v pcommon.Value) bool {
keys = append(keys, prometheustranslator.NormalizeLabel(k))
keys = append(keys, prometheustranslator.NormalizeLabel(k, false))
values = append(values, v.AsString())
return true
})
Expand All @@ -125,7 +125,7 @@ func (c *collector) getMetricMetadata(metric pmetric.Metric, attributes pcommon.
}

return prometheus.NewDesc(
prometheustranslator.BuildCompliantName(metric, c.namespace, c.addMetricSuffixes),
prometheustranslator.BuildCompliantName(metric, c.namespace, c.addMetricSuffixes, false),
metric.Description(),
keys,
c.constLabels,
Expand Down Expand Up @@ -327,7 +327,7 @@ func (c *collector) createTargetInfoMetrics(resourceAttrs []pcommon.Map) ([]prom
})

attributes.Range(func(k string, v pcommon.Value) bool {
finalKey := prometheustranslator.NormalizeLabel(k)
finalKey := prometheustranslator.NormalizeLabel(k, false)
if existingVal, ok := labels[finalKey]; ok {
labels[finalKey] = existingVal + ";" + v.AsString()
} else {
Expand Down
2 changes: 1 addition & 1 deletion exporter/prometheusremotewriteexporter/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ func validateAndSanitizeExternalLabels(cfg *Config) (map[string]string, error) {
if key == "" || value == "" {
return nil, fmt.Errorf("prometheus remote write: external labels configuration contains an empty key or value")
}
sanitizedLabels[prometheustranslator.NormalizeLabel(key)] = value
sanitizedLabels[prometheustranslator.NormalizeLabel(key, false)] = value
}

return sanitizedLabels, nil
Expand Down
2 changes: 1 addition & 1 deletion pkg/translator/loki/logs_to_loki.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ func LogToLokiEntry(lr plog.LogRecord, rl pcommon.Resource, scope pcommon.Instru
for label := range mergedLabels {
// Loki doesn't support dots in label names
// labelName is normalized label name to follow Prometheus label names standard
labelName := prometheustranslator.NormalizeLabel(string(label))
labelName := prometheustranslator.NormalizeLabel(string(label), false)
labels[model.LabelName(labelName)] = mergedLabels[label]
}

Expand Down
8 changes: 5 additions & 3 deletions pkg/translator/prometheus/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ go 1.22.0

require (
github.com/open-telemetry/opentelemetry-collector-contrib/internal/common v0.112.0
github.com/prometheus/common v0.60.0
github.com/stretchr/testify v1.9.0
go.opentelemetry.io/collector/featuregate v1.18.0
go.opentelemetry.io/collector/pdata v1.18.0
Expand All @@ -19,10 +20,11 @@ require (
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
go.uber.org/multierr v1.11.0 // indirect
golang.org/x/net v0.28.0 // indirect
golang.org/x/sys v0.24.0 // indirect
golang.org/x/text v0.17.0 // indirect
golang.org/x/net v0.29.0 // indirect
golang.org/x/sys v0.25.0 // indirect
golang.org/x/text v0.18.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240814211410-ddb44dafa142 // indirect
google.golang.org/grpc v1.67.1 // indirect
google.golang.org/protobuf v1.35.1 // indirect
Expand Down
16 changes: 10 additions & 6 deletions pkg/translator/prometheus/go.sum

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

96 changes: 96 additions & 0 deletions pkg/translator/prometheus/helpers_from_stdlib.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0
// Provenance-includes-location: https://github.com/golang/go/blob/f2d118fd5f7e872804a5825ce29797f81a28b0fa/src/strings/strings.go
// Provenance-includes-license: BSD-3-Clause
// Provenance-includes-copyright: Copyright The Go Authors.

package prometheus

import "strings"

// fieldsFunc is a copy of strings.FieldsFunc from the Go standard library,
// but it also returns the separators as part of the result.
func fieldsFunc(s string, f func(rune) bool) ([]string, []string) {
// A span is used to record a slice of s of the form s[start:end].
// The start index is inclusive and the end index is exclusive.
type span struct {
start int
end int
}
spans := make([]span, 0, 32)
separators := make([]string, 0, 32)

// Find the field start and end indices.
// Doing this in a separate pass (rather than slicing the string s
// and collecting the result substrings right away) is significantly
// more efficient, possibly due to cache effects.
start := -1 // valid span start if >= 0
for end, rune := range s {
if f(rune) {
if start >= 0 {
spans = append(spans, span{start, end})
// Set start to a negative value.
// Note: using -1 here consistently and reproducibly
// slows down this code by a several percent on amd64.
start = ^start
separators = append(separators, string(s[end]))
}
} else {
if start < 0 {
start = end
}
}
}

// Last field might end at EOF.
if start >= 0 {
spans = append(spans, span{start, len(s)})
}

// Create strings from recorded field indices.
a := make([]string, len(spans))
for i, span := range spans {
a[i] = s[span.start:span.end]
}

return a, separators
}

// join is a copy of strings.Join from the Go standard library,
// but it also accepts a slice of separators to join the elements with.
// If the slice of separators is shorter than the slice of elements, use a default value.
// We also don't check for integer overflow.
func join(elems []string, separators []string, def string) string {
switch len(elems) {
case 0:
return ""
case 1:
return elems[0]
}

var n int
var sep string
sepLen := len(separators)
for i, elem := range elems {
if i >= sepLen {
sep = def
} else {
sep = separators[i]
}
n += len(sep) + len(elem)
}

var b strings.Builder
b.Grow(n)
b.WriteString(elems[0])
for i, s := range elems[1:] {
if i >= sepLen {
sep = def
} else {
sep = separators[i]
}
b.WriteString(sep)
b.WriteString(s)
}
return b.String()
}
18 changes: 6 additions & 12 deletions pkg/translator/prometheus/normalize_label.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"strings"
"unicode"

"github.com/prometheus/common/model"
"go.opentelemetry.io/collector/featuregate"
)

Expand All @@ -24,15 +25,16 @@ var dropSanitizationGate = featuregate.GlobalRegistry().MustRegister(
// Labels that start with non-letter rune will be prefixed with "key_"
//
// Exception is made for double-underscores which are allowed
func NormalizeLabel(label string) string {
func NormalizeLabel(label string, allowUTF8 bool) string {

// Trivial case
if len(label) == 0 {
return label
}

// Replace all non-alphanumeric runes with underscores
label = strings.Map(sanitizeRune, label)
if allowUTF8 {
return label
}

// If label starts with a number, prepend with "key_"
if unicode.IsDigit(rune(label[0])) {
Expand All @@ -41,13 +43,5 @@ func NormalizeLabel(label string) string {
label = "key" + label
}

return label
}

// Return '_' for anything non-alphanumeric
func sanitizeRune(r rune) rune {
if unicode.IsLetter(r) || unicode.IsDigit(r) {
return r
}
return '_'
return model.EscapeName(label, model.UnderscoreEscaping)
}
63 changes: 48 additions & 15 deletions pkg/translator/prometheus/normalize_label_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,57 @@ import (
)

func TestSanitize(t *testing.T) {
oldSanitization := dropSanitizationGate.IsEnabled()
defer func() {
testutil.SetFeatureGateForTest(t, dropSanitizationGate, oldSanitization)
}()

defer testutil.SetFeatureGateForTest(t, dropSanitizationGate, false)()
for _, dropSanitization := range []bool{true, false} {
testutil.SetFeatureGateForTest(t, dropSanitizationGate, dropSanitization)

require.Equal(t, "", NormalizeLabel(""), "")
require.Equal(t, "key_test", NormalizeLabel("_test"))
require.Equal(t, "key_0test", NormalizeLabel("0test"))
require.Equal(t, "test", NormalizeLabel("test"))
require.Equal(t, "test__", NormalizeLabel("test_/"))
require.Equal(t, "__test", NormalizeLabel("__test"))
}
// Drop sanitization gate is only relevant if UTF8 isn't allowed. We use `false` for all cases.
if dropSanitization {
require.Equal(t, "", NormalizeLabel("", false), "")
require.Equal(t, "_test", NormalizeLabel("_test", false))
require.Equal(t, "key_0test", NormalizeLabel("0test", false)) // Even dropping santiization, we still add "key_" to metrics starting with digits
require.Equal(t, "test", NormalizeLabel("test", false))
require.Equal(t, "test__", NormalizeLabel("test_/", false))
require.Equal(t, "__test", NormalizeLabel("__test", false))
} else {
require.Equal(t, "", NormalizeLabel("", false), "")
require.Equal(t, "key_test", NormalizeLabel("_test", false))
require.Equal(t, "key_0test", NormalizeLabel("0test", false))
require.Equal(t, "test", NormalizeLabel("test", false))
require.Equal(t, "test__", NormalizeLabel("test_/", false))
require.Equal(t, "__test", NormalizeLabel("__test", false))
}
}

func TestSanitizeDropSanitization(t *testing.T) {
}

defer testutil.SetFeatureGateForTest(t, dropSanitizationGate, true)()
func TestNormalizeLabel(t *testing.T) {
tests := []struct {
label string
allowUTF8 bool
expected string
}{
{"", false, ""},
{"", true, ""},
{"label_with_special_chars!", false, "label_with_special_chars_"},
{"label_with_special_chars!", true, "label_with_special_chars!"},
{"label_with_foreign_characteres_字符", false, "label_with_foreign_characteres___"},
{"label_with_foreign_characteres_字符", true, "label_with_foreign_characteres_字符"},
{"label.with.dots", false, "label_with_dots"},
{"label.with.dots", true, "label.with.dots"},
{"123label", false, "key_123label"},
{"123label", true, "123label"}, // UTF-8 allows numbers at the beginning
{"_label", false, "key_label"},
{"_label", true, "_label"}, // UTF-8 allows single underscores at the beginning
{"__label", false, "__label"},
}

require.Equal(t, "", NormalizeLabel(""))
require.Equal(t, "_test", NormalizeLabel("_test"))
require.Equal(t, "key_0test", NormalizeLabel("0test"))
require.Equal(t, "test", NormalizeLabel("test"))
require.Equal(t, "__test", NormalizeLabel("__test"))
for _, test := range tests {
result := NormalizeLabel(test.label, test.allowUTF8)
require.Equal(t, test.expected, result)
}
}
Loading

0 comments on commit dd96ad5

Please sign in to comment.