From 7db61398716cf3588acdccb429668bf6e3b1a644 Mon Sep 17 00:00:00 2001 From: Yigithan Karabulut Date: Sat, 7 Dec 2024 19:43:31 +0300 Subject: [PATCH] added utf8 support to truncate_all function Signed-off-by: Yigithan Karabulut --- pkg/ottl/ottlfuncs/README.md | 2 + pkg/ottl/ottlfuncs/func_truncate_all.go | 12 ++- pkg/ottl/ottlfuncs/func_truncate_all_test.go | 81 +++++++++++++++++++- 3 files changed, 90 insertions(+), 5 deletions(-) diff --git a/pkg/ottl/ottlfuncs/README.md b/pkg/ottl/ottlfuncs/README.md index 8272d0d19d8c..ec72676a539c 100644 --- a/pkg/ottl/ottlfuncs/README.md +++ b/pkg/ottl/ottlfuncs/README.md @@ -393,6 +393,8 @@ The `truncate_all` function truncates all string values in a `pcommon.Map` so th `target` is a path expression to a `pcommon.Map` type field. `limit` is a non-negative integer. +If truncating at exactly the length results in a broken UTF-8 encoding, `truncate_all` will be truncated before the last UTF-8 character begins. + The map will be mutated such that the number of characters in all string values is less than or equal to the limit. Non-string values are ignored. Examples: diff --git a/pkg/ottl/ottlfuncs/func_truncate_all.go b/pkg/ottl/ottlfuncs/func_truncate_all.go index b10479bd6e3a..1c6f863c7840 100644 --- a/pkg/ottl/ottlfuncs/func_truncate_all.go +++ b/pkg/ottl/ottlfuncs/func_truncate_all.go @@ -6,6 +6,7 @@ package ottlfuncs // import "github.com/open-telemetry/opentelemetry-collector-c import ( "context" "fmt" + "unicode/utf8" "go.opentelemetry.io/collector/pdata/pcommon" @@ -47,7 +48,16 @@ func TruncateAll[K any](target ottl.PMapGetter[K], limit int64) (ottl.ExprFunc[K val.Range(func(_ string, value pcommon.Value) bool { stringVal := value.Str() if int64(len(stringVal)) > limit { - value.SetStr(stringVal[:limit]) + truncatedStr := stringVal[:limit] + for !utf8.ValidString(truncatedStr) { + limit-- + if limit == 0 { + value.SetStr("") + return true + } + truncatedStr = stringVal[:limit] + } + value.SetStr(truncatedStr) } return true }) diff --git a/pkg/ottl/ottlfuncs/func_truncate_all_test.go b/pkg/ottl/ottlfuncs/func_truncate_all_test.go index f2c43a349ab1..f6ac923673d4 100644 --- a/pkg/ottl/ottlfuncs/func_truncate_all_test.go +++ b/pkg/ottl/ottlfuncs/func_truncate_all_test.go @@ -16,7 +16,8 @@ import ( func Test_truncateAll(t *testing.T) { input := pcommon.NewMap() - input.PutStr("test", "hello world") + // 19 bytes. "hello world, " is 13 bytes, "世界" is 6 bytes. + input.PutStr("test", "hello world, 世界") input.PutInt("test2", 3) input.PutBool("test3", true) @@ -57,7 +58,57 @@ func Test_truncateAll(t *testing.T) { target: target, limit: 100, want: func(expectedMap pcommon.Map) { - expectedMap.PutStr("test", "hello world") + expectedMap.PutStr("test", "hello world, 世界") + expectedMap.PutInt("test2", 3) + expectedMap.PutBool("test3", true) + }, + }, + { + name: "truncate broken first utf8 character encoding - 1", + target: target, + limit: 14, + want: func(expectedMap pcommon.Map) { + expectedMap.PutStr("test", "hello world, ") + expectedMap.PutInt("test2", 3) + expectedMap.PutBool("test3", true) + }, + }, + { + name: "truncate broken first utf8 character encoding - 2", + target: target, + limit: 15, + want: func(expectedMap pcommon.Map) { + expectedMap.PutStr("test", "hello world, ") + expectedMap.PutInt("test2", 3) + expectedMap.PutBool("test3", true) + }, + }, + { + name: "truncate first utf8 character exactly", + target: target, + limit: 16, + want: func(expectedMap pcommon.Map) { + expectedMap.PutStr("test", "hello world, 世") + expectedMap.PutInt("test2", 3) + expectedMap.PutBool("test3", true) + }, + }, + { + name: "truncate broken second utf8 character encoding - 1", + target: target, + limit: 17, + want: func(expectedMap pcommon.Map) { + expectedMap.PutStr("test", "hello world, 世") + expectedMap.PutInt("test2", 3) + expectedMap.PutBool("test3", true) + }, + }, + { + name: "truncate broken second utf8 character encoding - 2", + target: target, + limit: 18, + want: func(expectedMap pcommon.Map) { + expectedMap.PutStr("test", "hello world, 世") expectedMap.PutInt("test2", 3) expectedMap.PutBool("test3", true) }, @@ -65,9 +116,9 @@ func Test_truncateAll(t *testing.T) { { name: "truncate exact", target: target, - limit: 11, + limit: 19, want: func(expectedMap pcommon.Map) { - expectedMap.PutStr("test", "hello world") + expectedMap.PutStr("test", "hello world, 世界") expectedMap.PutInt("test2", 3) expectedMap.PutBool("test3", true) }, @@ -127,3 +178,25 @@ func Test_truncateAll_get_nil(t *testing.T) { _, err = exprFunc(nil, nil) assert.Error(t, err) } + +func Test_truncateAll_utf8_zero_limit(t *testing.T) { + input := pcommon.NewMap() + input.PutStr("test", "世界") + + target := &ottl.StandardPMapGetter[pcommon.Map]{ + Getter: func(_ context.Context, tCtx pcommon.Map) (any, error) { + return tCtx, nil + }, + } + + exprFunc, err := TruncateAll(target, 1) + assert.NoError(t, err) + + result, err := exprFunc(nil, input) + assert.NoError(t, err) + assert.Nil(t, result) + + expected := pcommon.NewMap() + expected.PutStr("test", "") + assert.Equal(t, expected, input) +}