Skip to content

Commit

Permalink
fix edge case with upper case
Browse files Browse the repository at this point in the history
  • Loading branch information
yuval-k committed May 3, 2024
1 parent e9aa093 commit 874c045
Show file tree
Hide file tree
Showing 6 changed files with 313 additions and 3 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,5 @@ test-report.json
junit*.xml
*cov

*.test
*.test
kubeutils/testdata/fuzz/
3 changes: 3 additions & 0 deletions changelog/v0.7.1/sanitize.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
changelog:
- type: NON_USER_FACING
description: Fix and make santize faster
105 changes: 105 additions & 0 deletions kubeutils/strings.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
package kubeutils

import (
"bytes"
"crypto/md5"
"encoding/hex"
"hash"
"sync"
)

// This is an extrememly important number for these shortened names.
// It signifies the spot of separation from the original name and the hash.
// It is used for many string building and parsing operations below.
const magicNumber = 31
const totalSize = magicNumber*2 + 1
const encodedMd5 = 2 * md5.Size

const separator = '-'

// We can short-circuit the comparison if the first 31 characters are not equal.
// Otherweise we need to compare the shortened version of the strings.
func ShortenedEquals(shortened, standard string) bool {

// If the standard string is less than 63 characters, we can just compare the strings.
if len(standard) <= totalSize {
return shortened == standard
}

// If the shortened string is less than or equal to 32 characters, we can just compare the strings.
// Also if it's less than 32 the below checks may crash.
if len(shortened) <= magicNumber+1 {
return shortened == standard
}

// Check the first 31 characters, if they're not equal we can exit early.
if shortened[:magicNumber] != standard[:magicNumber] {
return false
}

// If 32nd character of the shortened string is not a '-' or the 32nd character of the standard string is not a '-'
// we can exit early.
// In theory this shouldn't be necessary, but this label can technically be modified by the user,
// so it's safer to double check.
if shortened[magicNumber] != separator {
return false
}

// Check the last 32 characters of the shortened string against the hash of the standard string.
hashed := hashName(standard)
return shortened[magicNumber+1:] == string(hashed[:magicNumber])
}

// shortenName is extrememly inefficient with it's allocation of slices for hashing.
// We can re-use the arrays to avoid this allocation. However, this code may be called
// from multiple go-routines simultaneously so we must house these objects in sync.Pools

// Pool of MD5 hashers to avoid allocation.
var md5HasherPool = sync.Pool{
New: func() interface{} {
return md5.New()
},
}

// Pool of string builders to avoid allocation.
var byteBufferPool = sync.Pool{
New: func() interface{} {
b := &bytes.Buffer{}
b.Grow(totalSize)
return b
},
}

// hashName returns a hash of the input string in base 16 format
// This function is optimized for speed and memory usage.
// It should aboid nearly all allocations by re-using the same buffers whenever possible.
func hashName(name string) [encodedMd5]byte {
hasher := md5HasherPool.Get().(hash.Hash)
hasher.Reset()
hasher.Write([]byte(name))
hashArray := [md5.Size]byte{}
hash := hasher.Sum(hashArray[:0])
// Cannot use hex.EncodedLen() here because it's a func, but it just returns 2 * len(src)
hashBufferArray := [encodedMd5]byte{}
hex.Encode(hashBufferArray[:], hash)
md5HasherPool.Put(hasher)
return hashBufferArray
}

// shortenName returns a shortened version of the input string.
// It is based on the `kubeutils.SanitizeNameV2` function, but it
// just does the shortening part.
func ShortenName(name string) string {
if len(name) > totalSize {
hash := hashName(name)
builder := byteBufferPool.Get().(*bytes.Buffer)
builder.Reset()
builder.Grow(totalSize)
builder.WriteString(name[:magicNumber])
builder.WriteRune(separator)
builder.Write(hash[:magicNumber])
name = builder.String()
byteBufferPool.Put(builder)
}
return name
}
138 changes: 138 additions & 0 deletions kubeutils/strings_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
package kubeutils

import (
"crypto/md5"
"fmt"
"testing"
)

// This function is a copy of the old version of this function.
// It is used to ensure parity with the old implementation.
// https://github.com/solo-io/gloo-mesh-enterprise/blob/3953b2582ec63bb3cbde567bbc69e19b45ea8f35/pkg/labels/labels.go#L402C1-L409C2
func shortenName(name string) string {
if len(name) > 63 {
hash := md5.Sum([]byte(name))
name = fmt.Sprintf("%s-%x", name[:31], hash)
name = name[:63]
}
return name
}

func BenchmarkShortenEqual(b *testing.B) {
b.Run("shorten name old", func(b *testing.B) {
for i := 0; i < b.N; i++ {
shortenName("jfdklanfkljasfhjhldacaslkhdfkjshfkjsadhfkjasdhgjadhgkdahfjkdahjfdsagdfhjdsagfhasjdfsdfasfsafsdf")
}
})

b.Run("shortened equals--worst case", func(b *testing.B) {
shortened := "jfdklanfkljasfhjhldacaslkhdfkjs-f1e0028d0fbfe9afbd1a8bb9b53848d"
standard := "jfdklanfkljasfhjhldacaslkhdfkjshfkjsadhfkjasdhgjadhgkdahfjkdahjfdsagdfhjdsagfhasjdfsdfasfsafsdf"
for i := 0; i < b.N; i++ {
ShortenedEquals(shortened, standard)
}
})

b.Run("shortened equals--different prefix", func(b *testing.B) {
shortened := "jfdklanfkljasfhjhlxacaslkhdfkjs-f1e0028d0fbfe9afbd1a8bb9b53848d"
standard := "jfdklanfkljasfhjhldacaslkhdfkjshfkjsadhfkjasdhgjadhgkdahfjkdahjfdsagdfhjdsagfhasjdfsdfasfsafsdf"
for i := 0; i < b.N; i++ {
ShortenedEquals(shortened, standard)
}
})

b.Run("shortened equals--less than 63 characters", func(b *testing.B) {
shortened := "hello"
standard := "hello"
for i := 0; i < b.N; i++ {
ShortenedEquals(shortened, standard)
}
})
}

func FuzzShortNameParity(f *testing.F) {
// Random string < 63
f.Add("hello")
// Random string > 63
f.Add("jfdklanfkljasfhjhldacaslkhdfkjshfkjsadhfkjasdhgjadhgkdahfjkdahjfdsagdfhjdsagfhasjdfsdfasfsafsdf")
f.Fuzz(func(t *testing.T, a string) {
oldName := shortenName(a)
newName := ShortenName(a)
if oldName != newName {
t.Fatalf("shortenName(%s) = %s, ShortenName(%s) = %s", a, oldName, a, newName)
}

equal := ShortenedEquals(newName, a)
if !equal {
t.Fatalf("ShortenedEquals(%s, %s) = %t", newName, a, equal)
}
})
}

func TestShortenName(t *testing.T) {
t.Run("shorten name < 63", func(t *testing.T) {
name := "hello"
shortened := ShortenName(name)
if shortened != name {
t.Fatalf("ShortenName(%s) = %s", name, shortened)
}
})

t.Run("shorten name > 63", func(t *testing.T) {
name := "jfdklanfkljasfhjhldacaslkhdfkjshfkjsadhfkjasdhgjadhgkdahfjkdahjfdsagdfhjdsagfhasjdfsdfasfsafsdf"
shortened := ShortenName(name)
if len(shortened) != 63 {
t.Fatalf("ShortenName(%s) = %s", name, shortened)
}

if shortened != "jfdklanfkljasfhjhldacaslkhdfkjs-f1e0028d0fbfe9afbd1a8bb9b53848d" {
t.Fatalf("ShortenName(%s) = %s", name, shortened)
}
})
}

func TestShortenedEquals(t *testing.T) {

testCases := []struct {
name string
shortened string
equal bool
}{
{
name: "hello",
shortened: "hello",
equal: true,
},
{
name: "jfdklanfkljasfhjhldacaslkhdfkjshfkjsadhfkjasdhgjadhgkdahfjkdahjfdsagdfhjdsagfhasjdfsdfasfsafsdf",
shortened: "jfdklanfkljasfhjhldacaslkhdfkjs-f1e0028d0fbfe9afbd1a8bb9b53848d",
equal: true,
},
{
name: "jfdklanfkljasfhjhldacaslkhdfkjshfkjsadhfkjasdhgjadhgkdahfjkdahjfdsagdfhjdsagfhasjdfsdfasfsafsdf",
shortened: "jfdklanfkljasfhjhldacaslkhdfkjs-f1e0028d0fbfe9afbd1a8bb9b53848",
equal: false,
},
{
name: "jfdklanfkljasfhjhldacaslkhdfkjshfkjsadhfkjasdhgjadhgkdahfjkdahjfdsagdfhjdsagfhasjdfsdfasfsafsdf",
shortened: "jfdklanfkljasfhjhldacaslkhdfkjsf1e0028d0fbfe9afbd1a8bb9b53848ds",
equal: false,
},
{
name: "jfdklanfkljasfhjhldacaslkhdfkjshfkjsadhfkjasdhgjadhgkdahfjkdahjfdsagdfhjdsagfhasjdfsdfasfsafsdf",
shortened: "jfdklanfkjasfhjhldacaslkhdfkjs-f1e0028d0fbfe9afbd1a8bb9b53848ds",
equal: false,
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
equal := ShortenedEquals(tc.shortened, tc.name)
if equal != tc.equal {
t.Fatalf("ShortenedEquals(%s, %s) = %t", tc.shortened, tc.name, equal)
}
})

}

}
24 changes: 22 additions & 2 deletions kubeutils/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"crypto/md5"
"fmt"
"strings"
"unicode"
)

// use SanitizeNameV2
Expand Down Expand Up @@ -36,12 +35,33 @@ func SanitizeNameV2(name string) string {
case '[', ']', '\n', '"', '\'':
return -1
}
return unicode.ToLower(r)
return r
}, name)
if len(name) > 63 {
name = ShortenName(name)
}
return strings.ToLower(name)
}

// Here for fuzz tests..
func SanitizeNameV2Old(name string) string {
name = strings.Replace(name, "*", "-", -1)
name = strings.Replace(name, "/", "-", -1)
name = strings.Replace(name, ".", "-", -1)
name = strings.Replace(name, "[", "", -1)
name = strings.Replace(name, "]", "", -1)
name = strings.Replace(name, ":", "-", -1)
name = strings.Replace(name, "_", "-", -1)
name = strings.Replace(name, " ", "-", -1)
name = strings.Replace(name, "\n", "", -1)
name = strings.Replace(name, "\"", "", -1)
name = strings.Replace(name, "'", "", -1)
if len(name) > 63 {
hash := md5.Sum([]byte(name))
name = fmt.Sprintf("%s-%x", name[:31], hash)
name = name[:63]
}
name = strings.Replace(name, ".", "-", -1)
name = strings.ToLower(name)
return name
}
43 changes: 43 additions & 0 deletions kubeutils/util_test.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package kubeutils

import (
"testing"
"time"
"unicode/utf8"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
Expand Down Expand Up @@ -63,3 +65,44 @@ var _ = Describe("sanitize name", func() {
}, gmeasure.SamplingConfig{N: 200, Duration: time.Minute})
})
})

func FuzzSanitizeNameParity(f *testing.F) {
// Random string < 63
f.Add("VirtualGateway-istio-ingressgateway-bookinfo-cluster-1-istio-ingressgateway-istio-gateway-ns-cluster-1-gloo-mesh-cluster-1-HTTPS.443-anything")
f.Add("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
f.Add("abc")
f.Add("abc123")
f.Add("bb*")
f.Add("bb*b")
f.Add("bb/")
f.Add("bb/b")
f.Add("bb.")
f.Add("bb.b")
f.Add("bb[")
f.Add("bb[b")
f.Add("bb]")
f.Add("bb]b")
f.Add("bb:")
f.Add("bb:b")
f.Add("bb ")
f.Add("bb b")
f.Add("bb\n")
f.Add("bb\nb")
f.Add("aa\"")
f.Add("bb\"b")
f.Add("aa'")
f.Add("bb'b")
f.Add("jfdklanfkljasfhjhldacaslkhdfkjshfkjsadhfkjasdhgjadhgkdahfjkdahjfdsagdfhjdsagfhasjdfsdfasfsafsdf")

f.Fuzz(func(t *testing.T, a string) {
// we can only get a valid kube name that's alphanumeric
if !utf8.Valid([]byte(a)) {
t.Skip("Skipping non-valid utf8 input")
}
oldName := SanitizeNameV2(a)
newName := SanitizeNameV2Old(a)
if oldName != newName {
t.Fatalf("SanitizeNameV2(%s) = %s, SanitizeNameV2Old(%s) = %s", a, oldName, a, newName)
}
})
}

0 comments on commit 874c045

Please sign in to comment.