Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix edge case with upper case #48

Merged
merged 1 commit into from
May 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,5 @@ test-report.json
junit*.xml
*cov

*.test
*.test
kubeutils/testdata/fuzz/
3 changes: 3 additions & 0 deletions changelog/v0.7.1/sanitize.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
changelog:
- type: NON_USER_FACING
description: Fix and make santize faster
105 changes: 105 additions & 0 deletions kubeutils/strings.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
package kubeutils

import (
"bytes"
"crypto/md5"
"encoding/hex"
"hash"
"sync"
)

// This is an extrememly important number for these shortened names.
// It signifies the spot of separation from the original name and the hash.
// It is used for many string building and parsing operations below.
const magicNumber = 31
const totalSize = magicNumber*2 + 1
const encodedMd5 = 2 * md5.Size

const separator = '-'

// We can short-circuit the comparison if the first 31 characters are not equal.
// Otherweise we need to compare the shortened version of the strings.
func ShortenedEquals(shortened, standard string) bool {

// If the standard string is less than 63 characters, we can just compare the strings.
if len(standard) <= totalSize {
return shortened == standard
}

// If the shortened string is less than or equal to 32 characters, we can just compare the strings.
// Also if it's less than 32 the below checks may crash.
if len(shortened) <= magicNumber+1 {
return shortened == standard
}

// Check the first 31 characters, if they're not equal we can exit early.
if shortened[:magicNumber] != standard[:magicNumber] {
return false
}

// If 32nd character of the shortened string is not a '-' or the 32nd character of the standard string is not a '-'
// we can exit early.
// In theory this shouldn't be necessary, but this label can technically be modified by the user,
// so it's safer to double check.
if shortened[magicNumber] != separator {
return false
}

// Check the last 32 characters of the shortened string against the hash of the standard string.
hashed := hashName(standard)
return shortened[magicNumber+1:] == string(hashed[:magicNumber])
}

// shortenName is extrememly inefficient with it's allocation of slices for hashing.
// We can re-use the arrays to avoid this allocation. However, this code may be called
// from multiple go-routines simultaneously so we must house these objects in sync.Pools

// Pool of MD5 hashers to avoid allocation.
var md5HasherPool = sync.Pool{
New: func() interface{} {
return md5.New()
},
}

// Pool of string builders to avoid allocation.
var byteBufferPool = sync.Pool{
New: func() interface{} {
b := &bytes.Buffer{}
b.Grow(totalSize)
return b
},
}

// hashName returns a hash of the input string in base 16 format
// This function is optimized for speed and memory usage.
// It should aboid nearly all allocations by re-using the same buffers whenever possible.
func hashName(name string) [encodedMd5]byte {
hasher := md5HasherPool.Get().(hash.Hash)
hasher.Reset()
hasher.Write([]byte(name))
hashArray := [md5.Size]byte{}
hash := hasher.Sum(hashArray[:0])
// Cannot use hex.EncodedLen() here because it's a func, but it just returns 2 * len(src)
hashBufferArray := [encodedMd5]byte{}
hex.Encode(hashBufferArray[:], hash)
md5HasherPool.Put(hasher)
return hashBufferArray
}

// shortenName returns a shortened version of the input string.
// It is based on the `kubeutils.SanitizeNameV2` function, but it
// just does the shortening part.
func ShortenName(name string) string {
if len(name) > totalSize {
hash := hashName(name)
builder := byteBufferPool.Get().(*bytes.Buffer)
builder.Reset()
builder.Grow(totalSize)
builder.WriteString(name[:magicNumber])
builder.WriteRune(separator)
builder.Write(hash[:magicNumber])
name = builder.String()
byteBufferPool.Put(builder)
}
return name
}
137 changes: 137 additions & 0 deletions kubeutils/strings_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
package kubeutils

import (
"crypto/md5"
"fmt"
"testing"
)

// This function is a copy of the old version of this function.
// It is used to ensure parity with the old implementation.
func shortenName(name string) string {
if len(name) > 63 {
hash := md5.Sum([]byte(name))
name = fmt.Sprintf("%s-%x", name[:31], hash)
name = name[:63]
}
return name
}

func BenchmarkShortenEqual(b *testing.B) {
b.Run("shorten name old", func(b *testing.B) {
for i := 0; i < b.N; i++ {
shortenName("jfdklanfkljasfhjhldacaslkhdfkjshfkjsadhfkjasdhgjadhgkdahfjkdahjfdsagdfhjdsagfhasjdfsdfasfsafsdf")
}
})

b.Run("shortened equals--worst case", func(b *testing.B) {
shortened := "jfdklanfkljasfhjhldacaslkhdfkjs-f1e0028d0fbfe9afbd1a8bb9b53848d"
standard := "jfdklanfkljasfhjhldacaslkhdfkjshfkjsadhfkjasdhgjadhgkdahfjkdahjfdsagdfhjdsagfhasjdfsdfasfsafsdf"
for i := 0; i < b.N; i++ {
ShortenedEquals(shortened, standard)
}
})

b.Run("shortened equals--different prefix", func(b *testing.B) {
shortened := "jfdklanfkljasfhjhlxacaslkhdfkjs-f1e0028d0fbfe9afbd1a8bb9b53848d"
standard := "jfdklanfkljasfhjhldacaslkhdfkjshfkjsadhfkjasdhgjadhgkdahfjkdahjfdsagdfhjdsagfhasjdfsdfasfsafsdf"
for i := 0; i < b.N; i++ {
ShortenedEquals(shortened, standard)
}
})

b.Run("shortened equals--less than 63 characters", func(b *testing.B) {
shortened := "hello"
standard := "hello"
for i := 0; i < b.N; i++ {
ShortenedEquals(shortened, standard)
}
})
}

func FuzzShortNameParity(f *testing.F) {
// Random string < 63
f.Add("hello")
// Random string > 63
f.Add("jfdklanfkljasfhjhldacaslkhdfkjshfkjsadhfkjasdhgjadhgkdahfjkdahjfdsagdfhjdsagfhasjdfsdfasfsafsdf")
f.Fuzz(func(t *testing.T, a string) {
oldName := shortenName(a)
newName := ShortenName(a)
if oldName != newName {
t.Fatalf("shortenName(%s) = %s, ShortenName(%s) = %s", a, oldName, a, newName)
}

equal := ShortenedEquals(newName, a)
if !equal {
t.Fatalf("ShortenedEquals(%s, %s) = %t", newName, a, equal)
}
})
}

func TestShortenName(t *testing.T) {
t.Run("shorten name < 63", func(t *testing.T) {
name := "hello"
shortened := ShortenName(name)
if shortened != name {
t.Fatalf("ShortenName(%s) = %s", name, shortened)
}
})

t.Run("shorten name > 63", func(t *testing.T) {
name := "jfdklanfkljasfhjhldacaslkhdfkjshfkjsadhfkjasdhgjadhgkdahfjkdahjfdsagdfhjdsagfhasjdfsdfasfsafsdf"
shortened := ShortenName(name)
if len(shortened) != 63 {
t.Fatalf("ShortenName(%s) = %s", name, shortened)
}

if shortened != "jfdklanfkljasfhjhldacaslkhdfkjs-f1e0028d0fbfe9afbd1a8bb9b53848d" {
t.Fatalf("ShortenName(%s) = %s", name, shortened)
}
})
}

func TestShortenedEquals(t *testing.T) {

testCases := []struct {
name string
shortened string
equal bool
}{
{
name: "hello",
shortened: "hello",
equal: true,
},
{
name: "jfdklanfkljasfhjhldacaslkhdfkjshfkjsadhfkjasdhgjadhgkdahfjkdahjfdsagdfhjdsagfhasjdfsdfasfsafsdf",
shortened: "jfdklanfkljasfhjhldacaslkhdfkjs-f1e0028d0fbfe9afbd1a8bb9b53848d",
equal: true,
},
{
name: "jfdklanfkljasfhjhldacaslkhdfkjshfkjsadhfkjasdhgjadhgkdahfjkdahjfdsagdfhjdsagfhasjdfsdfasfsafsdf",
shortened: "jfdklanfkljasfhjhldacaslkhdfkjs-f1e0028d0fbfe9afbd1a8bb9b53848",
equal: false,
},
{
name: "jfdklanfkljasfhjhldacaslkhdfkjshfkjsadhfkjasdhgjadhgkdahfjkdahjfdsagdfhjdsagfhasjdfsdfasfsafsdf",
shortened: "jfdklanfkljasfhjhldacaslkhdfkjsf1e0028d0fbfe9afbd1a8bb9b53848ds",
equal: false,
},
{
name: "jfdklanfkljasfhjhldacaslkhdfkjshfkjsadhfkjasdhgjadhgkdahfjkdahjfdsagdfhjdsagfhasjdfsdfasfsafsdf",
shortened: "jfdklanfkjasfhjhldacaslkhdfkjs-f1e0028d0fbfe9afbd1a8bb9b53848ds",
equal: false,
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
equal := ShortenedEquals(tc.shortened, tc.name)
if equal != tc.equal {
t.Fatalf("ShortenedEquals(%s, %s) = %t", tc.shortened, tc.name, equal)
}
})

}

}
9 changes: 3 additions & 6 deletions kubeutils/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"crypto/md5"
"fmt"
"strings"
"unicode"
)

// use SanitizeNameV2
Expand Down Expand Up @@ -36,12 +35,10 @@ func SanitizeNameV2(name string) string {
case '[', ']', '\n', '"', '\'':
return -1
}
return unicode.ToLower(r)
return r
}, name)
if len(name) > 63 {
hash := md5.Sum([]byte(name))
name = fmt.Sprintf("%s-%x", name[:31], hash)
name = name[:63]
name = ShortenName(name)
}
return name
return strings.ToLower(name)
}
70 changes: 70 additions & 0 deletions kubeutils/util_test.go
Original file line number Diff line number Diff line change
@@ -1,19 +1,48 @@
package kubeutils

import (
"crypto/md5"
"fmt"
"strings"
"testing"
"time"
"unicode/utf8"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/onsi/gomega/gmeasure"
)

// Here for fuzz tests..
func sanitizeNameV2Old(name string) string {
name = strings.Replace(name, "*", "-", -1)
name = strings.Replace(name, "/", "-", -1)
name = strings.Replace(name, ".", "-", -1)
name = strings.Replace(name, "[", "", -1)
name = strings.Replace(name, "]", "", -1)
name = strings.Replace(name, ":", "-", -1)
name = strings.Replace(name, "_", "-", -1)
name = strings.Replace(name, " ", "-", -1)
name = strings.Replace(name, "\n", "", -1)
name = strings.Replace(name, "\"", "", -1)
name = strings.Replace(name, "'", "", -1)
if len(name) > 63 {
hash := md5.Sum([]byte(name))
name = fmt.Sprintf("%s-%x", name[:31], hash)
name = name[:63]
}
name = strings.Replace(name, ".", "-", -1)
name = strings.ToLower(name)
return name
}

var _ = Describe("sanitize name", func() {

DescribeTable("sanitize short names", func(in, out string) {
Expect(SanitizeNameV2(in)).To(Equal(out))
},
Entry("basic a", "abc", "abc"),
Entry("basic A", "Abc", "abc"),
Entry("basic b", "abc123", "abc123"),
Entry("subX *", "bb*", "bb-"),
Entry("sub *", "bb*b", "bb-b"),
Expand Down Expand Up @@ -63,3 +92,44 @@ var _ = Describe("sanitize name", func() {
}, gmeasure.SamplingConfig{N: 200, Duration: time.Minute})
})
})

func FuzzSanitizeNameParity(f *testing.F) {
// Random string < 63
f.Add("VirtualGateway-istio-ingressgateway-bookinfo-cluster-1-istio-ingressgateway-istio-gateway-ns-cluster-1-gloo-mesh-cluster-1-HTTPS.443-anything")
f.Add("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
f.Add("abc")
f.Add("abc123")
f.Add("bb*")
f.Add("bb*b")
f.Add("bb/")
f.Add("bb/b")
f.Add("bb.")
f.Add("bb.b")
f.Add("bb[")
f.Add("bb[b")
f.Add("bb]")
f.Add("bb]b")
f.Add("bb:")
f.Add("bb:b")
f.Add("bb ")
f.Add("bb b")
f.Add("bb\n")
f.Add("bb\nb")
f.Add("aa\"")
f.Add("bb\"b")
f.Add("aa'")
f.Add("bb'b")
f.Add("jfdklanfkljasfhjhldacaslkhdfkjshfkjsadhfkjasdhgjadhgkdahfjkdahjfdsagdfhjdsagfhasjdfsdfasfsafsdf")

f.Fuzz(func(t *testing.T, a string) {
// we can only get a valid kube name that's alphanumeric
if !utf8.Valid([]byte(a)) {
t.Skip("Skipping non-valid utf8 input")
}
oldName := SanitizeNameV2(a)
newName := sanitizeNameV2Old(a)
if oldName != newName {
t.Fatalf("SanitizeNameV2(%s) = %s, SanitizeNameV2Old(%s) = %s", a, oldName, a, newName)
}
})
}
Loading