From 6fd8a07848d9ac72601205777714f7b86474bf06 Mon Sep 17 00:00:00 2001 From: Paul Leydier <75126792+paul-leydier@users.noreply.github.com> Date: Fri, 19 Nov 2021 19:44:12 +0100 Subject: [PATCH] feat: add Conversion/base64 algorithm (#437) * feat: base64 encoding * test: base64 encoding * docs: Added base64 declaration to README.md * feat: use string builder to drastically improve efficiency of Encode * docs: formatting * feat: base64 decoding * test: base64 decoding * docs: added base64 Decode function to README.md * test: test base64 Encode and Decode inverse functions * docs: base64 Decode docstring * docs: improve package documentation Co-authored-by: Taj * feat: remove usage of predefined return values * feat: base64 move to conversion package * fix: remove deleted line in README.md * Update conversion/base64.go * Update conversion/base64.go Co-authored-by: Taj Co-authored-by: Rak Laptudirm Co-authored-by: Andrii Siriak --- conversion/base64.go | 81 ++++++++++++++++++++++++++++ conversion/base64_test.go | 111 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 192 insertions(+) create mode 100644 conversion/base64.go create mode 100644 conversion/base64_test.go diff --git a/conversion/base64.go b/conversion/base64.go new file mode 100644 index 000000000..8828f6a7a --- /dev/null +++ b/conversion/base64.go @@ -0,0 +1,81 @@ +// base64.go +// description: The base64 encoding algorithm as defined in the RFC4648 standard. +// author: [Paul Leydier] (https://github.com/paul-leydier) +// ref: https://datatracker.ietf.org/doc/html/rfc4648#section-4 +// ref: https://en.wikipedia.org/wiki/Base64 +// see base64_test.go + +package conversion + +import ( + "strings" // Used for efficient string builder (more efficient than simply appending strings) +) + +const Alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" + +// Base64Encode encodes the received input bytes slice into a base64 string. +// The implementation follows the RFC4648 standard, which is documented +// at https://datatracker.ietf.org/doc/html/rfc4648#section-4 +func Base64Encode(input []byte) string { + var sb strings.Builder + // If not 24 bits (3 bytes) multiple, pad with 0 value bytes, and with "=" for the output + var padding string + for i := len(input) % 3; i > 0 && i < 3; i++ { + var zeroByte byte + input = append(input, zeroByte) + padding += "=" + } + + // encode 24 bits per 24 bits (3 bytes per 3 bytes) + for i := 0; i < len(input); i += 3 { + // select 3 8-bit input groups, and re-arrange them into 4 6-bit groups + // the literal 0x3F corresponds to the byte "0011 1111" + // the operation "byte & 0x3F" masks the two left-most bits + group := [4]byte{ + input[i] >> 2, + (input[i]<<4)&0x3F + input[i+1]>>4, + (input[i+1]<<2)&0x3F + input[i+2]>>6, + input[i+2] & 0x3F, + } + + // translate each group into a char using the static map + for _, b := range group { + sb.WriteString(string(Alphabet[int(b)])) + } + } + encoded := sb.String() + + // Apply the output padding + encoded = encoded[:len(encoded)-len(padding)] + padding[:] + + return encoded +} + +// Base64Decode decodes the received input base64 string into a byte slice. +// The implementation follows the RFC4648 standard, which is documented +// at https://datatracker.ietf.org/doc/html/rfc4648#section-4 +func Base64Decode(input string) []byte { + padding := strings.Count(input, "=") // Number of bytes which will be ignored + var decoded []byte + + // select 4 6-bit input groups, and re-arrange them into 3 8-bit groups + for i := 0; i < len(input); i += 4 { + // translate each group into a byte using the static map + byteInput := [4]byte{ + byte(strings.IndexByte(Alphabet, input[i])), + byte(strings.IndexByte(Alphabet, input[i+1])), + byte(strings.IndexByte(Alphabet, input[i+2])), + byte(strings.IndexByte(Alphabet, input[i+3])), + } + + group := [3]byte{ + byteInput[0]<<2 + byteInput[1]>>4, + byteInput[1]<<4 + byteInput[2]>>2, + byteInput[2]<<6 + byteInput[3], + } + + decoded = append(decoded, group[:]...) + } + + return decoded[:len(decoded)-padding] +} diff --git a/conversion/base64_test.go b/conversion/base64_test.go new file mode 100644 index 000000000..ed882634c --- /dev/null +++ b/conversion/base64_test.go @@ -0,0 +1,111 @@ +package conversion + +import "testing" + +func TestBase64Encode(t *testing.T) { + testCases := []struct { + in string + expected string + }{ + {"Hello World!", "SGVsbG8gV29ybGQh"}, // multiple of 3 byte length (multiple of 24-bits) + {"Hello World!a", "SGVsbG8gV29ybGQhYQ=="}, // multiple of 3 byte length + 1 + {"Hello World!ab", "SGVsbG8gV29ybGQhYWI="}, // multiple of 3 byte length + 2 + {"", ""}, // empty byte slice + {"6", "Ng=="}, // short text + {"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.", "TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdCwgc2VkIGRvIGVpdXNtb2QgdGVtcG9yIGluY2lkaWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWduYSBhbGlxdWEuIFV0IGVuaW0gYWQgbWluaW0gdmVuaWFtLCBxdWlzIG5vc3RydWQgZXhlcmNpdGF0aW9uIHVsbGFtY28gbGFib3JpcyBuaXNpIHV0IGFsaXF1aXAgZXggZWEgY29tbW9kbyBjb25zZXF1YXQuIER1aXMgYXV0ZSBpcnVyZSBkb2xvciBpbiByZXByZWhlbmRlcml0IGluIHZvbHVwdGF0ZSB2ZWxpdCBlc3NlIGNpbGx1bSBkb2xvcmUgZXUgZnVnaWF0IG51bGxhIHBhcmlhdHVyLiBFeGNlcHRldXIgc2ludCBvY2NhZWNhdCBjdXBpZGF0YXQgbm9uIHByb2lkZW50LCBzdW50IGluIGN1bHBhIHF1aSBvZmZpY2lhIGRlc2VydW50IG1vbGxpdCBhbmltIGlkIGVzdCBsYWJvcnVtLg=="}, // Long text + } + + for _, tc := range testCases { + result := Base64Encode([]byte(tc.in)) + if result != tc.expected { + t.Fatalf("Base64Encode(%s) = %s, want %s", tc.in, result, tc.expected) + } + } +} + +func BenchmarkBase64Encode(b *testing.B) { + benchmarks := []struct { + name string + in string + expected string + }{ + {"Hello World!", "Hello World!", "SGVsbG8gV29ybGQh"}, // multiple of 3 byte length (multiple of 24-bits) + {"Hello World!a", "Hello World!a", "SGVsbG8gV29ybGQhYQ=="}, // multiple of 3 byte length + 1 + {"Hello World!ab", "Hello World!ab", "SGVsbG8gV29ybGQhYWI="}, // multiple of 3 byte length + 2 + {"Empty", "", ""}, // empty byte slice + {"6", "6", "Ng=="}, // short text + {"Lorem ipsum", "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.", "TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdCwgc2VkIGRvIGVpdXNtb2QgdGVtcG9yIGluY2lkaWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWduYSBhbGlxdWEuIFV0IGVuaW0gYWQgbWluaW0gdmVuaWFtLCBxdWlzIG5vc3RydWQgZXhlcmNpdGF0aW9uIHVsbGFtY28gbGFib3JpcyBuaXNpIHV0IGFsaXF1aXAgZXggZWEgY29tbW9kbyBjb25zZXF1YXQuIER1aXMgYXV0ZSBpcnVyZSBkb2xvciBpbiByZXByZWhlbmRlcml0IGluIHZvbHVwdGF0ZSB2ZWxpdCBlc3NlIGNpbGx1bSBkb2xvcmUgZXUgZnVnaWF0IG51bGxhIHBhcmlhdHVyLiBFeGNlcHRldXIgc2ludCBvY2NhZWNhdCBjdXBpZGF0YXQgbm9uIHByb2lkZW50LCBzdW50IGluIGN1bHBhIHF1aSBvZmZpY2lhIGRlc2VydW50IG1vbGxpdCBhbmltIGlkIGVzdCBsYWJvcnVtLg=="}, // Long text + } + + for _, bm := range benchmarks { + b.Run(bm.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + Base64Encode([]byte(bm.in)) + } + }) + } +} + +func TestBase64Decode(t *testing.T) { + testCases := []struct { + expected string + in string + }{ + {"Hello World!", "SGVsbG8gV29ybGQh"}, // multiple of 3 byte length (multiple of 24-bits) + {"Hello World!a", "SGVsbG8gV29ybGQhYQ=="}, // multiple of 3 byte length + 1 + {"Hello World!ab", "SGVsbG8gV29ybGQhYWI="}, // multiple of 3 byte length + 2 + {"", ""}, // empty byte slice + {"6", "Ng=="}, // short text + {"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.", "TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdCwgc2VkIGRvIGVpdXNtb2QgdGVtcG9yIGluY2lkaWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWduYSBhbGlxdWEuIFV0IGVuaW0gYWQgbWluaW0gdmVuaWFtLCBxdWlzIG5vc3RydWQgZXhlcmNpdGF0aW9uIHVsbGFtY28gbGFib3JpcyBuaXNpIHV0IGFsaXF1aXAgZXggZWEgY29tbW9kbyBjb25zZXF1YXQuIER1aXMgYXV0ZSBpcnVyZSBkb2xvciBpbiByZXByZWhlbmRlcml0IGluIHZvbHVwdGF0ZSB2ZWxpdCBlc3NlIGNpbGx1bSBkb2xvcmUgZXUgZnVnaWF0IG51bGxhIHBhcmlhdHVyLiBFeGNlcHRldXIgc2ludCBvY2NhZWNhdCBjdXBpZGF0YXQgbm9uIHByb2lkZW50LCBzdW50IGluIGN1bHBhIHF1aSBvZmZpY2lhIGRlc2VydW50IG1vbGxpdCBhbmltIGlkIGVzdCBsYWJvcnVtLg=="}, // Long text + } + + for _, tc := range testCases { + result := string(Base64Decode(tc.in)) + if result != tc.expected { + t.Fatalf("Base64Decode(%s) = %s, want %s", tc.in, result, tc.expected) + } + } +} + +func BenchmarkBase64Decode(b *testing.B) { + benchmarks := []struct { + name string + expected string + in string + }{ + {"Hello World!", "Hello World!", "SGVsbG8gV29ybGQh"}, // multiple of 3 byte length (multiple of 24-bits) + {"Hello World!a", "Hello World!a", "SGVsbG8gV29ybGQhYQ=="}, // multiple of 3 byte length + 1 + {"Hello World!ab", "Hello World!ab", "SGVsbG8gV29ybGQhYWI="}, // multiple of 3 byte length + 2 + {"Empty", "", ""}, // empty byte slice + {"6", "6", "Ng=="}, // short text + {"Lorem ipsum", "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.", "TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdCwgc2VkIGRvIGVpdXNtb2QgdGVtcG9yIGluY2lkaWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWduYSBhbGlxdWEuIFV0IGVuaW0gYWQgbWluaW0gdmVuaWFtLCBxdWlzIG5vc3RydWQgZXhlcmNpdGF0aW9uIHVsbGFtY28gbGFib3JpcyBuaXNpIHV0IGFsaXF1aXAgZXggZWEgY29tbW9kbyBjb25zZXF1YXQuIER1aXMgYXV0ZSBpcnVyZSBkb2xvciBpbiByZXByZWhlbmRlcml0IGluIHZvbHVwdGF0ZSB2ZWxpdCBlc3NlIGNpbGx1bSBkb2xvcmUgZXUgZnVnaWF0IG51bGxhIHBhcmlhdHVyLiBFeGNlcHRldXIgc2ludCBvY2NhZWNhdCBjdXBpZGF0YXQgbm9uIHByb2lkZW50LCBzdW50IGluIGN1bHBhIHF1aSBvZmZpY2lhIGRlc2VydW50IG1vbGxpdCBhbmltIGlkIGVzdCBsYWJvcnVtLg=="}, // Long text + } + + for _, bm := range benchmarks { + b.Run(bm.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + Base64Decode(bm.in) + } + }) + } +} + +func TestBase64EncodeDecodeInverse(t *testing.T) { + testCases := []struct { + in string + }{ + {"Hello World!"}, // multiple of 3 byte length (multiple of 24-bits) + {"Hello World!a"}, // multiple of 3 byte length + 1 + {"Hello World!ab"}, // multiple of 3 byte length + 2 + {""}, // empty byte slice + {"6"}, // short text + {"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."}, // Long text + } + + for _, tc := range testCases { + result := string(Base64Decode(Base64Encode([]byte(tc.in)))) + if result != tc.in { + t.Fatalf("Base64Decode(Base64Encode(%s)) = %s, want %s", tc.in, result, tc.in) + } + } +}