From 9a433926dc4825a395c8a59c9c30a1fbee16a714 Mon Sep 17 00:00:00 2001 From: Jian Xiao <99709935+jianoaix@users.noreply.github.com> Date: Tue, 13 Aug 2024 14:02:47 -0700 Subject: [PATCH] [1/N][zero serialization] Create data structure for passing encode chunks (#690) --- core/data.go | 104 ++++++++++++++++++++++++++++++++++++++++++++++ core/data_test.go | 74 +++++++++++++++++++++++++++++++++ 2 files changed, 178 insertions(+) diff --git a/core/data.go b/core/data.go index bfb7020fa8..7829ef3ad6 100644 --- a/core/data.go +++ b/core/data.go @@ -30,6 +30,7 @@ type SecurityParam struct { QuorumRate common.RateParam } +type ChunkEncodingFormat = uint8 type BundleEncodingFormat = uint8 const ( @@ -52,8 +53,111 @@ const ( // in protobuf, UNKNOWN as 0 is a convention). GobBundleEncodingFormat BundleEncodingFormat = 0 GnarkBundleEncodingFormat BundleEncodingFormat = 1 + + // Similar to bundle encoding format, this describes the encoding format of chunks. + // The difference is ChunkEncodingFormat is just about chunks, whereas BundleEncodingFormat + // is also about how multiple chunks of the same bundle are packed into a single byte array. + GobChunkEncodingFormat ChunkEncodingFormat = 0 + GnarkChunkEncodingFormat ChunkEncodingFormat = 1 ) +type ChunksData struct { + // Chunks is the encoded bytes of the chunks. + Chunks [][]byte + // Format describes how the bytes of the chunks are encoded. + Format ChunkEncodingFormat + // The number of symbols in each chunk. + // Note each chunk of the same blob will always have the same number of symbols. + ChunkLen int +} + +func (cd *ChunksData) Size() uint64 { + if len(cd.Chunks) == 0 { + return 0 + } + // GnarkChunkEncoding will create chunks of equal size. + if cd.Format == GnarkChunkEncodingFormat { + return uint64(len(cd.Chunks)) * uint64(len(cd.Chunks[0])) + } + // GobChunkEncoding can create chunks of different sizes. + size := uint64(0) + for _, c := range cd.Chunks { + size += uint64(len(c)) + } + return size +} + +func (cd *ChunksData) FlattenToBundle() ([]byte, error) { + // Only Gnark coded chunks are dispersed as a byte array. + // Gob coded chunks are not flattened. + if cd.Format != GnarkChunkEncodingFormat { + return nil, fmt.Errorf("unsupported chunk encoding format to flatten: %v", cd.Format) + } + result := make([]byte, cd.Size()+8) + buf := result + metadata := (uint64(cd.Format) << (NumBundleHeaderBits - NumBundleEncodingFormatBits)) | uint64(cd.ChunkLen) + binary.LittleEndian.PutUint64(buf, metadata) + buf = buf[8:] + for _, c := range cd.Chunks { + if len(c) != len(cd.Chunks[0]) { + return nil, errors.New("all chunks must be of same size") + } + copy(buf, c) + buf = buf[len(c):] + } + return result, nil +} + +func (cd *ChunksData) ToGobFormat() (*ChunksData, error) { + if cd.Format == GobChunkEncodingFormat { + return cd, nil + } + if cd.Format != GnarkChunkEncodingFormat { + return nil, fmt.Errorf("unsupported chunk encoding format: %d", cd.Format) + } + gobChunks := make([][]byte, 0, len(cd.Chunks)) + for _, chunk := range cd.Chunks { + c, err := new(encoding.Frame).DeserializeGnark(chunk) + if err != nil { + return nil, err + } + gob, err := c.Serialize() + if err != nil { + return nil, err + } + gobChunks = append(gobChunks, gob) + } + return &ChunksData{ + Chunks: gobChunks, + Format: GobChunkEncodingFormat, + }, nil +} + +func (cd *ChunksData) ToGnarkFormat() (*ChunksData, error) { + if cd.Format == GnarkChunkEncodingFormat { + return cd, nil + } + if cd.Format != GobChunkEncodingFormat { + return nil, fmt.Errorf("unsupported chunk encoding format: %d", cd.Format) + } + gnarkChunks := make([][]byte, 0, len(cd.Chunks)) + for _, chunk := range cd.Chunks { + c, err := new(encoding.Frame).Deserialize(chunk) + if err != nil { + return nil, err + } + gnark, err := c.SerializeGnark() + if err != nil { + return nil, err + } + gnarkChunks = append(gnarkChunks, gnark) + } + return &ChunksData{ + Chunks: gnarkChunks, + Format: GnarkChunkEncodingFormat, + }, nil +} + func (s *SecurityParam) String() string { return fmt.Sprintf("QuorumID: %d, AdversaryThreshold: %d, ConfirmationThreshold: %d", s.QuorumID, s.AdversaryThreshold, s.ConfirmationThreshold) } diff --git a/core/data_test.go b/core/data_test.go index 9cbb9da8a7..c062c72442 100644 --- a/core/data_test.go +++ b/core/data_test.go @@ -1,6 +1,7 @@ package core_test import ( + "bytes" "math/rand" "testing" @@ -95,3 +96,76 @@ func TestBundleEncoding(t *testing.T) { } } } + +func createChunksData(t *testing.T, seed int) (core.Bundle, *core.ChunksData, *core.ChunksData) { + bundle := createBundle(t, 64, 64, seed) + gobChunks := make([][]byte, len(bundle)) + gnarkChunks := make([][]byte, len(bundle)) + for i, frame := range bundle { + gobChunk, err := frame.Serialize() + assert.Nil(t, err) + gobChunks[i] = gobChunk + + gnarkChunk, err := frame.SerializeGnark() + assert.Nil(t, err) + gnarkChunks[i] = gnarkChunk + } + gob := &core.ChunksData{ + Chunks: gobChunks, + Format: core.GobChunkEncodingFormat, + ChunkLen: 64, + } + gnark := &core.ChunksData{ + Chunks: gnarkChunks, + Format: core.GnarkChunkEncodingFormat, + ChunkLen: 64, + } + return bundle, gob, gnark +} + +func TestChunksData(t *testing.T) { + numTrials := 16 + for i := 0; i < numTrials; i++ { + bundle, gob, gnark := createChunksData(t, i) + assert.Equal(t, len(gob.Chunks), 64) + assert.Equal(t, len(gnark.Chunks), 64) + assert.Equal(t, gnark.Size(), uint64(64*(32+64*encoding.BYTES_PER_SYMBOL))) + // ToGobFormat + convertedGob, err := gob.ToGobFormat() + assert.Nil(t, err) + assert.Equal(t, convertedGob, gob) + convertedGob, err = gnark.ToGobFormat() + assert.Nil(t, err) + assert.Equal(t, len(gob.Chunks), len(convertedGob.Chunks)) + for i := 0; i < len(gob.Chunks); i++ { + assert.True(t, bytes.Equal(gob.Chunks[i], convertedGob.Chunks[i])) + } + // ToGnarkFormat + convertedGnark, err := gnark.ToGnarkFormat() + assert.Nil(t, err) + assert.Equal(t, convertedGnark, gnark) + convertedGnark, err = gob.ToGnarkFormat() + assert.Nil(t, err) + assert.Equal(t, len(gnark.Chunks), len(convertedGnark.Chunks)) + for i := 0; i < len(gnark.Chunks); i++ { + assert.True(t, bytes.Equal(gnark.Chunks[i], convertedGnark.Chunks[i])) + } + // FlattenToBundle + bytesFromChunksData, err := gnark.FlattenToBundle() + assert.Nil(t, err) + bytesFromBundle, err := bundle.Serialize() + assert.Nil(t, err) + assert.True(t, bytes.Equal(bytesFromChunksData, bytesFromBundle)) + // Invalid cases + gnark.Chunks[0] = gnark.Chunks[0][1:] + _, err = gnark.FlattenToBundle() + assert.EqualError(t, err, "all chunks must be of same size") + _, err = gob.FlattenToBundle() + assert.EqualError(t, err, "unsupported chunk encoding format to flatten: 0") + gob.Format = core.ChunkEncodingFormat(3) + _, err = gob.ToGobFormat() + assert.EqualError(t, err, "unsupported chunk encoding format: 3") + _, err = gob.ToGnarkFormat() + assert.EqualError(t, err, "unsupported chunk encoding format: 3") + } +}