From 0e81887b44d96c0751b231a115dc41101498f7e1 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Sun, 7 Jul 2024 04:11:57 +0000 Subject: [PATCH] working parallel kzg proof compute --- encoding/kzg/prover/encode.go | 2 +- encoding/kzg/prover/gpu/ecntt.go | 2 +- encoding/kzg/prover/gpu/msm.go | 2 +- encoding/kzg/prover/gpu/multiframe_proof.go | 22 ++- encoding/kzg/prover/gpu/ntt.go | 8 +- encoding/kzg/prover/parametrized_prover.go | 159 ++++++++++++++------ encoding/kzg/prover/prover.go | 16 +- encoding/rs/encode.go | 24 +-- encoding/rs/encode_test.go | 6 +- encoding/rs/encoder.go | 4 +- encoding/rs/encoder_fuzz_test.go | 2 +- encoding/rs/frame_test.go | 2 +- encoding/rs/gpu/extend_poly.go | 26 ++-- encoding/test/main.go | 44 ++++-- 14 files changed, 203 insertions(+), 116 deletions(-) diff --git a/encoding/kzg/prover/encode.go b/encoding/kzg/prover/encode.go index 680b9ab09f..8117979bab 100644 --- a/encoding/kzg/prover/encode.go +++ b/encoding/kzg/prover/encode.go @@ -5,7 +5,7 @@ import ( "github.com/consensys/gnark-crypto/ecc/bn254/fr" ) -type ProofComputer interface { +type ProofComputeDevice interface { // blobFr are coefficients ComputeCommitment(blobFr []fr.Element) (*bn254.G1Affine, error) ComputeMultiFrameProof(blobFr []fr.Element, numChunks, chunkLen, numWorker uint64) ([]bn254.G1Affine, error) diff --git a/encoding/kzg/prover/gpu/ecntt.go b/encoding/kzg/prover/gpu/ecntt.go index 992e1082bd..668ffb65ce 100644 --- a/encoding/kzg/prover/gpu/ecntt.go +++ b/encoding/kzg/prover/gpu/ecntt.go @@ -12,7 +12,7 @@ import ( "github.com/ingonyama-zk/icicle/v2/wrappers/golang/core" ) -func (c *GpuComputer) ECNtt(batchPoints []bn254.G1Affine, isInverse bool) ([]bn254.G1Affine, error) { +func (c *GpuComputeDevice) ECNtt(batchPoints []bn254.G1Affine, isInverse bool) ([]bn254.G1Affine, error) { totalNumSym := len(batchPoints) // convert gnark affine to icicle projective on slice diff --git a/encoding/kzg/prover/gpu/msm.go b/encoding/kzg/prover/gpu/msm.go index 39d620187e..cbb8c7bf1d 100644 --- a/encoding/kzg/prover/gpu/msm.go +++ b/encoding/kzg/prover/gpu/msm.go @@ -13,7 +13,7 @@ import ( ) // MsmBatch function supports batch across blobs -func (c *GpuComputer) MsmBatch(rowsFr [][]fr.Element, rowsG1 [][]bn254.G1Affine) ([]bn254.G1Affine, error) { +func (c *GpuComputeDevice) MsmBatch(rowsFr [][]fr.Element, rowsG1 [][]bn254.G1Affine) ([]bn254.G1Affine, error) { msmCfg := icicle_bn254_msm.GetDefaultMSMConfig() rowsSfIcicle := make([]icicle_bn254.ScalarField, 0) rowsAffineIcicle := make([]icicle_bn254.Affine, 0) diff --git a/encoding/kzg/prover/gpu/multiframe_proof.go b/encoding/kzg/prover/gpu/multiframe_proof.go index 584abce158..9069faaafb 100644 --- a/encoding/kzg/prover/gpu/multiframe_proof.go +++ b/encoding/kzg/prover/gpu/multiframe_proof.go @@ -2,6 +2,7 @@ package gpu import ( "fmt" + "sync" "time" "github.com/Layr-Labs/eigenda/encoding/fft" @@ -19,18 +20,19 @@ type WorkerResult struct { err error } -type GpuComputer struct { +type GpuComputeDevice struct { *kzg.KzgConfig Fs *fft.FFTSettings FFTPointsT [][]bn254.G1Affine // transpose of FFTPoints SFs *fft.FFTSettings Srs *kzg.SRS G2Trailing []bn254.G2Affine - NttCfg core.NTTConfig[[bn254_icicle.SCALAR_LIMBS]uint32] + NttCfg core.NTTConfig[[bn254_icicle.SCALAR_LIMBS]uint32] + GpuLock *sync.Mutex // lock whenever gpu is needed, } // benchmarks shows cpu commit on 2MB blob only takes 24.165562ms. For now, use cpu -func (p *GpuComputer) ComputeLengthProof(coeffs []fr.Element) (*bn254.G2Affine, error) { +func (p *GpuComputeDevice) ComputeLengthProof(coeffs []fr.Element) (*bn254.G2Affine, error) { inputLength := uint64(len(coeffs)) shiftedSecret := p.G2Trailing[p.KzgConfig.SRSNumberToLoad-inputLength:] config := ecc.MultiExpConfig{} @@ -44,7 +46,7 @@ func (p *GpuComputer) ComputeLengthProof(coeffs []fr.Element) (*bn254.G2Affine, } // benchmarks shows cpu commit on 2MB blob only takes 11.673738ms. For now, use cpu -func (p *GpuComputer) ComputeCommitment(coeffs []fr.Element) (*bn254.G1Affine, error) { +func (p *GpuComputeDevice) ComputeCommitment(coeffs []fr.Element) (*bn254.G1Affine, error) { // compute commit for the full poly config := ecc.MultiExpConfig{} var commitment bn254.G1Affine @@ -56,7 +58,7 @@ func (p *GpuComputer) ComputeCommitment(coeffs []fr.Element) (*bn254.G1Affine, e } // benchmarks shows cpu commit on 2MB blob only takes 31.318661ms. For now, use cpu -func (p *GpuComputer) ComputeLengthCommitment(coeffs []fr.Element) (*bn254.G2Affine, error) { +func (p *GpuComputeDevice) ComputeLengthCommitment(coeffs []fr.Element) (*bn254.G2Affine, error) { config := ecc.MultiExpConfig{} var lengthCommitment bn254.G2Affine @@ -69,7 +71,7 @@ func (p *GpuComputer) ComputeLengthCommitment(coeffs []fr.Element) (*bn254.G2Aff // This function supports batching over multiple blobs. // All blobs must have same size and concatenated passed as polyFr -func (p *GpuComputer) ComputeMultiFrameProof(polyFr []fr.Element, numChunks, chunkLen, numWorker uint64) ([]bn254.G1Affine, error) { +func (p *GpuComputeDevice) ComputeMultiFrameProof(polyFr []fr.Element, numChunks, chunkLen, numWorker uint64) ([]bn254.G1Affine, error) { // Robert: Standardizing this to use the same math used in precomputeSRS dimE := numChunks l := chunkLen @@ -110,6 +112,10 @@ func (p *GpuComputer) ComputeMultiFrameProof(polyFr []fr.Element, numChunks, chu } preprocessDone := time.Now() + // Start using GPU + p.GpuLock.Lock() + defer p.GpuLock.Unlock() + // Compute NTT on the coeff matrix p.NttCfg.BatchSize = int32(l) coeffStoreFFT, e := p.NTT(coeffStore) @@ -180,7 +186,7 @@ func (p *GpuComputer) ComputeMultiFrameProof(polyFr []fr.Element, numChunks, chu return flatProofsBatch, nil } -func (p *GpuComputer) proofWorkerGPU( +func (p *GpuComputeDevice) proofWorkerGPU( polyFr []fr.Element, jobChan <-chan uint64, l uint64, @@ -208,7 +214,7 @@ func (p *GpuComputer) proofWorkerGPU( } // capable of batching blobs -func (p *GpuComputer) GetSlicesCoeffWithoutFFT(polyFr []fr.Element, dimE, j, l uint64) ([]fr.Element, error) { +func (p *GpuComputeDevice) GetSlicesCoeffWithoutFFT(polyFr []fr.Element, dimE, j, l uint64) ([]fr.Element, error) { // there is a constant term m := uint64(dimE*l) - 1 dim := (m - j%l) / l diff --git a/encoding/kzg/prover/gpu/ntt.go b/encoding/kzg/prover/gpu/ntt.go index 214b407fd0..d55474181f 100644 --- a/encoding/kzg/prover/gpu/ntt.go +++ b/encoding/kzg/prover/gpu/ntt.go @@ -1,6 +1,8 @@ package gpu import ( + "fmt" + "github.com/Layr-Labs/eigenda/encoding/utils/gpu_utils" "github.com/consensys/gnark-crypto/ecc/bn254/fr" "github.com/ingonyama-zk/icicle/v2/wrappers/golang/core" @@ -8,7 +10,11 @@ import ( bn254_icicle_ntt "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bn254/ntt" ) -func (c *GpuComputer) NTT(batchFr [][]fr.Element) ([][]fr.Element, error) { +func (c *GpuComputeDevice) NTT(batchFr [][]fr.Element) ([][]fr.Element, error) { + if len(batchFr) == 0 { + return nil, fmt.Errorf("input to NTT contains no blob") + } + numSymbol := len(batchFr[0]) batchSize := len(batchFr) diff --git a/encoding/kzg/prover/parametrized_prover.go b/encoding/kzg/prover/parametrized_prover.go index f7fd919b12..9f3d61d620 100644 --- a/encoding/kzg/prover/parametrized_prover.go +++ b/encoding/kzg/prover/parametrized_prover.go @@ -6,6 +6,7 @@ import ( "time" "github.com/Layr-Labs/eigenda/encoding" + "github.com/hashicorp/go-multierror" "github.com/Layr-Labs/eigenda/encoding/fft" "github.com/Layr-Labs/eigenda/encoding/kzg" @@ -27,7 +28,34 @@ type ParametrizedProver struct { FFTPointsT [][]bn254.G1Affine // transpose of FFTPoints UseGpu bool - Computer ProofComputer + Computer ProofComputeDevice +} + +type RsEncodeResult struct { + Frames []rs.Frame + Indices []uint32 + Err error + Duration time.Duration +} +type LengthCommitmentResult struct { + LengthCommitment bn254.G2Affine + Err error + Duration time.Duration +} +type LengthProofResult struct { + LengthProof bn254.G2Affine + Err error + Duration time.Duration +} +type CommitmentResult struct { + Commitment bn254.G1Affine + Err error + Duration time.Duration +} +type ProofsResult struct { + Proofs []bn254.G1Affine + Err error + Duration time.Duration } // just a wrapper to take bytes not Fr Element @@ -45,66 +73,107 @@ func (g *ParametrizedProver) Encode(inputFr []fr.Element) (*bn254.G1Affine, *bn2 return nil, nil, nil, nil, nil, fmt.Errorf("poly Coeff length %v is greater than Loaded SRS points %v", len(inputFr), int(g.KzgConfig.SRSNumberToLoad)) } - startTime := time.Now() - // compute chunks - poly, frames, indices, err := g.Encoder.Encode(inputFr) - if err != nil { - return nil, nil, nil, nil, nil, err - } - rsEncodeDone := time.Now() - - // compute commit for the full poly - commit, err := g.Computer.ComputeCommitment(poly.Coeffs) - if err != nil { - return nil, nil, nil, nil, nil, err - } - commitDone := time.Now() + encodeStart := time.Now() - lengthCommitment, err := g.Computer.ComputeLengthCommitment(poly.Coeffs) - if err != nil { - return nil, nil, nil, nil, nil, err - } - lengthCommitDone := time.Now() + rsChan := make(chan RsEncodeResult, 1) + lengthCommitmentChan := make(chan LengthCommitmentResult, 1) + lengthProofChan := make(chan LengthProofResult, 1) + commitmentChan := make(chan CommitmentResult, 1) + proofChan := make(chan ProofsResult, 1) - lengthProof, err := g.Computer.ComputeLengthProof(poly.Coeffs) - if err != nil { - return nil, nil, nil, nil, nil, err - } - lengthProofDone := time.Now() + // inputFr is untouched + // compute chunks + go func() { + start := time.Now() + frames, indices, err := g.Encoder.Encode(inputFr) + rsChan <- RsEncodeResult{ + Frames: frames, + Indices: indices, + Err: err, + Duration: time.Since(start), + } + }() - // compute proofs - paddedCoeffs := make([]fr.Element, g.NumEvaluations()) - // polyCoeffs has less points than paddedCoeffs in general due to erasure redundancy - copy(paddedCoeffs, poly.Coeffs) - proofs, err := g.Computer.ComputeMultiFrameProof(paddedCoeffs, g.NumChunks, g.ChunkLength, g.NumWorker) - if err != nil { - return nil, nil, nil, nil, nil, fmt.Errorf("could not generate proofs: %v", err) + // compute commit for the full poly + go func() { + start := time.Now() + commit, err := g.Computer.ComputeCommitment(inputFr) + commitmentChan <- CommitmentResult{ + Commitment: *commit, + Err: err, + Duration: time.Since(start), + } + }() + + go func() { + start := time.Now() + lengthCommitment, err := g.Computer.ComputeLengthCommitment(inputFr) + lengthCommitmentChan <- LengthCommitmentResult{ + LengthCommitment: *lengthCommitment, + Err: err, + Duration: time.Since(start), + } + }() + + go func() { + start := time.Now() + lengthProof, err := g.Computer.ComputeLengthProof(inputFr) + lengthProofChan <- LengthProofResult{ + LengthProof: *lengthProof, + Err: err, + Duration: time.Since(start), + } + }() + + go func() { + start := time.Now() + // compute proofs + paddedCoeffs := make([]fr.Element, g.NumEvaluations()) + // polyCoeffs has less points than paddedCoeffs in general due to erasure redundancy + copy(paddedCoeffs, inputFr) + proofs, err := g.Computer.ComputeMultiFrameProof(paddedCoeffs, g.NumChunks, g.ChunkLength, g.NumWorker) + proofChan <- ProofsResult{ + Proofs: proofs, + Err: err, + Duration: time.Since(start), + } + }() + + lengthProofResult := <-lengthProofChan + lengthCommitmentResult := <-lengthCommitmentChan + commitmentResult := <-commitmentChan + rsResult := <-rsChan + proofsResult := <-proofChan + + if lengthProofResult.Err != nil || lengthCommitmentResult.Err != nil || + commitmentResult.Err != nil || rsResult.Err != nil || + proofsResult.Err != nil { + return nil, nil, nil, nil, nil, multierror.Append(lengthProofResult.Err, lengthCommitmentResult.Err, commitmentResult.Err, rsResult.Err, proofsResult.Err) } - multiProofDone := time.Now() - + totalProcessingTime := time.Since(encodeStart) if g.Verbose { log.Printf("\n\t\tRS encode %-v\n\t\tCommiting %-v\n\t\tLengthCommit %-v\n\t\tlengthProof %-v\n\t\tmultiProof %-v\n\t\tMetaInfo. order %-v shift %v\n", - rsEncodeDone.Sub(startTime), - commitDone.Sub(rsEncodeDone), - lengthCommitDone.Sub(commitDone), - lengthProofDone.Sub(lengthCommitDone), - multiProofDone.Sub(lengthProofDone), + rsResult.Duration, + commitmentResult.Duration, + lengthCommitmentResult.Duration, + lengthProofResult.Duration, + proofsResult.Duration, len(g.Srs.G2), g.SRSOrder-uint64(len(inputFr)), ) } // assemble frames - kzgFrames := make([]encoding.Frame, len(frames)) - for i, index := range indices { + kzgFrames := make([]encoding.Frame, len(rsResult.Frames)) + for i, index := range rsResult.Indices { kzgFrames[i] = encoding.Frame{ - Proof: proofs[index], - Coeffs: frames[i].Coeffs, + Proof: proofsResult.Proofs[index], + Coeffs: rsResult.Frames[i].Coeffs, } } if g.Verbose { - log.Printf("Total encoding took %v\n", time.Since(startTime)) + log.Printf("Total encoding took %v\n", totalProcessingTime) } - return commit, lengthCommitment, lengthProof, kzgFrames, indices, nil + return &commitmentResult.Commitment, &lengthCommitmentResult.LengthCommitment, &lengthProofResult.LengthProof, kzgFrames, rsResult.Indices, nil } diff --git a/encoding/kzg/prover/prover.go b/encoding/kzg/prover/prover.go index 7459e3b2cb..7c83725dc1 100644 --- a/encoding/kzg/prover/prover.go +++ b/encoding/kzg/prover/prover.go @@ -243,10 +243,10 @@ func (g *Prover) newProver(params encoding.EncodingParams) (*ParametrizedProver, sfs := fft.NewFFTSettings(t) // Set RS computer - var rsComputer rs.RSComputer + var RsComputeDevice rs.RsComputeDevice // Set KZG Prover computer - var computer ProofComputer + var computer ProofComputeDevice if !g.UseGpu { computer = &cpu.CpuComputer{ Fs: fs, @@ -256,14 +256,14 @@ func (g *Prover) newProver(params encoding.EncodingParams) (*ParametrizedProver, G2Trailing: g.G2Trailing, KzgConfig: g.KzgConfig, } - rsComputer = &rs_cpu.CpuComputer{ + RsComputeDevice = &rs_cpu.CpuComputer{ Fs: fs, EncodingParams: params, } } else { nttCfg := gpu_utils.SetupNTT() - - computer = &gpu.GpuComputer{ + GpuLock := sync.Mutex{} + computer = &gpu.GpuComputeDevice{ Fs: fs, FFTPointsT: fftPointsT, SFs: sfs, @@ -271,15 +271,17 @@ func (g *Prover) newProver(params encoding.EncodingParams) (*ParametrizedProver, G2Trailing: g.G2Trailing, KzgConfig: g.KzgConfig, NttCfg: nttCfg, + GpuLock: &GpuLock, } - rsComputer = &rs_gpu.GpuComputer{ + RsComputeDevice = &rs_gpu.GpuComputeDevice{ Fs: fs, EncodingParams: params, NttCfg: nttCfg, + GpuLock: &GpuLock, } } - encoder.Computer = rsComputer + encoder.Computer = RsComputeDevice return &ParametrizedProver{ Encoder: encoder, diff --git a/encoding/rs/encode.go b/encoding/rs/encode.go index 01c24a2958..9a6e6460ff 100644 --- a/encoding/rs/encode.go +++ b/encoding/rs/encode.go @@ -11,16 +11,11 @@ import ( "github.com/consensys/gnark-crypto/ecc/bn254/fr" ) -type GlobalPoly struct { - Coeffs []fr.Element - Values []fr.Element -} - // just a wrapper to take bytes not Fr Element -func (g *Encoder) EncodeBytes(inputBytes []byte) (*GlobalPoly, []Frame, []uint32, error) { +func (g *Encoder) EncodeBytes(inputBytes []byte) ([]Frame, []uint32, error) { inputFr, err := ToFrArray(inputBytes) if err != nil { - return nil, nil, nil, fmt.Errorf("cannot convert bytes to field elements, %w", err) + return nil, nil, fmt.Errorf("cannot convert bytes to field elements, %w", err) } return g.Encode(inputFr) } @@ -32,23 +27,18 @@ func (g *Encoder) EncodeBytes(inputBytes []byte) (*GlobalPoly, []Frame, []uint32 // frame, the multireveal interpolating coefficients are identical to the part of input bytes // in the form of field element. The extra returned integer list corresponds to which leading // coset root of unity, the frame is proving against, which can be deduced from a frame's index -func (g *Encoder) Encode(inputFr []fr.Element) (*GlobalPoly, []Frame, []uint32, error) { +func (g *Encoder) Encode(inputFr []fr.Element) ([]Frame, []uint32, error) { start := time.Now() intermediate := time.Now() pdCoeffs, err := g.PadPolyEval(inputFr) if err != nil { - return nil, nil, nil, err + return nil, nil, err } polyEvals, err := g.Computer.ExtendPolyEval(pdCoeffs) if err != nil { - return nil, nil, nil, err - } - - poly := &GlobalPoly{ - Values: polyEvals, - Coeffs: inputFr, + return nil, nil, err } log.Printf(" Extending evaluation takes %v\n", time.Since(intermediate)) @@ -56,13 +46,13 @@ func (g *Encoder) Encode(inputFr []fr.Element) (*GlobalPoly, []Frame, []uint32, // create frames to group relevant info frames, indices, err := g.MakeFrames(polyEvals) if err != nil { - return nil, nil, nil, err + return nil, nil, err } log.Printf(" SUMMARY: RSEncode %v byte among %v numChunks with chunkLength %v takes %v\n", len(inputFr)*encoding.BYTES_PER_SYMBOL, g.NumChunks, g.ChunkLength, time.Since(start)) - return poly, frames, indices, nil + return frames, indices, nil } // MakeFrames function takes extended evaluation data and bundles relevant information into Frame. diff --git a/encoding/rs/encode_test.go b/encoding/rs/encode_test.go index 832e369b50..fd43fec672 100644 --- a/encoding/rs/encode_test.go +++ b/encoding/rs/encode_test.go @@ -22,7 +22,7 @@ func TestEncodeDecode_InvertsWhenSamplingAllFrames(t *testing.T) { inputFr, err := rs.ToFrArray(GETTYSBURG_ADDRESS_BYTES) assert.Nil(t, err) - _, frames, _, err := enc.Encode(inputFr) + frames, _, err := enc.Encode(inputFr) assert.Nil(t, err) // sample some frames @@ -45,7 +45,7 @@ func TestEncodeDecode_InvertsWhenSamplingMissingFrame(t *testing.T) { inputFr, err := rs.ToFrArray(GETTYSBURG_ADDRESS_BYTES) assert.Nil(t, err) - _, frames, _, err := enc.Encode(inputFr) + frames, _, err := enc.Encode(inputFr) assert.Nil(t, err) // sample some frames @@ -70,7 +70,7 @@ func TestEncodeDecode_ErrorsWhenNotEnoughSampledFrames(t *testing.T) { inputFr, err := rs.ToFrArray(GETTYSBURG_ADDRESS_BYTES) assert.Nil(t, err) - _, frames, _, err := enc.Encode(inputFr) + frames, _, err := enc.Encode(inputFr) assert.Nil(t, err) // sample some frames diff --git a/encoding/rs/encoder.go b/encoding/rs/encoder.go index 5e13f6f658..28ef5846b2 100644 --- a/encoding/rs/encoder.go +++ b/encoding/rs/encoder.go @@ -18,10 +18,10 @@ type Encoder struct { NumRSWorker int - Computer RSComputer + Computer RsComputeDevice } -type RSComputer interface { +type RsComputeDevice interface { ExtendPolyEval(coeffs []fr.Element) ([]fr.Element, error) } diff --git a/encoding/rs/encoder_fuzz_test.go b/encoding/rs/encoder_fuzz_test.go index 0c70d54644..b9eb1a7bcb 100644 --- a/encoding/rs/encoder_fuzz_test.go +++ b/encoding/rs/encoder_fuzz_test.go @@ -20,7 +20,7 @@ func FuzzOnlySystematic(f *testing.F) { } //encode the data - _, frames, _, err := enc.EncodeBytes(input) + frames, _, err := enc.EncodeBytes(input) if err != nil { t.Errorf("Error Encoding:\n Data:\n %q \n Err: %q", input, err) } diff --git a/encoding/rs/frame_test.go b/encoding/rs/frame_test.go index dc230664e5..717b64ff75 100644 --- a/encoding/rs/frame_test.go +++ b/encoding/rs/frame_test.go @@ -17,7 +17,7 @@ func TestEncodeDecodeFrame_AreInverses(t *testing.T) { enc, _ := rs.NewEncoder(params, true) require.NotNil(t, enc) - _, frames, _, err := enc.EncodeBytes(GETTYSBURG_ADDRESS_BYTES) + frames, _, err := enc.EncodeBytes(GETTYSBURG_ADDRESS_BYTES) require.Nil(t, err) require.NotNil(t, frames, err) diff --git a/encoding/rs/gpu/extend_poly.go b/encoding/rs/gpu/extend_poly.go index 4dc7b33157..e1cb73fe7e 100644 --- a/encoding/rs/gpu/extend_poly.go +++ b/encoding/rs/gpu/extend_poly.go @@ -1,7 +1,7 @@ package gpu import ( - "errors" + "sync" "github.com/Layr-Labs/eigenda/encoding" "github.com/Layr-Labs/eigenda/encoding/fft" @@ -12,32 +12,24 @@ import ( "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bn254/ntt" ) -type GpuComputer struct { +type GpuComputeDevice struct { Fs *fft.FFTSettings encoding.EncodingParams - NttCfg core.NTTConfig[[bn254_icicle.SCALAR_LIMBS]uint32] + NttCfg core.NTTConfig[[bn254_icicle.SCALAR_LIMBS]uint32] + GpuLock *sync.Mutex } // Encoding Reed Solomon using FFT -func (g *GpuComputer) ExtendPolyEval(coeffs []fr.Element) ([]fr.Element, error) { +func (g *GpuComputeDevice) ExtendPolyEval(coeffs []fr.Element) ([]fr.Element, error) { - if len(coeffs) > int(g.NumEvaluations()) { - return nil, errors.New("the provided encoding parameters are not sufficient for the size of the data input") - } + g.GpuLock.Lock() + defer g.GpuLock.Unlock() - pdCoeffs := make([]fr.Element, g.NumEvaluations()) - for i := 0; i < len(coeffs); i++ { - pdCoeffs[i].Set(&coeffs[i]) - } - for i := len(coeffs); i < len(pdCoeffs); i++ { - pdCoeffs[i].SetZero() - } - - scalarsSF := gpu_utils.ConvertFrToScalarFieldsBytes(pdCoeffs) + scalarsSF := gpu_utils.ConvertFrToScalarFieldsBytes(coeffs) scalars := core.HostSliceFromElements[bn254_icicle.ScalarField](scalarsSF) - outputDevice := make(core.HostSlice[bn254_icicle.ScalarField], len(pdCoeffs)) + outputDevice := make(core.HostSlice[bn254_icicle.ScalarField], len(coeffs)) ntt.Ntt(scalars, core.KForward, &g.NttCfg, outputDevice) diff --git a/encoding/test/main.go b/encoding/test/main.go index 25c1014893..321dc0cba6 100644 --- a/encoding/test/main.go +++ b/encoding/test/main.go @@ -15,6 +15,7 @@ import ( "github.com/Layr-Labs/eigenda/encoding/rs" "github.com/consensys/gnark-crypto/ecc/bn254/fr" + cr "github.com/ingonyama-zk/icicle/v2/wrappers/golang/cuda_runtime" ) func main() { @@ -49,27 +50,49 @@ func readpoints() { } func TestKzgRs() { - numSymbols := 4 + isSmallTest := false + + numSymbols := 4096 * 8 // encode parameters - numNode := uint64(4) // 200 - numSys := uint64(2) // 180 + numNode := uint64(4096) // 200 + numSys := uint64(512) // 180 numPar := numNode - numSys - // Prepare data - fmt.Printf("* Task Starts\n") - fmt.Printf(" Num Sys: %v\n", numSys) - fmt.Printf(" Num Par: %v\n", numPar) - //fmt.Printf(" Data size(byte): %v\n", len(inputBytes)) + + numDevices, _ := cr.GetDeviceCount() + fmt.Println("num device ", numDevices) kzgConfig := &kzg.KzgConfig{ G1Path: "../../inabox/resources/kzg/g1.point.300000", G2Path: "../../inabox/resources/kzg/g2.point.300000", CacheDir: "SRSTables", - SRSOrder: 3000, - SRSNumberToLoad: 3000, + SRSOrder: 300000, + SRSNumberToLoad: 300000, NumWorker: uint64(runtime.GOMAXPROCS(0)), Verbose: true, } + if isSmallTest { + numSymbols = 4 + numNode = 4 + numSys = 2 + numPar = numNode - numSys + kzgConfig = &kzg.KzgConfig{ + G1Path: "../../inabox/resources/kzg/g1.point.300000", + G2Path: "../../inabox/resources/kzg/g2.point.300000", + CacheDir: "SRSTables", + SRSOrder: 3000, + SRSNumberToLoad: 3000, + NumWorker: uint64(runtime.GOMAXPROCS(0)), + Verbose: true, + } + } + + // Prepare data + fmt.Printf("* Task Starts\n") + fmt.Printf(" Num Sys: %v\n", numSys) + fmt.Printf(" Num Par: %v\n", numPar) + //fmt.Printf(" Data size(byte): %v\n", len(inputBytes)) + // create encoding object p, _ := prover.NewProver(kzgConfig, true) @@ -112,7 +135,6 @@ func TestKzgRs() { log.Fatal("leading coset inconsistency") } - fmt.Printf("frame %v leading coset %v\n", i, j) lc := enc.Fs.ExpandedRootsOfUnity[uint64(j)] g2Atn, err := kzg.ReadG2Point(uint64(len(f.Coeffs)), kzgConfig)