-
Notifications
You must be signed in to change notification settings - Fork 198
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GPU accelerated encoder #895
Changes from all commits
d58f035
7d90a7c
0cac49c
c66dc7a
65fdb61
d5ff7af
0c3fb33
4946b5b
ef3df8f
5fac9d7
421df30
0533c36
82ca409
47859b6
c3200d4
d80b454
c9a932b
4df720e
34e7ddc
7e37a48
7a6e38a
3a8bf73
46d7eb8
4b7413e
d3e64bb
5956a2d
eaac4ca
b6bef99
a6d7115
5be521f
6e006dd
b6604ee
8bb640b
bec398b
01decb9
5a61412
324f6b9
e806df4
e9bf815
c294985
cf7cba5
303050b
d3586df
5494c90
b2421e0
15ad390
3235a3f
eaa093d
f3ff995
6906988
327d26f
851ed2b
914d94a
523980e
180cdf7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,3 +16,5 @@ lightnode/docker/args.sh | |
.idea | ||
.env | ||
.vscode | ||
|
||
icicle/* |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
FROM nvidia/cuda:12.2.2-devel-ubuntu22.04 AS builder | ||
|
||
# Install Go | ||
ENV GOLANG_VERSION=1.21.1 | ||
ENV GOLANG_SHA256=b3075ae1ce5dab85f89bc7905d1632de23ca196bd8336afd93fa97434cfa55ae | ||
|
||
ADD https://go.dev/dl/go${GOLANG_VERSION}.linux-amd64.tar.gz /tmp/go.tar.gz | ||
RUN echo "${GOLANG_SHA256} /tmp/go.tar.gz" | sha256sum -c - && \ | ||
tar -C /usr/local -xzf /tmp/go.tar.gz && \ | ||
rm /tmp/go.tar.gz | ||
ENV PATH="/usr/local/go/bin:${PATH}" | ||
|
||
# Set up the working directory | ||
WORKDIR /app | ||
|
||
# Copy go.mod and go.sum first to leverage Docker cache | ||
COPY go.mod go.sum ./ | ||
|
||
# Download dependencies | ||
RUN go mod download | ||
|
||
# Copy the source code | ||
COPY ./disperser /app/disperser | ||
COPY common /app/common | ||
COPY contracts /app/contracts | ||
COPY core /app/core | ||
COPY api /app/api | ||
COPY indexer /app/indexer | ||
COPY encoding /app/encoding | ||
COPY relay /app/relay | ||
|
||
# Define Icicle versions and checksums | ||
ENV ICICLE_VERSION=3.1.0 | ||
ENV ICICLE_BASE_SHA256=2e4e33b8bc3e335b2dd33dcfb10a9aaa18717885509614a24f492f47a2e4f4b1 | ||
ENV ICICLE_CUDA_SHA256=cdba907eac6297445a6c128081ebba5c711d352003f69310145406a8fd781647 | ||
|
||
# Download Icicle tarballs | ||
ADD https://github.com/ingonyama-zk/icicle/releases/download/v${ICICLE_VERSION}/icicle_${ICICLE_VERSION//./_}-ubuntu22.tar.gz /tmp/icicle.tar.gz | ||
ADD https://github.com/ingonyama-zk/icicle/releases/download/v${ICICLE_VERSION}/icicle_${ICICLE_VERSION//./_}-ubuntu22-cuda122.tar.gz /tmp/icicle-cuda.tar.gz | ||
|
||
# Verify checksums and install Icicle | ||
RUN echo "${ICICLE_BASE_SHA256} /tmp/icicle.tar.gz" | sha256sum -c - && \ | ||
echo "${ICICLE_CUDA_SHA256} /tmp/icicle-cuda.tar.gz" | sha256sum -c - && \ | ||
tar xzf /tmp/icicle.tar.gz && \ | ||
cp -r ./icicle/lib/* /usr/lib/ && \ | ||
cp -r ./icicle/include/icicle/ /usr/local/include/ && \ | ||
tar xzf /tmp/icicle-cuda.tar.gz -C /opt && \ | ||
rm /tmp/icicle.tar.gz /tmp/icicle-cuda.tar.gz | ||
|
||
# Build the server with icicle backend | ||
WORKDIR /app/disperser | ||
RUN go build -tags=icicle -o ./bin/server ./cmd/encoder | ||
|
||
# Start a new stage for the base image | ||
FROM nvidia/cuda:12.2.2-base-ubuntu22.04 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Image size comparison: ghcr.io/layr-labs/eigenda/encoder latest 781c3866c5dd 6 hours ago 41MB There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what makes it so large? near 20x is a lot There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Didn't look into but my guess right now would be the cuda base image which contains the cuda runtime. |
||
|
||
COPY --from=builder /app/disperser/bin/server /usr/local/bin/server | ||
COPY --from=builder /usr/lib/libicicle* /usr/lib/ | ||
COPY --from=builder /usr/local/include/icicle /usr/local/include/icicle | ||
COPY --from=builder /opt/icicle /opt/icicle | ||
|
||
ENTRYPOINT ["server"] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,11 +10,12 @@ import ( | |
"github.com/Layr-Labs/eigenda/common/aws/s3" | ||
"github.com/Layr-Labs/eigenda/disperser/cmd/encoder/flags" | ||
blobstorev2 "github.com/Layr-Labs/eigenda/disperser/common/v2/blobstore" | ||
grpcprom "github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus" | ||
"github.com/Layr-Labs/eigenda/disperser/encoder" | ||
"github.com/Layr-Labs/eigenda/encoding" | ||
"github.com/Layr-Labs/eigenda/encoding/kzg/prover" | ||
"github.com/prometheus/client_golang/prometheus" | ||
"github.com/Layr-Labs/eigenda/relay/chunkstore" | ||
grpcprom "github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus" | ||
"github.com/prometheus/client_golang/prometheus" | ||
"github.com/urfave/cli" | ||
) | ||
|
||
|
@@ -69,9 +70,23 @@ func RunEncoderServer(ctx *cli.Context) error { | |
reg.MustRegister(grpcMetrics) | ||
} | ||
|
||
backendType, err := encoding.ParseBackendType(config.ServerConfig.Backend) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
// Set the encoding config | ||
encodingConfig := &encoding.Config{ | ||
BackendType: backendType, | ||
GPUEnable: config.ServerConfig.GPUEnable, | ||
NumWorker: config.EncoderConfig.NumWorker, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we have understanding about how this parallelism interacts with the blob level parallelism at the server? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From my understanding the interaction is that it can introduce CPU contention if we are handling more than 1 blob concurrently. In the worst case we set NumWorkers = # CPU on machine and two blobs are doing computations that require all workers. |
||
} | ||
|
||
if config.EncoderVersion == V2 { | ||
// We no longer compute the commitments in the encoder, so we don't need to load the G2 points | ||
prover, err := prover.NewProver(&config.EncoderConfig, false) | ||
// We no longer load the G2 points in V2 because the KZG commitments are computed | ||
// on the API server side. | ||
config.EncoderConfig.LoadG2Points = false | ||
prover, err := prover.NewProver(&config.EncoderConfig, encodingConfig) | ||
if err != nil { | ||
return fmt.Errorf("failed to create encoder: %w", err) | ||
} | ||
|
@@ -82,6 +97,10 @@ func RunEncoderServer(ctx *cli.Context) error { | |
} | ||
|
||
blobStoreBucketName := config.BlobStoreConfig.BucketName | ||
if blobStoreBucketName == "" { | ||
return fmt.Errorf("blob store bucket name is required") | ||
} | ||
|
||
blobStore := blobstorev2.NewBlobStore(blobStoreBucketName, s3Client, logger) | ||
logger.Info("Blob store", "bucket", blobStoreBucketName) | ||
|
||
|
@@ -101,13 +120,13 @@ func RunEncoderServer(ctx *cli.Context) error { | |
return server.Start() | ||
} | ||
|
||
prover, err := prover.NewProver(&config.EncoderConfig, true) | ||
config.EncoderConfig.LoadG2Points = true | ||
prover, err := prover.NewProver(&config.EncoderConfig, encodingConfig) | ||
if err != nil { | ||
return fmt.Errorf("failed to create encoder: %w", err) | ||
} | ||
|
||
server := encoder.NewEncoderServer(*config.ServerConfig, logger, prover, metrics, grpcMetrics) | ||
|
||
return server.Start() | ||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the first arg is kzgConfig, according to the func. Name issue?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it's acceptable