Skip to content

Commit

Permalink
Init gptj benchmark
Browse files Browse the repository at this point in the history
Signed-off-by: yankai14 <[email protected]>
  • Loading branch information
yankai14 committed Nov 1, 2023
1 parent fb0da55 commit 44e155d
Show file tree
Hide file tree
Showing 19 changed files with 1,040 additions and 2,461 deletions.
196 changes: 196 additions & 0 deletions .github/workflows/e2e-gptj.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
name: GPTJ End-to-End Tests

on:
workflow_dispatch:
schedule:
- cron: "0 9 * * 1"
push:
branches: [main]
paths:
- "benchmarks/gptj/**"
- "utils/**"
- "tools/**"
- "runner/**"

pull_request:
branches: [main]
paths:
- "benchmarks/gptj/**"
- "utils/**"
- "tools/**"
- "runner/**"

env:
GOOS: linux
GO111MODULE: on
PORT: 50051
PLATFORMS: linux/amd64,linux/arm64

jobs:
build-and-push:
name: Build and push all images
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
service:
[
gptj-python
]
steps:
- name: Check out code into the Go module directory
uses: actions/checkout@v4
with:
lfs: "true"

- uses: actions/setup-go@v4
with:
go-version: '1.21'

- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_HUB_USERNAME }}
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}

- name: Set up QEMU
uses: docker/setup-qemu-action@v3

- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v3

- name: Build and push
env:
GOPRIVATE_KEY: ${{ secrets.XDT_REPO_ACCESS_KEY }}
uses: docker/build-push-action@v5
with:
push: true
file: benchmarks/gptj/Dockerfile
platforms: ${{ env.PLATFORMS }}
target: ${{ matrix.target }}
tags: vhiveease/${{ matrix.service }}:latest
build-args: SERVICE=${{ matrix.service }}
context: .



test-compose:
name: Test Docker Compose
needs: build-and-push
env:
YAML_DIR: benchmarks/gptj/yamls/docker-compose/
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
service:
[
gptj-python,
]

steps:
- name: Check out code into the Go module directory
uses: actions/checkout@v4
with:
lfs: "true"

- name: start docker-compose benchmark
run: |
docker-compose -f ${{ env.YAML_DIR }}/dc-${{ matrix.service }}.yaml pull
docker-compose -f ${{ env.YAML_DIR }}/dc-${{ matrix.service }}.yaml up &> log_file &
sleep 60s
cat log_file
- name: invoke the chain
run: |
./tools/bin/grpcurl -plaintext -d '{"regenerate": "false"}' localhost:50051 gptj.GptJBenchmark.GetBenchmark
- name: invoke the relay
working-directory: tools/test-client
run: |
go build ./test-client.go
./test-client --addr localhost:50000 --name "Example text for CI"
- name: show docker-compose log
run: cat log_file

test-knative:
name: Test Knative Deployment
needs: build-and-push
env:
KIND_VERSION: v0.14.0
K8S_VERSION: v1.23
YAML_DIR: benchmarks/gptj/yamls/knative/

runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
include:
- service: gptj-python
file: kn-gptj-python.yaml
steps:
- uses: actions/checkout@v4
with:
lfs: "true"
- name: Checkout LFS objects
run: git lfs checkout

- uses: actions/setup-go@v4
with:
go-version: '1.21'

## Setup a Knative cluster to test the service
- name: Create k8s Kind Cluster
run: bash ./runner/scripts/01-kind.sh

- name: Install Serving
run: bash ./runner/scripts/02-serving.sh

- name: Install Kourier
run: bash ./runner/scripts/02-kourier.sh

- name: Setup domain
run: |
INGRESS_HOST="127.0.0.1"
KNATIVE_DOMAIN=$INGRESS_HOST.sslip.io
kubectl patch configmap -n knative-serving config-domain -p "{\"data\": {\"$KNATIVE_DOMAIN\": \"\"}}"
- name: Deploy knative
run: |
kubectl apply -f ${{ env.YAML_DIR }}/${{ matrix.file }}
- name: Check if service is ready
run: |
kubectl wait --for=condition=Ready -f ${{ env.YAML_DIR }}/${{ matrix.file }} --timeout 900s
kubectl get service
kubectl get -f ${{ env.YAML_DIR }}/${{ matrix.file }}
- name: Test invoking once
working-directory: tools/test-client
run: |
set -x
go build ./test-client.go
NODEPORT=80
url=$(kubectl get kservice ${{ matrix.service }} | awk '$2 ~ /http/ {sub(/http\:\/\//,""); print $2}')
./test-client --addr $url:$NODEPORT --name "Example text for CI"
- name: Print logs
if: ${{ always() }}
run: |
set -x
pod_list=$(kubectl get pods -n default -o jsonpath="{.items[*].name}")
for pod in $pod_list
do
kubectl logs $pod
done
- name: Down
if: ${{ always() }}
run: |
kubectl delete -f ${{ env.YAML_DIR }}/${{ matrix.file }} --namespace default --wait
1 change: 1 addition & 0 deletions benchmarks/gptj/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
**venv/
38 changes: 38 additions & 0 deletions benchmarks/gptj/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Use an official Python runtime as the base image
FROM python:3.10-slim

RUN apt-get update && apt-get install -y pbzip2 pv bzip2 libcurl4 curl python3.10 git build-essential

WORKDIR /workspace

# Install third_party library
RUN mkdir /tmp/third_party \
&& cd /tmp/third_party \
&& git clone https://github.com/pybind/pybind11.git \
&& mv pybind11 pybind \
&& cd /tmp/third_party/pybind \
&& git reset --hard 25abf7efba

# Install LoadGen
RUN cd /tmp/ \
&& git clone https://github.com/lrq619/loadgen.git \
&& cd /tmp/loadgen \
&& python3 setup.py install \
&& cd /tmp \
&& rm -rf /tmp/loadgen \
&& rm -rf /tmp/third_party

COPY benchmarks/gptj/python /workspace/python

RUN cd /tmp/ \
&& git clone https://github.com/vhive-serverless/vSwarm-proto.git \
&& cd /tmp/vSwarm-proto \
&& git checkout feature/gptj \
&& mv /tmp/vSwarm-proto/proto/gptj/* /workspace/python

RUN pip install pip==23.3
RUN cd /workspace/python && pip3 install -r /workspace/python/requirements.txt

EXPOSE 50051

ENTRYPOINT [ "python3", "python/server.py", "--dataset-path=python/data/cnn_eval.json", "--mlperf_conf=python/config/mlperf.conf", "--user_conf=python/config/user.conf"]
22 changes: 22 additions & 0 deletions benchmarks/gptj/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Gptj Benchmark

The `Gptj` benchmark is a large-language model that does inference tasks.

The function currently is only implemented in one runtime, namely Python.


## Running this benchmark locally (using docker)

The detailed and general description how to run benchmarks local you can find [here](../../docs/running_locally.md). The following steps show it on the `gptj-python` function.

### Invoke once
1. Start the function with docker-compose
```bash
docker-compose -f ./yamls/docker-compose/dc-gptj-python.yaml up
```
2. In a new terminal, invoke the interface function with grpcurl.
```bash
./tools/bin/grpcurl -plaintext -d '{"regenerate": "false"}' localhost:50051 gptj.GptJBenchmark.GetBenchmark
```
This will outputs the min, max and mean inference time of 1 inference, this may take around a few seconds.
Since inference of the gpt-j model may take awhile, we cache the latency info into a text file. If you want to regenerate the text file by doing another inference, change "false" to "true"
Loading

0 comments on commit 44e155d

Please sign in to comment.