Skip to content

Commit

Permalink
Merge pull request #254 from coreweave/amercurio/sd-inference-fix
Browse files Browse the repository at this point in the history
Stable Diffusion Inference Overhaul
  • Loading branch information
harubaru authored Sep 26, 2023
2 parents 53d4d00 + eef9c62 commit f4a5946
Show file tree
Hide file tree
Showing 19 changed files with 287 additions and 576 deletions.
11 changes: 0 additions & 11 deletions online-inference/stable-diffusion/00-model-pvc.yaml

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
apiVersion: v1
data:
access_key: Q0hBTkdFTUU=
access_key: Replace_this_with_your_access_key
kind: Secret
metadata:
name: s3-access-key
type: Opaque
---
apiVersion: v1
data:
secret_key: Q0hBTkdFTUU=
secret_key: Replace_this_with_your_secret_key
kind: Secret
metadata:
name: s3-secret-key
type: Opaque
---
apiVersion: v1
data:
url: Q0hBTkdFTUU=
url: Replace_this_with_your_host_url
kind: Secret
metadata:
name: s3-host-url
Expand Down
7 changes: 0 additions & 7 deletions online-inference/stable-diffusion/01-huggingface-secret.yaml

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,23 +1,20 @@
apiVersion: batch/v1
kind: Job
metadata:
name: stable-diffusion-uploader
name: stable-diffusion-serializer
spec:
template:
spec:
containers:
- name: model-uploader
image: harubaru1/s3cmd:4
- name: model-serializer
image: ghcr.io/coreweave/ml-containers/sd-inference:amercurio-sd-overhaul-7d29c61
imagePullPolicy: IfNotPresent
command: ["/bin/sh", "-c"]
args:
- >
/usr/bin/s3cmd
--access_key=${AWS_KEY}
--secret_key=${AWS_SECRET}
put --recursive --acl-public
/mnt/models/CompVis/stable-diffusion-v1-4
s3://<BUCKET URL>/
command:
- "python3"
- "/app/serialize.py"
- "--hf-model-id=runwayml/stable-diffusion-v1-5"
- "--precision=float16"
- "--dest-bucket=your-bucket-here"
env:
- name: AWS_KEY
valueFrom:
Expand All @@ -34,20 +31,13 @@ spec:
secretKeyRef:
name: s3-host-url
key: url
volumeMounts:
- name: model-cache
mountPath: /mnt/models
resources:
requests:
cpu: 1
memory: 2Gi
cpu: 2
memory: 16Gi
limits:
cpu: 1
memory: 2Gi
volumes:
- name: model-cache
persistentVolumeClaim:
claimName: stable-diffusion-model-cache
cpu: 2
memory: 16Gi
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
Expand Down
80 changes: 80 additions & 0 deletions online-inference/stable-diffusion/02-inference-service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
apiVersion: serving.knative.dev/v1
kind: Service
metadata:
name: sd
annotations:
networking.knative.dev/ingress-class: kourier.ingress.networking.knative.dev
labels:
knative.coreweave.cloud/ingress: kourier.ingress.networking.knative.dev
spec:
template:
metadata:
annotations:
autoscaling.knative.dev/minScale: "1"
autoscaling.knative.dev/maxScale: "1"
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: gpu.nvidia.com/class
operator: In
values:
- Quadro_RTX_5000
- key: topology.kubernetes.io/region
operator: In
values:
- ORD1
containers:
- name: kfserving-container
image: ghcr.io/coreweave/ml-containers/sd-inference:amercurio-sd-overhaul-7d29c61
command:
- "python3"
- "/app/service.py"
- "--model-uri=s3://tensorized/runwayml/stable-diffusion-v1-5"
- "--precision=float16"
- "--port=80"
env:
- name: AWS_KEY
valueFrom:
secretKeyRef:
name: s3-access-key
key: access_key
optional: true
- name: AWS_SECRET
valueFrom:
secretKeyRef:
name: s3-secret-key
key: secret_key
optional: true
- name: AWS_HOST
valueFrom:
secretKeyRef:
name: s3-host-url
key: url
optional: true
ports:
- protocol: TCP
containerPort: 80
livenessProbe:
httpGet:
path: /
port: 80
initialDelaySeconds: 30
periodSeconds: 30
readinessProbe:
httpGet:
path: /
port: 80
initialDelaySeconds: 30
periodSeconds: 30
resources:
requests:
cpu: 4
memory: 16Gi
nvidia.com/gpu: 1
limits:
cpu: 4
memory: 16Gi
nvidia.com/gpu: 1
49 changes: 0 additions & 49 deletions online-inference/stable-diffusion/02-model-download-job.yaml

This file was deleted.

49 changes: 0 additions & 49 deletions online-inference/stable-diffusion/02-optional-serialize-job.yaml

This file was deleted.

47 changes: 0 additions & 47 deletions online-inference/stable-diffusion/03-inference-service.yaml

This file was deleted.

6 changes: 2 additions & 4 deletions online-inference/stable-diffusion/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
ARG CUDA_RELEASE=11.6.2-cudnn8-devel-ubuntu20.04
FROM nvidia/cuda:${CUDA_RELEASE} AS base
FROM ghcr.io/coreweave/ml-containers/torch:afecfe9-base-cuda11.8.0-torch2.0.0-vision0.15.1
ENV DEBIAN_FRONTEND=noninteractive

RUN apt update && apt upgrade -y && \
apt update && apt install -y python3 python3-pip git curl && \
apt clean

ADD service/ /app/
COPY serializer/serialize.py /app/serialize.py
WORKDIR /app

RUN pip3 install --no-cache-dir --upgrade pip && \
pip3 install --no-cache-dir -r requirements.txt

CMD ["python3", "/app/service.py"]
7 changes: 0 additions & 7 deletions online-inference/stable-diffusion/Dockerfile.downloader

This file was deleted.

9 changes: 0 additions & 9 deletions online-inference/stable-diffusion/Dockerfile.s3

This file was deleted.

12 changes: 0 additions & 12 deletions online-inference/stable-diffusion/Dockerfile.serializer

This file was deleted.

6 changes: 0 additions & 6 deletions online-inference/stable-diffusion/README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,2 @@
# Stable Diffusion
Please refer to [CoreWeave Docs](https://docs.coreweave.com/machine-learning-and-ai/inference/examples/pytorch-jax/hugging-face/pytorch-hugging-face-diffusers-stable-diffusion-text-to-image) for a deployment tutorial.

## Example:
![stable diffusion, digital art, trending, HD](stable-diffusion.png)
```
{"prompt": "stable diffusion, digital art, trending, HD", "parameters": {"seed": 42424242}}
```
Loading

0 comments on commit f4a5946

Please sign in to comment.