Merge pull request #254 from coreweave/amercurio/sd-inference-fix

Stable Diffusion Inference Overhaul
coreweave · Sep 26, 2023 · f4a5946 · f4a5946
2 parents 53d4d00 + eef9c62
commit f4a5946
Show file tree

Hide file tree

Showing 19 changed files with 287 additions and 576 deletions.
diff --git a/online-inference/stable-diffusion/00-model-pvc.yaml b/online-inference/stable-diffusion/00-model-pvc.yaml
diff --git a/...able-diffusion/01-optional-s3-secret.yaml → ...able-diffusion/00-optional-s3-secret.yaml b/...able-diffusion/01-optional-s3-secret.yaml → ...able-diffusion/00-optional-s3-secret.yaml
@@ -1,22 +1,22 @@
 apiVersion: v1
 data:
-  access_key: Q0hBTkdFTUU=
+  access_key: Replace_this_with_your_access_key
 kind: Secret
 metadata:
   name: s3-access-key
 type: Opaque
 ---
 apiVersion: v1
 data:
-  secret_key: Q0hBTkdFTUU=
+  secret_key: Replace_this_with_your_secret_key
 kind: Secret
 metadata:
   name: s3-secret-key
 type: Opaque
 ---
 apiVersion: v1
 data:
-  url: Q0hBTkdFTUU=
+  url: Replace_this_with_your_host_url
 kind: Secret
 metadata:
   name: s3-host-url

diff --git a/online-inference/stable-diffusion/01-huggingface-secret.yaml b/online-inference/stable-diffusion/01-huggingface-secret.yaml
diff --git a/...-diffusion/03-optional-s3-upload-job.yaml → ...ffusion/01-optional-s3-serialize-job.yaml b/...-diffusion/03-optional-s3-upload-job.yaml → ...ffusion/01-optional-s3-serialize-job.yaml
@@ -1,23 +1,20 @@
 apiVersion: batch/v1
 kind: Job
 metadata:
-  name: stable-diffusion-uploader
+  name: stable-diffusion-serializer
 spec:
   template:
     spec:
       containers:
-      - name: model-uploader
-        image: harubaru1/s3cmd:4
+      - name: model-serializer
+        image: ghcr.io/coreweave/ml-containers/sd-inference:amercurio-sd-overhaul-7d29c61
         imagePullPolicy: IfNotPresent
-        command: ["/bin/sh", "-c"]
-        args:
-          - >
-            /usr/bin/s3cmd
-            --access_key=${AWS_KEY}
-            --secret_key=${AWS_SECRET}
-            put --recursive --acl-public
-            /mnt/models/CompVis/stable-diffusion-v1-4
-            s3://<BUCKET URL>/
+        command:
+          - "python3"
+          - "/app/serialize.py"
+          - "--hf-model-id=runwayml/stable-diffusion-v1-5"
+          - "--precision=float16"
+          - "--dest-bucket=your-bucket-here"
         env:
         - name: AWS_KEY
           valueFrom:
@@ -34,20 +31,13 @@ spec:
             secretKeyRef:
               name: s3-host-url
               key: url
-        volumeMounts:
-          - name: model-cache
-            mountPath: /mnt/models
         resources:
           requests:
-            cpu: 1
-            memory: 2Gi 
+            cpu: 2
+            memory: 16Gi
           limits:
-            cpu: 1
-            memory: 2Gi 
-      volumes:
-        - name: model-cache
-          persistentVolumeClaim:
-            claimName: stable-diffusion-model-cache
+            cpu: 2
+            memory: 16Gi
       affinity:
         nodeAffinity:
           requiredDuringSchedulingIgnoredDuringExecution:

diff --git a/online-inference/stable-diffusion/02-inference-service.yaml b/online-inference/stable-diffusion/02-inference-service.yaml
@@ -0,0 +1,80 @@
+apiVersion: serving.knative.dev/v1
+kind: Service
+metadata:
+  name: sd
+  annotations: 
+    networking.knative.dev/ingress-class: kourier.ingress.networking.knative.dev
+  labels:
+    knative.coreweave.cloud/ingress: kourier.ingress.networking.knative.dev
+spec:
+  template:
+    metadata:
+      annotations:
+        autoscaling.knative.dev/minScale: "1"
+        autoscaling.knative.dev/maxScale: "1"
+    spec:
+      affinity:
+        nodeAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            nodeSelectorTerms:
+            - matchExpressions:
+              - key: gpu.nvidia.com/class
+                operator: In
+                values:
+                - Quadro_RTX_5000
+              - key: topology.kubernetes.io/region
+                operator: In
+                values:
+                - ORD1 
+      containers:
+      - name: kfserving-container
+        image: ghcr.io/coreweave/ml-containers/sd-inference:amercurio-sd-overhaul-7d29c61
+        command:
+        - "python3"
+        - "/app/service.py"
+        - "--model-uri=s3://tensorized/runwayml/stable-diffusion-v1-5"
+        - "--precision=float16"
+        - "--port=80"
+        env:
+        - name: AWS_KEY
+          valueFrom:
+            secretKeyRef:
+              name: s3-access-key
+              key: access_key
+              optional: true
+        - name: AWS_SECRET
+          valueFrom:
+            secretKeyRef:
+              name: s3-secret-key
+              key: secret_key
+              optional: true
+        - name: AWS_HOST
+          valueFrom:
+            secretKeyRef:
+              name: s3-host-url
+              key: url
+              optional: true
+        ports:
+        - protocol: TCP
+          containerPort: 80
+        livenessProbe:
+          httpGet:
+            path: /
+            port: 80
+          initialDelaySeconds: 30
+          periodSeconds: 30
+        readinessProbe:
+          httpGet:
+            path: /
+            port: 80
+          initialDelaySeconds: 30
+          periodSeconds: 30
+        resources:
+          requests:
+            cpu: 4
+            memory: 16Gi
+            nvidia.com/gpu: 1
+          limits:
+            cpu: 4
+            memory: 16Gi
+            nvidia.com/gpu: 1
diff --git a/online-inference/stable-diffusion/02-model-download-job.yaml b/online-inference/stable-diffusion/02-model-download-job.yaml
diff --git a/online-inference/stable-diffusion/02-optional-serialize-job.yaml b/online-inference/stable-diffusion/02-optional-serialize-job.yaml
diff --git a/online-inference/stable-diffusion/03-inference-service.yaml b/online-inference/stable-diffusion/03-inference-service.yaml
diff --git a/online-inference/stable-diffusion/Dockerfile b/online-inference/stable-diffusion/Dockerfile
@@ -1,15 +1,13 @@
-ARG CUDA_RELEASE=11.6.2-cudnn8-devel-ubuntu20.04
-FROM nvidia/cuda:${CUDA_RELEASE} AS base
+FROM ghcr.io/coreweave/ml-containers/torch:afecfe9-base-cuda11.8.0-torch2.0.0-vision0.15.1
 ENV DEBIAN_FRONTEND=noninteractive
 
 RUN apt update && apt upgrade -y && \
     apt update && apt install -y python3 python3-pip git curl && \
     apt clean
 
 ADD service/ /app/
+COPY serializer/serialize.py /app/serialize.py
 WORKDIR /app
 
 RUN pip3 install --no-cache-dir --upgrade pip && \
     pip3 install --no-cache-dir -r requirements.txt
-
-CMD ["python3", "/app/service.py"]
diff --git a/online-inference/stable-diffusion/Dockerfile.downloader b/online-inference/stable-diffusion/Dockerfile.downloader
diff --git a/online-inference/stable-diffusion/Dockerfile.s3 b/online-inference/stable-diffusion/Dockerfile.s3
diff --git a/online-inference/stable-diffusion/Dockerfile.serializer b/online-inference/stable-diffusion/Dockerfile.serializer
diff --git a/online-inference/stable-diffusion/README.md b/online-inference/stable-diffusion/README.md
@@ -1,8 +1,2 @@
 # Stable Diffusion
 Please refer to [CoreWeave Docs](https://docs.coreweave.com/machine-learning-and-ai/inference/examples/pytorch-jax/hugging-face/pytorch-hugging-face-diffusers-stable-diffusion-text-to-image) for a deployment tutorial. 
-
-## Example:
-![stable diffusion, digital art, trending, HD](stable-diffusion.png)
-```
-{"prompt": "stable diffusion, digital art, trending, HD", "parameters": {"seed": 42424242}}
-```