Add runtime template of vLLM ROCM (#280)

* Add runtime template of vLLM ROCM Signed-off-by: Vaibhav Jain <[email protected]> * Fix runtime image reference Signed-off-by: Vaibhav Jain <[email protected]> * Update template description to highlight ROCm Signed-off-by: Vaibhav Jain <[email protected]> --------- Signed-off-by: Vaibhav Jain <[email protected]>
opendatahub-io · Nov 4, 2024 · 18a5e80 · 18a5e80
1 parent fe9e14b
commit 18a5e80
Show file tree

Hide file tree

Showing 4 changed files with 65 additions and 0 deletions.
diff --git a/config/base/kustomization.yaml b/config/base/kustomization.yaml
@@ -8,6 +8,7 @@ namespace: opendatahub
 configMapGenerator:
   - envs:
       - params.env
+      - params-vllm-rocm.env
     name: odh-model-controller-parameters
 generatorOptions:
   disableNameSuffixHash: true
@@ -84,6 +85,17 @@ replacements:
           name: vllm-runtime-template
         fieldPaths:
           - objects.0.spec.containers.0.image
+  - source:
+      kind: ConfigMap
+      version: v1
+      name: odh-model-controller-parameters
+      fieldPath: data.vllm-rocm-image
+    targets:
+      - select:
+          kind: Template
+          name: vllm-rocm-runtime-template
+        fieldPaths:
+          - objects.0.spec.containers.0.image
   - source:
       kind: ConfigMap
       version: v1

diff --git a/config/base/params-vllm-rocm.env b/config/base/params-vllm-rocm.env
@@ -0,0 +1 @@
+vllm-rocm-image=quay.io/opendatahub/vllm:fast-rocm
diff --git a/config/runtimes/kustomization.yaml b/config/runtimes/kustomization.yaml
@@ -11,4 +11,5 @@ resources:
   - tgis-template.yaml
   - ovms-kserve-template.yaml
   - vllm-template.yaml
+  - vllm-rocm-template.yaml
   - caikit-standalone-template.yaml
diff --git a/config/runtimes/vllm-rocm-template.yaml b/config/runtimes/vllm-rocm-template.yaml
@@ -0,0 +1,51 @@
+apiVersion: template.openshift.io/v1
+kind: Template
+metadata:
+  labels:
+    opendatahub.io/dashboard: 'true'
+    opendatahub.io/ootb: 'true'
+  annotations:
+    description: vLLM ServingRuntime to support ROCm (for AMD GPUs)
+    openshift.io/display-name: vLLM ROCm ServingRuntime for KServe
+    openshift.io/provider-display-name: Red Hat, Inc.
+    tags: rhods,rhoai,kserve,servingruntime
+    template.openshift.io/documentation-url: https://github.com/opendatahub-io/vllm
+    template.openshift.io/long-description: This template defines resources needed to deploy vLLM ServingRuntime with KServe in Red Hat OpenShift AI
+    opendatahub.io/modelServingSupport: '["single"]'
+    opendatahub.io/apiProtocol: 'REST'
+  name: vllm-rocm-runtime-template
+objects:
+  - apiVersion: serving.kserve.io/v1alpha1
+    kind: ServingRuntime
+    metadata:
+      name: vllm-rocm-runtime
+      annotations:
+        openshift.io/display-name: vLLM ROCm ServingRuntime for KServe
+        opendatahub.io/recommended-accelerators: '["amd.com/gpu"]'
+      labels:
+        opendatahub.io/dashboard: 'true'
+    spec:
+      annotations:
+        prometheus.io/port: '8080'
+        prometheus.io/path: '/metrics'
+      multiModel: false
+      supportedModelFormats:
+        - autoSelect: true
+          name: vLLM
+      containers:
+        - name: kserve-container
+          image: $(vllm-rocm-image)
+          command:
+            - python
+            - -m
+            - vllm.entrypoints.openai.api_server
+          args:
+            - "--port=8080"
+            - "--model=/mnt/models"
+            - "--served-model-name={{.Name}}"
+          env:
+            - name: HF_HOME
+              value: /tmp/hf_home
+          ports:
+            - containerPort: 8080
+              protocol: TCP
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		vllm-rocm-image=quay.io/opendatahub/vllm:fast-rocm