add simple chatbot definition

Signed-off-by: sallyom <[email protected]>
opendatahub-io · Nov 6, 2024 · 2ebed09 · 2ebed09
1 parent b4cb233
commit 2ebed09
Show file tree

Hide file tree

Showing 7 changed files with 150 additions and 0 deletions.
diff --git a/kubernetes_yaml/chatbot/README.md b/kubernetes_yaml/chatbot/README.md
@@ -0,0 +1,38 @@
+## Simple chatbot
+
+This folder holds the resource definitions to launch a chatbot.
+Environment variables `MODEL_ENDPOINT` must be provided.
+Optionally, `MODEL_ENDPOINT_BEARER`, and `MODEL_NAME` can be provided.
+
+Update the deployment as necessary and
+run this from the root of the repository
+
+
+```bash
+oc apply --kustomize ./chatbot
+```
+
+TODO:
+- get `MODEL_ENDPOINT` from configmap or secret. Currently you need to update it in deployment.yaml
+
+### Chatbot
+
+The chatbot image is built from
+[ai-lab-recipes repository chatbot](https://github.com/containers/ai-lab-recipes/blob/main/recipes/natural_language_processing/chatbot/app/Containerfile)
+with the below system prompt line from
+[chatbot_ui.py](https://github.com/containers/ai-lab-recipes/blob/main/recipes/natural_language_processing/chatbot/app/chatbot_ui.py)
+commented out, since it's not compatible with vLLM:
+
+```bash
+prompt = ChatPromptTemplate.from_messages([
+    #("system", "You are world class technical advisor."),
+    MessagesPlaceholder(variable_name="history"),
+    ("user", "{input}")
+])
+```
+
+
+## Candidate model inference service
+
+This folder also contains an example InferenceService definition. Modify [candidate-server.yaml](./candidate-server.yaml) as needed to launch a model
+from `S3` with `vLLM`.
diff --git a/kubernetes_yaml/chatbot/candidate-server.yaml b/kubernetes_yaml/chatbot/candidate-server.yaml
@@ -0,0 +1,37 @@
+apiVersion: serving.kserve.io/v1beta1
+kind: InferenceService
+metadata:
+  annotations:
+    openshift.io/display-name: candidate
+    serving.knative.openshift.io/enablePassthrough: "true"
+    sidecar.istio.io/inject: "true"
+    sidecar.istio.io/rewriteAppHTTPProbers: "true"
+  finalizers:
+  - inferenceservice.finalizers
+  generation: 2
+  labels:
+    opendatahub.io/dashboard: "true"
+  name: candidatemodel
+  namespace: ilab
+spec:
+  predictor:
+    maxReplicas: 1
+    minReplicas: 1
+    model:
+      modelFormat:
+        name: vLLM
+      name: ""
+      resources:
+        limits:
+          cpu: "2"
+          memory: 8Gi
+          nvidia.com/gpu: "1"
+        requests:
+          cpu: "1"
+          memory: 4Gi
+          nvidia.com/gpu: "1"
+      runtime: candidatemodel
+      storage:
+        key: aws-connection-ilab-s3
+        # Update path to match canddidate-server location
+        path: xxxxxxxx/xxxxxxxxx/pvc-to-model-op/model/phase_2/model/hf_format/candidate_model/
diff --git a/kubernetes_yaml/chatbot/chatbot-deployment.yaml b/kubernetes_yaml/chatbot/chatbot-deployment.yaml
@@ -0,0 +1,36 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: chatbot
+  labels:
+    app: chatbot
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: chatbot
+  template:
+    metadata:
+      labels:
+        app: chatbot
+    spec:
+      serviceAccountName: chatbot-sa
+      containers:
+      - name: chatbot-inference
+        image: quay.io/sallyom/chatbot:vllm
+        env:
+          - name: MODEL_NAME
+            # Update this value to the endpoint of a running model server
+            value: mixtral
+          - name: MODEL_ENDPOINT
+            # Update this value to the endpoint of a running model server
+            value: https://mixtral-sallyom.apps.ocp-beta-test.nerc.mghpcc.org
+          - name: MODEL_ENDPOINT_BEARER
+            valueFrom:
+              secretKeyRef:
+                name: judge-server
+                key: api_key
+        ports:
+          - containerPort: 8501
+        securityContext:
+          runAsNonRoot: true
diff --git a/kubernetes_yaml/chatbot/kustomization.yaml b/kubernetes_yaml/chatbot/kustomization.yaml
@@ -0,0 +1,8 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+resources:
+  - chatbot-deployment.yaml
+  - service.yaml
+  - route.yaml
+  - sa.yaml
diff --git a/kubernetes_yaml/chatbot/route.yaml b/kubernetes_yaml/chatbot/route.yaml
@@ -0,0 +1,14 @@
+apiVersion: route.openshift.io/v1
+kind: Route
+metadata:
+  name: chatbot
+  labels:
+    app: chatbot
+spec:
+  to:
+    kind: Service
+    name: chatbot-service
+  port:
+    targetPort: 8501
+  tls:
+    termination: edge
diff --git a/kubernetes_yaml/chatbot/sa.yaml b/kubernetes_yaml/chatbot/sa.yaml
@@ -0,0 +1,4 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: chatbot-sa
diff --git a/kubernetes_yaml/chatbot/service.yaml b/kubernetes_yaml/chatbot/service.yaml
@@ -0,0 +1,13 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: chatbot-service
+  labels:
+    app: chatbot
+spec:
+  selector:
+    app: chatbot
+  ports:
+    - protocol: TCP
+      port: 8501
+      targetPort: 8501