diff --git a/kubernetes_yaml/chatbot/README.md b/kubernetes_yaml/chatbot/README.md new file mode 100644 index 00000000..065e2d65 --- /dev/null +++ b/kubernetes_yaml/chatbot/README.md @@ -0,0 +1,36 @@ +## Simple chatbot + +This folder holds the resource definitions to launch a chatbot. +Before deploying, update the values in [configmap.yaml](./configmap.yaml) and [secret-token.yaml](./secret-token.yaml) +Specifically, `model_endpoint` value must be provided. +Optionally, `model_name` and `api_key` can be provided. + +Update the deployment as necessary and +run this from the root of the repository + + +```bash +oc apply --kustomize ./kubernetes_yaml/chatbot +``` + +### Chatbot + +The chatbot image is built from +[ai-lab-recipes repository chatbot](https://github.com/containers/ai-lab-recipes/blob/main/recipes/natural_language_processing/chatbot/app/Containerfile) +with the below system prompt line from +[chatbot_ui.py](https://github.com/containers/ai-lab-recipes/blob/main/recipes/natural_language_processing/chatbot/app/chatbot_ui.py) +commented out, since it's not compatible with vLLM: + +```bash +prompt = ChatPromptTemplate.from_messages([ + #("system", "You are world class technical advisor."), + MessagesPlaceholder(variable_name="history"), + ("user", "{input}") +]) +``` + + +## Candidate model inference service + +This folder also contains an example InferenceService definition. Modify [candidate-server.yaml](./candidate-server.yaml) as needed to launch a model +from `S3` with `vLLM`. diff --git a/kubernetes_yaml/chatbot/candidate-server.yaml b/kubernetes_yaml/chatbot/candidate-server.yaml new file mode 100644 index 00000000..ca891d8a --- /dev/null +++ b/kubernetes_yaml/chatbot/candidate-server.yaml @@ -0,0 +1,36 @@ +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + annotations: + openshift.io/display-name: candidate + serving.knative.openshift.io/enablePassthrough: "true" + sidecar.istio.io/inject: "true" + sidecar.istio.io/rewriteAppHTTPProbers: "true" + finalizers: + - inferenceservice.finalizers + labels: + opendatahub.io/dashboard: "true" + name: candidatemodel +spec: + predictor: + maxReplicas: 1 + minReplicas: 1 + model: + modelFormat: + name: vLLM + name: "" + resources: + limits: + cpu: "2" + memory: 8Gi + nvidia.com/gpu: "1" + requests: + cpu: "1" + memory: 4Gi + nvidia.com/gpu: "1" + runtime: candidatemodel + storage: + # Update to match project s3 storage + key: storage-s3 + # Update path to match candidate-server location + path: xxxxxxxx/xxxxxxxxx/pvc-to-model-op/model/phase_2/model/hf_format/candidate_model/ diff --git a/kubernetes_yaml/chatbot/configmap.yaml b/kubernetes_yaml/chatbot/configmap.yaml new file mode 100644 index 00000000..02713769 --- /dev/null +++ b/kubernetes_yaml/chatbot/configmap.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatbot-config +data: + model_name: UPDATE + model_endpoint: UPDATE diff --git a/kubernetes_yaml/chatbot/deployment.yaml b/kubernetes_yaml/chatbot/deployment.yaml new file mode 100644 index 00000000..88f18482 --- /dev/null +++ b/kubernetes_yaml/chatbot/deployment.yaml @@ -0,0 +1,40 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatbot + labels: + app: chatbot +spec: + replicas: 1 + selector: + matchLabels: + app: chatbot + template: + metadata: + labels: + app: chatbot + spec: + serviceAccountName: chatbot-sa + containers: + - name: chatbot-inference + image: quay.io/sallyom/chatbot:vllm + env: + - name: MODEL_NAME + valueFrom: + configMapKeyRef: + name: chatbot-config + key: model_name + - name: MODEL_ENDPOINT + valueFrom: + configMapKeyRef: + name: chatbot-config + key: model_endpoint + - name: MODEL_ENDPOINT_BEARER + valueFrom: + secretKeyRef: + name: model-token + key: api_key + ports: + - containerPort: 8501 + securityContext: + runAsNonRoot: true diff --git a/kubernetes_yaml/chatbot/kustomization.yaml b/kubernetes_yaml/chatbot/kustomization.yaml new file mode 100644 index 00000000..d4d42ad9 --- /dev/null +++ b/kubernetes_yaml/chatbot/kustomization.yaml @@ -0,0 +1,10 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - deployment.yaml + - service.yaml + - route.yaml + - sa.yaml + - configmap.yaml + - secret-token.yaml diff --git a/kubernetes_yaml/chatbot/route.yaml b/kubernetes_yaml/chatbot/route.yaml new file mode 100644 index 00000000..678b55db --- /dev/null +++ b/kubernetes_yaml/chatbot/route.yaml @@ -0,0 +1,14 @@ +apiVersion: route.openshift.io/v1 +kind: Route +metadata: + name: chatbot + labels: + app: chatbot +spec: + to: + kind: Service + name: chatbot-service + port: + targetPort: 8501 + tls: + termination: edge diff --git a/kubernetes_yaml/chatbot/sa.yaml b/kubernetes_yaml/chatbot/sa.yaml new file mode 100644 index 00000000..23b19f60 --- /dev/null +++ b/kubernetes_yaml/chatbot/sa.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: chatbot-sa diff --git a/kubernetes_yaml/chatbot/secret-token.yaml b/kubernetes_yaml/chatbot/secret-token.yaml new file mode 100644 index 00000000..40fa9e43 --- /dev/null +++ b/kubernetes_yaml/chatbot/secret-token.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Secret +metadata: + name: model-token +type: Opaque +stringData: + api_key: "xxx" diff --git a/kubernetes_yaml/chatbot/service.yaml b/kubernetes_yaml/chatbot/service.yaml new file mode 100644 index 00000000..ba5d78c5 --- /dev/null +++ b/kubernetes_yaml/chatbot/service.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Service +metadata: + name: chatbot-service + labels: + app: chatbot +spec: + selector: + app: chatbot + ports: + - protocol: TCP + port: 8501 + targetPort: 8501