510

HoussemDellai · Sep 18, 2024 · db95458 · db95458
1 parent dccf03a
commit db95458
Show file tree

Hide file tree

Showing 20 changed files with 401 additions and 16 deletions.
diff --git a/260_private_aks_bastion/Readme.md b/260_private_aks_bastion/Readme.md
@@ -53,7 +53,7 @@ Once you are connected to the Azure VM, run the following command to connect to
 az login --identity
 
 # get the credentials of the AKS cluster
-az aks get-credentials -g rg-private-aks-bastion-260 -n aks-private-260
+az aks get-credentials -g rg-private-aks-bastion-260 -n aks-cluster
 
 # verify the connection
 kubectl get nodes

diff --git a/260_private_aks_bastion/aks.tf b/260_private_aks_bastion/aks.tf
@@ -30,14 +30,4 @@ resource "azurerm_kubernetes_cluster" "aks" {
       default_node_pool.0.upgrade_settings
     ]
   }
-}
-
-resource "terraform_data" "aks-get-credentials" {
-  triggers_replace = [
-    azurerm_kubernetes_cluster.aks.id
-  ]
-
-  provisioner "local-exec" {
-    command = "az aks get-credentials -n ${azurerm_kubernetes_cluster.aks.name} -g ${azurerm_kubernetes_cluster.aks.resource_group_name} --overwrite-existing"
-  }
-}
+}
diff --git a/260_private_aks_bastion/install-tools.sh b/260_private_aks_bastion/install-tools.sh
@@ -13,6 +13,6 @@ snap install kubectl --classic
 
 # az aks list -o table
 
-# az aks get-credentials -n aks-cluster -g rg-spoke-202 --overwrite-existing
+# az aks get-credentials -g rg-private-aks-bastion-260 -n aks-private-260
 
 # kubectl get nodes
diff --git a/510_ai_ollama_k8s/Readme.md b/510_ai_ollama_k8s/Readme.md
@@ -0,0 +1,51 @@
+# Ollama AI model deployment on Azure Kubernetes Service (AKS)
+
+https://github.com/open-webui/open-webui/tree/main/kubernetes/manifest/base
+
+```sh
+$AKS_RG="rg-aks-ollama-llm"
+$AKS_NAME="aks-cluster"
+
+# create resource group
+az group create -n $AKS_RG -l swedencentral
+
+# create an AKS cluster 
+az aks create -n $AKS_NAME -g $AKS_RG --network-plugin azure --network-plugin-mode overlay -k 1.30.3 --node-vm-size Standard_D4s_v5
+
+# get credentials
+az aks get-credentials -n $AKS_NAME -g $AKS_RG --overwrite-existing
+
+# deploy Ollama server and client app (Open-WebUI) into AKS
+kubectl apply -f .
+
+# check the install
+kubectl get all -n open-webui
+
+# install LLM model likw phi3 or llama3.1 into ollama server
+kubectl exec ollama-0 -n ollama -it -- ollama run phi3
+
+# get the public IP of the client service
+kubectl get svc -n open-webui
+```
+
+Here are some example models that can be used in `ollama` [available here](https://github.com/ollama/ollama/blob/main/README.md#model-library):
+
+| Model              | Parameters | Size  | Download                       |
+| ------------------ | ---------- | ----- | ------------------------------ |
+| Llama 3.1          | 8B         | 4.7GB | `ollama run llama3.1`          |
+| Llama 3.1          | 70B        | 40GB  | `ollama run llama3.1:70b`      |
+| Llama 3.1          | 405B       | 231GB | `ollama run llama3.1:405b`     |
+| Phi 3 Mini         | 3.8B       | 2.3GB | `ollama run phi3`              |
+| Phi 3 Medium       | 14B        | 7.9GB | `ollama run phi3:medium`       |
+| Gemma 2            | 2B         | 1.6GB | `ollama run gemma2:2b`         |
+| Gemma 2            | 9B         | 5.5GB | `ollama run gemma2`            |
+| Gemma 2            | 27B        | 16GB  | `ollama run gemma2:27b`        |
+| Mistral            | 7B         | 4.1GB | `ollama run mistral`           |
+| Moondream 2        | 1.4B       | 829MB | `ollama run moondream`         |
+| Neural Chat        | 7B         | 4.1GB | `ollama run neural-chat`       |
+| Starling           | 7B         | 4.1GB | `ollama run starling-lm`       |
+| Code Llama         | 7B         | 3.8GB | `ollama run codellama`         |
+| Llama 2 Uncensored | 7B         | 3.8GB | `ollama run llama2-uncensored` |
+| LLaVA              | 7B         | 4.5GB | `ollama run llava`             |
+| Solar              | 10.7B      | 6.1GB | `ollama run solar`             |
+
diff --git a/510_ai_ollama_k8s/namespace.yaml b/510_ai_ollama_k8s/namespace.yaml
@@ -0,0 +1,4 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: ollama
diff --git a/510_ai_ollama_k8s/ollama-service.yaml b/510_ai_ollama_k8s/ollama-service.yaml
@@ -0,0 +1,13 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: ollama-service
+  namespace: ollama
+spec:
+  type: ClusterIP
+  selector:
+    app: ollama
+  ports:
+  - protocol: TCP
+    port: 11434
+    targetPort: 11434
diff --git a/510_ai_ollama_k8s/ollama-statefulset.yaml b/510_ai_ollama_k8s/ollama-statefulset.yaml
@@ -0,0 +1,41 @@
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: ollama
+  namespace: ollama
+spec:
+  serviceName: ollama
+  replicas: 1
+  selector:
+    matchLabels:
+      app: ollama
+  template:
+    metadata:
+      labels:
+        app: ollama
+    spec:
+      containers:
+      - name: ollama
+        image: ollama/ollama:latest
+        ports:
+        - containerPort: 11434
+        resources:
+          requests:
+            cpu: "2000m"
+            memory: "2Gi"
+          limits:
+            cpu: "4000m"
+            memory: "16Gi"
+            nvidia.com/gpu: "0"
+        volumeMounts:
+        - name: ollama-volume
+          mountPath: /root/.ollama
+        tty: true
+  volumeClaimTemplates:
+  - metadata:
+      name: ollama-volume
+    spec:
+      accessModes: [ "ReadWriteOnce" ]
+      resources:
+        requests:
+          storage: 90Gi
diff --git a/510_ai_ollama_k8s/webui-deployment.yaml b/510_ai_ollama_k8s/webui-deployment.yaml
@@ -0,0 +1,40 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: open-webui-deployment
+  namespace: ollama
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: open-webui
+  template:
+    metadata:
+      labels:
+        app: open-webui
+    spec:
+      containers:
+      - name: open-webui
+        image: ghcr.io/open-webui/open-webui:main
+        ports:
+        - containerPort: 8080
+        resources:
+          requests:
+            cpu: "500m"
+            memory: "500Mi"
+          limits:
+            cpu: "1000m"
+            memory: "1Gi"
+        env:
+        - name: OLLAMA_BASE_URL
+          value: "http://ollama-service.ollama.svc.cluster.local:11434"
+        - name: WEBUI_AUTH
+          value: "False"
+        tty: true
+        volumeMounts:
+        - name: webui-volume
+          mountPath: /app/backend/data
+      volumes:
+      - name: webui-volume
+        persistentVolumeClaim:
+          claimName: open-webui-pvc          
diff --git a/510_ai_ollama_k8s/webui-ingress.yaml b/510_ai_ollama_k8s/webui-ingress.yaml
@@ -0,0 +1,20 @@
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: open-webui-ingress
+  namespace: ollama
+  #annotations:
+    # Use appropriate annotations for your Ingress controller, e.g., for NGINX:
+    # nginx.ingress.kubernetes.io/rewrite-target: /
+spec:
+  rules:
+  - host: open-webui.minikube.local
+    http:
+      paths:
+      - path: /
+        pathType: Prefix
+        backend:
+          service:
+            name: open-webui-service
+            port:
+              number: 8080
diff --git a/510_ai_ollama_k8s/webui-pvc.yaml b/510_ai_ollama_k8s/webui-pvc.yaml
@@ -0,0 +1,12 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  labels:
+    app: open-webui
+  name: open-webui-pvc
+  namespace: ollama
+spec:
+  accessModes: ["ReadWriteOnce"]
+  resources:
+    requests:
+      storage: 2Gi
diff --git a/510_ai_ollama_k8s/webui-service.yaml b/510_ai_ollama_k8s/webui-service.yaml
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: open-webui-service
+  namespace: ollama
+spec:
+  type: LoadBalancer # NodePort  # Use LoadBalancer if you're on a cloud that supports it
+  selector:
+    app: open-webui
+  ports:
+    - protocol: TCP
+      port: 80
+      targetPort: 8080
+      # If using NodePort, you can optionally specify the nodePort:
+      # nodePort: 30000
diff --git a/_kaito/app/app.py b/_kaito/app/app.py
@@ -0,0 +1,57 @@
+from openai import AzureOpenAI
+# from openai import OpenAI
+import streamlit as st
+
+with st.sidebar:
+    openai_api_key = st.text_input(
+        "OpenAI API Key", key="chatbot_api_key", type="password"
+    )
+    "[Get an OpenAI API key](https://platform.openai.com/account/api-keys)"
+    "[View the source code](https://github.com/streamlit/llm-examples/blob/main/Chatbot.py)"
+    "[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/streamlit/llm-examples?quickstart=1)"
+
+st.title("💬 Chatbot")
+st.caption("🚀 A Streamlit chatbot powered by OpenAI")
+
+if "messages" not in st.session_state:
+    st.session_state["messages"] = [
+        {"role": "assistant", "content": "How can I help you?"}
+    ]
+
+for msg in st.session_state.messages:
+    st.chat_message(msg["role"]).write(msg["content"])
+
+if prompt := st.chat_input():
+    if not openai_api_key:
+        st.info("Please add your OpenAI API key to continue.")
+        st.stop()
+
+    openai_client = AzureOpenAI(
+        azure_endpoint="https://swedencentral.api.cognitive.microsoft.com",
+        api_key=openai_api_key,
+        api_version="2024-06-01",
+    )
+    # client = OpenAI(api_key=openai_api_key)
+
+    st.session_state.messages.append({"role": "user", "content": prompt})
+
+    st.chat_message("user").write(prompt)
+
+    response = openai_client.chat.completions.create(
+        model="gpt-4o",
+        messages=st.session_state.messages
+        # messages=[
+        #     {"role": "system", "content": "You are a helpful assistant."},
+        #     {"role": "user", "content": "Who are you ?"},
+        # ],
+    )
+
+    # response = client.chat.completions.create(
+    #     model="gpt-3.5-turbo", messages=st.session_state.messages
+    # )
+
+    msg = response.choices[0].message.content
+
+    st.session_state.messages.append({"role": "assistant", "content": msg})
+
+    st.chat_message("assistant").write(msg)
diff --git a/_kaito/app/flaskapp.py b/_kaito/app/flaskapp.py
@@ -0,0 +1,23 @@
+from flask import Flask, request, jsonify
+import openai
+
+app = Flask(__name__)
+
+# Set your Azure OpenAI API key
+openai.api_key = 'YOUR_AZURE_OPENAI_API_KEY'
+
+@app.route('/chat', methods=['POST'])
+def chat():
+    user_input = request.json.get('message')
+    response = openai.ChatCompletion.create(
+        model="gpt-4o",  # Updated to use GPT-4o
+        messages=[
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": user_input}
+        ],
+        max_tokens=150
+    )
+    return jsonify(response.choices[0].message['content'].strip())
+
+if __name__ == '__main__':
+    app.run(debug=True)
diff --git a/_kaito/app/requirements.txt b/_kaito/app/requirements.txt
@@ -0,0 +1,8 @@
+streamlit>=1.28
+langchain>=0.0.217
+openai>=1.2
+duckduckgo-search
+anthropic>=0.3.0
+trubrics>=1.4.3
+streamlit-feedback
+langchain-community
diff --git a/_kaito/commands.ps1 b/_kaito/commands.ps1
@@ -1,6 +1,8 @@
+# https://learn.microsoft.com/en-us/azure/aks/ai-toolchain-operator
+
 $AZURE_SUBSCRIPTION_ID=$(az account show --query id -o tsv)
-$AZURE_RESOURCE_GROUP="rg-kaito"
-$AZURE_LOCATION="swedencentral"
+$AZURE_RESOURCE_GROUP="rg-aks-kaito-frc"
+$AZURE_LOCATION="francecentral" # "swedencentral"
 $CLUSTER_NAME="aks-cluster"
 
 az group create --name $AZURE_RESOURCE_GROUP --location $AZURE_LOCATION
@@ -34,6 +36,6 @@ kubectl rollout restart deployment/kaito-gpu-provisioner -n kube-system
 kubectl get deployment -n kube-system | grep kaito
 
 # Deploy the Falcon 7B-instruct model from the KAITO model repository using the kubectl apply command.
-kubectl apply -f https://raw.githubusercontent.com/Azure/kaito/main/examples/kaito_workspace_falcon_7b-instruct.yaml
+kubectl apply -f https://raw.githubusercontent.com/Azure/kaito/main/examples/inference/kaito_workspace_falcon_7b-instruct.yaml
 
 kubectl get workspace workspace-falcon-7b-instruct -w
diff --git a/_kaito/kaito_workspace_falcon_7b-instruct-d4s-v5.yaml b/_kaito/kaito_workspace_falcon_7b-instruct-d4s-v5.yaml
diff --git a/_kaito/kaito_workspace_falcon_7b-instruct.yaml b/_kaito/kaito_workspace_falcon_7b-instruct.yaml