Skip to content

Commit

Permalink
510
Browse files Browse the repository at this point in the history
  • Loading branch information
Houssem Dellai committed Sep 18, 2024
1 parent dccf03a commit db95458
Show file tree
Hide file tree
Showing 20 changed files with 401 additions and 16 deletions.
2 changes: 1 addition & 1 deletion 260_private_aks_bastion/Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ Once you are connected to the Azure VM, run the following command to connect to
az login --identity

# get the credentials of the AKS cluster
az aks get-credentials -g rg-private-aks-bastion-260 -n aks-private-260
az aks get-credentials -g rg-private-aks-bastion-260 -n aks-cluster

# verify the connection
kubectl get nodes
Expand Down
12 changes: 1 addition & 11 deletions 260_private_aks_bastion/aks.tf
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,4 @@ resource "azurerm_kubernetes_cluster" "aks" {
default_node_pool.0.upgrade_settings
]
}
}

resource "terraform_data" "aks-get-credentials" {
triggers_replace = [
azurerm_kubernetes_cluster.aks.id
]

provisioner "local-exec" {
command = "az aks get-credentials -n ${azurerm_kubernetes_cluster.aks.name} -g ${azurerm_kubernetes_cluster.aks.resource_group_name} --overwrite-existing"
}
}
}
2 changes: 1 addition & 1 deletion 260_private_aks_bastion/install-tools.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@ snap install kubectl --classic

# az aks list -o table

# az aks get-credentials -n aks-cluster -g rg-spoke-202 --overwrite-existing
# az aks get-credentials -g rg-private-aks-bastion-260 -n aks-private-260

# kubectl get nodes
51 changes: 51 additions & 0 deletions 510_ai_ollama_k8s/Readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Ollama AI model deployment on Azure Kubernetes Service (AKS)

https://github.com/open-webui/open-webui/tree/main/kubernetes/manifest/base

```sh
$AKS_RG="rg-aks-ollama-llm"
$AKS_NAME="aks-cluster"

# create resource group
az group create -n $AKS_RG -l swedencentral

# create an AKS cluster
az aks create -n $AKS_NAME -g $AKS_RG --network-plugin azure --network-plugin-mode overlay -k 1.30.3 --node-vm-size Standard_D4s_v5

# get credentials
az aks get-credentials -n $AKS_NAME -g $AKS_RG --overwrite-existing

# deploy Ollama server and client app (Open-WebUI) into AKS
kubectl apply -f .

# check the install
kubectl get all -n open-webui

# install LLM model likw phi3 or llama3.1 into ollama server
kubectl exec ollama-0 -n ollama -it -- ollama run phi3

# get the public IP of the client service
kubectl get svc -n open-webui
```

Here are some example models that can be used in `ollama` [available here](https://github.com/ollama/ollama/blob/main/README.md#model-library):

| Model | Parameters | Size | Download |
| ------------------ | ---------- | ----- | ------------------------------ |
| Llama 3.1 | 8B | 4.7GB | `ollama run llama3.1` |
| Llama 3.1 | 70B | 40GB | `ollama run llama3.1:70b` |
| Llama 3.1 | 405B | 231GB | `ollama run llama3.1:405b` |
| Phi 3 Mini | 3.8B | 2.3GB | `ollama run phi3` |
| Phi 3 Medium | 14B | 7.9GB | `ollama run phi3:medium` |
| Gemma 2 | 2B | 1.6GB | `ollama run gemma2:2b` |
| Gemma 2 | 9B | 5.5GB | `ollama run gemma2` |
| Gemma 2 | 27B | 16GB | `ollama run gemma2:27b` |
| Mistral | 7B | 4.1GB | `ollama run mistral` |
| Moondream 2 | 1.4B | 829MB | `ollama run moondream` |
| Neural Chat | 7B | 4.1GB | `ollama run neural-chat` |
| Starling | 7B | 4.1GB | `ollama run starling-lm` |
| Code Llama | 7B | 3.8GB | `ollama run codellama` |
| Llama 2 Uncensored | 7B | 3.8GB | `ollama run llama2-uncensored` |
| LLaVA | 7B | 4.5GB | `ollama run llava` |
| Solar | 10.7B | 6.1GB | `ollama run solar` |

4 changes: 4 additions & 0 deletions 510_ai_ollama_k8s/namespace.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: ollama
13 changes: 13 additions & 0 deletions 510_ai_ollama_k8s/ollama-service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: v1
kind: Service
metadata:
name: ollama-service
namespace: ollama
spec:
type: ClusterIP
selector:
app: ollama
ports:
- protocol: TCP
port: 11434
targetPort: 11434
41 changes: 41 additions & 0 deletions 510_ai_ollama_k8s/ollama-statefulset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: ollama
namespace: ollama
spec:
serviceName: ollama
replicas: 1
selector:
matchLabels:
app: ollama
template:
metadata:
labels:
app: ollama
spec:
containers:
- name: ollama
image: ollama/ollama:latest
ports:
- containerPort: 11434
resources:
requests:
cpu: "2000m"
memory: "2Gi"
limits:
cpu: "4000m"
memory: "16Gi"
nvidia.com/gpu: "0"
volumeMounts:
- name: ollama-volume
mountPath: /root/.ollama
tty: true
volumeClaimTemplates:
- metadata:
name: ollama-volume
spec:
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: 90Gi
40 changes: 40 additions & 0 deletions 510_ai_ollama_k8s/webui-deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: open-webui-deployment
namespace: ollama
spec:
replicas: 1
selector:
matchLabels:
app: open-webui
template:
metadata:
labels:
app: open-webui
spec:
containers:
- name: open-webui
image: ghcr.io/open-webui/open-webui:main
ports:
- containerPort: 8080
resources:
requests:
cpu: "500m"
memory: "500Mi"
limits:
cpu: "1000m"
memory: "1Gi"
env:
- name: OLLAMA_BASE_URL
value: "http://ollama-service.ollama.svc.cluster.local:11434"
- name: WEBUI_AUTH
value: "False"
tty: true
volumeMounts:
- name: webui-volume
mountPath: /app/backend/data
volumes:
- name: webui-volume
persistentVolumeClaim:
claimName: open-webui-pvc
20 changes: 20 additions & 0 deletions 510_ai_ollama_k8s/webui-ingress.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: open-webui-ingress
namespace: ollama
#annotations:
# Use appropriate annotations for your Ingress controller, e.g., for NGINX:
# nginx.ingress.kubernetes.io/rewrite-target: /
spec:
rules:
- host: open-webui.minikube.local
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: open-webui-service
port:
number: 8080
12 changes: 12 additions & 0 deletions 510_ai_ollama_k8s/webui-pvc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
labels:
app: open-webui
name: open-webui-pvc
namespace: ollama
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 2Gi
15 changes: 15 additions & 0 deletions 510_ai_ollama_k8s/webui-service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
apiVersion: v1
kind: Service
metadata:
name: open-webui-service
namespace: ollama
spec:
type: LoadBalancer # NodePort # Use LoadBalancer if you're on a cloud that supports it
selector:
app: open-webui
ports:
- protocol: TCP
port: 80
targetPort: 8080
# If using NodePort, you can optionally specify the nodePort:
# nodePort: 30000
57 changes: 57 additions & 0 deletions _kaito/app/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from openai import AzureOpenAI
# from openai import OpenAI
import streamlit as st

with st.sidebar:
openai_api_key = st.text_input(
"OpenAI API Key", key="chatbot_api_key", type="password"
)
"[Get an OpenAI API key](https://platform.openai.com/account/api-keys)"
"[View the source code](https://github.com/streamlit/llm-examples/blob/main/Chatbot.py)"
"[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/streamlit/llm-examples?quickstart=1)"

st.title("💬 Chatbot")
st.caption("🚀 A Streamlit chatbot powered by OpenAI")

if "messages" not in st.session_state:
st.session_state["messages"] = [
{"role": "assistant", "content": "How can I help you?"}
]

for msg in st.session_state.messages:
st.chat_message(msg["role"]).write(msg["content"])

if prompt := st.chat_input():
if not openai_api_key:
st.info("Please add your OpenAI API key to continue.")
st.stop()

openai_client = AzureOpenAI(
azure_endpoint="https://swedencentral.api.cognitive.microsoft.com",
api_key=openai_api_key,
api_version="2024-06-01",
)
# client = OpenAI(api_key=openai_api_key)

st.session_state.messages.append({"role": "user", "content": prompt})

st.chat_message("user").write(prompt)

response = openai_client.chat.completions.create(
model="gpt-4o",
messages=st.session_state.messages
# messages=[
# {"role": "system", "content": "You are a helpful assistant."},
# {"role": "user", "content": "Who are you ?"},
# ],
)

# response = client.chat.completions.create(
# model="gpt-3.5-turbo", messages=st.session_state.messages
# )

msg = response.choices[0].message.content

st.session_state.messages.append({"role": "assistant", "content": msg})

st.chat_message("assistant").write(msg)
23 changes: 23 additions & 0 deletions _kaito/app/flaskapp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from flask import Flask, request, jsonify
import openai

app = Flask(__name__)

# Set your Azure OpenAI API key
openai.api_key = 'YOUR_AZURE_OPENAI_API_KEY'

@app.route('/chat', methods=['POST'])
def chat():
user_input = request.json.get('message')
response = openai.ChatCompletion.create(
model="gpt-4o", # Updated to use GPT-4o
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": user_input}
],
max_tokens=150
)
return jsonify(response.choices[0].message['content'].strip())

if __name__ == '__main__':
app.run(debug=True)
8 changes: 8 additions & 0 deletions _kaito/app/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
streamlit>=1.28
langchain>=0.0.217
openai>=1.2
duckduckgo-search
anthropic>=0.3.0
trubrics>=1.4.3
streamlit-feedback
langchain-community
8 changes: 5 additions & 3 deletions _kaito/commands.ps1
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# https://learn.microsoft.com/en-us/azure/aks/ai-toolchain-operator

$AZURE_SUBSCRIPTION_ID=$(az account show --query id -o tsv)
$AZURE_RESOURCE_GROUP="rg-kaito"
$AZURE_LOCATION="swedencentral"
$AZURE_RESOURCE_GROUP="rg-aks-kaito-frc"
$AZURE_LOCATION="francecentral" # "swedencentral"
$CLUSTER_NAME="aks-cluster"

az group create --name $AZURE_RESOURCE_GROUP --location $AZURE_LOCATION
Expand Down Expand Up @@ -34,6 +36,6 @@ kubectl rollout restart deployment/kaito-gpu-provisioner -n kube-system
kubectl get deployment -n kube-system | grep kaito

# Deploy the Falcon 7B-instruct model from the KAITO model repository using the kubectl apply command.
kubectl apply -f https://raw.githubusercontent.com/Azure/kaito/main/examples/kaito_workspace_falcon_7b-instruct.yaml
kubectl apply -f https://raw.githubusercontent.com/Azure/kaito/main/examples/inference/kaito_workspace_falcon_7b-instruct.yaml

kubectl get workspace workspace-falcon-7b-instruct -w
Binary file not shown.
Binary file modified _kaito/kaito_workspace_falcon_7b-instruct.yaml
Binary file not shown.
Loading

0 comments on commit db95458

Please sign in to comment.