Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DONNT MERGE: PoC for opea offline installation #550

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions offline-install/00_online_pkg.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

sudo apt-get install -y docker.io git
sudo snap install helm --classic
wget https://arxiv.org/pdf/2212.04088 -o 2212.pdf
45 changes: 45 additions & 0 deletions offline-install/01_online_images.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/bin/bash


# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

registry=registry:5000

# Define the images to pull and push
images=(
"docker.io/nginx:1.27.1"
"docker.io/opea/chatqna-conversation-ui:latest"
"docker.io/opea/chatqna:latest"
"docker.io/opea/chatqna-ui:latest"
"docker.io/opea/dataprep-redis:latest"
"docker.io/opea/retriever-redis:latest"
"docker.io/redis/redis-stack:7.2.0-v9"
"gcr.io/google_containers/pause:3.1"
"ghcr.io/huggingface/text-embeddings-inference:cpu-1.5"
"ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
)

# Loop through each image, pull it, tag it, and push it to the registry
for image in "${images[@]}"; do
# Extract the image name and tag
image_name=$(echo "$image" | awk -F ':' '{print $1}' | awk -F '/' ' { for (i=2; i<=NF-1; i++) printf $i "/"; print $(NF) }')
image_tag=$(echo "$image" | awk -F: '{print $2}')

# If no tag is specified, default to latest
if [ -z "$image_tag" ]; then
image_tag="latest"
fi

echo "Pulling image: $image"
docker pull "$image"

# Tag the image for the new registry with the correct tag
new_image="$registry/$image_name:$image_tag"
echo "Tagging image: $image as $new_image"
docker tag "$image" "$new_image"

# Push the image to the new registry
echo "Pushing image: $new_image"
docker push "$new_image"
done
21 changes: 21 additions & 0 deletions offline-install/02.a_online_models.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

registry=registry:5000
hf_token="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"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Security: don't hard-code secrets, take them as arguments, e.g. as name of file containing the token.

http_proxy=""
https_proxy=""

python3 -m venv venv
source venv/bin/activate
pip install -U "huggingface_hub[cli]"
export HF_ENDPOINT=https://hf-mirror.com
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be variable too.

huggingface-cli login --token $hf_token
huggingface-cli download BAAI/bge-base-en-v1.5 --local-dir cli.data/models--BAAI--bge-base-en-v1.5
huggingface-cli download BAAI/bge-reranker-base --local-dir cli.data/models--BAAI--bge-reranker-base
huggingface-cli download Intel/neural-chat-7b-v3-3 --local-dir cli.data/models--Intel--neural-chat-7b-v3-3

#docker build . -t $registry/opea/models:latest
#docker push $registry/opea/models:latest
11 changes: 11 additions & 0 deletions offline-install/10_offline_install_models.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

registry="registry:5000"

sed -i "s|image:.*models.*|image: $registry/opea/models:latest|" models.yaml
sed -i "s|image:.*pause.*|image: $registry/google_containers/pause:3.1|" models.yaml
kubectl delete -f models.yaml || true
kubectl apply -f models.yaml
32 changes: 32 additions & 0 deletions offline-install/11.a_offline_install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/bash

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

registry="registry:5000"

cd ../helm-charts/
helm uninstall chatqna || true

sed -i "s|image: .*nginx:1.27.1|image: $registry/nginx:1.27.1|g" chatqna/templates/nginx-deployment.yaml
sed -i "/ - name: milvus/,+4d" common/data-prep/Chart.yaml
sed -i "/ - name: milvus/,+4d" common/retriever-usvc/Chart.yaml
sed -i "s/^data:/data:\n HF_HUB_OFFLINE: 'True'/" common/tgi/templates/configmap.yaml
sed -i "s/^data:/data:\n HF_HUB_OFFLINE: 'True'/" common/tei/templates/configmap.yaml
sed -i "s/^data:/data:\n HF_HUB_OFFLINE: 'True'/" common/teirerank/templates/configmap.yaml

./update_dependency.sh
helm dependency update chatqna
export MODELDIR="/mnt/opea-models"
helm install chatqna chatqna --set global.modelUseHostPath=${MODELDIR} \
--set image.repository=$registry/opea/chatqna,image.tag=latest \
--set tgi.image.repository=$registry/huggingface/text-generation-inference,tgi.image.tag=2.4.0-intel-cpu \
--set tei.image.repository=$registry/huggingface/text-embeddings-inference,tei.image.tag=cpu-1.5 \
--set teirerank.image.repository=$registry/huggingface/text-embeddings-inference,teirerank.image.tag=cpu-1.5 \
--set redis-vector-db.image.repository=$registry/redis/redis-stack,redis-vector-db.image.tag=7.2.0-v9 \
--set retriever-usvc.image.repository=$registry/opea/retriever-redis,retriever-usvc.image.tag=latest \
--set chatqna-ui.image.repository=$registry/opea/chatqna-ui,chatqna-ui.image.tag=latest,chatqna-ui.containerPort=5173 \
--set data-prep.image.repository=$registry/opea/dataprep-redis,data-prep.image.tag=latest \
--set tgi.LLM_MODEL_ID=/data/models--Intel--neural-chat-7b-v3-3 \
--set tei.EMBEDDING_MODEL_ID=/data/models--BAAI--bge-base-en-v1.5 \
--set teirerank.RERANK_MODEL_ID=/data/models--BAAI--bge-reranker-base
2 changes: 2 additions & 0 deletions offline-install/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
FROM busybox:1.36.1
ADD data /data
33 changes: 33 additions & 0 deletions offline-install/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
Note: The script is only for DEMO instead of production. It is risky to run this script in production environment.
The script is tesed in ubuntu 22.04.

1. 00_online_pkg.sh

this script is to clone opea code and install some packages.


2. 01_online_images.sh

this script is to pull opea container images to push to your local registry. the script need to access docker.io and
ghcr.io. you need to change docker registry.


3. 02.a_online_models.sh

this script is to download model images. you need to change proxy and token.

4. copy model to k8s nodes

4.a manually copy (fast)

i. mkdir in k8s nodes
sudo rm -rf /mnt/opea-models/; sudo mkdir /mnt/opea-models/; sudo chmod 777 /mnt/opea-models/

ii. scp to k8s nodes
example: scp -r cli.data/* [email protected]:/mnt/opea-models/

4.b k8s daemonset (slow)
10_offline_install_models.sh

5. 11.a_offline_install.sh
this script is to offline install opea chaqna in k8s. you need to change docker registry
63 changes: 63 additions & 0 deletions offline-install/backup/02_online_models.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/bin/bash

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

registry=registry:5000
hf_token="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"
http_proxy=""
https_proxy=""

# Function to run a Docker container
run_docker_container() {
local name=$1
local port=$2
local volume=$3
local image=$4
local model_id=$5

# Run the Docker container
docker stop "$name" 2>/dev/null
docker rm "$name" 2>/dev/null
docker run --name "$name" -d -p "$port":80 -e HF_TOKEN=$hf_token -e http_proxy=$http_proxy -e https_proxy=$https_proxy -v "$volume" "$image" --model-id "$model_id"
}

# Function to check if the service is up
check_service() {
local name=$1
local check_url=$2
local check_data=$3

# Check if the service is up
until curl -s -o /dev/null -w "%{http_code}" -X POST -d "$check_data" -H 'Content-Type: application/json' "$check_url" | grep -q "200"; do
echo "curl -X POST -d '$check_data' -H 'Content-Type: application/json' '$check_url'"
echo "Waiting for $name service to be up..."
docker logs --tail 5 "$name"
sleep 5
done

echo "$name service is up"

# Destroy the Docker container after a successful check
docker stop "$name"
docker rm "$name"
}

# Define the ports to use
port1=9001
port2=9002
port3=9003

# Run the Docker containers
run_docker_container "tei" "$port1" "$PWD/data:/data" "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5" "BAAI/bge-base-en-v1.5"
run_docker_container "teirank" "$port2" "$PWD/data:/data" "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5" "BAAI/bge-reranker-base"
run_docker_container "tgi" "$port3" "$PWD/data:/data" "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu" "Intel/neural-chat-7b-v3-3"

# Check the services
check_service "tei" "http://localhost:$port1/v1/embeddings" '{"input": "This is a test"}'
check_service "teirank" "http://localhost:$port2/rerank" '{"query": "What is Deep Learning?", "texts": ["Deep Learning is " ]}'
check_service "tgi" "http://localhost:$port3/v1/chat/completions" '{"messages": [{"role": "user", "content": "Say this is a test!"}]}'


#docker build . -t $registry/opea/models:latest
#docker push $registry/opea/models:latest
30 changes: 30 additions & 0 deletions offline-install/backup/11_offline_install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

registry="registry:5000"

cd ../helm-charts/
helm uninstall chatqna || true

sed -i "s|image: .*nginx:1.27.1|image: $registry/nginx:1.27.1|g" chatqna/templates/nginx-deployment.yaml
sed -i "/ - name: milvus/,+4d" common/data-prep/Chart.yaml
sed -i "/ - name: milvus/,+4d" common/retriever-usvc/Chart.yaml
sed -i "s/^data:/data:\n HF_HUB_OFFLINE: 'True'/" common/tgi/templates/configmap.yaml
sed -i "s/^data:/data:\n HF_HUB_OFFLINE: 'True'/" common/tei/templates/configmap.yaml
sed -i "s/^data:/data:\n HF_HUB_OFFLINE: 'True'/" common/teirerank/templates/configmap.yaml

./update_dependency.sh
helm dependency update chatqna
export MODELDIR="/mnt/opea-models"
export MODELNAME="Intel/neural-chat-7b-v3-3"
helm install chatqna chatqna --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} \
--set image.repository=$registry/opea/chatqna,image.tag=latest \
--set tgi.image.repository=$registry/huggingface/text-generation-inference,tgi.image.tag=2.4.0-intel-cpu \
--set tei.image.repository=$registry/huggingface/text-embeddings-inference,tei.image.tag=cpu-1.5 \
--set teirerank.image.repository=$registry/huggingface/text-embeddings-inference,teirerank.image.tag=cpu-1.5 \
--set redis-vector-db.image.repository=$registry/redis/redis-stack,redis-vector-db.image.tag=7.2.0-v9 \
--set retriever-usvc.image.repository=$registry/opea/retriever-redis,retriever-usvc.image.tag=latest \
--set chatqna-ui.image.repository=$registry/opea/chatqna-ui,chatqna-ui.image.tag=latest,chatqna-ui.containerPort=5173 \
--set data-prep.image.repository=$registry/opea/dataprep-redis,data-prep.image.tag=latest
44 changes: 44 additions & 0 deletions offline-install/models.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

apiVersion: apps/v1
kind: DaemonSet
metadata:
name: copy-model-daemonset
spec:
selector:
matchLabels:
name: copy-model
template:
metadata:
labels:
name: copy-model
spec:
initContainers:
- name: copy-model
image: registry:5000/opea/models:latest
volumeMounts:
- name: local-model
mountPath: /mnt/opea-models
command: ["/bin/sh", "-c", "cp -r /data/* /mnt/opea-models/ && echo CopyDone"]
containers:
- name: pause
image: registry:5000/google_containers/pause:3.1
resources:
requests:
cpu: "0"
memory: "0"
limits:
cpu: "0"
memory: "0"
volumeMounts:
- name: local-model
mountPath: /mnt/opea-models
restartPolicy: Always
volumes:
- name: local-model
hostPath:
path: /mnt/opea-models
type: DirectoryOrCreate
updateStrategy:
type: RollingUpdate