From 8325526dd35e8f2ffb439f276614a54065c5d6df Mon Sep 17 00:00:00 2001 From: Ualas Date: Tue, 23 Apr 2024 13:41:22 -0500 Subject: [PATCH] feat: added Llama 3 8B support --- docker-compose-gguf.yml | 8 ++++---- run.sh | 7 +++++++ ui/types/openai.ts | 8 ++++++++ 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/docker-compose-gguf.yml b/docker-compose-gguf.yml index 1d21353..5a99b37 100644 --- a/docker-compose-gguf.yml +++ b/docker-compose-gguf.yml @@ -3,7 +3,7 @@ version: '3.6' services: llama-gpt-api: # Pin to llama-cpp-python 0.1.80 with GGUF support - image: ghcr.io/abetlen/llama-cpp-python:latest@sha256:de0fd227f348b5e43d4b5b7300f1344e712c14132914d1332182e9ecfde502b2 + image: ghcr.io/abetlen/llama-cpp-python:v0.2.63 restart: on-failure volumes: - './models:/models' @@ -11,8 +11,8 @@ services: ports: - 3001:8000 environment: - MODEL: '/models/${MODEL_NAME:-code-llama-2-7b-chat.gguf}' - MODEL_DOWNLOAD_URL: '${MODEL_DOWNLOAD_URL:-https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q4_K_M.gguf}' + MODEL: '/models/${MODEL_NAME:-Meta-Llama-3-8B-Instruct.Q4_K_M.gguf}' + MODEL_DOWNLOAD_URL: '${MODEL_DOWNLOAD_URL:-https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q4_K_M.gguf}' N_GQA: '${N_GQA:-1}' USE_MLOCK: 1 cap_add: @@ -31,7 +31,7 @@ services: environment: - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX' - 'OPENAI_API_HOST=http://llama-gpt-api:8000' - - 'DEFAULT_MODEL=/models/${MODEL_NAME:-llama-2-7b-chat.bin}' + - 'DEFAULT_MODEL=/models/${MODEL_NAME:-Meta-Llama-3-8B-Instruct.Q4_K_M.gguf}' - 'NEXT_PUBLIC_DEFAULT_SYSTEM_PROMPT=${DEFAULT_SYSTEM_PROMPT:-"You are a helpful and friendly AI assistant. Respond very concisely."}' - 'WAIT_HOSTS=llama-gpt-api:8000' - 'WAIT_TIMEOUT=${WAIT_TIMEOUT:-3600}' diff --git a/run.sh b/run.sh index bd5b32e..b21dcda 100755 --- a/run.sh +++ b/run.sh @@ -30,6 +30,13 @@ model_type="gguf" # Export the model value as an environment variable case $model in + llama3-8b) + export MODEL_NAME="Meta-Llama-3-8B-Instruct.Q4_K_M.gguf" + export MODEL_DOWNLOAD_URL="https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q4_K_M.gguf" + export WAIT_TIMEOUT=3600 + export N_GQA=1 + model_type="gguf" + ;; 7b) export MODEL_NAME="llama-2-7b-chat.bin" export MODEL_DOWNLOAD_URL="https://huggingface.co/TheBloke/Nous-Hermes-Llama-2-7B-GGML/resolve/main/nous-hermes-llama-2-7b.ggmlv3.q4_0.bin" diff --git a/ui/types/openai.ts b/ui/types/openai.ts index af351dc..00dc8ff 100644 --- a/ui/types/openai.ts +++ b/ui/types/openai.ts @@ -13,6 +13,8 @@ export enum OpenAIModelID { GPT_4 = 'gpt-4', GPT_4_32K = 'gpt-4-32k', + LLAMA_3_8b_Q4_K_M = '/models/Meta-Llama-3-8B-Instruct.Q4_K_M.gguf', + LLAMA_7B_CHAT_GGMLV3_Q4_0 = '/models/llama-2-7b-chat.bin', LLAMA_13B_CHAT_GGMLV3_Q4_0 = '/models/llama-2-13b-chat.bin', LLAMA_70B_CHAT_GGMLV3_Q4_0 = '/models/llama-2-70b-chat.bin', @@ -34,6 +36,12 @@ export enum OpenAIModelID { export const fallbackModelID = OpenAIModelID.LLAMA_7B_CHAT_GGMLV3_Q4_0; export const OpenAIModels: Record = { + [OpenAIModelID.LLAMA_3_8b_Q4_K_M]: { + id: OpenAIModelID.LLAMA_3_8b_Q4_K_M, + name: 'LLAMA 3 8B', + maxLength: 12000, + tokenLimit: 4000, + }, [OpenAIModelID.GPT_3_5]: { id: OpenAIModelID.GPT_3_5, name: 'GPT-3.5',