Skip to content

Commit

Permalink
Minor last min fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
frikky committed Jan 29, 2025
1 parent b7f7d94 commit cd5062a
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 32 deletions.
12 changes: 8 additions & 4 deletions shuffle-ai/1.0.0/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,14 @@ RUN apt install -y file openssl bash tini libpng-dev aspell-en
RUN apt install -y git clang g++ make automake autoconf libtool cmake
RUN apt install -y autoconf-archive wget
RUN mkdir -p /models
RUN wget https://huggingface.co/QuantFactory/Llama-3.2-3B-GGUF/resolve/main/Llama-3.2-3B.Q8_0.gguf?download=true -O /models/Llama-3.2-3B.Q8_0.gguf

# Larger model
RUN wget https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF/resolve/main/DeepSeek-R1-Distill-Llama-8B-Q2_K.gguf
ENV MODEL_PATH="/models/DeepSeek-R1-Distill-Llama-8B-Q2_K.gguf"

# https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF/resolve/main/DeepSeek-R1-Distill-Llama-8B-Q8_0.gguf
#RUN wget https://huggingface.co/QuantFactory/Llama-3.2-3B-GGUF/resolve/main/Llama-3.2-3B.Q2_K.gguf?download=true -O /models/Llama-3.2-3B.Q8_0.gguf
#RUN wget https://huggingface.co/QuantFactory/Llama-3.2-3B-GGUF/resolve/main/Llama-3.2-3B.Q2_K.gguf?download=true -O /models/Llama-3.2-3B.Q8_0.gguf

# Install all of our pip packages in a single directory that we can copy to our base image later
RUN mkdir /install
Expand Down Expand Up @@ -72,9 +79,6 @@ ENV SHUFFLE_APP_SDK_TIMEOUT=300
#ENV LD_LIBRARY_PATH=/usr/local/lib/python3.10/site-packages/ctransformers/lib/basic/libctransformers.so
#RUN chmod 755 /usr/local/lib/python3.10/site-packages/ctransformers/lib/basic/libctransformers.so

#RUN apt install -y libffi-dev


COPY src /app
WORKDIR /app
CMD ["python", "app.py", "--log-level", "DEBUG"]
8 changes: 4 additions & 4 deletions shuffle-ai/1.0.0/api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ contact_info:
email: [email protected]
actions:
- name: run_llm
description: "Runs a local LLM based on ollama with any of their models from https://github.com/ollama/ollama?tab=readme-ov-file#model-library"
description: "Runs a local LLM, with a GPU or CPU (slow). Default model is set up in Dockerfile"
parameters:
- name: question
description: "The input question to the model"
Expand All @@ -21,11 +21,11 @@ actions:
example: ""
schema:
type: string
- name: model
description: "The model to run"
- name: system_message
description: "The system message use, if any"
required: false
multiline: false
example: "deepseek-v3"
example: ""
schema:
type: string

Expand Down
92 changes: 68 additions & 24 deletions shuffle-ai/1.0.0/src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,50 @@

from shuffle_sdk import AppBase

#model = "/models/Llama-3.2-3B.Q8_0.gguf" # Larger
#model = "/models/Llama-3.2-3B.Q2_K.gguf" # Smol

#model = "/models/DeepSeek-R1-Distill-Llama-8B-Q8_0.gguf" # Larger 8-bit
model = "/models/DeepSeek-R1-Distill-Llama-8B-Q2_K.gguf" # Smaller
if os.getenv("MODEL_PATH"):
model = os.getenv("MODEL_PATH")

def load_llm_model(model):
if not os.path.exists(model):
model_name = model.split("/")[-1]
# Check $HOME/downloads/{model}

home_path = os.path.expanduser("~")
print(home_path)

if os.path.exists(f"{home_path}/downloads/{model_name}"):
model = f"{home_path}/downloads/{model_name}"
else:
return {
"success": False,
"reason": "Model not found at path %s" % model,
"details": "Ensure the model path is correct"
}

# Check for GPU layers
llm = None
gpu_layers = os.getenv("GPU_LAYERS")
if gpu_layers:
gpu_layers = int(gpu_layers)
if gpu_layers > 0:
print("GPU Layers: %s" % gpu_layers)
llm = llama_cpp.Llama(model_path=model, n_gpu_layers=gpu_layers)
else:
llm = llama_cpp.Llama(model_path=model)
else:
# Check if GPU available
#print("No GPU layers set.")
llm = llama_cpp.Llama(model_path=model)

return llm

llm = load_llm_model(model)

class Tools(AppBase):
__version__ = "1.0.0"
app_name = "Shuffle AI"
Expand All @@ -34,47 +78,47 @@ def __init__(self, redis, logger, console_logger=None):

#def run_llm(self, question, model="llama3.2"):
#def run_llm(self, question, model="deepseek-v3"):
def run_llm(self, question, model="/models/Llama-3.2-3B.Q8_0.gguf"):
self.logger.info("[DEBUG] Running LLM with model '%s'" % model)
def run_llm(self, question, system_message=""):
global llm
global model

if not os.path.exists(model):
return {
"success": False,
"reason": "Model not found at path %s" % model,
"details": "Ensure the model path is correct"
}
if not system_message:
system_message = "Be a friendly assistant",

llm = llama_cpp.Llama(model_path=model)
self.logger.info("[DEBUG] Running LLM with model '%s'. To overwrite path, use environment variable MODEL_PATH=<path>" % model)

# https://github.com/abetlen/llama-cpp-python
output = llm.create_chat_completion(
max_tokens=100,
messages = [
{"role": "system", "content": "You are an assistant who outputs in JSON format.."},
{
"role": "system",
"content": system_message,
},
{
"role": "user",
"content": question,
}
]
)

return output

self.logger.info("[DEBUG] LLM output: %s" % output)

#model = ctransformers.AutoModelForCausalLM.from_pretrained(
# model_path_or_repo_id=model,
# #model_type="deepseek-v3"
#)
new_message = ""
if "choices" in output and len(output["choices"]) > 0:
new_message = output["choices"][0]["message"]["content"]

#resp = model(full_question)
#return resp
parsed_output = {
"success": True,
"model": output["model"],
"tokens": output["tokens"],
"output": new_message,
}

#response = ollama.chat(model=model, messages=[
# {
# "role": "user", "content": question,
# }
#])
if not os.getenv("GPU_LAYERS"):
parsed_output["debug"] = "GPU_LAYERS not set. Running on CPU. Set GPU_LAYERS to the number of GPU layers to use (e.g. 8)."

#return response["message"]["content"]
return output

def security_assistant(self):
# Currently testing outside the Shuffle environment
Expand Down

0 comments on commit cd5062a

Please sign in to comment.