diff --git a/application/llm/llama_cpp.py b/application/llm/llama_cpp.py new file mode 100644 index 000000000..d54d6f1b2 --- /dev/null +++ b/application/llm/llama_cpp.py @@ -0,0 +1,35 @@ +from application.llm.base import BaseLLM + +class LlamaCpp(BaseLLM): + + def __init__(self, api_key, llm_name='/Users/pavel/Desktop/docsgpt/application/models/orca-test.bin'): + global llama + from llama_cpp import Llama + + llama = Llama(model_path=llm_name) + + def gen(self, model, engine, messages, stream=False, **kwargs): + context = messages[0]['content'] + user_question = messages[-1]['content'] + prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n" + + result = llama(prompt, max_tokens=150, echo=False) + + # import sys + # print(result['choices'][0]['text'].split('### Answer \n')[-1], file=sys.stderr) + + return result['choices'][0]['text'].split('### Answer \n')[-1] + + def gen_stream(self, model, engine, messages, stream=True, **kwargs): + context = messages[0]['content'] + user_question = messages[-1]['content'] + prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n" + + result = llama(prompt, max_tokens=150, echo=False, stream=stream) + + # import sys + # print(list(result), file=sys.stderr) + + for item in result: + for choice in item['choices']: + yield choice['text'] diff --git a/application/llm/llm_creator.py b/application/llm/llm_creator.py index a7ffc0f65..6a60f1b69 100644 --- a/application/llm/llm_creator.py +++ b/application/llm/llm_creator.py @@ -1,6 +1,7 @@ from application.llm.openai import OpenAILLM, AzureOpenAILLM from application.llm.sagemaker import SagemakerAPILLM from application.llm.huggingface import HuggingFaceLLM +from application.llm.llama_cpp import LlamaCpp @@ -9,7 +10,8 @@ class LLMCreator: 'openai': OpenAILLM, 'azure_openai': AzureOpenAILLM, 'sagemaker': SagemakerAPILLM, - 'huggingface': HuggingFaceLLM + 'huggingface': HuggingFaceLLM, + 'llama.cpp': LlamaCpp } @classmethod