add: local llama recipe

artefactory-skaff · Jan 4, 2024 · 3b72f39 · 3b72f39
1 parent 8bf54c6
commit 3b72f39
Show file tree

Hide file tree

Showing 4 changed files with 27 additions and 11 deletions.
diff --git a/backend/config.py b/backend/config.py
@@ -65,7 +65,7 @@ class RagConfig:
     database:                 DatabaseConfig        = field(default_factory=DatabaseConfig)
     chat_history_window_size: int                   = 5
     max_tokens_limit:         int                   = 3000
-    response_mode:            str                   = "normal"
+    response_mode:            str                   = None
 
     @classmethod
     def from_yaml(cls, yaml_path: Path, env: dict = None):

diff --git a/backend/config.yaml b/backend/config.yaml
@@ -1,11 +1,7 @@
 LLMConfig: &LLMConfig
-  source: AzureChatOpenAI
+  source: ChatOllama
   source_config:
-    openai_api_type: azure
-    openai_api_key: {{ OPENAI_API_KEY }}
-    openai_api_base: https://genai-ds.openai.azure.com/
-    openai_api_version: 2023-07-01-preview
-    deployment_name: gpt4
+    model: llama2
 
 VectorStoreConfig: &VectorStoreConfig
   source: Chroma

diff --git a/backend/main.py b/backend/main.py
@@ -11,7 +11,7 @@
 from fastapi.responses import StreamingResponse
 from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
 from jose import JWTError, jwt
-from langchain_core.messages.ai import AIMessage
+from langchain_core.messages.ai import AIMessage, AIMessageChunk
 
 from backend.logger import get_logger
 from backend.model import Message
@@ -145,8 +145,10 @@ async def stream_response(chat_id: str, response):
                 yield data.content.encode("utf-8")
         else:
             for part in response:
-                full_response += part.content
-                yield part.content.encode("utf-8")
+                if isinstance(part, AIMessageChunk):
+                    part = part.content
+                full_response += part
+                yield part.encode("utf-8")
                 await asyncio.sleep(0)
     except Exception as e:
         logger.error(f"Error generating response for chat {chat_id}: {e}", exc_info=True)

diff --git a/docs/recipe_llms_configs.md b/docs/recipe_llms_configs.md
@@ -12,9 +12,27 @@ LLMConfig: &LLMConfig
   temperature: 0.1
 ```
 
+## Local llama2
+!!! info "You will first need to install and run Ollama"
+
+    [Download the Ollama application here](https://ollama.ai/download)
+
+    Ollama will automatically utilize the GPU on Apple devices.
+
+    ```shell
+    ollama run llama2
+    ```
+
+```yaml
+LLMConfig: &LLMConfig
+  source: ChatOllama
+  source_config:
+    model: llama2
+```
+
 ## Vertex AI gemini-pro
 
-!!! info "login to GCP"
+!!! info "You will first need to login to GCP"
 
     ```shell
     export PROJECT_ID=<gcp_project_id>