diff --git a/backend/config.py b/backend/config.py index 9bb1d24..cebc134 100644 --- a/backend/config.py +++ b/backend/config.py @@ -65,7 +65,7 @@ class RagConfig: database: DatabaseConfig = field(default_factory=DatabaseConfig) chat_history_window_size: int = 5 max_tokens_limit: int = 3000 - response_mode: str = "normal" + response_mode: str = None @classmethod def from_yaml(cls, yaml_path: Path, env: dict = None): diff --git a/backend/config.yaml b/backend/config.yaml index fc78029..cbb606b 100644 --- a/backend/config.yaml +++ b/backend/config.yaml @@ -1,11 +1,7 @@ LLMConfig: &LLMConfig - source: AzureChatOpenAI + source: ChatOllama source_config: - openai_api_type: azure - openai_api_key: {{ OPENAI_API_KEY }} - openai_api_base: https://genai-ds.openai.azure.com/ - openai_api_version: 2023-07-01-preview - deployment_name: gpt4 + model: llama2 VectorStoreConfig: &VectorStoreConfig source: Chroma diff --git a/backend/main.py b/backend/main.py index f073318..1907a3d 100644 --- a/backend/main.py +++ b/backend/main.py @@ -11,7 +11,7 @@ from fastapi.responses import StreamingResponse from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm from jose import JWTError, jwt -from langchain_core.messages.ai import AIMessage +from langchain_core.messages.ai import AIMessage, AIMessageChunk from backend.logger import get_logger from backend.model import Message @@ -145,8 +145,10 @@ async def stream_response(chat_id: str, response): yield data.content.encode("utf-8") else: for part in response: - full_response += part.content - yield part.content.encode("utf-8") + if isinstance(part, AIMessageChunk): + part = part.content + full_response += part + yield part.encode("utf-8") await asyncio.sleep(0) except Exception as e: logger.error(f"Error generating response for chat {chat_id}: {e}", exc_info=True) diff --git a/docs/recipe_llms_configs.md b/docs/recipe_llms_configs.md index 06e8140..518a3ea 100644 --- a/docs/recipe_llms_configs.md +++ b/docs/recipe_llms_configs.md @@ -12,9 +12,27 @@ LLMConfig: &LLMConfig temperature: 0.1 ``` +## Local llama2 +!!! info "You will first need to install and run Ollama" + + [Download the Ollama application here](https://ollama.ai/download) + + Ollama will automatically utilize the GPU on Apple devices. + + ```shell + ollama run llama2 + ``` + +```yaml +LLMConfig: &LLMConfig + source: ChatOllama + source_config: + model: llama2 +``` + ## Vertex AI gemini-pro -!!! info "login to GCP" +!!! info "You will first need to login to GCP" ```shell export PROJECT_ID=