Skip to content

Commit

Permalink
add: local llama recipe
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexisVLRT committed Jan 4, 2024
1 parent 8bf54c6 commit 3b72f39
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 11 deletions.
2 changes: 1 addition & 1 deletion backend/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class RagConfig:
database: DatabaseConfig = field(default_factory=DatabaseConfig)
chat_history_window_size: int = 5
max_tokens_limit: int = 3000
response_mode: str = "normal"
response_mode: str = None

@classmethod
def from_yaml(cls, yaml_path: Path, env: dict = None):
Expand Down
8 changes: 2 additions & 6 deletions backend/config.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
LLMConfig: &LLMConfig
source: AzureChatOpenAI
source: ChatOllama
source_config:
openai_api_type: azure
openai_api_key: {{ OPENAI_API_KEY }}
openai_api_base: https://genai-ds.openai.azure.com/
openai_api_version: 2023-07-01-preview
deployment_name: gpt4
model: llama2

VectorStoreConfig: &VectorStoreConfig
source: Chroma
Expand Down
8 changes: 5 additions & 3 deletions backend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from fastapi.responses import StreamingResponse
from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
from jose import JWTError, jwt
from langchain_core.messages.ai import AIMessage
from langchain_core.messages.ai import AIMessage, AIMessageChunk

from backend.logger import get_logger
from backend.model import Message
Expand Down Expand Up @@ -145,8 +145,10 @@ async def stream_response(chat_id: str, response):
yield data.content.encode("utf-8")
else:
for part in response:
full_response += part.content
yield part.content.encode("utf-8")
if isinstance(part, AIMessageChunk):
part = part.content
full_response += part
yield part.encode("utf-8")
await asyncio.sleep(0)
except Exception as e:
logger.error(f"Error generating response for chat {chat_id}: {e}", exc_info=True)
Expand Down
20 changes: 19 additions & 1 deletion docs/recipe_llms_configs.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,27 @@ LLMConfig: &LLMConfig
temperature: 0.1
```
## Local llama2
!!! info "You will first need to install and run Ollama"
[Download the Ollama application here](https://ollama.ai/download)
Ollama will automatically utilize the GPU on Apple devices.
```shell
ollama run llama2
```

```yaml
LLMConfig: &LLMConfig
source: ChatOllama
source_config:
model: llama2
```

## Vertex AI gemini-pro

!!! info "login to GCP"
!!! info "You will first need to login to GCP"

```shell
export PROJECT_ID=<gcp_project_id>
Expand Down

0 comments on commit 3b72f39

Please sign in to comment.