-
Notifications
You must be signed in to change notification settings - Fork 0
/
backend.py
115 lines (93 loc) · 3.07 KB
/
backend.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
"""
RAG pipeline accessible through RestAPI.
Author
------
Nicolas Rojas
"""
# import libraries
import os.path
from json import dumps
import yaml
from pydantic import BaseModel
from fastapi import FastAPI
import chromadb
from llama_index.core import (
VectorStoreIndex,
Settings,
)
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.llms.ollama import Ollama
from database_handler import create_index
class Query(BaseModel):
"""RestAPI input structure."""
query: str
def save_query(path: str, query: str, response: dict):
"""Save query in persistent jsonl file.
Parameters
----------
path : str
Path to the jsonl file where the query history is saved.
query : str
Query received by the RAG system.
response : dict
Dictionary with the response and relevant documents.
"""
data = dict(response)
data["query"] = query
with open(path, "a", encoding="utf8") as jfile:
jfile.write(dumps(data, ensure_ascii=False) + "\n")
# load configuration variables
with open("config.yaml", "r", encoding="utf8") as yfile:
parameters = yaml.safe_load(yfile)
index_dir = parameters["index_directory"]
chunk_size = parameters["chunk_size"]
embedding_model = parameters["embedding_model"]
ollama_model = parameters["ollama_model"]
chroma_collection = parameters["chroma_collection"]
documents_dir = parameters["documents_dir"]
query_history = parameters["query_history"]
# Set custom RAG settings
Settings.chunk_size = chunk_size
Settings.embed_model = HuggingFaceEmbedding(model_name=embedding_model)
Settings.llm = Ollama(model=ollama_model, request_timeout=360.0)
# initiate FastAPI app
app = FastAPI()
# check if stored index already exists
if not os.path.exists(index_dir):
create_index(chroma_collection, documents_dir, index_dir, embedding_model)
# load the existing index
chroma_client = chromadb.PersistentClient(path=index_dir)
chroma_collection = chroma_client.get_or_create_collection(chroma_collection)
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
embed_model = HuggingFaceEmbedding(model_name=embedding_model)
index = VectorStoreIndex.from_vector_store(
vector_store,
embed_model=embed_model,
)
# define the index
query_engine = index.as_query_engine()
@app.post("/query/")
def retrieve(query: Query) -> dict:
"""Run a query with the RAG pipeline.
Parameters
----------
query : str
Question asked by the user, as a string.
Returns
-------
dict
Dictionary containing the answer given by the LLM and the relevant
documents.
"""
query = query.query
global query_engine
response = query_engine.query(query)
result = {"response": response.response}
source_files = []
for source_node in response.source_nodes:
source_files.append(source_node.node.metadata["file_name"])
source_files = list(set(source_files))
result["source_files"] = source_files
save_query(query_history, query, result)
return result