-
Notifications
You must be signed in to change notification settings - Fork 0
/
rag_engine.py
67 lines (51 loc) · 2.25 KB
/
rag_engine.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from warnings import filterwarnings
filterwarnings('ignore')
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_postgres import PGVector
connection = "postgresql+psycopg://langchain:langchain@localhost:6024/langchain"
collection_name = "arxiv_docs"
print("Loading Sentence transformer and vectorstore...")
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = PGVector(
embeddings=embeddings,
collection_name=collection_name,
connection=connection,
use_jsonb=True
)
print("Loaded Sentence transformer and vectorstore.")
def similarity_search(text, num_docs):
'''Perform similarity search from query and return relevant documents.'''
documents = vectorstore.similarity_search(text, k=num_docs)
return documents
def document_template(document):
'''Returns a template with article title, id and abstract.'''
abstract = document.page_content
id = document.metadata['arXivId']
title = document.metadata['title']
template = f"Article title: {title}\narXivId: {id}\nAbstract:{abstract}"
return template
def rag_function(query, num_docs=3):
'''Perform similarity search and return the context.'''
relevant_docs = similarity_search(query, num_docs)
context = "\n\n".join([document_template(doc) for doc in relevant_docs])
return context
def generate_prompt(query, num_docs=3):
'''Generate the prompt using the given query.'''
context = rag_function(query, num_docs)
prompt_template = f"""
You are a helpful assistant. Use the information provided to answer the question below. Follow these rules:
1. Base your answer on the facts in the provided information.
2. Keep your answer concise.
3. Recommend an article relevant to the question based on title and arXiv ID from the context.
4. If the information doesn't contain the answer, inform the user and recommend an article relevant to the question based on title and arXiv ID from the context.
Question:
{query}
Context:
{context}
Helpful Answer:
"""
return prompt_template.strip()
# Example usage
question = "Provide examples of compiler optimization techniques."
prompt = generate_prompt(question)
print(f"template: {prompt}")