Skip to content

Commit

Permalink
integration of structured query only(search part) with existing app
Browse files Browse the repository at this point in the history
  • Loading branch information
Taniya-Das committed Jul 19, 2024
1 parent 0837b7e commit 9cf2b3d
Show file tree
Hide file tree
Showing 10 changed files with 218 additions and 9 deletions.
3 changes: 2 additions & 1 deletion backend/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"top_p" : 0.95,
"search_type" : "similarity",
"reranking" : false,
"long_context_reorder" : false
"long_context_reorder" : false,
"structure_query": true

}
4 changes: 4 additions & 0 deletions frontend/paths.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,9 @@
"llm_response" : {
"docker" : "http://fastapi:8081/llmquery/",
"local" : "http://0.0.0.0:8081/llmquery/"
},
"structured_query": {
"docker" : "http://fastapi:8082/structuredquery/",
"local" : "http://0.0.0.0:8082/structuredquery/"
}
}
20 changes: 15 additions & 5 deletions frontend/ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,21 @@
response_parser = ResponseParser(query_type, apply_llm_before_rag=True)
if query_type == "Dataset":
with st.spinner("Waiting for results..."):
# get rag response
response_parser.fetch_rag_response(query_type, query)
# get llm response
response_parser.fetch_llm_response(query)
# get updated columns based on llm response
if config["structure_query"] == True:
# get structured query
structured_query = response_parser.fetch_structured_query(query_type, query)
st.write(structured_query)
# get rag response
response_parser.fetch_rag_response(query_type, structured_query['query'])
# get llm response
# response_parser.fetch_llm_response(query)
# get updated columns based on llm response
else:
# get rag response
response_parser.fetch_rag_response(query_type, query)
# get llm response
response_parser.fetch_llm_response(query)
# get updated columns based on llm response
results = response_parser.parse_and_update_response(data_metadata)
# display results in a table
display_results(results)
Expand Down
22 changes: 22 additions & 0 deletions frontend/ui_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,28 @@ def fetch_llm_response(self, query):
f"{llm_response_path['local']}{query}"
).json()
return self.llm_response

def fetch_structured_query(self, query_type, query):
"""
Description: Fetch the response from the FastAPI service
"""
structured_response_path = self.paths["structured_query"]
try:
self.structured_query_response = requests.get(
f"{structured_response_path['docker']}{query}",
json={"query": query},
).json()
except:
self.structured_query_response = requests.get(
f"{structured_response_path['local']}{query}",
json={"query": query},
).json()
print(self.structured_query_response)
return self.structured_query_response



def fetch_rag_response(self, query_type, query):
"""
Expand Down
15 changes: 12 additions & 3 deletions start_local.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,18 @@ cd ollama
./get_ollama.sh &
echo $! > $PID_FILE

cd ../llm_service
uvicorn llm_service:app --host 0.0.0.0 --port 8081 &
echo $! > $PID_FILE
structured_query = True
if [$structured_query ==true ]
then
cd ../structured_query
uvicorn llm_service_structured_query:app --host 0.0.0.0 --port 8082 &
echo $! > $PID_FILE
else
cd ../llm_service
uvicorn llm_service:app --host 0.0.0.0 --port 8081 &
echo $! > $PID_FILE
fi


cd ../backend
uvicorn backend:app --host 0.0.0.0 --port 8000 &
Expand Down
3 changes: 3 additions & 0 deletions stop_local.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ kill -9 $(cat $PID_FILE)
cd ../llm_service
kill -9 $(cat $PID_FILE)

cd ../structured_query
kill -9 $(cat $PID_FILE)

cd ../frontend
# streamlit run ui.py &
kill -9 $(cat $PID_FILE)
Expand Down
72 changes: 72 additions & 0 deletions structured_query/llm_service_structured_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
from langchain.chains.query_constructor.base import (
get_query_constructor_prompt,
load_query_constructor_runnable,
)
from structured_query_examples import examples

import json
import sys

sys.path.append("../")
sys.path.append("../data")

with open("attribute_info.json", "r") as f:
attribute_info = json.loads(f.read())

attribute_info = attribute_info[1:]

examples = examples
document_content_description = "Metadata of datasets for various machine learning applications fetched from OpenML platform"
prompt = get_query_constructor_prompt(
document_contents=document_content_description,
attribute_info=attribute_info,
examples=examples,
)

from langchain_community.chat_models import ChatOllama

content_attr = [
"status",
"NumberOfClasses",
"NumberOfFeatures",
"NumberOfInstances",
"Combined_information",
]
# document_content_description = "Metadata of machine learning datasets including status (if dataset is active or not), number of classes in the dataset, number of instances (examples) in the dataset, number of features in the dataset, and Combined_information containing the combined metadata information about the dataset."
filter_attribute_info = tuple(ai for ai in attribute_info if ai["name"] in content_attr)

chain = load_query_constructor_runnable(
ChatOllama(model="llama3"),
document_content_description,
# attribute_info,
filter_attribute_info,
examples=examples,
fix_invalid=True,
)

# def structuring_query(query:str):
# structured_query = chain.invoke(query)

# return structured_query.query, structured_query.filter

from fastapi import FastAPI
from fastapi.responses import JSONResponse
from httpx import ConnectTimeout
# from llm_service_utils import create_chain, parse_answers_initial
from tenacity import retry, retry_if_exception_type, stop_after_attempt

app = FastAPI()

# Create port
@app.get("/structuredquery/{query}", response_class=JSONResponse)
@retry(stop=stop_after_attempt(3), retry=retry_if_exception_type(ConnectTimeout))
async def get_structured_query(query: str):
"""
Description: Get the query, replace %20 with space and invoke the chain to get the answers based on the prompt
"""
query = query.replace("%20", " ")
response = chain.invoke({"query": query})
return response

Empty file.
41 changes: 41 additions & 0 deletions structured_query/structured_query_examples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@

examples = [
(
"Give me mushroom datasets with less than 10k rows.",
{
"query": "mushroom dataset",
"filter": 'lt("NumberOfInstances", 10000)',
},
),
(
"Give me datasets than can be used in healthcare with 2 or more classes",
{
"query": "heathcare dataset ",
"filter": 'gte("NumberOfClasses", 2.0)',
},
),
(
"Give me datasets than can be used in healthcare, or climate applications with 2 or more classes and in arff format.",
{
"query": "heathcare dataset, climate datasets",
"filter": 'and(gte("NumberOfClasses", 2.0), eq("format", "arff"))',
},
),
(
"Give me medical datasets.",
{
"query": "medical datasets",
"filter": "NO_FILTER",
},
),
(
"Give me medical datasets with large number of features.",
{
"query": "medical datasets",
"filter": 'gte("NumberOfFeatures, 100)',
},
),
]



47 changes: 47 additions & 0 deletions structured_query/structuring_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from langchain.chains.query_constructor.base import (
get_query_constructor_prompt,
load_query_constructor_runnable,
)
from structured_query.structured_query_examples import examples

import json

with open("../data/attribute_info.json", "r") as f:
attribute_info = json.loads(f.read())

attribute_info = attribute_info[1:]

examples = examples
document_content_description = "Metadata of datasets for various machine learning applications fetched from OpenML platform"
prompt = get_query_constructor_prompt(
document_contents=document_content_description,
attribute_info=attribute_info,
examples=examples,
)

from langchain_community.chat_models import ChatOllama

content_attr = [
"status",
"NumberOfClasses",
"NumberOfFeatures",
"NumberOfInstances",
"Combined_information",
]
# document_content_description = "Metadata of machine learning datasets including status (if dataset is active or not), number of classes in the dataset, number of instances (examples) in the dataset, number of features in the dataset, and Combined_information containing the combined metadata information about the dataset."
filter_attribute_info = tuple(ai for ai in attribute_info if ai["name"] in content_attr)

chain = load_query_constructor_runnable(
ChatOllama(model="llama3"),
document_content_description,
# attribute_info,
filter_attribute_info,
examples=examples,
fix_invalid=True,
)

def structuring_query(query:str):
structured_query = chain.invoke(query)

return structured_query.query, structured_query.filter

0 comments on commit 9cf2b3d

Please sign in to comment.