-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
integration of structured query only(search part) with existing app
- Loading branch information
1 parent
0837b7e
commit 9cf2b3d
Showing
10 changed files
with
218 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
from langchain.chains.query_constructor.base import ( | ||
get_query_constructor_prompt, | ||
load_query_constructor_runnable, | ||
) | ||
from structured_query_examples import examples | ||
|
||
import json | ||
import sys | ||
|
||
sys.path.append("../") | ||
sys.path.append("../data") | ||
|
||
with open("attribute_info.json", "r") as f: | ||
attribute_info = json.loads(f.read()) | ||
|
||
attribute_info = attribute_info[1:] | ||
|
||
examples = examples | ||
document_content_description = "Metadata of datasets for various machine learning applications fetched from OpenML platform" | ||
prompt = get_query_constructor_prompt( | ||
document_contents=document_content_description, | ||
attribute_info=attribute_info, | ||
examples=examples, | ||
) | ||
|
||
from langchain_community.chat_models import ChatOllama | ||
|
||
content_attr = [ | ||
"status", | ||
"NumberOfClasses", | ||
"NumberOfFeatures", | ||
"NumberOfInstances", | ||
"Combined_information", | ||
] | ||
# document_content_description = "Metadata of machine learning datasets including status (if dataset is active or not), number of classes in the dataset, number of instances (examples) in the dataset, number of features in the dataset, and Combined_information containing the combined metadata information about the dataset." | ||
filter_attribute_info = tuple(ai for ai in attribute_info if ai["name"] in content_attr) | ||
|
||
chain = load_query_constructor_runnable( | ||
ChatOllama(model="llama3"), | ||
document_content_description, | ||
# attribute_info, | ||
filter_attribute_info, | ||
examples=examples, | ||
fix_invalid=True, | ||
) | ||
|
||
# def structuring_query(query:str): | ||
# structured_query = chain.invoke(query) | ||
|
||
# return structured_query.query, structured_query.filter | ||
|
||
from fastapi import FastAPI | ||
from fastapi.responses import JSONResponse | ||
from httpx import ConnectTimeout | ||
# from llm_service_utils import create_chain, parse_answers_initial | ||
from tenacity import retry, retry_if_exception_type, stop_after_attempt | ||
|
||
app = FastAPI() | ||
|
||
# Create port | ||
@app.get("/structuredquery/{query}", response_class=JSONResponse) | ||
@retry(stop=stop_after_attempt(3), retry=retry_if_exception_type(ConnectTimeout)) | ||
async def get_structured_query(query: str): | ||
""" | ||
Description: Get the query, replace %20 with space and invoke the chain to get the answers based on the prompt | ||
""" | ||
query = query.replace("%20", " ") | ||
response = chain.invoke({"query": query}) | ||
return response | ||
|
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
|
||
examples = [ | ||
( | ||
"Give me mushroom datasets with less than 10k rows.", | ||
{ | ||
"query": "mushroom dataset", | ||
"filter": 'lt("NumberOfInstances", 10000)', | ||
}, | ||
), | ||
( | ||
"Give me datasets than can be used in healthcare with 2 or more classes", | ||
{ | ||
"query": "heathcare dataset ", | ||
"filter": 'gte("NumberOfClasses", 2.0)', | ||
}, | ||
), | ||
( | ||
"Give me datasets than can be used in healthcare, or climate applications with 2 or more classes and in arff format.", | ||
{ | ||
"query": "heathcare dataset, climate datasets", | ||
"filter": 'and(gte("NumberOfClasses", 2.0), eq("format", "arff"))', | ||
}, | ||
), | ||
( | ||
"Give me medical datasets.", | ||
{ | ||
"query": "medical datasets", | ||
"filter": "NO_FILTER", | ||
}, | ||
), | ||
( | ||
"Give me medical datasets with large number of features.", | ||
{ | ||
"query": "medical datasets", | ||
"filter": 'gte("NumberOfFeatures, 100)', | ||
}, | ||
), | ||
] | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
from langchain.chains.query_constructor.base import ( | ||
get_query_constructor_prompt, | ||
load_query_constructor_runnable, | ||
) | ||
from structured_query.structured_query_examples import examples | ||
|
||
import json | ||
|
||
with open("../data/attribute_info.json", "r") as f: | ||
attribute_info = json.loads(f.read()) | ||
|
||
attribute_info = attribute_info[1:] | ||
|
||
examples = examples | ||
document_content_description = "Metadata of datasets for various machine learning applications fetched from OpenML platform" | ||
prompt = get_query_constructor_prompt( | ||
document_contents=document_content_description, | ||
attribute_info=attribute_info, | ||
examples=examples, | ||
) | ||
|
||
from langchain_community.chat_models import ChatOllama | ||
|
||
content_attr = [ | ||
"status", | ||
"NumberOfClasses", | ||
"NumberOfFeatures", | ||
"NumberOfInstances", | ||
"Combined_information", | ||
] | ||
# document_content_description = "Metadata of machine learning datasets including status (if dataset is active or not), number of classes in the dataset, number of instances (examples) in the dataset, number of features in the dataset, and Combined_information containing the combined metadata information about the dataset." | ||
filter_attribute_info = tuple(ai for ai in attribute_info if ai["name"] in content_attr) | ||
|
||
chain = load_query_constructor_runnable( | ||
ChatOllama(model="llama3"), | ||
document_content_description, | ||
# attribute_info, | ||
filter_attribute_info, | ||
examples=examples, | ||
fix_invalid=True, | ||
) | ||
|
||
def structuring_query(query:str): | ||
structured_query = chain.invoke(query) | ||
|
||
return structured_query.query, structured_query.filter | ||
|