Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
Taniya-Das committed Aug 9, 2024
2 parents b00a944 + 1218cc9 commit 4f2a095
Show file tree
Hide file tree
Showing 17 changed files with 95 additions and 47 deletions.
7 changes: 4 additions & 3 deletions backend/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
FROM python:3.10.14
RUN mkdir /fastapi
COPY requirements.txt fastapi/requirements.txt
RUN mkdir /data
COPY ./backend /fastapi
COPY ./data /data
WORKDIR /fastapi

RUN pip install -r requirements.txt

COPY ./data /fastapi
COPY . /fastapi
#COPY . /fastapi
# COPY modules /fastapi
# COPY *.py /fastapi
# COPY *.json /fastapi
Expand Down
13 changes: 7 additions & 6 deletions backend/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@
config["test_subset"] = True
config["data_dir"] = "./data/testing_data/"
# load the persistent database using ChromaDB
print('Loading DB')
client = chromadb.PersistentClient(path=config["persist_dir"])
# Loading the metadata for all types

# Setup llm chain, initialize the retriever and llm, and setup Retrieval QA

print('Setting LLM chain')
qa_dataset_handler = QASetup(
config=config,
data_type="dataset",
Expand All @@ -41,7 +42,7 @@
llm_chain_handler = LLMChainCreator(config=config, local=True)
llm_chain_handler.enable_cache()
llm_chain = llm_chain_handler.get_llm_chain()

print('OK.')

# Send test query as first query to avoid cold start
try:
Expand All @@ -65,8 +66,8 @@ async def read_dataset(query: str):
# Fetch the result data frame based on the query
_, ids_order = QueryProcessor(
query=query,
qa=qa_dataset if type_of_query == "dataset" else qa_flow,
type_of_query=type_of_query,
qa=qa_dataset,
type_of_query='dataset',
config=config,
).get_result_from_query()

Expand All @@ -86,8 +87,8 @@ async def read_flow(query: str):
try:
_, ids_order = QueryProcessor(
query=query,
qa=qa_dataset if type_of_query == "flow" else qa_flow,
type_of_query=type_of_query,
qa=qa_flow,
type_of_query='flow',
config=config,
).get_result_from_query()

Expand Down
4 changes: 2 additions & 2 deletions backend/training.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import chromadb
from .modules.rag_llm import *
from .modules.utils import *
from modules.rag_llm import *
from modules.utils import *

# Load the config file and set training to true
config = load_config_and_device("config.json", training=True)
Expand Down
2 changes: 1 addition & 1 deletion data/attribute_info.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
[{"name": "Unnamed: 0", "description": "Unique identifier", "type": "Numeric"}, {"name": "did", "description": "Dataset ID", "type": "Numeric"}, {"name": "name", "description": "Name of the dataset", "type": "String"}, {"name": "version", "description": "Version of this dataset. '1' for original version. Auto-incremented by server.", "type": "Numeric"}, {"name": "uploader", "description": "ID of the uploader", "type": "Numeric"}, {"name": "status", "description": "Current status of the dataset. Whether the dataset is active.", "type": "String"}, {"name": "format", "description": "Format of the dataset. Example - arff format ", "type": "String"}, {"name": "MajorityClassSize", "description": "Number of instances belonging to the most frequent class.", "type": "Numeric"}, {"name": "MaxNominalAttDistinctValues", "description": "The maximum number of distinct values among attributes of the nominal type.", "type": "Numeric"}, {"name": "MinorityClassSize", "description": "Number of instances belonging to the least frequent class.", "type": "Numeric"}, {"name": "NumberOfClasses", "description": "Number of classes in the dataset. 2.0 for binary classification, and more than 2.0 for multi-class classification.", "type": "Float"}, {"name": "NumberOfFeatures", "description": "Number of features or attributes in the dataset.", "type": "Numeric"}, {"name": "NumberOfInstances", "description": "Number of instances in the dataset", "type": "Numeric"}, {"name": "NumberOfInstancesWithMissingValues", "description": "Number of instances with missing values in the dataset", "type": "Numeric"}, {"name": "NumberOfMissingValues", "description": "Number of missing values in the dataset", "type": "Numeric"}, {"name": "NumberOfNumericFeatures", "description": "Number of numeric features in the dataset", "type": "Numeric"}, {"name": "NumberOfSymbolicFeatures", "description": "Number of symbolic features in the dataset", "type": "Numeric"}, {"name": "description", "description": "Description of the dataset", "type": "String"}, {"name": "qualities", "description": "Qualities of the dataset", "type": "String"}, {"name": "features", "description": "Features of the dataset", "type": "String"}, {"name": "Combined_information", "description": "Combine information from all the coulmns in the dataset.", "type": "String"}]
[{"name": "Unnamed: 0", "description": "Unique identifier", "type": "Numeric"}, {"name": "did", "description": "Dataset ID", "type": "Numeric"}, {"name": "name", "description": "Name of the dataset", "type": "String"}, {"name": "version", "description": "Version of this dataset. '1' for original version. Auto-incremented by server.", "type": "Numeric"}, {"name": "uploader", "description": "ID of the uploader", "type": "Numeric"}, {"name": "status", "description": "Current status of the dataset. Whether the dataset is active.", "type": "String"}, {"name": "format", "description": "Format of the dataset. Example - arff format ", "type": "String"}, {"name": "MajorityClassSize", "description": "Number of instances belonging to the most frequent class.", "type": "Numeric"}, {"name": "MaxNominalAttDistinctValues", "description": "The maximum number of distinct values among attributes of the nominal type.", "type": "Numeric"}, {"name": "MinorityClassSize", "description": "Number of instances belonging to the least frequent class.", "type": "Numeric"}, {"name": "NumberOfClasses", "description": "Number of classes in the dataset. 2.0 for binary classification, and more than 2.0 for multi-class classification.", "type": "Numeric"}, {"name": "NumberOfFeatures", "description": "Number of features or attributes in the dataset.", "type": "Numeric"}, {"name": "NumberOfInstances", "description": "Number of instances in the dataset", "type": "Numeric"}, {"name": "NumberOfInstancesWithMissingValues", "description": "Number of instances with missing values in the dataset", "type": "Numeric"}, {"name": "NumberOfMissingValues", "description": "Number of missing values in the dataset", "type": "Numeric"}, {"name": "NumberOfNumericFeatures", "description": "Number of numeric features in the dataset", "type": "Numeric"}, {"name": "NumberOfSymbolicFeatures", "description": "Number of symbolic features in the dataset", "type": "Numeric"}, {"name": "description", "description": "Description of the dataset", "type": "String"}, {"name": "qualities", "description": "Qualities of the dataset", "type": "String"}, {"name": "features", "description": "Features of the dataset", "type": "String"}, {"name": "Combined_information", "description": "Combine information from all the coulmns in the dataset.", "type": "String"}]
32 changes: 23 additions & 9 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
services:
ollama:
# build: ollama/
# build: ollama/
image: ollama/ollama:latest
ports:
- 11434:11434
Expand All @@ -9,27 +9,41 @@ services:
# restart: always
# tty: true
container_name: ollama
restart: unless-stopped
# volumes:
# - ./get_ollama.sh:/get_ollama.sh
# entrypoint: ["/bin/bash", "/get_ollama.sh"]
# restart: unless-stopped
volumes:
- ./ollama/get_ollama.sh:/get_ollama.sh
entrypoint: ["/bin/bash", "/get_ollama.sh"]

fastapi:
build: backend/
ports:
build:
context: .
dockerfile: backend/Dockerfile
ports:
- 8000:8000
networks:
- deploy_network
depends_on:
- ollama
container_name: fastapi

llmservice:
build: llm_service/
ports:
- 8081:8081
networks:
- deploy_network
depends_on:
- ollama
container_name: llmservice

streamlit:
build: frontend/
build:
context: .
dockerfile: frontend/Dockerfile
depends_on:
- fastapi
- ollama
ports:
ports:
- 8501:8501
networks:
- deploy_network
Expand Down
9 changes: 7 additions & 2 deletions frontend/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
FROM python:3.10.14

RUN mkdir /streamlit
RUN mkdir /backend
RUN mkdir /data

COPY requirements.txt /streamlit
COPY ./data/all_dataset_description.csv /data
COPY ./data/all_flow_description.csv /data
COPY ./frontend /streamlit
COPY ./backend/config.json /backend
WORKDIR /streamlit

RUN pip install -r requirements.txt
COPY . /streamlit
#COPY . /streamlit

EXPOSE 8501

Expand Down
2 changes: 1 addition & 1 deletion frontend/paths.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"local" : "http://0.0.0.0:8000/"
},
"llm_response" : {
"docker" : "http://fastapi:8081/llmquery/",
"docker" : "http://llmservice:8081/llmquery/",
"local" : "http://0.0.0.0:8081/llmquery/"
},
"structured_query": {
Expand Down
4 changes: 3 additions & 1 deletion frontend/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
streamlit==1.36.0
streamlit==1.36.0
streamlit_feedback
langchain_community
9 changes: 6 additions & 3 deletions frontend/ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,11 @@
st.session_state["query"] = query
st.session_state["query_type"] = query_type

llm_filter = st.checkbox('LLM Filter')
# Submit button logic
if st.button("Submit"):
response_parser = ResponseParser(query_type, apply_llm_before_rag=False)
apply_llm_before_rag=None if not llm_filter else False
response_parser = ResponseParser(query_type, apply_llm_before_rag=apply_llm_before_rag)
if query_type == "Dataset":
with st.spinner("Waiting for results..."):
if config["structured_query"] == True:
Expand All @@ -59,8 +61,9 @@
else:
# get rag response
response_parser.fetch_rag_response(query_type, query)
# get llm response
response_parser.fetch_llm_response(query)
if llm_filter:
# get llm response
response_parser.fetch_llm_response(query)
# get updated columns based on llm response

results = response_parser.parse_and_update_response(data_metadata)
Expand Down
30 changes: 23 additions & 7 deletions frontend/ui_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,28 @@ def parse_and_update_response(self, metadata: pd.DataFrame):
- self.apply_llm_before_rag == False
- Metadata is filtered based by the Query parsing LLM first and the rag response second
"""
if self.rag_response is not None and self.llm_response is not None:
if not self.apply_llm_before_rag:
if self.apply_llm_before_rag is None or self.llm_response is None:
print('No LLM filter.')
print(self.rag_response)
filtered_metadata = metadata[
metadata["did"].isin(self.rag_response["initial_response"])
]
print(filtered_metadata)
# if no llm response is required, return the initial response
return filtered_metadata

elif self.rag_response is not None and self.llm_response is not None:
if self.apply_llm_before_rag is None:
print('No LLM filter.')
print(self.rag_response)
filtered_metadata = metadata[
metadata["did"].isin(self.rag_response["initial_response"])
]
print(filtered_metadata)
# if no llm response is required, return the initial response
return filtered_metadata
elif not self.apply_llm_before_rag:
print('RAG before LLM filter.')
filtered_metadata = metadata[
metadata["did"].isin(self.rag_response["initial_response"])
]
Expand All @@ -216,6 +236,7 @@ def parse_and_update_response(self, metadata: pd.DataFrame):
llm_parser.get_attributes_from_response()
return llm_parser.update_subset_cols(filtered_metadata)
elif self.apply_llm_before_rag:
print('LLM filter before RAG')
llm_parser = LLMResponseParser(self.llm_response)
llm_parser.get_attributes_from_response()
filtered_metadata = llm_parser.update_subset_cols(metadata)
Expand All @@ -224,9 +245,6 @@ def parse_and_update_response(self, metadata: pd.DataFrame):
filtered_metadata["did"].isin(self.rag_response["initial_response"])
]

elif self.apply_llm_before_rag is None:
# if no llm response is required, return the initial response
return metadata
elif (
self.rag_response is not None and self.structured_query_response is not None
):
Expand All @@ -242,7 +260,5 @@ def parse_and_update_response(self, metadata: pd.DataFrame):
]
print("Showing only rag response")
return filtered_metadata[["did", "name", *col_name]]
else:
return metadata


9 changes: 4 additions & 5 deletions llm_service/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
FROM python:3.10.14
RUN mkdir /llmservice
COPY requirements.txt fastapi/requirements.txt
WORKDIR /fastapi
COPY . /llmservice
WORKDIR /llmservice

RUN pip install -r requirements.txt

COPY . /llmservice

EXPOSE 8001
EXPOSE 8081

ENTRYPOINT ["uvicorn", "llmservice:app", "--host", "0.0.0.0", "--port", "8001"]
ENTRYPOINT ["uvicorn", "llm_service:app", "--host", "0.0.0.0", "--port", "8081"]
8 changes: 6 additions & 2 deletions llm_service/llm_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@
# join the prompt dictionary to the prompt template to create the final prompt
prompt = prompt_template + "\n".join([prompt_dict[key] for key in prompt_dict.keys()])

chain = create_chain(prompt)

chain = create_chain(prompt)
chain_docker = create_chain(prompt, base_url='http://ollama:11434')
app = FastAPI()


Expand All @@ -38,6 +39,9 @@ async def get_llm_query(query: str):
Description: Get the query, replace %20 (url spacing) with space and invoke the chain to get the answers based on the prompt
"""
query = query.replace("%20", " ")
response = chain.invoke({"query": query})
try:
response = chain_docker.invoke({"query": query})
except:
response = chain.invoke({"query": query})
answers = parse_answers_initial(response, patterns, prompt_dict)
return JSONResponse(content=answers)
4 changes: 2 additions & 2 deletions llm_service/llm_service_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
from langchain_core.prompts import ChatPromptTemplate


def create_chain(prompt, model: str = "llama3", temperature: int = 0):
def create_chain(prompt, model: str = "llama3", temperature: int = 0, base_url: str = "http://localhost:11434"):
"""
Description: Create a langchain chain with the given prompt and model and the temperature.
The lower the temperature, the less "creative" the model will be.
"""
llm = ChatOllama(model=model, temperature=temperature)
llm = ChatOllama(model=model, temperature=temperature, base_url=base_url)
prompt = ChatPromptTemplate.from_template(prompt)

return prompt | llm | StrOutputParser()
Expand Down
1 change: 1 addition & 0 deletions llm_service/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ langchain==0.2.6
langchain_community==0.2.6
langchain_core==0.2.11
tenacity==8.3.0
regex
4 changes: 2 additions & 2 deletions ollama/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ RUN mkdir /ollama

# COPY requirements.txt /ollama
COPY get_ollama.sh /ollama
COPY base_logger.py /ollama
#COPY base_logger.py /ollama
WORKDIR /ollama

RUN curl -fsSL https://ollama.com/install.sh | sh
RUN chmod +x get_ollama.sh
RUN python3 base_logger.py
#RUN python3 base_logger.py
EXPOSE 11434

ENTRYPOINT ["ollama", "serve"]
2 changes: 2 additions & 0 deletions ollama/get_ollama.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@ while [ "$(ollama list | grep 'NAME')" == "" ]; do
sleep 1
done


ollama run llama3
tail -f /dev/null
2 changes: 1 addition & 1 deletion start_training.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
cd ai_search/backend
cd backend
python training.py
cd ..

0 comments on commit 4f2a095

Please sign in to comment.