Skip to content

Commit

Permalink
fixed chunking bug
Browse files Browse the repository at this point in the history
  • Loading branch information
SubhadityaMukherjee committed Jul 22, 2024
1 parent 7a3391c commit 43159fd
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 3 deletions.
5 changes: 3 additions & 2 deletions backend/modules/vector_store_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,11 @@ class DataLoader:
"""
Description: Used to chunk data
"""
def __init__(self, metadata_df: pd.DataFrame, page_content_column: str, chunk_size:int = 1000):
def __init__(self, metadata_df: pd.DataFrame, page_content_column: str, chunk_size:int = 1000, chunk_overlap:int = 150):
self.metadata_df = metadata_df
self.page_content_column = page_content_column
self.chunk_size = chunk_size
self.chunk_overlap = chunk_overlap if self.chunk_size > chunk_overlap else self.chunk_size

def load_and_process_data(self) -> list:
"""
Expand All @@ -29,7 +30,7 @@ def load_and_process_data(self) -> list:
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
chunk_size=self.chunk_size, chunk_overlap=150
chunk_size=self.chunk_size, chunk_overlap=self.chunk_overlap
)
documents = text_splitter.split_documents(documents)

Expand Down
3 changes: 2 additions & 1 deletion evaluation/evaluation_results.csv
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
embedding_model,llm_model,llm_before_rag,custom_experiement,precision,recall,map
embedding_model,llm_model,llm_before_rag,custom_experiment,precision,recall,map
Snowflake/snowflake-arctic-embed-l,phi3,False,,0.6503587153587154,0.9997150997150998,0.8818890986108248
Snowflake/snowflake-arctic-embed-l,phi3,True,,0.6503587153587154,0.9997150997150998,0.8818890986108248
Snowflake/snowflake-arctic-embed-l,phi3,No LLM filtering,,0.6475524475524476,1.0,0.8036501822692299
Snowflake/snowflake-arctic-embed-l,llama3,False,,0.6503587153587154,0.9997150997150998,0.8818890986108248
Snowflake/snowflake-arctic-embed-l,llama3,True,,0.6503587153587154,0.9997150997150998,0.8818890986108248
Snowflake/snowflake-arctic-embed-l,llama3,No LLM filtering,,0.6475524475524476,1.0,0.8036501822692299
BAAI/bge-large-en-v1.5,llama3,False,512_chunk,0.6547227323742475,0.9979275073972045,0.8259522762251688
BAAI/bge-large-en-v1.5,llama3,False,similarity_score_threshold_search,0.7046654902068915,0.9655886894421927,0.8152488113948533
BAAI/bge-large-en-v1.5,llama3,False,mmr_search,0.6659358738904193,0.9911131517192123,0.8500771034849167
BAAI/bge-large-en-v1.5,llama3,False,temperature_1,0.648773129928433,0.9808182838485869,0.8593124999240845
Expand Down
Binary file modified evaluation/evaluation_results.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 43159fd

Please sign in to comment.