Skip to content

Commit

Permalink
Outputs rag pipeline as a yaml config file
Browse files Browse the repository at this point in the history
  • Loading branch information
matthewcoole committed Nov 22, 2024
1 parent 2a913c0 commit 11ee206
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 11 deletions.
1 change: 1 addition & 0 deletions data/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@
/eval.png
/eidc_rag_testset.csv
/eidc_rag_test_set.csv
/rag-pipeline.yml
26 changes: 23 additions & 3 deletions dvc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,14 @@ stages:
outs:
- ${files.extracted}
chunk-data:
cmd: python scripts/chunk_data.py -o ${files.chunked} -c ${hp.chunk-size} -ol ${hp.overlap} ${files.extracted} ${files.supporting-docs} -m ${max-length}
cmd: >-
python scripts/chunk_data.py
-o ${files.chunked}
-c ${hp.chunk-size}
-ol ${hp.overlap}
${files.extracted}
${files.supporting-docs}
-m ${max-length}
deps:
- ${files.extracted}
- ${files.supporting-docs}
Expand All @@ -37,7 +44,12 @@ stages:
outs:
- ${files.embeddings}
upload-to-docstore:
cmd: python scripts/upload_to_docstore.py ${files.embeddings} -o ${doc-store.files} -em ${hp.embeddings-model} -c ${doc-store.collection}
cmd: >-
python scripts/upload_to_docstore.py
${files.embeddings}
-o ${doc-store.files}
-em ${hp.embeddings-model}
-c ${doc-store.collection}
deps:
- ${files.embeddings}
- scripts/upload_to_docstore.py
Expand All @@ -48,13 +60,21 @@ stages:
outs:
- ${files.test-set}
run-rag-pipeline:
cmd: python scripts/run_rag_pipeline.py -i ${files.test-set} -o ${files.eval-set} -ds ${files.doc-store} -c ${doc-store.collection} -m ${rag.model}
cmd: >-
python scripts/run_rag_pipeline.py
-i ${files.test-set}
-o ${files.eval-set}
-ds ${files.doc-store}
-c ${doc-store.collection}
-m ${rag.model}
-p ${files.pipeline}
deps:
- ${files.test-set}
- ${files.doc-store}
- scripts/run_rag_pipeline.py
outs:
- ${files.eval-set}
- ${files.pipeline}
evaluate:
cmd: python scripts/evaluate.py ${files.eval-set} -m ${files.metrics} -img ${files.eval-plot}
deps:
Expand Down
23 changes: 19 additions & 4 deletions scripts/run_rag_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import shutil
from argparse import ArgumentParser
from typing import Any, Dict, List, Tuple
__import__("pysqlite3")
import sys
sys.modules["sqlite3"] = sys.modules.pop("pysqlite3")

import pandas as pd
from haystack import Pipeline
from haystack.components.builders import PromptBuilder
Expand Down Expand Up @@ -90,11 +88,15 @@ def main(
doc_store_path: str,
collection_name: str,
model: str,
pipeline_file: str,
) -> None:
shutil.copytree(doc_store_path, TMP_DOC_PATH)

rag_pipe = build_rag_pipeline(model, collection_name)

with open(pipeline_file, "w") as f:
rag_pipe.dump(f)

df = pd.read_csv(test_data_file)
df.drop(columns=["rating", "contexts"], inplace=True)

Expand Down Expand Up @@ -136,5 +138,18 @@ def main(
help="Model to use in RAG pipeline.",
default="llama3.1",
)
parser.add_argument(
"-p",
"--pipeline_file",
help="File to save the built RAG pipeline to.",
default="pipeline.yml",
)
args = parser.parse_args()
main(args.input, args.output, args.doc_store, args.collection, args.model)
main(
args.input,
args.output,
args.doc_store,
args.collection,
args.model,
args.pipeline_file,
)
4 changes: 0 additions & 4 deletions scripts/upload_to_docstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,6 @@
import uuid
from argparse import ArgumentParser

__import__("pysqlite3")
import sys

sys.modules["sqlite3"] = sys.modules.pop("pysqlite3")
import chromadb
from chromadb.utils import embedding_functions
from chromadb.utils.batch_utils import create_batches
Expand Down

1 comment on commit 11ee206

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

context_recall: 0.5366100283424792
answer_relevancy: 0.4366129713073692
answer_correctness: 0.501229913307302
context_precision: 0.49209737248506186

Please sign in to comment.