Skip to content

Commit

Permalink
Merge pull request #28 from NERC-CEH/eval_outputs
Browse files Browse the repository at this point in the history
Eval outputs
  • Loading branch information
matthewcoole authored Dec 20, 2024
2 parents a4afd7f + a63811d commit c8162f3
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 10 deletions.
1 change: 1 addition & 0 deletions data/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@
/eidc_rag_test_set.csv
/rag-pipeline.yml
/pipeline.yml
/results.csv
/cleaned_testset.csv
3 changes: 2 additions & 1 deletion dvc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,11 @@ stages:
- ${files.eval-set}
- ${files.pipeline}
evaluate:
cmd: uv run scripts/evaluate.py ${files.eval-set} -m ${files.metrics} -img ${files.eval-plot}
cmd: uv run scripts/evaluate.py ${files.eval-set} -m ${files.metrics} -img ${files.eval-plot} -r ${files.results}
deps:
- ${files.eval-set}
- scripts/evaluate.py
outs:
- ${files.metrics}
- ${files.eval-plot}
- ${files.results}
1 change: 1 addition & 0 deletions params.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ files:
cleaned-test-set: data/cleaned_testset.csv
eval-set: data/evaluation_data.csv
metrics: data/metrics.json
results: data/results.csv
eval-plot: data/eval.png
pipeline: data/pipeline.yml
sub-sample: 0 # sample n datasets for testing (0 will use all datasets)
Expand Down
12 changes: 9 additions & 3 deletions scripts/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from ragas.run_config import RunConfig


def main(eval_dataset: str, metric_output: str, image_output: str) -> None:
def main(eval_dataset: str, metric_output: str, image_output: str, results_output: str) -> None:
nest_asyncio.apply() # apply the event loop async fix
df = pd.read_csv(eval_dataset, converters={"contexts": pd.eval})
eval_dataset = Dataset.from_pandas(df)
Expand All @@ -45,7 +45,7 @@ def main(eval_dataset: str, metric_output: str, image_output: str) -> None:
run_config=RunConfig(max_workers=1),
)
result_df = result.to_pandas()

result_df.to_csv(results_output, index=False)
Path(metric_output).parent.mkdir(parents=True, exist_ok=True)
with open(metric_output, "w+") as f:
json.dump(result, f)
Expand Down Expand Up @@ -88,5 +88,11 @@ def main(eval_dataset: str, metric_output: str, image_output: str) -> None:
help="File to save image plot to.",
default="data/evaluation.png",
)
parser.add_argument(
"-r",
"--results",
help="File to save evaluation results",
default="data/results.csv",
)
args = parser.parse_args()
main(args.eval_dataset, args.metrics_output, args.image_output)
main(args.eval_dataset, args.metrics_output, args.image_output, args.results)
18 changes: 12 additions & 6 deletions scripts/run_rag_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from haystack_integrations.components.generators.ollama.generator import OllamaGenerator
from haystack_integrations.components.retrievers.chroma import ChromaQueryTextRetriever
from haystack_integrations.document_stores.chroma import ChromaDocumentStore
from tqdm import tqdm

TMP_DOC_PATH = ".tmp/doc-store"

Expand Down Expand Up @@ -93,12 +94,17 @@ def run_query(query: str, pipeline: Pipeline) -> Dict[str, Any]:
def query_pipeline(questions: List[str], rag_pipe: Pipeline) -> Tuple[str, List[str]]:
answers = []
contexts = []
for q in questions:
response = run_query(q, rag_pipe)
answers.append(response["answer_builder"]["answers"][0].data)
contexts.append(
[doc.content for doc in response["answer_builder"]["answers"][0].documents]
)
for q in tqdm(questions):
try:
response = run_query(q, rag_pipe)
answers.append(response["answer_builder"]["answers"][0].data)
contexts.append(
[doc.content for doc in response["answer_builder"]["answers"][0].documents]
)
except Exception as e:
print(str(e))
answers.append("Error")
contexts.append([])
return answers, contexts


Expand Down

0 comments on commit c8162f3

Please sign in to comment.