Skip to content

Commit

Permalink
Updated eval script for SEC 10-Q dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
Taqi Jaffri committed Dec 12, 2023
1 parent 50d5299 commit f038b39
Show file tree
Hide file tree
Showing 15 changed files with 51 additions and 325 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
__pycache__
.venv
temp
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
272 changes: 0 additions & 272 deletions evals/earnings_calls/v1/ground-truth-earning_calls.csv

This file was deleted.

88 changes: 47 additions & 41 deletions evals/earnings_calls/run-evals.ipynb → evals/run-evals.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# Evaluating Docugami KG-RAG against OpenAI Assistants Retrieval"
"# SEC 10-Q Eval\n",
"\n",
"Evaluating Docugami KG-RAG against OpenAI Assistants Retrieval for this dataset: https://github.com/docugami/KG-RAG-datasets/tree/main/sec-10-q"
]
},
{
Expand All @@ -14,6 +16,15 @@
"## Set up Eval"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!git clone https://github.com/docugami/KG-RAG-datasets.git temp"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -22,27 +33,40 @@
"source": [
"import os\n",
"from pathlib import Path\n",
"from datetime import datetime\n",
"\n",
"# Important: Create your OpenAI assistant via https://platform.openai.com/playground\n",
"# and put the assistant ID here. Make sure you upload the identical set of\n",
"# files listed below (these files will be uploaded automatically to Docugami)\n",
"OPENAI_ASSISTANT_ID = \"asst_g837jjwr6Ohgk2EWfQOKTcPg\"\n",
"OPENAI_ASSISTANT_ID = \"asst_qY1M0SeFYlmqkEZsMVZX2VAK\"\n",
"\n",
"DOCSET_NAME = \"Earnings Calls Evaluation 12-06-2023\"\n",
"FILES_DIR = Path(os.getcwd()) / \"v1/docs\"\n",
"DOCSET_NAME = \"SEC 10Q Filings\"\n",
"EVAL_NAME = DOCSET_NAME + \" \" + datetime.now().strftime(\"%Y-%m-%d\")\n",
"FILES_DIR = Path(os.getcwd()) / \"temp/sec-10-q/docs\"\n",
"FILE_NAMES = [\n",
" \"Q1 2022 Snowflake Inc. Earnings Call - Snowflake Inc - BamSEC.pdf\",\n",
" \"Q1 2023 Snowflake Inc. Earnings Call - Snowflake Inc - BamSEC.pdf\",\n",
" \"Q2 2022 Snowflake Inc. Earnings Call - Snowflake Inc - BamSEC.pdf\",\n",
" \"Q2 2023 Snowflake Inc. Earnings Call - Snowflake Inc - BamSEC.pdf\",\n",
" \"Q3 2021 Snowflake Inc. Earnings Call - Snowflake Inc - BamSEC.pdf\",\n",
" \"Q3 2022 Snowflake Inc. Earnings Call - Snowflake Inc - BamSEC.pdf\",\n",
" \"Q3 2023 Snowflake Inc. Earnings Call - Snowflake Inc - BamSEC.pdf\",\n",
" \"Q4 2020 Snowflake Inc. Earnings Call - Snowflake Inc - BamSEC.pdf\",\n",
" \"Q4 2022 Snowflake Inc. Earnings Call - Snowflake Inc - BamSEC.pdf\",\n",
" \"Q3 FY23 Microsoft Corp Earnings Call.pdf\",\n",
" \"2022 Q3 AAPL.pdf\",\n",
" \"2022 Q3 AMZN.pdf\",\n",
" \"2022 Q3 INTC.pdf\",\n",
" \"2022 Q3 MSFT.pdf\",\n",
" \"2022 Q3 NVDA.pdf\",\n",
" \"2023 Q1 AAPL.pdf\",\n",
" \"2023 Q1 AMZN.pdf\",\n",
" \"2023 Q1 INTC.pdf\",\n",
" \"2023 Q1 MSFT.pdf\",\n",
" \"2023 Q1 NVDA.pdf\",\n",
" \"2023 Q2 AAPL.pdf\",\n",
" \"2023 Q2 AMZN.pdf\",\n",
" \"2023 Q2 INTC.pdf\",\n",
" \"2023 Q2 MSFT.pdf\",\n",
" \"2023 Q2 NVDA.pdf\",\n",
" \"2023 Q3 AAPL.pdf\",\n",
" \"2023 Q3 AMZN.pdf\",\n",
" \"2023 Q3 INTC.pdf\",\n",
" \"2023 Q3 MSFT.pdf\",\n",
" \"2023 Q3 NVDA.pdf\",\n",
"]\n",
"GROUND_TRUTH_CSV = Path(os.getcwd()) / \"v1/ground-truth-earning_calls.csv\"\n",
"\n",
"GROUND_TRUTH_CSV = Path(os.getcwd()) / \"temp/sec-10-q/data/raw_data.csv\"\n",
"\n",
"# We will run each experiment multiple times and average, \n",
"# since results vary slightly over runs\n",
Expand All @@ -68,7 +92,7 @@
"\n",
"# Dataset\n",
"client = Client()\n",
"dataset_name = DOCSET_NAME\n",
"dataset_name = EVAL_NAME\n",
"existing_datasets = list(client.list_datasets(dataset_name=dataset_name))\n",
"if existing_datasets:\n",
" # read existing dataset\n",
Expand All @@ -91,16 +115,6 @@
"## Set up Docugami KG-RAG"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"! pip install pip --quiet --upgrade\n",
"! pip install docugami==0.0.9 dgml-utils==0.3.0 --quiet --upgrade"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -161,7 +175,7 @@
"assert docset_name\n",
"\n",
"# Note: This can take some time since it is embedding and creating summaries for all the docs and chunks\n",
"index_docset(docset_id=docset_id, name=docset_name)"
"index_docset(docset_id=docset_id, name=docset_name, overwrite=True)"
]
},
{
Expand Down Expand Up @@ -204,7 +218,7 @@
"outputs": [],
"source": [
"# Test the agent to make sure it is working\n",
"predict_docugami_agent({\"question\": \"What was the question from Barclays in the Q2 2023 earnings call?\"})"
"predict_docugami_agent({\"question\": \"How much did Microsoft spend for opex in the latest quarter?\"})"
]
},
{
Expand All @@ -214,15 +228,6 @@
"## Set up OpenAI Assistants Retrieval"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"! pip install openai --upgrade --quiet"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -255,7 +260,7 @@
"outputs": [],
"source": [
"# Test the agent to make sure it is working\n",
"predict_openai_agent({\"question\": \"What was the question from Barclays in the Q2 2023 earnings call?\"})"
"predict_openai_agent({\"question\": \"How much did Microsoft spend for opex in the latest quarter?\"})"
]
},
{
Expand All @@ -267,7 +272,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -286,17 +291,18 @@
" \"\"\"\n",
" client = Client()\n",
" client.run_on_dataset(\n",
" dataset_name=DOCSET_NAME,\n",
" dataset_name=EVAL_NAME,\n",
" llm_or_chain_factory=eval_func,\n",
" evaluation=eval_config,\n",
" verbose=True,\n",
" project_name=eval_run_name,\n",
" concurrency_level=2, # to help with rate limits\n",
" )\n",
"\n",
"\n",
"# Experiments\n",
"agent_map = {\n",
" \"openai_assistant_retrieval\": predict_openai_agent,\n",
" # \"openai_assistant_retrieval\": predict_openai_agent,\n",
" \"docugami_kg_rag_zero_shot\": predict_docugami_agent,\n",
"}\n",
"\n",
Expand Down
14 changes: 2 additions & 12 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ typer = "^0.9.0"
docugami = "0.0.9"
pandas = "^2.1.3"
chromadb = "0.4.14"
langsmith = "^0.0.69"

[tool.poetry.group.dev.dependencies]
langchain-cli = "*"
Expand Down

0 comments on commit f038b39

Please sign in to comment.