Skip to content

Commit

Permalink
find invalid runs notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
MaxiBoether committed Sep 23, 2024
1 parent ba26601 commit fe537a8
Showing 1 changed file with 126 additions and 0 deletions.
126 changes: 126 additions & 0 deletions analytics/tools/find_invalid_runs.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Motivation\n",
"\n",
"This notebook can be used to find pipeline runs where we have empty evaluation responses despite expecting some. Older versions of Modyn have lest robustness in the evaluation handling."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"\n",
"from tqdm import tqdm\n",
"\n",
"from modyn.supervisor.internal.grpc.enums import PipelineStage\n",
"from modyn.supervisor.internal.pipeline_executor.models import MultiEvaluationInfo, PipelineLogs, SingleEvaluationInfo\n",
"\n",
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"log_dir = Path(\"/Users/mboether/phd/dynamic-data/sigmod-data/yearbook/debug/logs\")\n",
"logfiles = [logfile for logfile in log_dir.glob(\"**/pipeline.log\")]\n",
"logfiles"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def metrics_valid(logfile: Path):\n",
" logs = PipelineLogs.model_validate_json(logfile.read_text())\n",
" for eval_log in logs.supervisor_logs.stage_runs:\n",
" if eval_log.id == PipelineStage.EVALUATE_MULTI.name:\n",
" multiinfo = eval_log.info\n",
" assert isinstance(multiinfo, MultiEvaluationInfo)\n",
"\n",
" for info in multiinfo.interval_results:\n",
" assert isinstance(info, SingleEvaluationInfo)\n",
" res = info.results\n",
"\n",
" if len(res[\"metrics\"]) == 0:\n",
" if res[\"dataset_size\"] == 0:\n",
" print(\n",
" f\"Warning: Empty metrics but empty dataset in {logfile}: {info}\"\n",
" ) # Might want to remove this - not sure if needed.\n",
" else:\n",
" return False\n",
"\n",
" return True"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"invalid_pipelines = []\n",
"for logfile in tqdm(logfiles):\n",
" if not metrics_valid(logfile):\n",
" invalid_pipelines.append(logfile)\n",
"\n",
"invalid_pipelines\n",
"\n",
"# Typically, you'd want to delete those directories because they are invalid (see next cell)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Commented out for safety\n",
"\n",
"\"\"\"\n",
"import shutil\n",
"parent_dirs = {file_path.parent for file_path in invalid_pipelines}\n",
"\n",
"for directory in parent_dirs:\n",
" try:\n",
" shutil.rmtree(directory)\n",
" except Exception as e:\n",
" print(f\"Failed to delete {directory}: {e}\")\n",
"\"\"\""
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

0 comments on commit fe537a8

Please sign in to comment.