-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feature: More resilient evaluations (#635)
This PR addresses several issues: 1) When the evaluator crashes with an exception, we actually also send this to the supervisor 2) We validate whether we receive the correct number of results at the supervisor 3) We fix the validation of the correct number of evaluations at the evaluator 4) Some minor additions of retrying to connect in case the connection gets lost Our observation of missing evaluations should now be caught at the first point (we actually retry on exception at the evaluator) but the others serve as a guarding mechanism.
- Loading branch information
1 parent
a15b3c6
commit 6436929
Showing
13 changed files
with
341 additions
and
133 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Motivation\n", | ||
"\n", | ||
"This notebook can be used to find pipeline runs where we have empty evaluation responses despite expecting some. Older versions of Modyn have lest robustness in the evaluation handling." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from pathlib import Path\n", | ||
"\n", | ||
"from tqdm import tqdm\n", | ||
"\n", | ||
"from modyn.supervisor.internal.grpc.enums import PipelineStage\n", | ||
"from modyn.supervisor.internal.pipeline_executor.models import MultiEvaluationInfo, PipelineLogs, SingleEvaluationInfo\n", | ||
"\n", | ||
"%load_ext autoreload\n", | ||
"%autoreload 2" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"log_dir = Path(\"/Users/mboether/phd/dynamic-data/sigmod-data/yearbook/debug/logs\")\n", | ||
"logfiles = [logfile for logfile in log_dir.glob(\"**/pipeline.log\")]\n", | ||
"logfiles" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def metrics_valid(logfile: Path):\n", | ||
" logs = PipelineLogs.model_validate_json(logfile.read_text())\n", | ||
" for eval_log in logs.supervisor_logs.stage_runs:\n", | ||
" if eval_log.id == PipelineStage.EVALUATE_MULTI.name:\n", | ||
" multiinfo = eval_log.info\n", | ||
" assert isinstance(multiinfo, MultiEvaluationInfo)\n", | ||
"\n", | ||
" for info in multiinfo.interval_results:\n", | ||
" assert isinstance(info, SingleEvaluationInfo)\n", | ||
" res = info.results\n", | ||
"\n", | ||
" if len(res[\"metrics\"]) == 0:\n", | ||
" if res[\"dataset_size\"] == 0:\n", | ||
" print(\n", | ||
" f\"Warning: Empty metrics but empty dataset in {logfile}: {info}\"\n", | ||
" ) # Might want to remove this - not sure if needed.\n", | ||
" else:\n", | ||
" return False\n", | ||
"\n", | ||
" return True" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"invalid_pipelines = []\n", | ||
"for logfile in tqdm(logfiles):\n", | ||
" if not metrics_valid(logfile):\n", | ||
" invalid_pipelines.append(logfile)\n", | ||
"\n", | ||
"invalid_pipelines\n", | ||
"\n", | ||
"# Typically, you'd want to delete those directories because they are invalid (see next cell)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Commented out for safety\n", | ||
"\n", | ||
"\"\"\"\n", | ||
"import shutil\n", | ||
"parent_dirs = {file_path.parent for file_path in invalid_pipelines}\n", | ||
"\n", | ||
"for directory in parent_dirs:\n", | ||
" try:\n", | ||
" shutil.rmtree(directory)\n", | ||
" except Exception as e:\n", | ||
" print(f\"Failed to delete {directory}: {e}\")\n", | ||
"\"\"\"" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.11.8" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 4 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.