From d542debd038eca1843ecbae014ab1af4d937f920 Mon Sep 17 00:00:00 2001 From: Robin Holzinger Date: Sat, 28 Sep 2024 16:04:40 +0200 Subject: [PATCH] Finalize all plots? --- analytics/plotting/common/color.py | 2 +- .../plotting/common/tradeoff_scatterplot.py | 20 +- analytics/plotting/rh_thesis/TODO.md | 17 - .../compare_all/arxiv_perf_tradeoff.ipynb | 542 +++++++++++ .../compare_all/hp_perf_tradeoff.ipynb | 559 +++++++++++ .../compare_all/yb_perf_tradeoff.ipynb | 891 ++++++++++++++++++ .../plotting/rh_thesis/drift/arxiv_cost.ipynb | 282 ++++++ .../drift/arxiv_heatmap_single.ipynb | 17 +- .../plotting/rh_thesis/drift/hp_cost.ipynb | 219 +++++ .../rh_thesis/drift/hp_heatmap_single.ipynb | 8 +- .../plotting/rh_thesis/drift/yb_cost.ipynb | 24 +- .../drift/yb_cost_perf_tradeoff.ipynb | 2 +- .../drift/yearbook_heatmap_multi.ipynb | 27 +- .../drift/yearbook_heatmap_single.ipynb | 6 +- .../arxiv_heatmap_metrics.ipynb | 10 +- .../evaluation_setup/hp_heatmap_metrics.ipynb | 10 +- .../evaluation_setup/yb_heatmap_metrics.ipynb | 25 +- .../yb_heatmap_window_size.ipynb | 8 +- .../rh_thesis/performance/arxiv_cost.ipynb | 227 +++++ .../performance/arxiv_heatmap_single.ipynb | 285 ++++++ .../rh_thesis/performance/hp_cost.ipynb | 221 +++++ .../performance/hp_heatmap_single.ipynb | 286 ++++++ .../rh_thesis/performance/yb_cost.ipynb | 212 +++++ .../performance/yearbook_heatmap_multi.ipynb | 363 +++++++ .../performance/yearbook_heatmap_single.ipynb | 307 ++++++ .../yearbook_heatmap_single_num_miclass.ipynb | 310 ++++++ 26 files changed, 4807 insertions(+), 73 deletions(-) delete mode 100644 analytics/plotting/rh_thesis/TODO.md create mode 100644 analytics/plotting/rh_thesis/compare_all/arxiv_perf_tradeoff.ipynb create mode 100644 analytics/plotting/rh_thesis/compare_all/hp_perf_tradeoff.ipynb create mode 100644 analytics/plotting/rh_thesis/compare_all/yb_perf_tradeoff.ipynb create mode 100644 analytics/plotting/rh_thesis/drift/arxiv_cost.ipynb create mode 100644 analytics/plotting/rh_thesis/drift/hp_cost.ipynb create mode 100644 analytics/plotting/rh_thesis/performance/arxiv_cost.ipynb create mode 100644 analytics/plotting/rh_thesis/performance/arxiv_heatmap_single.ipynb create mode 100644 analytics/plotting/rh_thesis/performance/hp_cost.ipynb create mode 100644 analytics/plotting/rh_thesis/performance/hp_heatmap_single.ipynb create mode 100644 analytics/plotting/rh_thesis/performance/yb_cost.ipynb create mode 100644 analytics/plotting/rh_thesis/performance/yearbook_heatmap_multi.ipynb create mode 100644 analytics/plotting/rh_thesis/performance/yearbook_heatmap_single.ipynb create mode 100644 analytics/plotting/rh_thesis/performance/yearbook_heatmap_single_num_miclass.ipynb diff --git a/analytics/plotting/common/color.py b/analytics/plotting/common/color.py index 042945553..14b864026 100644 --- a/analytics/plotting/common/color.py +++ b/analytics/plotting/common/color.py @@ -63,8 +63,8 @@ def main_colors(light: bool = False) -> list[tuple[float, float, float]]: colorblind_palette[-2], colorblind_palette[1], colorblind_palette[2], - colorblind_palette[3], colorblind_palette[4], + colorblind_palette[5], ] diff --git a/analytics/plotting/common/tradeoff_scatterplot.py b/analytics/plotting/common/tradeoff_scatterplot.py index 0e003df53..ddb5f30d5 100644 --- a/analytics/plotting/common/tradeoff_scatterplot.py +++ b/analytics/plotting/common/tradeoff_scatterplot.py @@ -20,6 +20,8 @@ def plot_tradeoff_scatter( height_factor: float = 1.0, width_factor: float = 1.0, target_ax: Axes | None = None, + manual_legend_title: bool = True, + legend_ncol: int = 1, ) -> Figure: sns.set_theme(style="whitegrid") init_plot() @@ -46,7 +48,13 @@ def plot_tradeoff_scatter( hue=hue, style=style, # style="pipeline_ref", - palette=[main_color(0), main_color(1), main_color(3)], + palette=[ + main_color(0), + main_color(1), + main_color(3), + main_color(4), + main_color(5), + ], # palette={"drift": main_color(3), "yearly": main_color(0), "amount": main_color(1)}, s=300, # legend=False, @@ -56,10 +64,18 @@ def plot_tradeoff_scatter( # ax.set(xlim=(-4, 85)) ax.legend( - title=hue, fontsize="small", title_fontsize="medium", # title="Pipeline", + **( + { + "title": hue, + } + if manual_legend_title + else {} + ), + # 2 columns + ncol=legend_ncol, ) # Adjust x-axis tick labels diff --git a/analytics/plotting/rh_thesis/TODO.md b/analytics/plotting/rh_thesis/TODO.md deleted file mode 100644 index 27e973a65..000000000 --- a/analytics/plotting/rh_thesis/TODO.md +++ /dev/null @@ -1,17 +0,0 @@ -drift: - -- plot arxiv - -performance: - -- 1 cost plot -- 1 single pipeline heatmap -- 1 multi pipeline heatmap for every dataset (including best of every subtype) - -cost: - -- 1 dummy plot - -discussion: - -- tradeoff plot: 1 per dataset diff --git a/analytics/plotting/rh_thesis/compare_all/arxiv_perf_tradeoff.ipynb b/analytics/plotting/rh_thesis/compare_all/arxiv_perf_tradeoff.ipynb new file mode 100644 index 000000000..033b7328a --- /dev/null +++ b/analytics/plotting/rh_thesis/compare_all/arxiv_perf_tradeoff.ipynb @@ -0,0 +1,542 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "\n", + "from analytics.app.data.load import list_pipelines, load_pipeline_logs\n", + "from analytics.app.data.transform import (\n", + " df_aggregate_eval_metric,\n", + " dfs_models_and_evals,\n", + " pipeline_leaf_times_df,\n", + ")\n", + "from analytics.plotting.common.save import save_plot\n", + "from analytics.plotting.common.tradeoff_scatterplot import plot_tradeoff_scatter\n", + "from modyn.supervisor.internal.grpc.enums import PipelineStage\n", + "from modyn.supervisor.internal.pipeline_executor.models import PipelineLogs\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipelines_dirs = [\n", + " Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/arxiv/10_baselines_time\"),\n", + " Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/arxiv/11_baselines_amount\"),\n", + " Path(\n", + " \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/arxiv/21_datadrift_dynamic\"\n", + " ), # TODO\n", + " Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/arxiv/30_performance\"),\n", + "]\n", + "\n", + "pipeline_logs: dict[int, PipelineLogs] = {}\n", + "pipelines: dict[int, tuple[str, Path]] = {}\n", + "\n", + "for dir in pipelines_dirs:\n", + " print(\"Reading\", dir)\n", + " dir_pipelines = list_pipelines(dir)\n", + " print(dir_pipelines)\n", + " pipelines.update(dir_pipelines)\n", + "\n", + " max_pipeline_id = max(dir_pipelines.keys())\n", + " print(pipelines)\n", + " pipeline_logs.update({p_id: load_pipeline_logs(p_id, dir) for (p_id, (_, p_path)) in dir_pipelines.items()})\n", + " assert dir.exists()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# doesn't do anything unless include_composite_model = True\n", + "composite_model_variant = \"currently_active_model\"\n", + "\n", + "patch_yearbook = True\n", + "dataset_id = \"arxiv_kaggle_test\"\n", + "eval_handler = \"periodic-current\"\n", + "metric = \"Accuracy\"\n", + "include_composite_model = True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipelines = {p_id: (pname, p_path) for p_id, (pname, p_path) in pipelines.items()}\n", + "\n", + "pipeline_ids = pipelines.keys()\n", + "pipeline_ids = [\n", + " y\n", + " for y, _ in [\n", + " (263, \"timetrigger_5y\"),\n", + " (265, \"timetrigger_10y\"),\n", + " # (267, 'timetrigger_26w'),\n", + " (269, \"timetrigger_2y\"),\n", + " (272, \"timetrigger_1y\"),\n", + " # (264, 'dataamount_1000000'),\n", + " (266, \"dataamount_50000\"),\n", + " # (268, 'dataamount_500000'),\n", + " (270, \"dataamount_25000\"),\n", + " (271, \"dataamount_100000\"),\n", + " (782, \"drifttrigger_mmd-quant-0.05-20_int20000_win1y\"),\n", + " (783, \"drifttrigger_mmd-rollavg-0.5-20_int20000_win1y\"),\n", + " (784, \"drifttrigger_mmd-rollavg-5.0-20_int20000_win1y\"),\n", + " (785, \"drifttrigger_mmd-quant-0.15-20_int20000_win1y\"),\n", + " (786, \"drifttrigger_mmd-rollavg-0.2-20_int20000_win1y\"),\n", + " (787, \"drifttrigger_mmd-quant-0.1-20_int20000_win1y\"),\n", + " (788, \"drifttrigger_mmd-rollavg-1.0-20_int20000_win1y\"),\n", + " (789, \"drifttrigger_mmd-quant-0.3-20_int20000_win1y\"),\n", + " (790, \"drifttrigger_mmd-rollavg-2.0-20_int20000_win1y\"),\n", + " (674, \"performancetrigger_static-0.45-int20000\"),\n", + " (675, \"performancetrigger_dynamic-quant-0.05-20-int20000\"),\n", + " (676, \"performancetrigger_dynamic-rollavg-0.3-20-int20000\"),\n", + " (677, \"performancetrigger_num_misclass-100000-exp-0.6-red-False--int20000\"),\n", + " (678, \"performancetrigger_dynamic-rollavg-0.2-20-int20000\"),\n", + " (679, \"performancetrigger_dynamic-rollavg-0.1-20-int20000\"),\n", + " (680, \"performancetrigger_static-0.5-int20000\"),\n", + " (681, \"performancetrigger_dynamic-quant-0.15-20-int20000\"),\n", + " (682, \"performancetrigger_num_misclass-50000-exp-0.6-red-False--int20000\"),\n", + " (723, \"performancetrigger_num_misclass-30000-exp-0.6-red-False--int20000\"),\n", + " (756, \"performancetrigger_num_misclass-15000-exp-0.6-red-False--int20000\"),\n", + " (762, \"performancetrigger_num_misclass-10000-exp-0.6-red-False--int20000\"),\n", + " ]\n", + "]\n", + "\n", + "[(p_id, pname) for p_id, (pname, _) in pipelines.items() if p_id in pipeline_ids]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Wrangle data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "list_df_eval_single: list[pd.DataFrame] = []\n", + "df_leaf_list: list[pd.DataFrame] = []\n", + "\n", + "for pipeline_id in pipeline_ids:\n", + " logs = pipeline_logs[pipeline_id]\n", + " df_leaf_single = pipeline_leaf_times_df(logs, use_traintime_patch_at_trainer=True, pipeline_id=pipeline_id)\n", + " df_leaf_single[\"pipeline_id\"] = pipeline_id\n", + " df_leaf_list.append(df_leaf_single)\n", + "\n", + " _, _, df_eval_single = dfs_models_and_evals(\n", + " pipeline_logs[pipeline_id], df_leaf_single[\"sample_time\"].max(), pipelines[pipeline_id][0]\n", + " )\n", + " df_eval_single[\"pipeline_id\"] = pipeline_id\n", + " list_df_eval_single.append(df_eval_single)\n", + "\n", + "df_adjusted = pd.concat(list_df_eval_single)\n", + "df_adjusted\n", + "\n", + "df_leaf = pd.concat(df_leaf_list)\n", + "df_leaf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(df_leaf[\"id\"].unique())\n", + "assert set(df_leaf[\"id\"].unique()) == {\n", + " \"TRAIN\",\n", + " \"INIT_CLUSTER_CONNECTION\",\n", + " \"EVALUATE_TRIGGER_POLICY\",\n", + " \"INFORM_SELECTOR_REMAINING_DATA\",\n", + " \"INFORM_SELECTOR_ABOUT_TRIGGER\",\n", + " \"TRAINING_COMPLETED\",\n", + " \"STORE_TRAINED_MODEL\",\n", + " \"EVALUATE\",\n", + " \"DONE\",\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted = df_adjusted[\n", + " (df_adjusted[\"dataset_id\"] == dataset_id)\n", + " & (df_adjusted[\"eval_handler\"] == eval_handler)\n", + " & (df_adjusted[\"metric\"] == metric)\n", + "]\n", + "\n", + "# in percent (0-100)\n", + "df_adjusted[\"value\"] = df_adjusted[\"value\"] * 100\n", + "df_adjusted" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted = df_adjusted.sort_values(by=[\"interval_center\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Reduce to composite models\n", + "df_adjusted = df_adjusted[df_adjusted[composite_model_variant]]\n", + "df_adjusted[composite_model_variant].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# reduce evaluation interval to interval where all policies have evaluations\n", + "min_active_eval_center_per_pipeline = (\n", + " df_adjusted[df_adjusted[composite_model_variant]].groupby(\"pipeline_ref\")[\"interval_center\"].min()\n", + ")\n", + "\n", + "maximum_min = pd.to_datetime(min_active_eval_center_per_pipeline).max()\n", + "print(maximum_min, min_active_eval_center_per_pipeline)\n", + "\n", + "assert maximum_min < pd.to_datetime(\"2006-01-01\")\n", + "\n", + "df_adjusted = df_adjusted[pd.to_datetime(df_adjusted[\"interval_center\"]) >= maximum_min]\n", + "df_adjusted[\"interval_center\"].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted[\"interval_center\"] = df_adjusted[\"interval_center\"].astype(str).str.split(\"-\").str[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Aggregate metrics to a scalar value per pipeline\n", + "mean_accuracies = df_aggregate_eval_metric(\n", + " df_adjusted,\n", + " group_by=[\"pipeline_id\", \"pipeline_ref\", \"metric\"],\n", + " in_col=\"value\",\n", + " out_col=\"metric_value\",\n", + " aggregate_func=\"mean\",\n", + ")\n", + "mean_accuracies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_triggers = df_leaf[df_leaf[\"id\"] == PipelineStage.TRAIN.name]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_triggers = df_leaf[df_leaf[\"id\"] == PipelineStage.TRAIN.name]\n", + "df_triggers = df_triggers[df_triggers[\"sample_time\"] > maximum_min]\n", + "df_triggers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Find number of trigger per pipeline that are after maximum_min\n", + "\n", + "# before the cutoff there was one trigger (equivalent to start of our reduced dataset): +1\n", + "num_triggers = df_triggers.groupby(\"pipeline_id\").aggregate(count=(\"id\", \"count\"), sum_duration=(\"duration\", \"sum\"))\n", + "num_triggers[\"count\"] += 1\n", + "num_triggers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "num_triggers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "merged = num_triggers.merge(mean_accuracies, on=\"pipeline_id\", how=\"inner\")\n", + "assert num_triggers.shape[0] == merged.shape[0]\n", + "merged" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def create_type(x: str):\n", + " if \"year\" in x:\n", + " return \"time\"\n", + " elif \"samples\" in x:\n", + " return \"amount\"\n", + " elif \"d\" in x:\n", + " return \"drift\"\n", + " else:\n", + " return \"unknown\"\n", + "\n", + "\n", + "merged[\"type\"] = merged[\"pipeline_ref\"].apply(lambda x: create_type(x))\n", + "merged" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "renamed = merged.copy()\n", + "\n", + "# renamed = merged[\n", + "# merged[\"pipeline_id\"].isin(\n", + "# [\n", + "# # # static thresholds\n", + "# # 113, # 0.03\n", + "# # 112, # 0.05\n", + "# # 107, # 0.07\n", + "# # 109, # 0.09\n", + "# # 85, # 0.12\n", + "# # # dyn quantile\n", + "# # 353, # % 0.05\n", + "# # 345, # % 0.10\n", + "# # 357, # % 0.15\n", + "# # # dyn roll. avg\n", + "# # 372, # Δ 2.0\n", + "# # 370, # Δ 1.0\n", + "# # 369, # Δ 0.5\n", + "# # 363, # Δ 0.05\n", + "# ]\n", + "# )\n", + "# ].copy()\n", + "renamed[\"Trigger SubType\"] = renamed[\"pipeline_ref\"].apply(\n", + " lambda x: (\n", + " \"DataAmount\"\n", + " if \"dataamount\" in x\n", + " else (\n", + " \"Time\"\n", + " if \"time\" in x\n", + " else (\n", + " (\n", + " \"Static\"\n", + " if \"_mmd-0\" in x\n", + " else (\"Quantile\" if \"quant\" in x else (\"Rolling Avg.\" if \"roll\" in x else (\"unknown\")))\n", + " )\n", + " if \"drift\" in x\n", + " else (\n", + " (\n", + " \"Static\"\n", + " if \"static\" in x\n", + " else (\n", + " \"Quantile\"\n", + " if \"quant\" in x\n", + " else (\n", + " \"Rolling Avg.\"\n", + " if \"roll\" in x\n", + " else (\"AvoidableMisclass\" if \"num_misclass\" in x else (\"unknown\"))\n", + " )\n", + " )\n", + " )\n", + " if \"performancetrigger\" in x\n", + " else (\n", + " \"DataIncorporationLatency\"\n", + " if \"data_inc\" in x\n", + " else (\"AvoidableMisclass\" if \"avoidable\" in x else (\"unknown\"))\n", + " )\n", + " )\n", + " )\n", + " )\n", + " )\n", + ")\n", + "renamed[\"Trigger Type\"] = renamed[\"pipeline_ref\"].apply(\n", + " lambda x: (\n", + " \"Simple\"\n", + " if \"dataamount\" in x\n", + " else (\n", + " \"Simple\"\n", + " if \"time\" in x\n", + " else (\n", + " \"DataDrift\"\n", + " if \"drift\" in x\n", + " else (\"Performance\" if \"performancetrigger\" in x else (\"Cost\" if \"costtrigger\" in x else (\"unknown\")))\n", + " )\n", + " )\n", + " )\n", + ")\n", + "\n", + "# assert no unknowns and DataIncorporationLatency\n", + "assert not renamed[\"Trigger Type\"].str.contains(\"unknown\").any()\n", + "assert not renamed[\"Trigger SubType\"].str.contains(\"unknown\").any()\n", + "assert not renamed[\"Trigger SubType\"].str.contains(\"DataIncorporationLatency\").any()\n", + "\n", + "# assert no cost triggers\n", + "assert not renamed[\"Trigger Type\"].str.contains(\"Cost\").any()\n", + "\n", + "renamed[\"Trigger Type\"] = pd.Categorical(\n", + " renamed[\"Trigger Type\"], categories=[\"Simple\", \"DataDrift\", \"Performance\"], ordered=True\n", + ")\n", + "\n", + "renamed[\"Trigger SubType\"] = pd.Categorical(\n", + " renamed[\"Trigger SubType\"],\n", + " categories=[\"DataAmount\", \"Time\", \"Static\", \"Quantile\", \"Rolling Avg.\", \"AvoidableMisclass\"],\n", + " ordered=True,\n", + ")\n", + "\n", + "renamed = renamed.sort_values(by=[\"Trigger Type\", \"Trigger SubType\", \"pipeline_id\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig = plot_tradeoff_scatter(\n", + " renamed,\n", + " x=\"count\",\n", + " y=\"metric_value\",\n", + " hue=\"Trigger Type\",\n", + " style=\"Trigger SubType\",\n", + " x_label=\"Number of Triggers\",\n", + " y_label=\"Mean Accuracy %\",\n", + " height_factor=0.8,\n", + " width_factor=0.8,\n", + " manual_legend_title=False,\n", + " legend_ncol=2,\n", + ")\n", + "\n", + "save_plot(fig, \"_all_tradeoff_arxiv_triggers_performance\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "in_minutes = renamed.copy()\n", + "in_minutes[\"sum_duration\"] = in_minutes[\"sum_duration\"] / 60\n", + "\n", + "fig = plot_tradeoff_scatter(\n", + " in_minutes,\n", + " x=\"sum_duration\",\n", + " y=\"metric_value\",\n", + " hue=\"Trigger Type\",\n", + " style=\"Trigger SubType\",\n", + " x_label=\"Total Cost (Minutes)\",\n", + " y_label=\"Mean Accuracy %\",\n", + " height_factor=0.7,\n", + " width_factor=0.8,\n", + " manual_legend_title=False,\n", + " legend_ncol=2,\n", + ")\n", + "\n", + "# save_plot(fig, \"tradeoff_drift_yearbook_cost_performance\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig = plot_tradeoff_scatter(\n", + " renamed,\n", + " x=\"count\",\n", + " y=\"sum_duration\",\n", + " hue=\"Trigger Type\",\n", + " style=\"Trigger SubType\",\n", + " x_label=\"Number of Triggers\",\n", + " y_label=\"Total Cost (seconds)\",\n", + " height_factor=1.5,\n", + " width_factor=1.8,\n", + ")\n", + "\n", + "# save_plot(fig, \"tradeoff_drift_yearbook_triggers_cost\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/analytics/plotting/rh_thesis/compare_all/hp_perf_tradeoff.ipynb b/analytics/plotting/rh_thesis/compare_all/hp_perf_tradeoff.ipynb new file mode 100644 index 000000000..ba2991250 --- /dev/null +++ b/analytics/plotting/rh_thesis/compare_all/hp_perf_tradeoff.ipynb @@ -0,0 +1,559 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "\n", + "from analytics.app.data.load import list_pipelines, load_pipeline_logs\n", + "from analytics.app.data.transform import (\n", + " df_aggregate_eval_metric,\n", + " dfs_models_and_evals,\n", + " pipeline_leaf_times_df,\n", + ")\n", + "from analytics.plotting.common.tradeoff_scatterplot import plot_tradeoff_scatter\n", + "from modyn.supervisor.internal.grpc.enums import PipelineStage\n", + "from modyn.supervisor.internal.pipeline_executor.models import PipelineLogs\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipelines_dirs = [\n", + " Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/huffpost/10_baselines_time\"),\n", + " Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/huffpost/11_baselines_amount\"),\n", + " Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/huffpost/21_datadrift_dynamic\"),\n", + " Path(\n", + " \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/huffpost/30_performance/num_misclass\"\n", + " ),\n", + " Path(\n", + " \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/huffpost/30_performance/static_dyn\"\n", + " ),\n", + "]\n", + "\n", + "pipeline_logs: dict[int, PipelineLogs] = {}\n", + "pipelines: dict[int, tuple[str, Path]] = {}\n", + "\n", + "for dir in pipelines_dirs:\n", + " print(\"Reading\", dir)\n", + " dir_pipelines = list_pipelines(dir)\n", + " print(dir_pipelines)\n", + " pipelines.update(dir_pipelines)\n", + "\n", + " max_pipeline_id = max(dir_pipelines.keys())\n", + " print(pipelines)\n", + " pipeline_logs.update({p_id: load_pipeline_logs(p_id, dir) for (p_id, (_, p_path)) in dir_pipelines.items()})\n", + " assert dir.exists()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# doesn't do anything unless include_composite_model = True\n", + "composite_model_variant = \"currently_active_model\"\n", + "\n", + "patch_yearbook = True\n", + "dataset_id = \"huffpost_kaggle_test\"\n", + "eval_handler = \"periodic-current\"\n", + "metric = \"Accuracy\"\n", + "include_composite_model = True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipelines = {p_id: (pname, p_path) for p_id, (pname, p_path) in pipelines.items()}\n", + "\n", + "# pipeline_ids = pipelines.keys()\n", + "pipeline_ids = [\n", + " y\n", + " for y, _ in [\n", + " (273, \"timetrigger_26w\"),\n", + " (275, \"timetrigger_13w\"),\n", + " (278, \"timetrigger_1y\"),\n", + " # (280, 'timetrigger_4y'),\n", + " # (282, 'timetrigger_2y'),\n", + " (274, \"dataamount_10000\"),\n", + " (276, \"dataamount_5000\"),\n", + " (277, \"dataamount_20000\"),\n", + " # (279, 'dataamount_80000'),\n", + " # (281, 'dataamount_40000'),\n", + " (745, \"dataamount_15000\"),\n", + " # (750, 'dataamount_30000'),\n", + " (763, \"drifttrigger_mmd-quant-0.05-20_int1500_win1y\"),\n", + " (769, \"drifttrigger_mmd-quant-0.15-20_int1500_win1y\"),\n", + " (770, \"drifttrigger_mmd-rollavg-5.0-20_int1500_win1y\"),\n", + " (771, \"drifttrigger_mmd-rollavg-2.0-20_int1500_win1y\"),\n", + " (772, \"drifttrigger_mmd-rollavg-1.0-20_int1500_win1y\"),\n", + " (774, \"drifttrigger_mmd-rollavg-0.5-20_int1500_win1y\"),\n", + " (689, \"performancetrigger_num_misclass-8000-exp-0.5-red-False--int1500y\"),\n", + " (705, \"performancetrigger_num_misclass-8000-exp-0.6-red-False--int1500y\"),\n", + " (722, \"performancetrigger_num_misclass-4000-exp-0.5-red-False--int1500y\"),\n", + " (724, \"performancetrigger_num_misclass-4000-exp-0.6-red-False--int1500y\"),\n", + " (725, \"performancetrigger_num_misclass-1000-exp-0.5-red-False--int1500y\"),\n", + " (726, \"performancetrigger_num_misclass-1000-exp-0.6-red-False--int1500y\"),\n", + " (773, \"performancetrigger_num_misclass-500-exp-0.5-red-False--int1500y\"),\n", + " (775, \"performancetrigger_num_misclass-250-exp-0.6-red-False--int1500y\"),\n", + " (776, \"performancetrigger_num_misclass-500-exp-0.6-red-False--int1500y\"),\n", + " (778, \"performancetrigger_num_misclass-250-exp-0.5-red-False--int1500y\"),\n", + " (635, \"performancetrigger_static-0.45-int1500y\"),\n", + " (636, \"performancetrigger_dynamic-quant-0.05-15-int1500y\"),\n", + " (637, \"performancetrigger_dynamic-rollavg-0.3-15-int1500y\"),\n", + " (639, \"performancetrigger_static-0.5-int1500y\"),\n", + " (640, \"performancetrigger_dynamic-rollavg-0.3-30-int1500y\"),\n", + " (642, \"performancetrigger_dynamic-quant-0.05-30-int1500y\"),\n", + " (643, \"performancetrigger_static-0.55-int1500y\"),\n", + " (645, \"performancetrigger_dynamic-rollavg-0.2-15-int1500y\"),\n", + " (646, \"performancetrigger_dynamic-quant-0.15-15-int1500y\"),\n", + " (647, \"performancetrigger_static-0.6-int1500y\"),\n", + " (649, \"performancetrigger_dynamic-rollavg-0.2-30-int1500y\"),\n", + " (650, \"performancetrigger_dynamic-quant-0.15-30-int1500y\"),\n", + " (651, \"performancetrigger_dynamic-rollavg-0.1-15-int1500y\"),\n", + " (653, \"performancetrigger_dynamic-quant-0.3-15-int1500y\"),\n", + " (654, \"performancetrigger_dynamic-rollavg-0.1-30-int1500y\"),\n", + " (656, \"performancetrigger_dynamic-quant-0.3-30-int1500y\"),\n", + " ]\n", + "]\n", + "\n", + "[(p_id, pname) for p_id, (pname, _) in pipelines.items() if p_id in pipeline_ids]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Wrangle data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "list_df_eval_single: list[pd.DataFrame] = []\n", + "df_leaf_list: list[pd.DataFrame] = []\n", + "\n", + "for pipeline_id in pipeline_ids:\n", + " logs = pipeline_logs[pipeline_id]\n", + " df_leaf_single = pipeline_leaf_times_df(logs, use_traintime_patch_at_trainer=True, pipeline_id=pipeline_id)\n", + " df_leaf_single[\"pipeline_id\"] = pipeline_id\n", + " df_leaf_list.append(df_leaf_single)\n", + "\n", + " _, _, df_eval_single = dfs_models_and_evals(\n", + " pipeline_logs[pipeline_id], df_leaf_single[\"sample_time\"].max(), pipelines[pipeline_id][0]\n", + " )\n", + " df_eval_single[\"pipeline_id\"] = pipeline_id\n", + " list_df_eval_single.append(df_eval_single)\n", + "\n", + "df_adjusted = pd.concat(list_df_eval_single)\n", + "df_adjusted\n", + "\n", + "df_leaf = pd.concat(df_leaf_list)\n", + "df_leaf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(df_leaf[\"id\"].unique())\n", + "assert set(df_leaf[\"id\"].unique()) == {\n", + " \"TRAIN\",\n", + " \"INIT_CLUSTER_CONNECTION\",\n", + " \"EVALUATE_TRIGGER_POLICY\",\n", + " \"INFORM_SELECTOR_REMAINING_DATA\",\n", + " \"INFORM_SELECTOR_ABOUT_TRIGGER\",\n", + " \"TRAINING_COMPLETED\",\n", + " \"STORE_TRAINED_MODEL\",\n", + " \"EVALUATE\",\n", + " \"DONE\",\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted = df_adjusted[\n", + " (df_adjusted[\"dataset_id\"] == dataset_id)\n", + " & (df_adjusted[\"eval_handler\"] == eval_handler)\n", + " & (df_adjusted[\"metric\"] == metric)\n", + "]\n", + "\n", + "# in percent (0-100)\n", + "df_adjusted[\"value\"] = df_adjusted[\"value\"] * 100\n", + "df_adjusted" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted = df_adjusted.sort_values(by=[\"interval_center\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Reduce to composite models\n", + "df_adjusted = df_adjusted[df_adjusted[composite_model_variant]]\n", + "df_adjusted[composite_model_variant].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# reduce evaluation interval to interval where all policies have evaluations\n", + "min_active_eval_center_per_pipeline = (\n", + " df_adjusted[df_adjusted[composite_model_variant]].groupby(\"pipeline_ref\")[\"interval_center\"].min()\n", + ")\n", + "maximum_min = min_active_eval_center_per_pipeline.max()\n", + "print(maximum_min, min_active_eval_center_per_pipeline)\n", + "\n", + "assert maximum_min < pd.Timestamp(\"2013-05-01\")\n", + "\n", + "df_adjusted = df_adjusted[df_adjusted[\"interval_center\"] >= maximum_min]\n", + "df_adjusted[\"interval_center\"].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted[\"interval_center\"] = df_adjusted[\"interval_center\"].astype(str).str.split(\"-\").str[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Aggregate metrics to a scalar value per pipeline\n", + "mean_accuracies = df_aggregate_eval_metric(\n", + " df_adjusted,\n", + " group_by=[\"pipeline_id\", \"pipeline_ref\", \"metric\"],\n", + " in_col=\"value\",\n", + " out_col=\"metric_value\",\n", + " aggregate_func=\"mean\",\n", + ")\n", + "mean_accuracies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_triggers = df_leaf[df_leaf[\"id\"] == PipelineStage.TRAIN.name]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_triggers = df_leaf[df_leaf[\"id\"] == PipelineStage.TRAIN.name]\n", + "df_triggers = df_triggers[df_triggers[\"sample_time\"] > maximum_min]\n", + "df_triggers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Find number of trigger per pipeline that are after maximum_min\n", + "\n", + "# before the cutoff there was one trigger (equivalent to start of our reduced dataset): +1\n", + "num_triggers = df_triggers.groupby(\"pipeline_id\").aggregate(count=(\"id\", \"count\"), sum_duration=(\"duration\", \"sum\"))\n", + "num_triggers[\"count\"] += 1\n", + "num_triggers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "num_triggers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "merged = num_triggers.merge(mean_accuracies, on=\"pipeline_id\", how=\"inner\")\n", + "assert num_triggers.shape[0] == merged.shape[0]\n", + "merged" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def create_type(x: str):\n", + " if \"year\" in x:\n", + " return \"time\"\n", + " elif \"samples\" in x:\n", + " return \"amount\"\n", + " elif \"d\" in x:\n", + " return \"drift\"\n", + " else:\n", + " return \"unknown\"\n", + "\n", + "\n", + "merged[\"type\"] = merged[\"pipeline_ref\"].apply(lambda x: create_type(x))\n", + "merged" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "renamed = merged.copy()\n", + "\n", + "# renamed = merged[\n", + "# merged[\"pipeline_id\"].isin(\n", + "# [\n", + "# # # static thresholds\n", + "# # 113, # 0.03\n", + "# # 112, # 0.05\n", + "# # 107, # 0.07\n", + "# # 109, # 0.09\n", + "# # 85, # 0.12\n", + "# # # dyn quantile\n", + "# # 353, # % 0.05\n", + "# # 345, # % 0.10\n", + "# # 357, # % 0.15\n", + "# # # dyn roll. avg\n", + "# # 372, # Δ 2.0\n", + "# # 370, # Δ 1.0\n", + "# # 369, # Δ 0.5\n", + "# # 363, # Δ 0.05\n", + "# ]\n", + "# )\n", + "# ].copy()\n", + "renamed[\"Trigger SubType\"] = renamed[\"pipeline_ref\"].apply(\n", + " lambda x: (\n", + " \"DataAmount\"\n", + " if \"dataamount\" in x\n", + " else (\n", + " \"Time\"\n", + " if \"time\" in x\n", + " else (\n", + " (\n", + " \"Static\"\n", + " if \"_mmd-0\" in x\n", + " else (\"Quantile\" if \"quant\" in x else (\"Rolling Avg.\" if \"roll\" in x else (\"unknown\")))\n", + " )\n", + " if \"drift\" in x\n", + " else (\n", + " (\n", + " \"Static\"\n", + " if \"static\" in x\n", + " else (\n", + " \"Quantile\"\n", + " if \"quant\" in x\n", + " else (\n", + " \"Rolling Avg.\"\n", + " if \"roll\" in x\n", + " else (\"AvoidableMisclass\" if \"num_misclass\" in x else (\"unknown\"))\n", + " )\n", + " )\n", + " )\n", + " if \"performancetrigger\" in x\n", + " else (\n", + " \"DataIncorporationLatency\"\n", + " if \"data_inc\" in x\n", + " else (\"AvoidableMisclass\" if \"avoidable\" in x else (\"unknown\"))\n", + " )\n", + " )\n", + " )\n", + " )\n", + " )\n", + ")\n", + "renamed[\"Trigger Type\"] = renamed[\"pipeline_ref\"].apply(\n", + " lambda x: (\n", + " \"Simple\"\n", + " if \"dataamount\" in x\n", + " else (\n", + " \"Simple\"\n", + " if \"time\" in x\n", + " else (\n", + " \"DataDrift\"\n", + " if \"drift\" in x\n", + " else (\"Performance\" if \"performancetrigger\" in x else (\"Cost\" if \"costtrigger\" in x else (\"unknown\")))\n", + " )\n", + " )\n", + " )\n", + ")\n", + "\n", + "# assert no unknowns and DataIncorporationLatency\n", + "assert not renamed[\"Trigger Type\"].str.contains(\"unknown\").any()\n", + "assert not renamed[\"Trigger SubType\"].str.contains(\"unknown\").any()\n", + "assert not renamed[\"Trigger SubType\"].str.contains(\"DataIncorporationLatency\").any()\n", + "\n", + "# assert no cost triggers\n", + "assert not renamed[\"Trigger Type\"].str.contains(\"Cost\").any()\n", + "\n", + "renamed[\"Trigger Type\"] = pd.Categorical(\n", + " renamed[\"Trigger Type\"], categories=[\"Simple\", \"DataDrift\", \"Performance\"], ordered=True\n", + ")\n", + "\n", + "renamed[\"Trigger SubType\"] = pd.Categorical(\n", + " renamed[\"Trigger SubType\"],\n", + " categories=[\"DataAmount\", \"Time\", \"Static\", \"Quantile\", \"Rolling Avg.\", \"AvoidableMisclass\"],\n", + " ordered=True,\n", + ")\n", + "\n", + "renamed = renamed.sort_values(by=[\"Trigger Type\", \"Trigger SubType\", \"pipeline_id\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig = plot_tradeoff_scatter(\n", + " renamed,\n", + " x=\"count\",\n", + " y=\"metric_value\",\n", + " hue=\"Trigger Type\",\n", + " style=\"Trigger SubType\",\n", + " x_label=\"Number of Triggers\",\n", + " y_label=\"Mean Accuracy %\",\n", + " height_factor=1,\n", + " width_factor=1,\n", + ")\n", + "\n", + "# save_plot(fig, \"_all_tradeoff_yearbook_triggers_performance\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "in_minutes = renamed.copy()\n", + "in_minutes[\"sum_duration\"] = in_minutes[\"sum_duration\"] / 60\n", + "\n", + "fig = plot_tradeoff_scatter(\n", + " in_minutes,\n", + " x=\"sum_duration\",\n", + " y=\"metric_value\",\n", + " hue=\"Trigger Type\",\n", + " style=\"Trigger SubType\",\n", + " x_label=\"Total Cost (Minutes)\",\n", + " y_label=\"Mean Accuracy %\",\n", + " height_factor=1,\n", + " width_factor=1,\n", + ")\n", + "\n", + "# save_plot(fig, \"tradeoff_drift_yearbook_cost_performance\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig = plot_tradeoff_scatter(\n", + " renamed,\n", + " x=\"count\",\n", + " y=\"sum_duration\",\n", + " hue=\"Trigger Type\",\n", + " style=\"Trigger SubType\",\n", + " x_label=\"Number of Triggers\",\n", + " y_label=\"Total Cost (seconds)\",\n", + " height_factor=1.5,\n", + " width_factor=1.8,\n", + ")\n", + "\n", + "# save_plot(fig, \"tradeoff_drift_yearbook_triggers_cost\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/analytics/plotting/rh_thesis/compare_all/yb_perf_tradeoff.ipynb b/analytics/plotting/rh_thesis/compare_all/yb_perf_tradeoff.ipynb new file mode 100644 index 000000000..811a5f083 --- /dev/null +++ b/analytics/plotting/rh_thesis/compare_all/yb_perf_tradeoff.ipynb @@ -0,0 +1,891 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "\n", + "from analytics.app.data.load import list_pipelines, load_pipeline_logs\n", + "from analytics.app.data.transform import (\n", + " df_aggregate_eval_metric,\n", + " dfs_models_and_evals,\n", + " patch_yearbook_time,\n", + " pipeline_leaf_times_df,\n", + ")\n", + "from analytics.plotting.common.save import save_plot\n", + "from analytics.plotting.common.tradeoff_scatterplot import plot_tradeoff_scatter\n", + "from modyn.supervisor.internal.grpc.enums import PipelineStage\n", + "from modyn.supervisor.internal.pipeline_executor.models import PipelineLogs\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipelines_dirs = [\n", + " Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/10_baselines_time\"),\n", + " Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/11_baselines_amount\"),\n", + " Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/20_datadrift_static\"),\n", + " Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/21_datadrift_dynamic\"),\n", + " Path(\n", + " \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/30_performance/num_misclass\"\n", + " ),\n", + " Path(\n", + " \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/30_performance/static_dyn\"\n", + " ),\n", + " Path(\n", + " \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/40_cost_dataincorporation_latency\"\n", + " ),\n", + " Path(\n", + " \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/41_avoidable_miclass_cost\"\n", + " ),\n", + "]\n", + "\n", + "pipeline_logs: dict[int, PipelineLogs] = {}\n", + "pipelines: dict[int, tuple[str, Path]] = {}\n", + "\n", + "for dir in pipelines_dirs:\n", + " print(\"Reading\", dir)\n", + " dir_pipelines = list_pipelines(dir)\n", + " print(dir_pipelines)\n", + " pipelines.update(dir_pipelines)\n", + "\n", + " max_pipeline_id = max(dir_pipelines.keys())\n", + " print(pipelines)\n", + " pipeline_logs.update({p_id: load_pipeline_logs(p_id, dir) for (p_id, (_, p_path)) in dir_pipelines.items()})\n", + " assert dir.exists()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# doesn't do anything unless include_composite_model = True\n", + "composite_model_variant = \"currently_active_model\"\n", + "\n", + "patch_yearbook = True\n", + "dataset_id = \"yearbook_test\"\n", + "eval_handler = \"periodic-delta+-1y\"\n", + "metric = \"Accuracy\"\n", + "include_composite_model = True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipelines = {p_id: (pname, p_path) for p_id, (pname, p_path) in pipelines.items()}\n", + "pipeline_ids = pipelines.keys()\n", + "pipeline_ids = [\n", + " y\n", + " for y, _ in [\n", + " # (20, 'yearbook_timetrigger_40y'),\n", + " (23, \"yearbook_timetrigger_25y\"),\n", + " (24, \"yearbook_timetrigger_15y\"),\n", + " (25, \"yearbook_timetrigger_10y\"),\n", + " (26, \"yearbook_timetrigger_5y\"),\n", + " (27, \"yearbook_timetrigger_4y\"),\n", + " (29, \"yearbook_timetrigger_3y\"),\n", + " (31, \"yearbook_timetrigger_2y\"),\n", + " (33, \"yearbook_timetrigger_1y\"),\n", + " (21, \"yearbook_dataamount_250\"),\n", + " (30, \"yearbook_dataamount_500\"),\n", + " (32, \"yearbook_dataamount_1000\"),\n", + " (35, \"yearbook_dataamount_2500\"),\n", + " (36, \"yearbook_dataamount_5000\"),\n", + " (37, \"yearbook_dataamount_10000\"),\n", + " # (38, 'yearbook_dataamount_15000'),\n", + " # (39, 'yearbook_dataamount_30000'),\n", + " # duplicates\n", + " # (666, 'yearbook_dataamount_1000'),\n", + " # (667, 'yearbook_dataamount_250'),\n", + " # (668, 'yearbook_dataamount_2500'),\n", + " # (669, 'yearbook_dataamount_5000'),\n", + " # (670, 'yearbook_dataamount_10000'),\n", + " # (671, 'yearbook_dataamount_500'),\n", + " # (672, 'yearbook_dataamount_15000'),\n", + " # (673, 'yearbook_dataamount_30000'),\n", + " # (63, 'yearbook_drifttrigger_mmd-0.09_int100_win1d'),\n", + " # (64, 'yearbook_drifttrigger_mmd-0.07_int100_win1d'),\n", + " # (65, 'yearbook_drifttrigger_mmd-0.12_int100_win1d'),\n", + " # (66, 'yearbook_drifttrigger_mmd-0.15_int100_win1d'),\n", + " # (67, 'yearbook_drifttrigger_mmd-0.03_int100_win1d'),\n", + " # (68, 'yearbook_drifttrigger_mmd-0.05_int100_win1d'),\n", + " # (69, 'yearbook_drifttrigger_mmd-0.12_int100_win4d'),\n", + " # (70, 'yearbook_drifttrigger_mmd-0.2_int100_win1d'),\n", + " # (71, 'yearbook_drifttrigger_mmd-0.4_int100_win1d'),\n", + " # (72, 'yearbook_drifttrigger_mmd-0.15_int100_win4d'),\n", + " # (73, 'yearbook_drifttrigger_mmd-0.09_int100_win4d'),\n", + " # (74, 'yearbook_drifttrigger_mmd-0.07_int100_win4d'),\n", + " # (75, 'yearbook_drifttrigger_mmd-0.12_int100_win10d'),\n", + " # (76, 'yearbook_drifttrigger_mmd-0.03_int100_win4d'),\n", + " # (77, 'yearbook_drifttrigger_mmd-0.05_int100_win4d'),\n", + " # (78, 'yearbook_drifttrigger_mmd-0.15_int100_win10d'),\n", + " # (79, 'yearbook_drifttrigger_mmd-0.4_int100_win4d'),\n", + " # (80, 'yearbook_drifttrigger_mmd-0.2_int100_win4d'),\n", + " (81, \"yearbook_drifttrigger_mmd-0.12_int250_win1d\"),\n", + " # (82, 'yearbook_drifttrigger_mmd-0.09_int100_win10d'),\n", + " # (83, 'yearbook_drifttrigger_mmd-0.07_int100_win10d'),\n", + " (84, \"yearbook_drifttrigger_mmd-0.15_int250_win1d\"),\n", + " (85, \"yearbook_drifttrigger_mmd-0.12_int250_win4d\"),\n", + " # (86, 'yearbook_drifttrigger_mmd-0.03_int100_win10d'),\n", + " # (87, 'yearbook_drifttrigger_mmd-0.05_int100_win10d'),\n", + " (88, \"yearbook_drifttrigger_mmd-0.15_int250_win4d\"),\n", + " (89, \"yearbook_drifttrigger_mmd-0.12_int250_win10d\"),\n", + " # (90, 'yearbook_drifttrigger_mmd-0.2_int100_win10d'),\n", + " (91, \"yearbook_drifttrigger_mmd-0.15_int250_win10d\"),\n", + " # (92, 'yearbook_drifttrigger_mmd-0.4_int100_win10d'),\n", + " # (93, 'yearbook_drifttrigger_mmd-0.12_int500_win1d'),\n", + " # (94, 'yearbook_drifttrigger_mmd-0.15_int500_win1d'),\n", + " (95, \"yearbook_drifttrigger_mmd-0.07_int250_win1d\"),\n", + " # (96, 'yearbook_drifttrigger_mmd-0.12_int500_win4d'),\n", + " (97, \"yearbook_drifttrigger_mmd-0.09_int250_win1d\"),\n", + " # (98, 'yearbook_drifttrigger_mmd-0.15_int500_win4d'),\n", + " (99, \"yearbook_drifttrigger_mmd-0.05_int250_win1d\"),\n", + " (100, \"yearbook_drifttrigger_mmd-0.03_int250_win1d\"),\n", + " # (101, 'yearbook_drifttrigger_mmd-0.12_int500_win10d'),\n", + " # (102, 'yearbook_drifttrigger_mmd-0.15_int500_win10d'),\n", + " (103, \"yearbook_drifttrigger_mmd-0.2_int250_win1d\"),\n", + " (104, \"yearbook_drifttrigger_mmd-0.4_int250_win1d\"),\n", + " # (105, 'yearbook_drifttrigger_mmd-0.12_int1000_win1d'),\n", + " # (106, 'yearbook_drifttrigger_mmd-0.15_int1000_win1d'),\n", + " (107, \"yearbook_drifttrigger_mmd-0.07_int250_win4d\"),\n", + " # (108, 'yearbook_drifttrigger_mmd-0.12_int1000_win4d'),\n", + " (109, \"yearbook_drifttrigger_mmd-0.09_int250_win4d\"),\n", + " # (110, 'yearbook_drifttrigger_mmd-0.15_int1000_win4d'),\n", + " # (111, 'yearbook_drifttrigger_mmd-0.12_int1000_win10d'),\n", + " (112, \"yearbook_drifttrigger_mmd-0.05_int250_win4d\"),\n", + " (113, \"yearbook_drifttrigger_mmd-0.03_int250_win4d\"),\n", + " # (114, 'yearbook_drifttrigger_mmd-0.15_int1000_win10d'),\n", + " (115, \"yearbook_drifttrigger_mmd-0.2_int250_win4d\"),\n", + " (116, \"yearbook_drifttrigger_mmd-0.4_int250_win4d\"),\n", + " (117, \"yearbook_drifttrigger_mmd-0.07_int250_win10d\"),\n", + " (118, \"yearbook_drifttrigger_mmd-0.09_int250_win10d\"),\n", + " (119, \"yearbook_drifttrigger_mmd-0.05_int250_win10d\"),\n", + " # (122, 'yearbook_drifttrigger_mmd-0.09_int500_win1d'),\n", + " (123, \"yearbook_drifttrigger_mmd-0.2_int250_win10d\"),\n", + " # (126, 'yearbook_drifttrigger_mmd-0.09_int500_win4d'),\n", + " # (127, 'yearbook_drifttrigger_mmd-0.07_int500_win1d'),\n", + " # (132, 'yearbook_drifttrigger_mmd-0.05_int500_win1d'),\n", + " # (133, 'yearbook_drifttrigger_mmd-0.4_int500_win10d'),\n", + " # (136, 'yearbook_drifttrigger_mmd-0.4_int1000_win1d'),\n", + " # (137, 'yearbook_drifttrigger_mmd-0.2_int500_win1d'),\n", + " # (138, 'yearbook_drifttrigger_mmd-0.09_int1000_win4d'),\n", + " # (139, 'yearbook_drifttrigger_mmd-0.07_int500_win4d'),\n", + " # (144, 'yearbook_drifttrigger_mmd-0.4_int1000_win10d'),\n", + " # (145, 'yearbook_drifttrigger_mmd-0.05_int500_win4d'),\n", + " # (146, 'yearbook_drifttrigger_mmd-0.2_int500_win4d'),\n", + " # (147, 'yearbook_drifttrigger_mmd-0.07_int500_win10d'),\n", + " # (148, 'yearbook_drifttrigger_mmd-0.05_int500_win10d'),\n", + " # (149, 'yearbook_drifttrigger_mmd-0.2_int500_win10d'),\n", + " # (150, 'yearbook_drifttrigger_mmd-0.07_int1000_win1d'),\n", + " # (151, 'yearbook_drifttrigger_mmd-0.05_int1000_win1d'),\n", + " # (152, 'yearbook_drifttrigger_mmd-0.2_int1000_win1d'),\n", + " # (153, 'yearbook_drifttrigger_mmd-0.07_int1000_win4d'),\n", + " # (154, 'yearbook_drifttrigger_mmd-0.05_int1000_win4d'),\n", + " # (155, 'yearbook_drifttrigger_mmd-0.2_int1000_win4d'),\n", + " # (156, 'yearbook_drifttrigger_mmd-0.07_int1000_win10d'),\n", + " # (157, 'yearbook_drifttrigger_mmd-0.05_int1000_win10d'),\n", + " # (158, 'yearbook_drifttrigger_mmd-0.2_int1000_win10d'),\n", + " (159, \"yearbook_drifttrigger_mmd-0.03_int250_win10d\"),\n", + " # (160, 'yearbook_drifttrigger_mmd-0.03_int1000_win1d'),\n", + " # (161, 'yearbook_drifttrigger_mmd-0.4_int500_win4d'),\n", + " # (162, 'yearbook_drifttrigger_mmd-0.03_int500_win10d'),\n", + " # (163, 'yearbook_drifttrigger_mmd-0.4_int1000_win4d'),\n", + " # (164, 'yearbook_drifttrigger_mmd-0.09_int1000_win10d'),\n", + " # (165, 'yearbook_drifttrigger_mmd-0.03_int500_win1d'),\n", + " (166, \"yearbook_drifttrigger_mmd-0.4_int250_win10d\"),\n", + " # (167, 'yearbook_drifttrigger_mmd-0.09_int1000_win1d'),\n", + " # (168, 'yearbook_drifttrigger_mmd-0.09_int500_win10d'),\n", + " # (169, 'yearbook_drifttrigger_mmd-0.03_int500_win4d'),\n", + " # (170, 'yearbook_drifttrigger_mmd-0.4_int500_win1d'),\n", + " # (171, 'yearbook_drifttrigger_mmd-0.03_int1000_win10d'),\n", + " # (172, 'yearbook_drifttrigger_mmd-0.03_int1000_win4d'),\n", + " # (329, 'yearbook_drifttrigger_mmd-quant-0.05-10_int500_win4d'),\n", + " # (330, 'yearbook_drifttrigger_mmd-quant-0.05-20_int500_win4d\\n'),\n", + " # (331, 'yearbook_drifttrigger_mmd-quant-0.05-30_int100_win4d'),\n", + " # (332, 'yearbook_drifttrigger_mmd-quant-0.1-10_int500_win4d'),\n", + " # (333, 'yearbook_drifttrigger_mmd-quant-0.15-20_int500_win4d'),\n", + " # (334, 'yearbook_drifttrigger_mmd-quant-0.15-10_int500_win4d'),\n", + " # (335, 'yearbook_drifttrigger_mmd-quant-0.1-20_int500_win4d'),\n", + " # (336, 'yearbook_drifttrigger_mmd-quant-0.3-20_int500_win4d'),\n", + " # (337, 'yearbook_drifttrigger_mmd-quant-0.1-30_int500_win4d'),\n", + " # (338, 'yearbook_drifttrigger_mmd-quant-0.3-10_int500_win4d'),\n", + " # (339, 'yearbook_drifttrigger_mmd-rollavg-0.05-20_int500_win4d'),\n", + " (340, \"yearbook_drifttrigger_mmd-quant-0.1-10_int250_win4d\"),\n", + " # (341, 'yearbook_drifttrigger_mmd-quant-0.15-30_int100_win4d'),\n", + " # (342, 'yearbook_drifttrigger_mmd-rollavg-0.05-10_int500_win4d'),\n", + " # (343, 'yearbook_drifttrigger_mmd-rollavg-0.2-20_int500_win4d'),\n", + " # (344, 'yearbook_drifttrigger_mmd-rollavg-0.2-10_int500_win4d'),\n", + " (345, \"yearbook_drifttrigger_mmd-quant-0.1-20_int250_win4d\"),\n", + " # (346, 'yearbook_drifttrigger_mmd-rollavg-0.5-20_int500_win4d'),\n", + " # (347, 'yearbook_drifttrigger_mmd-rollavg-0.5-10_int500_win4d'),\n", + " # (348, 'yearbook_drifttrigger_mmd-rollavg-1.0-20_int500_win4d'),\n", + " (349, \"yearbook_drifttrigger_mmd-quant-0.1-30_int250_win4d\"),\n", + " # (350, 'yearbook_drifttrigger_mmd-rollavg-1.0-10_int500_win4d'),\n", + " # (351, 'yearbook_drifttrigger_mmd-rollavg-2.0-20_int500_win4d'),\n", + " # (352, 'yearbook_drifttrigger_mmd-quant-0.3-30_int100_win4d'),\n", + " (353, \"yearbook_drifttrigger_mmd-quant-0.05-20_int250_win4d\"),\n", + " # (354, 'yearbook_drifttrigger_mmd-rollavg-2.0-10_int500_win4d'),\n", + " # (355, 'yearbook_drifttrigger_mmd-quant-0.1-10_int100_win4d'),\n", + " (356, \"yearbook_drifttrigger_mmd-quant-0.05-10_int250_win4d\"),\n", + " (357, \"yearbook_drifttrigger_mmd-quant-0.15-20_int250_win4d\"),\n", + " (358, \"yearbook_drifttrigger_mmd-quant-0.15-10_int250_win4d\"),\n", + " (359, \"yearbook_drifttrigger_mmd-quant-0.3-20_int250_win4d\"),\n", + " # (360, 'yearbook_drifttrigger_mmd-rollavg-0.05-30_int100_win4d'),\n", + " # (361, 'yearbook_drifttrigger_mmd-quant-0.1-20_int100_win4d'),\n", + " (362, \"yearbook_drifttrigger_mmd-quant-0.3-10_int250_win4d\"),\n", + " (363, \"yearbook_drifttrigger_mmd-rollavg-0.05-20_int250_win4d\"),\n", + " (364, \"yearbook_drifttrigger_mmd-rollavg-0.05-10_int250_win4d\"),\n", + " (365, \"yearbook_drifttrigger_mmd-rollavg-0.2-20_int250_win4d\"),\n", + " # (366, 'yearbook_drifttrigger_mmd-quant-0.1-30_int100_win4d'),\n", + " # (367, 'yearbook_drifttrigger_mmd-rollavg-0.2-30_int100_win4d'),\n", + " (368, \"yearbook_drifttrigger_mmd-rollavg-0.2-10_int250_win4d\"),\n", + " (369, \"yearbook_drifttrigger_mmd-rollavg-0.5-20_int250_win4d\"),\n", + " (370, \"yearbook_drifttrigger_mmd-rollavg-1.0-20_int250_win4d\"),\n", + " (371, \"yearbook_drifttrigger_mmd-rollavg-0.5-10_int250_win4d\"),\n", + " (372, \"yearbook_drifttrigger_mmd-rollavg-2.0-20_int250_win4d\"),\n", + " (373, \"yearbook_drifttrigger_mmd-rollavg-1.0-10_int250_win4d\"),\n", + " # (374, 'yearbook_drifttrigger_mmd-rollavg-0.5-30_int100_win4d'),\n", + " # (375, 'yearbook_drifttrigger_mmd-quant-0.05-20_int100_win4d'),\n", + " (376, \"yearbook_drifttrigger_mmd-rollavg-2.0-10_int250_win4d\"),\n", + " # (377, 'yearbook_drifttrigger_mmd-quant-0.05-10_int100_win4d'),\n", + " # (378, 'yearbook_drifttrigger_mmd-rollavg-1.0-30_int100_win4d'),\n", + " # (379, 'yearbook_drifttrigger_mmd-quant-0.15-20_int100_win4d'),\n", + " # (380, 'yearbook_drifttrigger_mmd-quant-0.15-10_int100_win4d'),\n", + " # (381, 'yearbook_drifttrigger_mmd-rollavg-2.0-30_int100_win4d'),\n", + " # (382, 'yearbook_drifttrigger_mmd-quant-0.3-20_int100_win4d'),\n", + " (383, \"yearbook_drifttrigger_mmd-quant-0.05-30_int250_win4d\"),\n", + " # (384, 'yearbook_drifttrigger_mmd-quant-0.3-10_int100_win4d'),\n", + " (385, \"yearbook_drifttrigger_mmd-quant-0.15-30_int250_win4d\"),\n", + " (386, \"yearbook_drifttrigger_mmd-quant-0.3-30_int250_win4d\"),\n", + " # (387, 'yearbook_drifttrigger_mmd-rollavg-0.05-20_int100_win4d'),\n", + " (388, \"yearbook_drifttrigger_mmd-rollavg-0.05-30_int250_win4d\"),\n", + " # (389, 'yearbook_drifttrigger_mmd-rollavg-0.05-10_int100_win4d'),\n", + " (390, \"yearbook_drifttrigger_mmd-rollavg-0.2-30_int250_win4d\"),\n", + " # (391, 'yearbook_drifttrigger_mmd-rollavg-0.2-20_int100_win4d'),\n", + " (392, \"yearbook_drifttrigger_mmd-rollavg-0.5-30_int250_win4d\"),\n", + " (393, \"yearbook_drifttrigger_mmd-rollavg-1.0-30_int250_win4d\"),\n", + " # (394, 'yearbook_drifttrigger_mmd-rollavg-0.2-10_int100_win4d'),\n", + " (395, \"yearbook_drifttrigger_mmd-rollavg-2.0-30_int250_win4d\"),\n", + " # (396, 'yearbook_drifttrigger_mmd-rollavg-0.5-20_int100_win4d'),\n", + " # (397, 'yearbook_drifttrigger_mmd-quant-0.05-30_int500_win4d'),\n", + " # (398, 'yearbook_drifttrigger_mmd-quant-0.15-30_int500_win4d'),\n", + " # (399, 'yearbook_drifttrigger_mmd-rollavg-0.5-10_int100_win4d'),\n", + " # (400, 'yearbook_drifttrigger_mmd-quant-0.3-30_int500_win4d'),\n", + " # (401, 'yearbook_drifttrigger_mmd-rollavg-1.0-20_int100_win4d'),\n", + " # (402, 'yearbook_drifttrigger_mmd-rollavg-0.05-30_int500_win4d'),\n", + " # (403, 'yearbook_drifttrigger_mmd-rollavg-0.2-30_int500_win4d'),\n", + " # (404, 'yearbook_drifttrigger_mmd-rollavg-1.0-10_int100_win4d'),\n", + " # (405, 'yearbook_drifttrigger_mmd-rollavg-0.5-30_int500_win4d'),\n", + " # (406, 'yearbook_drifttrigger_mmd-rollavg-2.0-20_int100_win4d'),\n", + " # (407, 'yearbook_drifttrigger_mmd-rollavg-1.0-30_int500_win4d'),\n", + " # (408, 'yearbook_drifttrigger_mmd-rollavg-2.0-30_int500_win4d'),\n", + " # (409, 'yearbook_drifttrigger_mmd-rollavg-2.0-10_int100_win4d'),\n", + " # (683,\n", + " # 'yearbook_performancetrigger_num_misclass-1500-exp-0.85-red-True--int250y'),\n", + " # (685,\n", + " # 'yearbook_performancetrigger_num_misclass-1500-exp-0.85-red-False--int250y'),\n", + " (686, \"yearbook_performancetrigger_num_misclass-1500-exp-0.9-red-True--int250y\"),\n", + " (687, \"yearbook_performancetrigger_num_misclass-1500-exp-0.9-red-False--int250y\"),\n", + " # (688,\n", + " # 'yearbook_performancetrigger_num_misclass-1500-exp-0.95-red-True--int250y'),\n", + " # (704,\n", + " # 'yearbook_performancetrigger_num_misclass-200-exp-0.85-red-False--int250y'),\n", + " # (727,\n", + " # 'yearbook_performancetrigger_num_misclass-1500-exp-0.95-red-False--int250y'),\n", + " # (728,\n", + " # 'yearbook_performancetrigger_num_misclass-1000-exp-0.85-red-True--int250y'),\n", + " # (729,\n", + " # 'yearbook_performancetrigger_num_misclass-1000-exp-0.85-red-False--int250y'),\n", + " # (730,\n", + " # 'yearbook_performancetrigger_num_misclass-50-exp-0.85-red-True--int250y'),\n", + " (731, \"yearbook_performancetrigger_num_misclass-1000-exp-0.9-red-True--int250y\"),\n", + " # (732,\n", + " # 'yearbook_performancetrigger_num_misclass-50-exp-0.85-red-False--int250y'),\n", + " (733, \"yearbook_performancetrigger_num_misclass-1000-exp-0.9-red-False--int250y\"),\n", + " (734, \"yearbook_performancetrigger_num_misclass-50-exp-0.9-red-True--int250y\"),\n", + " # (735,\n", + " # 'yearbook_performancetrigger_num_misclass-1000-exp-0.95-red-True--int250y'),\n", + " (736, \"yearbook_performancetrigger_num_misclass-50-exp-0.9-red-False--int250y\"),\n", + " # (737,\n", + " # 'yearbook_performancetrigger_num_misclass-1000-exp-0.95-red-False--int250y'),\n", + " # (738,\n", + " # 'yearbook_performancetrigger_num_misclass-50-exp-0.95-red-True--int250y'),\n", + " # (739,\n", + " # 'yearbook_performancetrigger_num_misclass-500-exp-0.85-red-True--int250y'),\n", + " # (740,\n", + " # 'yearbook_performancetrigger_num_misclass-50-exp-0.95-red-False--int250y'),\n", + " # (741,\n", + " # 'yearbook_performancetrigger_num_misclass-500-exp-0.85-red-False--int250y'),\n", + " (742, \"yearbook_performancetrigger_num_misclass-500-exp-0.9-red-True--int250y\"),\n", + " (743, \"yearbook_performancetrigger_num_misclass-500-exp-0.9-red-False--int250y\"),\n", + " # (744,\n", + " # 'yearbook_performancetrigger_num_misclass-500-exp-0.95-red-True--int250y'),\n", + " # (746,\n", + " # 'yearbook_performancetrigger_num_misclass-500-exp-0.95-red-False--int250y'),\n", + " # (747,\n", + " # 'yearbook_performancetrigger_num_misclass-200-exp-0.85-red-True--int250y'),\n", + " (749, \"yearbook_performancetrigger_num_misclass-200-exp-0.9-red-True--int250y\"),\n", + " (751, \"yearbook_performancetrigger_num_misclass-200-exp-0.9-red-False--int250y\"),\n", + " # (753,\n", + " # 'yearbook_performancetrigger_num_misclass-200-exp-0.95-red-True--int250y'),\n", + " # (754,\n", + " # 'yearbook_performancetrigger_num_misclass-200-exp-0.95-red-False--int250y'),\n", + " # (755,\n", + " # 'yearbook_performancetrigger_num_misclass-100-exp-0.85-red-True--int250y'),\n", + " # (757,\n", + " # 'yearbook_performancetrigger_num_misclass-100-exp-0.85-red-False--int250y'),\n", + " (758, \"yearbook_performancetrigger_num_misclass-100-exp-0.9-red-True--int250y\"),\n", + " (759, \"yearbook_performancetrigger_num_misclass-100-exp-0.9-red-False--int250y\"),\n", + " # (760,\n", + " # 'yearbook_performancetrigger_num_misclass-100-exp-0.95-red-True--int250y'),\n", + " # (761,\n", + " # 'yearbook_performancetrigger_num_misclass-100-exp-0.95-red-False--int250y'),\n", + " # (410, 'yearbook_performancetrigger_static-0.7-int100y'),\n", + " (411, \"yearbook_performancetrigger_static-0.7-int250y\"),\n", + " # (412, 'yearbook_performancetrigger_static-0.7-int500y'),\n", + " # (413, 'yearbook_performancetrigger_static-0.75-int500y'),\n", + " (414, \"yearbook_performancetrigger_static-0.75-int250y\"),\n", + " # (416, 'yearbook_performancetrigger_static-0.8-int500y'),\n", + " # (417, 'yearbook_performancetrigger_static-0.75-int100y'),\n", + " (418, \"yearbook_performancetrigger_static-0.8-int250y\"),\n", + " # (419, 'yearbook_performancetrigger_static-0.85-int500y'),\n", + " (421, \"yearbook_performancetrigger_static-0.85-int250y\"),\n", + " # (422, 'yearbook_performancetrigger_static-0.875-int500y'),\n", + " # (423, 'yearbook_performancetrigger_static-0.8-int100y'),\n", + " # (424, 'yearbook_performancetrigger_static-0.9-int500y'),\n", + " (425, \"yearbook_performancetrigger_static-0.875-int250y\"),\n", + " # (427, 'yearbook_performancetrigger_static-0.925-int500y'),\n", + " # (428, 'yearbook_performancetrigger_static-0.85-int100y'),\n", + " (429, \"yearbook_performancetrigger_static-0.9-int250y\"),\n", + " # (430, 'yearbook_performancetrigger_static-0.95-int500y'),\n", + " (432, \"yearbook_performancetrigger_static-0.925-int250y\"),\n", + " # (433, 'yearbook_performancetrigger_dynamic-quant-0.05-10-int500y'),\n", + " # (434, 'yearbook_performancetrigger_static-0.875-int100y'),\n", + " # (436, 'yearbook_performancetrigger_dynamic-quant-0.05-20-int500y'),\n", + " (437, \"yearbook_performancetrigger_static-0.95-int250y\"),\n", + " # (438, 'yearbook_performancetrigger_dynamic-quant-0.05-30-int500y'),\n", + " # (440, 'yearbook_performancetrigger_dynamic-quant-0.15-10-int500y'),\n", + " # (441, 'yearbook_performancetrigger_static-0.9-int100y'),\n", + " # (442, 'yearbook_performancetrigger_dynamic-quant-0.05-10-int250y'),\n", + " # (443, 'yearbook_performancetrigger_dynamic-quant-0.15-20-int500y'),\n", + " (445, \"yearbook_performancetrigger_dynamic-quant-0.05-20-int250y\"),\n", + " # (446, 'yearbook_performancetrigger_dynamic-quant-0.15-30-int500y'),\n", + " # (447, 'yearbook_performancetrigger_dynamic-quant-0.3-10-int500y'),\n", + " # (448, 'yearbook_performancetrigger_dynamic-quant-0.05-30-int250y'),\n", + " # (450, 'yearbook_performancetrigger_static-0.925-int100y'),\n", + " # (451, 'yearbook_performancetrigger_dynamic-quant-0.3-20-int500y'),\n", + " # (452, 'yearbook_performancetrigger_dynamic-quant-0.15-10-int250y'),\n", + " # (454, 'yearbook_performancetrigger_dynamic-quant-0.3-30-int500y'),\n", + " (455, \"yearbook_performancetrigger_dynamic-quant-0.15-20-int250y\"),\n", + " # (458, 'yearbook_performancetrigger_dynamic-quant-0.15-30-int250y'),\n", + " # (459, 'yearbook_performancetrigger_static-0.95-int100y'),\n", + " # (463, 'yearbook_performancetrigger_dynamic-quant-0.3-10-int250y'),\n", + " # (464, 'yearbook_performancetrigger_dynamic-rollavg-0.05-10-int500y'),\n", + " # (465, 'yearbook_performancetrigger_dynamic-rollavg-0.05-20-int500y'),\n", + " (467, \"yearbook_performancetrigger_dynamic-quant-0.3-20-int250y\"),\n", + " # (468, 'yearbook_performancetrigger_dynamic-rollavg-0.05-30-int500y'),\n", + " # (469, 'yearbook_performancetrigger_dynamic-rollavg-0.1-10-int500y'),\n", + " # (471, 'yearbook_performancetrigger_dynamic-quant-0.3-30-int250y'),\n", + " # (472, 'yearbook_performancetrigger_dynamic-rollavg-0.1-20-int500y'),\n", + " # (473, 'yearbook_performancetrigger_dynamic-quant-0.05-10-int100y'),\n", + " # (474, 'yearbook_performancetrigger_dynamic-rollavg-0.1-30-int500y'),\n", + " # (475, 'yearbook_performancetrigger_dynamic-rollavg-0.2-10-int500y'),\n", + " # (478, 'yearbook_performancetrigger_dynamic-rollavg-0.2-20-int500y'),\n", + " # (479, 'yearbook_performancetrigger_dynamic-rollavg-0.2-30-int500y'),\n", + " (481, \"yearbook_performancetrigger_dynamic-quant-0.05-20-int100y\"),\n", + " # (483, 'yearbook_performancetrigger_dynamic-rollavg-0.3-10-int500y'),\n", + " # (484, 'yearbook_performancetrigger_dynamic-rollavg-0.3-20-int500y'),\n", + " # (486, 'yearbook_performancetrigger_dynamic-rollavg-0.3-30-int500y'),\n", + " # (489, 'yearbook_performancetrigger_dynamic-quant-0.05-30-int100y'),\n", + " # (491, 'yearbook_performancetrigger_dynamic-rollavg-0.05-10-int250y'),\n", + " (494, \"yearbook_performancetrigger_dynamic-rollavg-0.05-20-int250y\"),\n", + " # (497, 'yearbook_performancetrigger_dynamic-quant-0.15-10-int100y'),\n", + " # (499, 'yearbook_performancetrigger_dynamic-rollavg-0.05-30-int250y'),\n", + " # (503, 'yearbook_performancetrigger_dynamic-rollavg-0.1-10-int250y'),\n", + " (506, \"yearbook_performancetrigger_dynamic-rollavg-0.1-20-int250y\"),\n", + " (507, \"yearbook_performancetrigger_dynamic-quant-0.15-20-int100y\"),\n", + " # (509, 'yearbook_performancetrigger_dynamic-rollavg-0.1-30-int250y'),\n", + " # (513, 'yearbook_performancetrigger_dynamic-rollavg-0.2-10-int250y'),\n", + " (516, \"yearbook_performancetrigger_dynamic-rollavg-0.2-20-int250y\"),\n", + " # (519, 'yearbook_performancetrigger_dynamic-quant-0.15-30-int100y'),\n", + " # (521, 'yearbook_performancetrigger_dynamic-rollavg-0.2-30-int250y'),\n", + " # (524, 'yearbook_performancetrigger_dynamic-rollavg-0.3-10-int250y'),\n", + " # (527, 'yearbook_performancetrigger_dynamic-rollavg-0.3-20-int250y'),\n", + " # (529, 'yearbook_performancetrigger_dynamic-rollavg-0.3-30-int250y'),\n", + " # (530, 'yearbook_performancetrigger_dynamic-quant-0.3-10-int100y'),\n", + " (539, \"yearbook_performancetrigger_dynamic-quant-0.3-20-int100y\"),\n", + " # (548, 'yearbook_performancetrigger_dynamic-quant-0.3-30-int100y'),\n", + " # (818, 'yearbook_costtrigger_data_inc_int250_exch15552000'),\n", + " # (819, 'yearbook_costtrigger_data_inc_int250_exch13824000'),\n", + " (821, \"yearbook_costtrigger_avoidable_misclass_int250_exch86400000_redFalse\"),\n", + " # (822, 'yearbook_costtrigger_data_inc_int250_exch12096000'),\n", + " # (825, 'yearbook_costtrigger_data_inc_int250_exch10368000'),\n", + " # (835, 'yearbook_costtrigger_data_inc_int250_exch129600000'),\n", + " # (836, 'yearbook_costtrigger_data_inc_int250_exch34560000'),\n", + " # (837, 'yearbook_costtrigger_data_inc_int250_exch25920000'),\n", + " # (838, 'yearbook_costtrigger_data_inc_int250_exch4320000'),\n", + " # (821, 'yearbook_costtrigger_avoidable_misclass_int250_exch86400000_redFalse'),\n", + " (839, \"yearbook_costtrigger_avoidable_misclass_int250_exch86400000_redFalse\"),\n", + " (840, \"yearbook_costtrigger_avoidable_misclass_int250_exch864000_redFalse\"),\n", + " (841, \"yearbook_costtrigger_avoidable_misclass_int250_exch864000_redFalse\"),\n", + " (842, \"yearbook_costtrigger_avoidable_misclass_int250_exch864.0_redFalse\"),\n", + " # (843, 'yearbook_costtrigger_avoidable_misclass_int250_exch8640000_redFalse'),\n", + " (844, \"yearbook_costtrigger_avoidable_misclass_int250_exch8640.0_redFalse\"),\n", + " # (846, 'yearbook_costtrigger_avoidable_misclass_int250_exch8640000000_redFalse'),\n", + " # (847, 'yearbook_costtrigger_avoidable_misclass_int250_exch864000000_redFalse'),\n", + " (848, \"yearbook_costtrigger_avoidable_misclass_int250_exch64800.0_redFalse\"),\n", + " # (849, 'yearbook_costtrigger_avoidable_misclass_int250_exch43200.0_redFalse'),\n", + " (850, \"yearbook_costtrigger_avoidable_misclass_int250_exch21600.0_redFalse\"),\n", + " # (851, 'yearbook_costtrigger_avoidable_misclass_int250_exch4320000000_redFalse')]\n", + " ]\n", + "]\n", + "\n", + "[(p_id, pname) for p_id, (pname, _) in pipelines.items() if p_id in pipeline_ids]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Wrangle data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "list_df_eval_single: list[pd.DataFrame] = []\n", + "df_leaf_list: list[pd.DataFrame] = []\n", + "\n", + "for pipeline_id in pipeline_ids:\n", + " logs = pipeline_logs[pipeline_id]\n", + " df_leaf_single = pipeline_leaf_times_df(logs, use_traintime_patch_at_trainer=True, pipeline_id=pipeline_id)\n", + " df_leaf_single[\"pipeline_id\"] = pipeline_id\n", + " df_leaf_list.append(df_leaf_single)\n", + "\n", + " _, _, df_eval_single = dfs_models_and_evals(\n", + " pipeline_logs[pipeline_id], df_leaf_single[\"sample_time\"].max(), pipelines[pipeline_id][0]\n", + " )\n", + " df_eval_single[\"pipeline_id\"] = pipeline_id\n", + " list_df_eval_single.append(df_eval_single)\n", + "\n", + "df_adjusted = pd.concat(list_df_eval_single)\n", + "df_adjusted\n", + "\n", + "df_leaf = pd.concat(df_leaf_list)\n", + "df_leaf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(df_leaf[\"id\"].unique())\n", + "assert set(df_leaf[\"id\"].unique()) == {\n", + " \"TRAIN\",\n", + " \"INIT_CLUSTER_CONNECTION\",\n", + " \"EVALUATE_TRIGGER_POLICY\",\n", + " \"INFORM_SELECTOR_REMAINING_DATA\",\n", + " \"INFORM_SELECTOR_ABOUT_TRIGGER\",\n", + " \"TRAINING_COMPLETED\",\n", + " \"STORE_TRAINED_MODEL\",\n", + " \"EVALUATE\",\n", + " \"DONE\",\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted = df_adjusted[\n", + " (df_adjusted[\"dataset_id\"] == dataset_id)\n", + " & (df_adjusted[\"eval_handler\"] == eval_handler)\n", + " & (df_adjusted[\"metric\"] == metric)\n", + "]\n", + "\n", + "# in percent (0-100)\n", + "df_adjusted[\"value\"] = df_adjusted[\"value\"] * 100\n", + "df_adjusted" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if patch_yearbook:\n", + " for column in [\"interval_start\", \"interval_center\", \"interval_end\"]:\n", + " patch_yearbook_time(df_adjusted, column)\n", + " patch_yearbook_time(df_leaf, \"sample_time\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted = df_adjusted.sort_values(by=[\"interval_center\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Reduce to composite models\n", + "df_adjusted = df_adjusted[df_adjusted[composite_model_variant]]\n", + "df_adjusted[composite_model_variant].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# reduce evaluation interval to interval where all policies have evaluations\n", + "min_active_eval_center_per_pipeline = (\n", + " df_adjusted[df_adjusted[composite_model_variant]].groupby(\"pipeline_ref\")[\"interval_center\"].min()\n", + ")\n", + "maximum_min = min_active_eval_center_per_pipeline.max()\n", + "print(maximum_min, min_active_eval_center_per_pipeline)\n", + "\n", + "assert maximum_min < pd.Timestamp(\"1962-01-01\")\n", + "\n", + "df_adjusted = df_adjusted[df_adjusted[\"interval_center\"] >= maximum_min]\n", + "df_adjusted[\"interval_center\"].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted[\"interval_center\"] = df_adjusted[\"interval_center\"].astype(str).str.split(\"-\").str[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Aggregate metrics to a scalar value per pipeline\n", + "mean_accuracies = df_aggregate_eval_metric(\n", + " df_adjusted,\n", + " group_by=[\"pipeline_id\", \"pipeline_ref\", \"metric\"],\n", + " in_col=\"value\",\n", + " out_col=\"metric_value\",\n", + " aggregate_func=\"mean\",\n", + ")\n", + "mean_accuracies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_triggers = df_leaf[df_leaf[\"id\"] == PipelineStage.TRAIN.name]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_triggers = df_leaf[df_leaf[\"id\"] == PipelineStage.TRAIN.name]\n", + "df_triggers = df_triggers[df_triggers[\"sample_time\"] > maximum_min]\n", + "df_triggers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Find number of trigger per pipeline that are after maximum_min\n", + "\n", + "# before the cutoff there was one trigger (equivalent to start of our reduced dataset): +1\n", + "num_triggers = df_triggers.groupby(\"pipeline_id\").aggregate(count=(\"id\", \"count\"), sum_duration=(\"duration\", \"sum\"))\n", + "num_triggers[\"count\"] += 1\n", + "num_triggers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "num_triggers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "merged = num_triggers.merge(mean_accuracies, on=\"pipeline_id\", how=\"inner\")\n", + "assert num_triggers.shape[0] == merged.shape[0]\n", + "merged" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def create_type(x: str):\n", + " if \"year\" in x:\n", + " return \"time\"\n", + " elif \"samples\" in x:\n", + " return \"amount\"\n", + " elif \"d\" in x:\n", + " return \"drift\"\n", + " else:\n", + " return \"unknown\"\n", + "\n", + "\n", + "merged[\"type\"] = merged[\"pipeline_ref\"].apply(lambda x: create_type(x))\n", + "merged" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "renamed = merged.copy()\n", + "\n", + "# renamed = merged[\n", + "# merged[\"pipeline_id\"].isin(\n", + "# [\n", + "# # # static thresholds\n", + "# # 113, # 0.03\n", + "# # 112, # 0.05\n", + "# # 107, # 0.07\n", + "# # 109, # 0.09\n", + "# # 85, # 0.12\n", + "# # # dyn quantile\n", + "# # 353, # % 0.05\n", + "# # 345, # % 0.10\n", + "# # 357, # % 0.15\n", + "# # # dyn roll. avg\n", + "# # 372, # Δ 2.0\n", + "# # 370, # Δ 1.0\n", + "# # 369, # Δ 0.5\n", + "# # 363, # Δ 0.05\n", + "# ]\n", + "# )\n", + "# ].copy()\n", + "renamed[\"Trigger SubType\"] = renamed[\"pipeline_ref\"].apply(\n", + " lambda x: (\n", + " \"DataAmount\"\n", + " if \"dataamount\" in x\n", + " else (\n", + " \"Time\"\n", + " if \"time\" in x\n", + " else (\n", + " (\n", + " \"Static\"\n", + " if \"_mmd-0\" in x\n", + " else (\"Quantile\" if \"quant\" in x else (\"Rolling Avg.\" if \"roll\" in x else (\"unknown\")))\n", + " )\n", + " if \"drift\" in x\n", + " else (\n", + " (\n", + " \"Static\"\n", + " if \"static\" in x\n", + " else (\n", + " \"Quantile\"\n", + " if \"quant\" in x\n", + " else (\n", + " \"Rolling Avg.\"\n", + " if \"roll\" in x\n", + " else (\"AvoidableMisclass\" if \"num_misclass\" in x else (\"unknown\"))\n", + " )\n", + " )\n", + " )\n", + " if \"performancetrigger\" in x\n", + " else (\n", + " \"DataIncorporationLatency\"\n", + " if \"data_inc\" in x\n", + " else (\"AvoidableMisclass\" if \"avoidable\" in x else (\"unknown\"))\n", + " )\n", + " )\n", + " )\n", + " )\n", + " )\n", + ")\n", + "renamed[\"Trigger Type\"] = renamed[\"pipeline_ref\"].apply(\n", + " lambda x: (\n", + " \"Simple\"\n", + " if \"dataamount\" in x\n", + " else (\n", + " \"Simple\"\n", + " if \"time\" in x\n", + " else (\n", + " \"DataDrift\"\n", + " if \"drift\" in x\n", + " else (\"Performance\" if \"performancetrigger\" in x else (\"Cost\" if \"costtrigger\" in x else (\"unknown\")))\n", + " )\n", + " )\n", + " )\n", + ")\n", + "\n", + "# assert no unknowns and DataIncorporationLatency\n", + "assert not renamed[\"Trigger Type\"].str.contains(\"unknown\").any()\n", + "assert not renamed[\"Trigger SubType\"].str.contains(\"unknown\").any()\n", + "assert not renamed[\"Trigger SubType\"].str.contains(\"DataIncorporationLatency\").any()\n", + "\n", + "renamed[\"Trigger Type\"] = pd.Categorical(\n", + " renamed[\"Trigger Type\"], categories=[\"Simple\", \"DataDrift\", \"Performance\", \"Cost\"], ordered=True\n", + ")\n", + "\n", + "renamed[\"Trigger SubType\"] = pd.Categorical(\n", + " renamed[\"Trigger SubType\"],\n", + " categories=[\"DataAmount\", \"Time\", \"Static\", \"Quantile\", \"Rolling Avg.\", \"AvoidableMisclass\"],\n", + " ordered=True,\n", + ")\n", + "\n", + "renamed = renamed.sort_values(by=[\"Trigger Type\", \"Trigger SubType\", \"pipeline_id\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig = plot_tradeoff_scatter(\n", + " renamed,\n", + " x=\"count\",\n", + " y=\"metric_value\",\n", + " hue=\"Trigger Type\",\n", + " style=\"Trigger SubType\",\n", + " x_label=\"Number of Triggers\",\n", + " y_label=\"Mean Accuracy %\",\n", + " height_factor=0.8,\n", + " width_factor=0.9,\n", + " manual_legend_title=False,\n", + " legend_ncol=2,\n", + ")\n", + "\n", + "save_plot(fig, \"_all_tradeoff_yearbook_triggers_performance\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "in_minutes = renamed.copy()\n", + "in_minutes[\"sum_duration\"] = in_minutes[\"sum_duration\"] / 60\n", + "\n", + "fig = plot_tradeoff_scatter(\n", + " in_minutes,\n", + " x=\"sum_duration\",\n", + " y=\"metric_value\",\n", + " hue=\"Trigger Type\",\n", + " style=\"Trigger SubType\",\n", + " x_label=\"Total Cost (Minutes)\",\n", + " y_label=\"Mean Accuracy %\",\n", + " height_factor=0.8,\n", + " width_factor=0.9,\n", + " manual_legend_title=False,\n", + " legend_ncol=2,\n", + ")\n", + "\n", + "save_plot(fig, \"_all_tradeoff_yearbook_cost_performance\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig = plot_tradeoff_scatter(\n", + " renamed,\n", + " x=\"count\",\n", + " y=\"sum_duration\",\n", + " hue=\"Trigger Type\",\n", + " style=\"Trigger SubType\",\n", + " x_label=\"Number of Triggers\",\n", + " y_label=\"Total Cost (seconds)\",\n", + " height_factor=1.5,\n", + " width_factor=1.8,\n", + " manual_legend_title=False,\n", + " legend_ncol=2,\n", + ")\n", + "\n", + "# save_plot(fig, \"tradeoff_drift_yearbook_triggers_cost\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/analytics/plotting/rh_thesis/drift/arxiv_cost.ipynb b/analytics/plotting/rh_thesis/drift/arxiv_cost.ipynb new file mode 100644 index 000000000..efa21290b --- /dev/null +++ b/analytics/plotting/rh_thesis/drift/arxiv_cost.ipynb @@ -0,0 +1,282 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "import matplotlib.dates as mdates\n", + "import pandas as pd\n", + "from matplotlib.ticker import FixedFormatter, FixedLocator\n", + "\n", + "from analytics.app.data.load import list_pipelines, load_pipeline_logs\n", + "from analytics.app.data.transform import pipeline_leaf_times_df\n", + "from analytics.plotting.common.cost_matrix import plot_cost_matrix\n", + "from analytics.plotting.common.save import save_plot\n", + "from modyn.supervisor.internal.pipeline_executor.models import PipelineLogs\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipelines_dirs = [\n", + " # TODO\n", + "]\n", + "\n", + "pipeline_logs: dict[int, PipelineLogs] = {}\n", + "pipelines: dict[int, tuple[str, Path]] = {}\n", + "\n", + "for dir in pipelines_dirs:\n", + " dir_pipelines = list_pipelines(dir)\n", + " pipelines.update(dir_pipelines)\n", + " max_pipeline_id = max(dir_pipelines.keys())\n", + " print(pipelines)\n", + " pipeline_logs.update({p_id: load_pipeline_logs(p_id, dir) for (p_id, (_, p_path)) in dir_pipelines.items()})\n", + " assert dir.exists()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# mode: time + amount\n", + "pipeline_ids = [267, 269, 265] + [268, 271, 270]\n", + "\n", + "# doesn't do anything unless include_composite_model = True\n", + "composite_model_variant = \"currently_active_model\"\n", + "\n", + "patch_yearbook = True\n", + "dataset_id = \"arxiv_kaggle_test\"\n", + "eval_handler = \"periodic-current\"\n", + "metric = \"Accuracy\"\n", + "include_composite_model = True" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Wrangle data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_leaf_list = []\n", + "for pipeline_id in pipeline_ids:\n", + " logs = pipeline_logs[pipeline_id]\n", + " df_leaf_single = pipeline_leaf_times_df(logs, use_traintime_patch_at_trainer=True, pipeline_id=pipeline_id)\n", + " df_leaf_list.append(df_leaf_single)\n", + "\n", + "df_leaf = pd.concat(df_leaf_list)\n", + "df_leaf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted = df_leaf.copy()\n", + "\n", + "# coloring in order of decreasing avg. duration\n", + "avg_duration_per_stage = df_adjusted.groupby([\"pipeline_ref\", \"id\"])[\"duration\"].mean().sort_values(ascending=False)\n", + "df_adjusted = df_adjusted.merge(avg_duration_per_stage, on=[\"pipeline_ref\", \"id\"], suffixes=(\"\", \"_avg\")).sort_values(\n", + " \"duration_avg\", ascending=False\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted\n", + "df_adjusted[\"sample_time_year\"] = df_adjusted[\"sample_time\"]\n", + "df_adjusted[\"sample_time_year_bin\"] = pd.cut(df_adjusted[\"sample_time_year\"], bins=10, labels=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_new = df_adjusted[\n", + " (\n", + " df_adjusted[\"id\"].isin(\n", + " [\n", + " \"TRAIN\",\n", + " \"STORE_TRAINED_MODEL\",\n", + " \"INFORM_SELECTOR_REMAINING_DATA\",\n", + " \"INFORM_SELECTOR_ABOUT_TRIGGER\",\n", + " \"EVALUATE_TRIGGER_POLICY\",\n", + " ]\n", + " )\n", + " )\n", + "][[\"pipeline_ref\", \"id\", \"sample_time_year\", \"duration\"]].copy()\n", + "df_new = df_new.sort_values(\"sample_time_year\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "state_rename = {\n", + " \"INFORM_SELECTOR_REMAINING_DATA\": \"inform remaining data\",\n", + " \"INFORM_SELECTOR_ABOUT_TRIGGER\": \"inform trigger\",\n", + "}\n", + "\n", + "df_new[\"id\"] = df_new[\"id\"].replace(state_rename).str.lower().str.replace(\"_\", \" \")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_new" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig = plot_cost_matrix(\n", + " df_new,\n", + " [265, 269, 267],\n", + " grid_alpha=0.75,\n", + " title_map={\n", + " 265: \"TimeTrigger 10 years\",\n", + " 269: \"TimeTrigger 2 years\",\n", + " 267: \"TimeTrigger 26 weeks\",\n", + " },\n", + " height_factor=1.8,\n", + " width_factor=1.0,\n", + " duration_ylabel=\"Duration (min)\",\n", + " cumulative_ylabel=\"Cumulative Duration (min)\",\n", + " x_date_locator=FixedLocator([mdates.date2num(pd.Timestamp(d)) for d in [\"2000-01-01\", \"2009-01-01\", \"2018-01-01\"]]),\n", + " x_date_formatter=FixedFormatter([str(year) for year in [\"Jan 2000\", \"Jan 2009\", \"Jan 2018\"]]),\n", + " x_lim=(pd.Timestamp(\"1995-01-01\"), pd.Timestamp(\"2024-09-01\")),\n", + " y_ticks_cumulative=[x for x in range(0, 1000, 200)],\n", + " y_lim_cumulative=(0, 1000),\n", + " y_minutes=True,\n", + " y_minutes_cumulative=True,\n", + ")\n", + "\n", + "save_plot(fig, \"arxiv_time-trigger-cost-matrix\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig = plot_cost_matrix(\n", + " df_new,\n", + " [268, 271, 270],\n", + " grid_alpha=0.75,\n", + " title_map={\n", + " 268: \"AmountTrigger 500k samples\",\n", + " 271: \"AmountTrigger 100k samples\",\n", + " 270: \"AmountTrigger 25k samples\",\n", + " },\n", + " height_factor=1.8,\n", + " width_factor=1.0,\n", + " duration_ylabel=\"Duration (min)\",\n", + " cumulative_ylabel=\"Cumulative Duration (min)\",\n", + " x_date_locator=FixedLocator([mdates.date2num(pd.Timestamp(d)) for d in [\"2000-01-01\", \"2009-01-01\", \"2018-01-01\"]]),\n", + " x_date_formatter=FixedFormatter([str(year) for year in [\"Jan 2000\", \"Jan 2009\", \"Jan 2018\"]]),\n", + " x_lim=(pd.Timestamp(\"1995-01-01\"), pd.Timestamp(\"2024-09-01\")),\n", + " y_ticks_cumulative=[x for x in range(0, 1000, 200)],\n", + " y_lim_cumulative=(0, 1000),\n", + " y_minutes=True,\n", + " y_minutes_cumulative=True,\n", + ")\n", + "\n", + "save_plot(fig, \"arxiv_amount-trigger-cost-matrix\")\n", + "# not interesting: note that for 250 samples we see multiple trigger at the same timestamp" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Plot 100k amount and 2y time trigger together" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig = plot_cost_matrix(\n", + " df_new,\n", + " [269, 271],\n", + " grid_alpha=0.75,\n", + " title_map={\n", + " 269: \"TimeTrigger 2 years\",\n", + " 271: \"AmountTrigger 100k samples\",\n", + " },\n", + " height_factor=1.2,\n", + " width_factor=1.0,\n", + " duration_ylabel=\"Duration (min)\",\n", + " cumulative_ylabel=\"Cumulative Duration (min)\",\n", + " x_date_locator=FixedLocator([mdates.date2num(pd.Timestamp(d)) for d in [\"2000-01-01\", \"2009-01-01\", \"2018-01-01\"]]),\n", + " x_date_formatter=FixedFormatter([str(year) for year in [\"Jan 2000\", \"Jan 2009\", \"Jan 2018\"]]),\n", + " x_lim=(pd.Timestamp(\"1995-01-01\"), pd.Timestamp(\"2024-09-01\")),\n", + " y_ticks_cumulative=[x for x in range(0, 1000, 200)],\n", + " y_lim_cumulative=(0, 1000),\n", + " y_minutes=True,\n", + " y_minutes_cumulative=True,\n", + ")\n", + "\n", + "save_plot(fig, \"arxiv_timeamount-trigger-cost-matrix\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/analytics/plotting/rh_thesis/drift/arxiv_heatmap_single.ipynb b/analytics/plotting/rh_thesis/drift/arxiv_heatmap_single.ipynb index 100651d98..b8c9e6fa6 100644 --- a/analytics/plotting/rh_thesis/drift/arxiv_heatmap_single.ipynb +++ b/analytics/plotting/rh_thesis/drift/arxiv_heatmap_single.ipynb @@ -57,13 +57,13 @@ "outputs": [], "source": [ "# mode:\n", - "pipeline_id = 771 # hp drifttrigger_mmd-rollavg-2.0-20_int1500_win1y\n", + "pipeline_id = 782 # drifttrigger_mmd-quant-0.05-20_int20000_win1y\n", "\n", "# doesn't do anything unless include_composite_model = True\n", "composite_model_variant = \"currently_active_model\"\n", "\n", "patch_yearbook = True\n", - "dataset_id = \"huffpost_kaggle_test\"\n", + "dataset_id = \"arxiv_kaggle_test\"\n", "eval_handler = \"periodic-current\"\n", "metric = \"Accuracy\"\n", "include_composite_model = False" @@ -230,20 +230,21 @@ " heatmap_data,\n", " reverse_col=True,\n", " x_custom_ticks=[\n", - " (i, f\"{period.to_timestamp().strftime('%b %Y')}\".replace(\" \", \"\\n\"))\n", + " (i, f\"{period.to_timestamp().strftime('%b %Y')}\")\n", " for i, period in list(enumerate(heatmap_data.columns))[::1]\n", - " if period in [pd.Period(\"Apr 2014\"), pd.Period(\"Jul 2018\"), pd.Period(\"Jan 2022\")]\n", + " if period in [pd.Period(\"Mar 2000\"), pd.Period(\"Mar 2009\"), pd.Period(\"Mar 2020\")]\n", " ],\n", " y_custom_ticks=[\n", - " (i + 0.5, f\"{period.to_timestamp().strftime('%b %Y')}\")\n", + " (i, f\"{period.to_timestamp().strftime('%b %Y')}\".replace(\" \", \"\\n\"))\n", " for i, period in list(enumerate(heatmap_data.index))[::1]\n", + " if period in [pd.Period(\"Jun 2000\"), pd.Period(\"Jun 2009\"), pd.Period(\"May 2020\")]\n", " ],\n", " y_label=\"Trained up to\",\n", " x_label=\"Evaluation Year\",\n", - " title_label=\"HuffPost Dynamic Threshold\\nRolling Average: Δ +200%\",\n", + " title_label=\"Arxiv Dynamic Drift Threshold: MMD Quantile: 0.05\",\n", " color_label=\"Accuracy %\",\n", - " width_factor=0.6,\n", - " height_factor=0.61,\n", + " width_factor=1,\n", + " height_factor=0.55,\n", " # grid_alpha=0.4,\n", " grid_alpha=0.0,\n", " # disable_horizontal_grid=True,\n", diff --git a/analytics/plotting/rh_thesis/drift/hp_cost.ipynb b/analytics/plotting/rh_thesis/drift/hp_cost.ipynb new file mode 100644 index 000000000..6fb266f85 --- /dev/null +++ b/analytics/plotting/rh_thesis/drift/hp_cost.ipynb @@ -0,0 +1,219 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "import matplotlib.dates as mdates\n", + "import pandas as pd\n", + "from matplotlib.ticker import FixedFormatter, FixedLocator\n", + "\n", + "from analytics.app.data.load import list_pipelines, load_pipeline_logs\n", + "from analytics.app.data.transform import pipeline_leaf_times_df\n", + "from analytics.plotting.common.cost_matrix import plot_cost_matrix\n", + "from analytics.plotting.common.save import save_plot\n", + "from modyn.supervisor.internal.pipeline_executor.models import PipelineLogs\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipelines_dirs = [\n", + " Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/huffpost/21_datadrift_dynamic\"),\n", + "]\n", + "\n", + "pipeline_logs: dict[int, PipelineLogs] = {}\n", + "pipelines: dict[int, tuple[str, Path]] = {}\n", + "\n", + "for dir in pipelines_dirs:\n", + " dir_pipelines = list_pipelines(dir)\n", + " pipelines.update(dir_pipelines)\n", + " max_pipeline_id = max(dir_pipelines.keys())\n", + " print(pipelines)\n", + " pipeline_logs.update({p_id: load_pipeline_logs(p_id, dir) for (p_id, (_, p_path)) in dir_pipelines.items()})\n", + " assert dir.exists()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# mode: time + amount\n", + "pipeline_ids = [771] # hp drifttrigger_mmd-rollavg-2.0-20_int1500_win1y\n", + "\n", + "\n", + "# doesn't do anything unless include_composite_model = True\n", + "composite_model_variant = \"currently_active_model\"\n", + "\n", + "patch_yearbook = True\n", + "dataset_id = \"huffpost_kaggle_test\"\n", + "eval_handler = \"periodic-current\"\n", + "metric = \"Accuracy\"\n", + "include_composite_model = True" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Wrangle data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_leaf_list = []\n", + "for pipeline_id in pipeline_ids:\n", + " logs = pipeline_logs[pipeline_id]\n", + " df_leaf_single = pipeline_leaf_times_df(logs, use_traintime_patch_at_trainer=True, pipeline_id=pipeline_id)\n", + " df_leaf_list.append(df_leaf_single)\n", + "\n", + "df_leaf = pd.concat(df_leaf_list)\n", + "df_leaf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted = df_leaf.copy()\n", + "\n", + "# coloring in order of decreasing avg. duration\n", + "avg_duration_per_stage = df_adjusted.groupby([\"pipeline_ref\", \"id\"])[\"duration\"].mean().sort_values(ascending=False)\n", + "df_adjusted = df_adjusted.merge(avg_duration_per_stage, on=[\"pipeline_ref\", \"id\"], suffixes=(\"\", \"_avg\")).sort_values(\n", + " \"duration_avg\", ascending=False\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted\n", + "df_adjusted[\"sample_time_year\"] = df_adjusted[\"sample_time\"]\n", + "df_adjusted[\"sample_time_year_bin\"] = pd.cut(df_adjusted[\"sample_time_year\"], bins=10, labels=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_new = df_adjusted[\n", + " (\n", + " df_adjusted[\"id\"].isin(\n", + " [\n", + " \"TRAIN\",\n", + " \"STORE_TRAINED_MODEL\",\n", + " \"INFORM_SELECTOR_REMAINING_DATA\",\n", + " \"INFORM_SELECTOR_ABOUT_TRIGGER\",\n", + " \"EVALUATE_TRIGGER_POLICY\",\n", + " ]\n", + " )\n", + " )\n", + "][[\"pipeline_ref\", \"id\", \"sample_time_year\", \"duration\"]].copy()\n", + "df_new = df_new.sort_values(\"sample_time_year\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "state_rename = {\n", + " \"INFORM_SELECTOR_REMAINING_DATA\": \"inform remaining data\",\n", + " \"INFORM_SELECTOR_ABOUT_TRIGGER\": \"inform trigger\",\n", + "}\n", + "\n", + "df_new[\"id\"] = df_new[\"id\"].replace(state_rename).str.lower().str.replace(\"_\", \" \")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_new" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig = plot_cost_matrix(\n", + " df_new,\n", + " [771],\n", + " grid_alpha=0.75,\n", + " title_map={\n", + " 771: \"HuffPost Dynamic Drift: Δ +200%\",\n", + " },\n", + " height_factor=0.7,\n", + " width_factor=1.0,\n", + " duration_ylabel=\"Duration (min)\",\n", + " cumulative_ylabel=\"Cumulative Duration (min)\",\n", + " x_date_locator=FixedLocator([mdates.date2num(pd.Timestamp(d)) for d in [\"2014-05-01\", \"2018-06-01\", \"2021-01-01\"]]),\n", + " x_date_formatter=FixedFormatter([str(year) for year in [\"May\\n2014\", \"Jun\\n2018\", \"Jan\\n2021\"]]),\n", + " x_lim=(pd.Timestamp(\"2012-01-01\"), pd.Timestamp(\"2022-09-01\")),\n", + " y_ticks_cumulative=[x for x in range(0, 110, 25)],\n", + " y_lim_cumulative=(0, 100),\n", + " y_minutes=True,\n", + " y_minutes_cumulative=True,\n", + ")\n", + "\n", + "save_plot(fig, \"huffpost_drift-trigger-cost-matrix\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/analytics/plotting/rh_thesis/drift/hp_heatmap_single.ipynb b/analytics/plotting/rh_thesis/drift/hp_heatmap_single.ipynb index 26784853b..b0c43f313 100644 --- a/analytics/plotting/rh_thesis/drift/hp_heatmap_single.ipynb +++ b/analytics/plotting/rh_thesis/drift/hp_heatmap_single.ipynb @@ -230,7 +230,7 @@ " heatmap_data,\n", " reverse_col=True,\n", " x_custom_ticks=[\n", - " (i, f\"{period.to_timestamp().strftime('%b %Y')}\".replace(\" \", \"\\n\"))\n", + " (i, f\"{period.to_timestamp().strftime('%b %Y')}\")\n", " for i, period in list(enumerate(heatmap_data.columns))[::1]\n", " if period in [pd.Period(\"Apr 2014\"), pd.Period(\"Jul 2018\"), pd.Period(\"Jan 2022\")]\n", " ],\n", @@ -240,10 +240,10 @@ " ],\n", " y_label=\"Trained up to\",\n", " x_label=\"Evaluation Year\",\n", - " title_label=\"HuffPost Dynamic Threshold\\nRolling Average: Δ +200%\",\n", + " title_label=\"HuffPost Dynamic Drift Threshold: Rolling Average Δ +200%\",\n", " color_label=\"Accuracy %\",\n", - " width_factor=0.6,\n", - " height_factor=0.61,\n", + " width_factor=1,\n", + " height_factor=0.5,\n", " # grid_alpha=0.4,\n", " grid_alpha=0.0,\n", " # disable_horizontal_grid=True,\n", diff --git a/analytics/plotting/rh_thesis/drift/yb_cost.ipynb b/analytics/plotting/rh_thesis/drift/yb_cost.ipynb index b642f2841..a136ed341 100644 --- a/analytics/plotting/rh_thesis/drift/yb_cost.ipynb +++ b/analytics/plotting/rh_thesis/drift/yb_cost.ipynb @@ -160,21 +160,37 @@ " [107],\n", " grid_alpha=0.75,\n", " title_map={\n", - " 107: \"static MMD=0.07 threshold\",\n", + " 107: \"Static MMD Threshold=0.07\",\n", " },\n", - " height_factor=0.8,\n", + " height_factor=0.7,\n", " width_factor=1.0,\n", " duration_ylabel=\"Duration (sec)\",\n", " cumulative_ylabel=\"Cumulative Duration (min)\",\n", " x_ticks=[x for x in range(1940, 2010 + 1, 30)],\n", - " y_ticks_cumulative=[x for x in range(0, 60 + 1, 20)],\n", - " y_lim_cumulative=(0, 70),\n", + " y_ticks_cumulative=[x for x in range(0, 9 + 1, 3)],\n", + " y_lim_cumulative=(0, 10),\n", " y_minutes=False,\n", " y_minutes_cumulative=True,\n", ")\n", "\n", "save_plot(fig, \"yearbook_drift-trigger-cost-matrix\")" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# warmup noticeable where not detection is launched" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/analytics/plotting/rh_thesis/drift/yb_cost_perf_tradeoff.ipynb b/analytics/plotting/rh_thesis/drift/yb_cost_perf_tradeoff.ipynb index 379988a5f..c33a59917 100644 --- a/analytics/plotting/rh_thesis/drift/yb_cost_perf_tradeoff.ipynb +++ b/analytics/plotting/rh_thesis/drift/yb_cost_perf_tradeoff.ipynb @@ -211,7 +211,7 @@ "maximum_min = min_active_eval_center_per_pipeline.max()\n", "print(maximum_min, min_active_eval_center_per_pipeline)\n", "\n", - "assert maximum_min < pd.Timestamp(\"1950-01-01\")\n", + "assert maximum_min < pd.Timestamp(\"1940-01-01\")\n", "\n", "df_adjusted = df_adjusted[df_adjusted[\"interval_center\"] >= maximum_min]\n", "df_adjusted[\"interval_center\"].unique()" diff --git a/analytics/plotting/rh_thesis/drift/yearbook_heatmap_multi.ipynb b/analytics/plotting/rh_thesis/drift/yearbook_heatmap_multi.ipynb index ac8b6f040..b3390cbd6 100644 --- a/analytics/plotting/rh_thesis/drift/yearbook_heatmap_multi.ipynb +++ b/analytics/plotting/rh_thesis/drift/yearbook_heatmap_multi.ipynb @@ -222,10 +222,10 @@ " y_custom_ticks=[(i + 0.5, pipelines_refs[y]) for i, y in enumerate(heatmap_data.index)],\n", " y_label=\"Pipeline with\\nWindow Size\",\n", " x_label=\"Evaluation Year\",\n", - " title_label=\"Yearbook Composite Models:\\nWindow Sizes (MMD=0.07)\",\n", + " title_label=\"Yearbook Composite Models: Drift Window Sizes (MMD=0.07)\",\n", " color_label=\"Accuracy %\",\n", - " width_factor=0.5,\n", - " height_factor=0.43,\n", + " width_factor=1,\n", + " height_factor=0.38,\n", " # grid_alpha=0.4,\n", " grid_alpha=0.0,\n", " # disable_horizontal_grid=True,\n", @@ -291,10 +291,10 @@ " y_custom_ticks=[(i + 0.5, pipelines_refs[y]) for i, y in enumerate(heatmap_data.index)],\n", " y_label=\"MMD Threshold\",\n", " x_label=\"Evaluation Year\",\n", - " title_label=\"Yearbook Composite Models:\\nStatic Thresholds\",\n", + " title_label=\"Yearbook Composite Models: Static Drift Thresholds\",\n", " color_label=\"Accuracy %\",\n", - " width_factor=0.5,\n", - " height_factor=0.55,\n", + " width_factor=1,\n", + " height_factor=0.45,\n", " # grid_alpha=0.4,\n", " grid_alpha=0.0,\n", " # disable_horizontal_grid=True,\n", @@ -362,12 +362,12 @@ " reverse_col=True,\n", " x_ticks=[1950, 1975, 2000],\n", " y_custom_ticks=[(i + 0.5, pipelines_refs[y]) for i, y in enumerate(heatmap_data.index)],\n", - " y_label=\"Dynamic Quantile\",\n", + " y_label=\"Criterion\",\n", " x_label=\"Evaluation Year\",\n", - " title_label=\"Yearbook Composite Models:\\nDynamic Thresholds\",\n", + " title_label=\"Yearbook Composite Models: Dynamic Drift Thresholds\",\n", " color_label=\"Accuracy %\",\n", - " width_factor=0.5,\n", - " height_factor=0.55,\n", + " width_factor=1,\n", + " height_factor=0.47,\n", " # grid_alpha=0.4,\n", " grid_alpha=0.0,\n", " # disable_horizontal_grid=True,\n", @@ -381,6 +381,13 @@ ")\n", "save_plot(fig, \"yb_trigger_heatmap_drift_multi_dynamic_thresholds\")" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/analytics/plotting/rh_thesis/drift/yearbook_heatmap_single.ipynb b/analytics/plotting/rh_thesis/drift/yearbook_heatmap_single.ipynb index 015b7bc92..54bdc59eb 100644 --- a/analytics/plotting/rh_thesis/drift/yearbook_heatmap_single.ipynb +++ b/analytics/plotting/rh_thesis/drift/yearbook_heatmap_single.ipynb @@ -271,10 +271,10 @@ " y_custom_ticks=[(i + 0.5, str(y)) for i, y in enumerate(heatmap_data.index)],\n", " y_label=\"Trained up to\",\n", " x_label=\"Evaluation Year\",\n", - " title_label=\"Yearbook 4y Windows\\nStatic Threshold: MMD=0.07\",\n", + " title_label=\"Yearbook 4y Drift Windows: Static MMD Threshold=0.07\",\n", " color_label=\"Accuracy %\",\n", - " width_factor=0.5,\n", - " height_factor=0.61,\n", + " width_factor=1,\n", + " height_factor=0.55,\n", " # grid_alpha=0.4,\n", " grid_alpha=0.0,\n", " # disable_horizontal_grid=True,\n", diff --git a/analytics/plotting/rh_thesis/evaluation_setup/arxiv_heatmap_metrics.ipynb b/analytics/plotting/rh_thesis/evaluation_setup/arxiv_heatmap_metrics.ipynb index 51234c32f..5d0537799 100644 --- a/analytics/plotting/rh_thesis/evaluation_setup/arxiv_heatmap_metrics.ipynb +++ b/analytics/plotting/rh_thesis/evaluation_setup/arxiv_heatmap_metrics.ipynb @@ -292,16 +292,16 @@ "outputs": [], "source": [ "plot_content = {\n", - " (0, \"Evaluation Year\"): {\n", - " (0, \"Trained up to\"): (\n", + " (0, \"Trained up to\"): {\n", + " (0, \"Evaluation Year\"): (\n", " \"Accuracy\",\n", " generate_heatmap_data_for_handler(\n", " df_merged, \"Accuracy\"\n", " ), # almost identical to F1-micro and F1-weighted; macro is broken\n", " ),\n", - " (1, \"Trained up to\"): (\"Top-2-Accuracy\", generate_heatmap_data_for_handler(df_merged, \"Top-2-Accuracy\")),\n", - " (2, \"Trained up to\"): (\"Top-5-Accuracy\", generate_heatmap_data_for_handler(df_merged, \"Top-5-Accuracy\")),\n", - " (3, \"Trained up to\"): (\"Top-10-Accuracy\", generate_heatmap_data_for_handler(df_merged, \"Top-10-Accuracy\")),\n", + " (1, \"Evaluation Year\"): (\"Top-2-Accuracy\", generate_heatmap_data_for_handler(df_merged, \"Top-2-Accuracy\")),\n", + " (2, \"Evaluation Year\"): (\"Top-5-Accuracy\", generate_heatmap_data_for_handler(df_merged, \"Top-5-Accuracy\")),\n", + " (3, \"Evaluation Year\"): (\"Top-10-Accuracy\", generate_heatmap_data_for_handler(df_merged, \"Top-10-Accuracy\")),\n", " }\n", "}\n", "\n", diff --git a/analytics/plotting/rh_thesis/evaluation_setup/hp_heatmap_metrics.ipynb b/analytics/plotting/rh_thesis/evaluation_setup/hp_heatmap_metrics.ipynb index d1621419e..4c4c4e479 100644 --- a/analytics/plotting/rh_thesis/evaluation_setup/hp_heatmap_metrics.ipynb +++ b/analytics/plotting/rh_thesis/evaluation_setup/hp_heatmap_metrics.ipynb @@ -292,16 +292,16 @@ "outputs": [], "source": [ "plot_content = {\n", - " (0, \"Evaluation Year\"): {\n", - " (0, \"Trained up to\"): (\n", + " (0, \"Trained up to\"): {\n", + " (0, \"Evaluation Year\"): (\n", " \"Accuracy\",\n", " generate_heatmap_data_for_handler(\n", " df_merged, \"Accuracy\"\n", " ), # almost identical to F1-micro and F1-weighted; macro is broken\n", " ),\n", - " (1, \"Trained up to\"): (\"Top-2-Accuracy\", generate_heatmap_data_for_handler(df_merged, \"Top-2-Accuracy\")),\n", - " (2, \"Trained up to\"): (\"Top-5-Accuracy\", generate_heatmap_data_for_handler(df_merged, \"Top-5-Accuracy\")),\n", - " (3, \"Trained up to\"): (\"Top-10-Accuracy\", generate_heatmap_data_for_handler(df_merged, \"Top-10-Accuracy\")),\n", + " (1, \"Evaluation Year\"): (\"Top-2-Accuracy\", generate_heatmap_data_for_handler(df_merged, \"Top-2-Accuracy\")),\n", + " (2, \"Evaluation Year\"): (\"Top-5-Accuracy\", generate_heatmap_data_for_handler(df_merged, \"Top-5-Accuracy\")),\n", + " (3, \"Evaluation Year\"): (\"Top-10-Accuracy\", generate_heatmap_data_for_handler(df_merged, \"Top-10-Accuracy\")),\n", " }\n", "}\n", "\n", diff --git a/analytics/plotting/rh_thesis/evaluation_setup/yb_heatmap_metrics.ipynb b/analytics/plotting/rh_thesis/evaluation_setup/yb_heatmap_metrics.ipynb index 018ab16ed..6cdc52db8 100644 --- a/analytics/plotting/rh_thesis/evaluation_setup/yb_heatmap_metrics.ipynb +++ b/analytics/plotting/rh_thesis/evaluation_setup/yb_heatmap_metrics.ipynb @@ -302,9 +302,9 @@ "outputs": [], "source": [ "plot_content = {\n", - " (0, \"Evaluation Year\"): {\n", - " (0, \"Trained up to\"): (\"Accuracy\", generate_heatmap_data_for_handler(df_merged, \"Accuracy\")),\n", - " (1, \"Trained up to\"): (\"ROC-AUC\", generate_heatmap_data_for_handler(df_merged, \"ROC-AUC\")),\n", + " (0, \"Trained up to\"): {\n", + " (0, \"Evaluation Year\"): (\"Accuracy\", generate_heatmap_data_for_handler(df_merged, \"Accuracy\")),\n", + " (1, \"Evaluation Year\"): (\"ROC-AUC\", generate_heatmap_data_for_handler(df_merged, \"ROC-AUC\")),\n", " }\n", "}\n", "\n", @@ -326,7 +326,7 @@ " grid_alpha=0.5,\n", ")\n", "\n", - "save_plot(fig, \"evaluation_metrics_yb_1\")" + "save_plot(fig, \"evaluation_metrics_yb_one\")" ] }, { @@ -336,10 +336,10 @@ "outputs": [], "source": [ "plot_content = {\n", - " (0, \"Evaluation Year\"): {\n", - " (0, \"Trained up to\"): (\"F1-micro\", generate_heatmap_data_for_handler(df_merged, \"F1-micro\")),\n", - " (1, \"Trained up to\"): (\"F1-macro\", generate_heatmap_data_for_handler(df_merged, \"F1-macro\")),\n", - " (2, \"Trained up to\"): (\"F1-weighted\", generate_heatmap_data_for_handler(df_merged, \"F1-weighted\")),\n", + " (0, \"Trained up to\"): {\n", + " (0, \"Evaluation Year\"): (\"F1-micro\", generate_heatmap_data_for_handler(df_merged, \"F1-micro\")),\n", + " (1, \"Evaluation Year\"): (\"F1-macro\", generate_heatmap_data_for_handler(df_merged, \"F1-macro\")),\n", + " (2, \"Evaluation Year\"): (\"F1-weighted\", generate_heatmap_data_for_handler(df_merged, \"F1-weighted\")),\n", " }\n", "}\n", "\n", @@ -361,8 +361,15 @@ " grid_alpha=0.5,\n", ")\n", "\n", - "save_plot(fig, \"evaluation_metrics_yb_1\")" + "save_plot(fig, \"evaluation_metrics_yb_two\")" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/analytics/plotting/rh_thesis/evaluation_setup/yb_heatmap_window_size.ipynb b/analytics/plotting/rh_thesis/evaluation_setup/yb_heatmap_window_size.ipynb index 24593e11e..108303011 100644 --- a/analytics/plotting/rh_thesis/evaluation_setup/yb_heatmap_window_size.ipynb +++ b/analytics/plotting/rh_thesis/evaluation_setup/yb_heatmap_window_size.ipynb @@ -292,13 +292,13 @@ "\n", "\n", "plot_content = {\n", - " (0, \"Evaluation Year\"): {\n", - " (0, \"Trained up to\"): (\"Same Year\", generate_heatmap_data_for_handler(df_merged, \"periodic-current\")),\n", - " (1, \"Trained up to\"): (\n", + " (0, \"Trained up to\"): {\n", + " (0, \"Evaluation Year\"): (\"Same Year\", generate_heatmap_data_for_handler(df_merged, \"periodic-current\")),\n", + " (1, \"Evaluation Year\"): (\n", " \"3 Year Window (±1 yr.)\",\n", " generate_heatmap_data_for_handler(df_merged, \"periodic-delta+-1y\"),\n", " ),\n", - " (2, \"Trained up to\"): (\n", + " (2, \"Evaluation Year\"): (\n", " \"11 Year Window (±5 yr.)\",\n", " generate_heatmap_data_for_handler(df_merged, \"periodic-delta+-5y\"),\n", " ),\n", diff --git a/analytics/plotting/rh_thesis/performance/arxiv_cost.ipynb b/analytics/plotting/rh_thesis/performance/arxiv_cost.ipynb new file mode 100644 index 000000000..03a3f16f0 --- /dev/null +++ b/analytics/plotting/rh_thesis/performance/arxiv_cost.ipynb @@ -0,0 +1,227 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "import matplotlib.dates as mdates\n", + "import pandas as pd\n", + "from matplotlib.ticker import FixedFormatter, FixedLocator\n", + "\n", + "from analytics.app.data.load import list_pipelines, load_pipeline_logs\n", + "from analytics.app.data.transform import pipeline_leaf_times_df\n", + "from analytics.plotting.common.cost_matrix import plot_cost_matrix\n", + "from analytics.plotting.common.save import save_plot\n", + "from modyn.supervisor.internal.pipeline_executor.models import PipelineLogs\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipelines_dirs = [\n", + " Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/arxiv/30_performance\")\n", + "]\n", + "\n", + "pipeline_logs: dict[int, PipelineLogs] = {}\n", + "pipelines: dict[int, tuple[str, Path]] = {}\n", + "\n", + "for dir in pipelines_dirs:\n", + " dir_pipelines = list_pipelines(dir)\n", + " pipelines.update(dir_pipelines)\n", + " max_pipeline_id = max(dir_pipelines.keys())\n", + " print(pipelines)\n", + " pipeline_logs.update({p_id: load_pipeline_logs(p_id, dir) for (p_id, (_, p_path)) in dir_pipelines.items()})\n", + " assert dir.exists()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# mode: time + amount\n", + "pipeline_ids = [762] # performancetrigger_num_misclass-10000-exp-0.6-red-False--int20000\n", + "\n", + "\n", + "# doesn't do anything unless include_composite_model = True\n", + "composite_model_variant = \"currently_active_model\"\n", + "\n", + "patch_yearbook = True\n", + "dataset_id = \"arxiv_kaggle_test\"\n", + "eval_handler = \"periodic-current\"\n", + "metric = \"Accuracy\"\n", + "include_composite_model = True" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Wrangle data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_leaf_list = []\n", + "for pipeline_id in pipeline_ids:\n", + " logs = pipeline_logs[pipeline_id]\n", + " df_leaf_single = pipeline_leaf_times_df(logs, use_traintime_patch_at_trainer=True, pipeline_id=pipeline_id)\n", + " df_leaf_list.append(df_leaf_single)\n", + "\n", + "df_leaf = pd.concat(df_leaf_list)\n", + "df_leaf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted = df_leaf.copy()\n", + "\n", + "# coloring in order of decreasing avg. duration\n", + "avg_duration_per_stage = df_adjusted.groupby([\"pipeline_ref\", \"id\"])[\"duration\"].mean().sort_values(ascending=False)\n", + "df_adjusted = df_adjusted.merge(avg_duration_per_stage, on=[\"pipeline_ref\", \"id\"], suffixes=(\"\", \"_avg\")).sort_values(\n", + " \"duration_avg\", ascending=False\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted\n", + "df_adjusted[\"sample_time_year\"] = df_adjusted[\"sample_time\"]\n", + "df_adjusted[\"sample_time_year_bin\"] = pd.cut(df_adjusted[\"sample_time_year\"], bins=10, labels=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_new = df_adjusted[\n", + " (\n", + " df_adjusted[\"id\"].isin(\n", + " [\n", + " \"TRAIN\",\n", + " \"STORE_TRAINED_MODEL\",\n", + " \"INFORM_SELECTOR_REMAINING_DATA\",\n", + " \"INFORM_SELECTOR_ABOUT_TRIGGER\",\n", + " \"EVALUATE_TRIGGER_POLICY\",\n", + " ]\n", + " )\n", + " )\n", + "][[\"pipeline_ref\", \"id\", \"sample_time_year\", \"duration\"]].copy()\n", + "df_new = df_new.sort_values(\"sample_time_year\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "state_rename = {\n", + " \"INFORM_SELECTOR_REMAINING_DATA\": \"inform remaining data\",\n", + " \"INFORM_SELECTOR_ABOUT_TRIGGER\": \"inform trigger\",\n", + "}\n", + "\n", + "df_new[\"id\"] = df_new[\"id\"].replace(state_rename).str.lower().str.replace(\"_\", \" \")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_new" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig = plot_cost_matrix(\n", + " df_new,\n", + " [762],\n", + " grid_alpha=0.75,\n", + " title_map={\n", + " 762: \"arXiv PerformanceTrigger (NumMisclass.)\",\n", + " },\n", + " height_factor=0.7,\n", + " width_factor=1.0,\n", + " duration_ylabel=\"Duration (min)\",\n", + " cumulative_ylabel=\"Cumulative Duration (min)\",\n", + " x_date_locator=FixedLocator([mdates.date2num(pd.Timestamp(d)) for d in [\"2000-01-01\", \"2009-01-01\", \"2018-01-01\"]]),\n", + " x_date_formatter=FixedFormatter([str(year) for year in [\"Jan 2000\", \"Jan 2009\", \"Jan 2018\"]]),\n", + " x_lim=(pd.Timestamp(\"1995-01-01\"), pd.Timestamp(\"2024-09-01\")),\n", + " y_ticks_cumulative=[x for x in range(0, 1000, 200)],\n", + " y_lim_cumulative=(0, 1000),\n", + " y_minutes=True,\n", + " y_minutes_cumulative=True,\n", + ")\n", + "save_plot(fig, \"arxiv_performance-trigger-cost-matrix\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Lower/insignificant policy eval costs compared to drift" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/analytics/plotting/rh_thesis/performance/arxiv_heatmap_single.ipynb b/analytics/plotting/rh_thesis/performance/arxiv_heatmap_single.ipynb new file mode 100644 index 000000000..44c4d2e7e --- /dev/null +++ b/analytics/plotting/rh_thesis/performance/arxiv_heatmap_single.ipynb @@ -0,0 +1,285 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "\n", + "from analytics.app.data.load import list_pipelines\n", + "from analytics.app.data.transform import dfs_models_and_evals, logs_dataframe\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipelines_dir = Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/arxiv/30_performance\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipelines = list_pipelines(pipelines_dir)\n", + "max_pipeline_id = max(pipelines.keys())\n", + "pipelines" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from analytics.app.data.load import load_pipeline_logs\n", + "\n", + "pipeline_logs = {p_id: load_pipeline_logs(p_id, pipelines_dir) for (p_id, (_, p_path)) in pipelines.items()}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# mode:\n", + "pipeline_id = 762 # performancetrigger_num_misclass-10000-exp-0.6-red-False--int20000\n", + "\n", + "# doesn't do anything unless include_composite_model = True\n", + "composite_model_variant = \"currently_active_model\"\n", + "\n", + "patch_yearbook = True\n", + "dataset_id = \"arxiv_kaggle_test\"\n", + "eval_handler = \"periodic-current\"\n", + "metric = \"Accuracy\"\n", + "include_composite_model = False" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Wrangle data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipeline_log = pipeline_logs[pipeline_id]\n", + "pipeline_ref = f\"{pipeline_id}\".zfill(len(str(max_pipeline_id))) + f\" - {pipelines[pipeline_id][0]}\"\n", + "\n", + "df_all = logs_dataframe(pipeline_log, pipeline_ref)\n", + "\n", + "df_logs_models, _, df_eval_single = dfs_models_and_evals(\n", + " # subtracting would interfere with yearbook patching\n", + " pipeline_log,\n", + " df_all[\"sample_time\"].max(),\n", + " pipeline_ref,\n", + ")\n", + "\n", + "df_adjusted = df_eval_single\n", + "\n", + "\n", + "df_adjusted = df_adjusted[\n", + " (df_adjusted[\"dataset_id\"] == dataset_id)\n", + " & (df_adjusted[\"eval_handler\"] == eval_handler)\n", + " & (df_adjusted[\"metric\"] == metric)\n", + "]\n", + "\n", + "# in percent (0-100)\n", + "df_adjusted[\"value\"] = df_adjusted[\"value\"] * 100" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted = df_adjusted.sort_values(by=[\"interval_center\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Add composite model\n", + "\n", + "assert df_adjusted[\"pipeline_ref\"].nunique() <= 1\n", + "# add the pipeline time series which is the performance of different models stitched together dep.\n", + "# w.r.t which model was active\n", + "pipeline_composite_model = df_adjusted[df_adjusted[composite_model_variant]]\n", + "pipeline_composite_model[\"model_idx\"] = 0\n", + "pipeline_composite_model[\"id_model\"] = 0\n", + "\n", + "label_map = {k: f\"{k}\" for k, v in df_adjusted[[\"model_idx\", \"id_model\"]].values}\n", + "label_map[0] = \"Pipeline composite model\"\n", + "\n", + "if include_composite_model:\n", + " df_adjusted = pd.concat([pipeline_composite_model, df_adjusted])\n", + "else:\n", + " df_adjusted[\"model_idx\"] = df_adjusted[\"model_idx\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Create Plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted = df_adjusted.sort_values(by=[\"interval_center\"])\n", + "df_adjusted[\"interval_center\"] = df_adjusted[\"interval_center\"].dt.to_period(\"M\")\n", + "df_adjusted" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_train_end_years_per_model = df_logs_models[[\"model_idx\", \"real_train_end\"]]\n", + "df_train_end_years_per_model[\"real_train_end\"] = df_train_end_years_per_model[\"real_train_end\"].dt.to_period(\"M\")\n", + "df_train_end_years_per_model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_merged = df_adjusted.merge(df_train_end_years_per_model, on=\"model_idx\", how=\"left\")\n", + "df_merged" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_merged.groupby([\"real_train_end\", \"interval_center\"]).size()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# build heatmap matrix dataframe:\n", + "df_merged[\"real_train_end\"] = df_merged[\"real_train_end\"].apply(lambda x: pd.Period(x, freq=\"M\"))\n", + "heatmap_data = df_merged.pivot(index=[\"real_train_end\"], columns=\"interval_center\", values=\"value\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "heatmap_data.index.min(), heatmap_data.index.max()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "heatmap_data.index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from analytics.plotting.common.heatmap import build_heatmap\n", + "from analytics.plotting.common.save import save_plot\n", + "\n", + "fig = build_heatmap(\n", + " heatmap_data,\n", + " reverse_col=True,\n", + " x_custom_ticks=[\n", + " (i, f\"{period.to_timestamp().strftime('%b %Y')}\")\n", + " for i, period in list(enumerate(heatmap_data.columns))[::1]\n", + " if period in [pd.Period(\"Mar 2000\"), pd.Period(\"Mar 2009\"), pd.Period(\"Mar 2020\")]\n", + " ],\n", + " y_custom_ticks=[\n", + " (i, f\"{period.to_timestamp().strftime('%b %Y')}\".replace(\" \", \"\\n\"))\n", + " for i, period in list(enumerate(heatmap_data.index))[::1]\n", + " if period in [pd.Period(\"Jun 2000\"), pd.Period(\"Jun 2009\"), pd.Period(\"May 2020\")]\n", + " ],\n", + " y_label=\"Trained up to\",\n", + " x_label=\"Evaluation Year\",\n", + " title_label=\"Arxiv PerformanceTrigger\\nExp. Acc=60% | NumMiscl=10k | No Reduction\",\n", + " color_label=\"Accuracy %\",\n", + " width_factor=1,\n", + " height_factor=0.7,\n", + " # grid_alpha=0.4,\n", + " grid_alpha=0.0,\n", + " # disable_horizontal_grid=True,\n", + " # cbar=False,\n", + " df_logs_models=df_logs_models,\n", + " x_axis=\"period\",\n", + ")\n", + "save_plot(fig, \"arxiv_trigger_heatmap_performance_single_dynamic\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/analytics/plotting/rh_thesis/performance/hp_cost.ipynb b/analytics/plotting/rh_thesis/performance/hp_cost.ipynb new file mode 100644 index 000000000..a4401c8df --- /dev/null +++ b/analytics/plotting/rh_thesis/performance/hp_cost.ipynb @@ -0,0 +1,221 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "import matplotlib.dates as mdates\n", + "import pandas as pd\n", + "from matplotlib.ticker import FixedFormatter, FixedLocator\n", + "\n", + "from analytics.app.data.load import list_pipelines, load_pipeline_logs\n", + "from analytics.app.data.transform import pipeline_leaf_times_df\n", + "from analytics.plotting.common.cost_matrix import plot_cost_matrix\n", + "from analytics.plotting.common.save import save_plot\n", + "from modyn.supervisor.internal.pipeline_executor.models import PipelineLogs\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipelines_dirs = [\n", + " Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/huffpost/30_performance/static_dyn\")\n", + "]\n", + "\n", + "pipeline_logs: dict[int, PipelineLogs] = {}\n", + "pipelines: dict[int, tuple[str, Path]] = {}\n", + "\n", + "for dir in pipelines_dirs:\n", + " dir_pipelines = list_pipelines(dir)\n", + " pipelines.update(dir_pipelines)\n", + " max_pipeline_id = max(dir_pipelines.keys())\n", + " print(pipelines)\n", + " pipeline_logs.update({p_id: load_pipeline_logs(p_id, dir) for (p_id, (_, p_path)) in dir_pipelines.items()})\n", + " assert dir.exists()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# mode: time + amount\n", + "pipeline_ids = [639] # performancetrigger_static-0.5-int1500y\n", + "\n", + "\n", + "# doesn't do anything unless include_composite_model = True\n", + "composite_model_variant = \"currently_active_model\"\n", + "\n", + "patch_yearbook = True\n", + "dataset_id = \"huffpost_kaggle_test\"\n", + "eval_handler = \"periodic-current\"\n", + "metric = \"Accuracy\"\n", + "include_composite_model = True" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Wrangle data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_leaf_list = []\n", + "for pipeline_id in pipeline_ids:\n", + " logs = pipeline_logs[pipeline_id]\n", + " df_leaf_single = pipeline_leaf_times_df(logs, use_traintime_patch_at_trainer=True, pipeline_id=pipeline_id)\n", + " df_leaf_list.append(df_leaf_single)\n", + "\n", + "df_leaf = pd.concat(df_leaf_list)\n", + "df_leaf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted = df_leaf.copy()\n", + "\n", + "# coloring in order of decreasing avg. duration\n", + "avg_duration_per_stage = df_adjusted.groupby([\"pipeline_ref\", \"id\"])[\"duration\"].mean().sort_values(ascending=False)\n", + "df_adjusted = df_adjusted.merge(avg_duration_per_stage, on=[\"pipeline_ref\", \"id\"], suffixes=(\"\", \"_avg\")).sort_values(\n", + " \"duration_avg\", ascending=False\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted\n", + "df_adjusted[\"sample_time_year\"] = df_adjusted[\"sample_time\"]\n", + "df_adjusted[\"sample_time_year_bin\"] = pd.cut(df_adjusted[\"sample_time_year\"], bins=10, labels=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_new = df_adjusted[\n", + " (\n", + " df_adjusted[\"id\"].isin(\n", + " [\n", + " \"TRAIN\",\n", + " \"STORE_TRAINED_MODEL\",\n", + " \"INFORM_SELECTOR_REMAINING_DATA\",\n", + " \"INFORM_SELECTOR_ABOUT_TRIGGER\",\n", + " \"EVALUATE_TRIGGER_POLICY\",\n", + " ]\n", + " )\n", + " )\n", + "][[\"pipeline_ref\", \"id\", \"sample_time_year\", \"duration\"]].copy()\n", + "df_new = df_new.sort_values(\"sample_time_year\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "state_rename = {\n", + " \"INFORM_SELECTOR_REMAINING_DATA\": \"inform remaining data\",\n", + " \"INFORM_SELECTOR_ABOUT_TRIGGER\": \"inform trigger\",\n", + "}\n", + "\n", + "df_new[\"id\"] = df_new[\"id\"].replace(state_rename).str.lower().str.replace(\"_\", \" \")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_new" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig = plot_cost_matrix(\n", + " df_new,\n", + " [639],\n", + " grid_alpha=0.75,\n", + " title_map={\n", + " 639: \"HuffPost Static PerformanceTrigger\",\n", + " },\n", + " height_factor=0.7,\n", + " width_factor=1.0,\n", + " duration_ylabel=\"Duration (min)\",\n", + " cumulative_ylabel=\"Cumulative Duration (min)\",\n", + " x_date_locator=FixedLocator([mdates.date2num(pd.Timestamp(d)) for d in [\"2014-05-01\", \"2018-06-01\", \"2021-01-01\"]]),\n", + " x_date_formatter=FixedFormatter([str(year) for year in [\"May\\n2014\", \"Jun\\n2018\", \"Jan\\n2021\"]]),\n", + " x_lim=(pd.Timestamp(\"2012-01-01\"), pd.Timestamp(\"2022-09-01\")),\n", + " y_ticks_cumulative=[x for x in range(0, 110, 25)],\n", + " y_lim_cumulative=(0, 100),\n", + " y_minutes=True,\n", + " y_minutes_cumulative=True,\n", + ")\n", + "\n", + "save_plot(fig, \"huffpost_performance-trigger-cost-matrix\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Lower policy eval costs compared to drift" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/analytics/plotting/rh_thesis/performance/hp_heatmap_single.ipynb b/analytics/plotting/rh_thesis/performance/hp_heatmap_single.ipynb new file mode 100644 index 000000000..ad25baf58 --- /dev/null +++ b/analytics/plotting/rh_thesis/performance/hp_heatmap_single.ipynb @@ -0,0 +1,286 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "\n", + "from analytics.app.data.load import list_pipelines\n", + "from analytics.app.data.transform import dfs_models_and_evals, logs_dataframe\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipelines_dir = Path(\n", + " \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/huffpost/30_performance/static_dyn\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipelines = list_pipelines(pipelines_dir)\n", + "max_pipeline_id = max(pipelines.keys())\n", + "pipelines" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from analytics.app.data.load import load_pipeline_logs\n", + "\n", + "pipeline_logs = {p_id: load_pipeline_logs(p_id, pipelines_dir) for (p_id, (_, p_path)) in pipelines.items()}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# mode:\n", + "pipeline_id = 639 # performancetrigger_static-0.5-int1500y\n", + "\n", + "# doesn't do anything unless include_composite_model = True\n", + "composite_model_variant = \"currently_active_model\"\n", + "\n", + "patch_yearbook = True\n", + "dataset_id = \"huffpost_kaggle_test\"\n", + "eval_handler = \"periodic-current\"\n", + "metric = \"Accuracy\"\n", + "include_composite_model = False" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Wrangle data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipeline_log = pipeline_logs[pipeline_id]\n", + "pipeline_ref = f\"{pipeline_id}\".zfill(len(str(max_pipeline_id))) + f\" - {pipelines[pipeline_id][0]}\"\n", + "\n", + "df_all = logs_dataframe(pipeline_log, pipeline_ref)\n", + "\n", + "df_logs_models, _, df_eval_single = dfs_models_and_evals(\n", + " # subtracting would interfere with yearbook patching\n", + " pipeline_log,\n", + " df_all[\"sample_time\"].max(),\n", + " pipeline_ref,\n", + ")\n", + "\n", + "df_adjusted = df_eval_single\n", + "\n", + "\n", + "df_adjusted = df_adjusted[\n", + " (df_adjusted[\"dataset_id\"] == dataset_id)\n", + " & (df_adjusted[\"eval_handler\"] == eval_handler)\n", + " & (df_adjusted[\"metric\"] == metric)\n", + "]\n", + "\n", + "# in percent (0-100)\n", + "df_adjusted[\"value\"] = df_adjusted[\"value\"] * 100" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted = df_adjusted.sort_values(by=[\"interval_center\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Add composite model\n", + "\n", + "assert df_adjusted[\"pipeline_ref\"].nunique() <= 1\n", + "# add the pipeline time series which is the performance of different models stitched together dep.\n", + "# w.r.t which model was active\n", + "pipeline_composite_model = df_adjusted[df_adjusted[composite_model_variant]]\n", + "pipeline_composite_model[\"model_idx\"] = 0\n", + "pipeline_composite_model[\"id_model\"] = 0\n", + "\n", + "label_map = {k: f\"{k}\" for k, v in df_adjusted[[\"model_idx\", \"id_model\"]].values}\n", + "label_map[0] = \"Pipeline composite model\"\n", + "\n", + "if include_composite_model:\n", + " df_adjusted = pd.concat([pipeline_composite_model, df_adjusted])\n", + "else:\n", + " df_adjusted[\"model_idx\"] = df_adjusted[\"model_idx\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Create Plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted = df_adjusted.sort_values(by=[\"interval_center\"])\n", + "df_adjusted[\"interval_center\"] = df_adjusted[\"interval_center\"].dt.to_period(\"M\")\n", + "df_adjusted" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_train_end_years_per_model = df_logs_models[[\"model_idx\", \"real_train_end\"]]\n", + "df_train_end_years_per_model[\"real_train_end\"] = df_train_end_years_per_model[\"real_train_end\"].dt.to_period(\"M\")\n", + "df_train_end_years_per_model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_merged = df_adjusted.merge(df_train_end_years_per_model, on=\"model_idx\", how=\"left\")\n", + "df_merged" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_merged.groupby([\"real_train_end\", \"interval_center\"]).size()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# build heatmap matrix dataframe:\n", + "df_merged[\"real_train_end\"] = df_merged[\"real_train_end\"].apply(lambda x: pd.Period(x, freq=\"M\"))\n", + "heatmap_data = df_merged.pivot(index=[\"real_train_end\"], columns=\"interval_center\", values=\"value\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "heatmap_data.index.min(), heatmap_data.index.max()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "heatmap_data.index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from analytics.plotting.common.heatmap import build_heatmap\n", + "from analytics.plotting.common.save import save_plot\n", + "\n", + "fig = build_heatmap(\n", + " heatmap_data,\n", + " reverse_col=True,\n", + " x_custom_ticks=[\n", + " (i, f\"{period.to_timestamp().strftime('%b %Y')}\")\n", + " for i, period in list(enumerate(heatmap_data.columns))[::1]\n", + " if period in [pd.Period(\"Apr 2014\"), pd.Period(\"Jul 2018\"), pd.Period(\"Jan 2022\")]\n", + " ],\n", + " y_custom_ticks=[\n", + " (i + 0.5, f\"{period.to_timestamp().strftime('%b %Y')}\")\n", + " for i, period in list(enumerate(heatmap_data.index))[::1]\n", + " ],\n", + " y_label=\"Trained up to\",\n", + " x_label=\"Evaluation Year\",\n", + " title_label=\"HuffPost PerformanceTrigger: Static Accuracy Threshold=50%\",\n", + " color_label=\"Accuracy %\",\n", + " width_factor=1,\n", + " height_factor=0.5,\n", + " # grid_alpha=0.4,\n", + " grid_alpha=0.0,\n", + " # disable_horizontal_grid=True,\n", + " # cbar=False,\n", + " df_logs_models=df_logs_models,\n", + " x_axis=\"period\",\n", + ")\n", + "save_plot(fig, \"hp_trigger_heatmap_performance_single_static\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/analytics/plotting/rh_thesis/performance/yb_cost.ipynb b/analytics/plotting/rh_thesis/performance/yb_cost.ipynb new file mode 100644 index 000000000..acf1a8608 --- /dev/null +++ b/analytics/plotting/rh_thesis/performance/yb_cost.ipynb @@ -0,0 +1,212 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "\n", + "from analytics.app.data.load import list_pipelines, load_pipeline_logs\n", + "from analytics.app.data.transform import patch_yearbook_time, pipeline_leaf_times_df\n", + "from analytics.plotting.common.cost_matrix import plot_cost_matrix\n", + "from analytics.plotting.common.save import save_plot\n", + "from modyn.supervisor.internal.pipeline_executor.models import PipelineLogs\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipelines_dirs = [\n", + " Path(\n", + " \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/30_performance/num_misclass/\"\n", + " ),\n", + "]\n", + "\n", + "pipeline_logs: dict[int, PipelineLogs] = {}\n", + "pipelines: dict[int, tuple[str, Path]] = {}\n", + "\n", + "for dir in pipelines_dirs:\n", + " dir_pipelines = list_pipelines(dir)\n", + " pipelines.update(dir_pipelines)\n", + " max_pipeline_id = max(dir_pipelines.keys())\n", + " print(pipelines)\n", + " pipeline_logs.update({p_id: load_pipeline_logs(p_id, dir) for (p_id, (_, p_path)) in dir_pipelines.items()})\n", + " assert dir.exists()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# mode: time + amount\n", + "pipeline_ids = [759] # yearbook_performancetrigger_num_misclass-100-exp-0.9-red-False--int250y\n", + "\n", + "# doesn't do anything unless include_composite_model = True\n", + "composite_model_variant = \"currently_active_model\"\n", + "\n", + "patch_yearbook = True\n", + "dataset_id = \"yearbook_test\"\n", + "eval_handler = \"periodic-delta+-1y\"\n", + "metric = \"Accuracy\"\n", + "include_composite_model = True" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Wrangle data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_leaf_list = []\n", + "for pipeline_id in pipeline_ids:\n", + " logs = pipeline_logs[pipeline_id]\n", + " df_leaf_single = pipeline_leaf_times_df(logs, use_traintime_patch_at_trainer=True, pipeline_id=pipeline_id)\n", + " df_leaf_list.append(df_leaf_single)\n", + "\n", + "df_leaf = pd.concat(df_leaf_list)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted = df_leaf.copy()\n", + "\n", + "# coloring in order of decreasing avg. duration\n", + "avg_duration_per_stage = df_adjusted.groupby([\"pipeline_ref\", \"id\"])[\"duration\"].mean().sort_values(ascending=False)\n", + "df_adjusted = df_adjusted.merge(avg_duration_per_stage, on=[\"pipeline_ref\", \"id\"], suffixes=(\"\", \"_avg\")).sort_values(\n", + " \"duration_avg\", ascending=False\n", + ")\n", + "\n", + "# Yearbook as a mapped time dimension (to display the correct timestamps we need to convert back from days to years)\n", + "if patch_yearbook:\n", + " patch_yearbook_time(df_adjusted, \"sample_time\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted\n", + "df_adjusted[\"sample_time_year\"] = df_adjusted[\"sample_time\"].dt.year\n", + "df_adjusted[\"sample_time_year_bin\"] = pd.cut(df_adjusted[\"sample_time_year\"], bins=10, labels=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_new = df_adjusted[\n", + " (\n", + " df_adjusted[\"id\"].isin(\n", + " [\n", + " \"TRAIN\",\n", + " \"STORE_TRAINED_MODEL\",\n", + " \"INFORM_SELECTOR_REMAINING_DATA\",\n", + " \"INFORM_SELECTOR_ABOUT_TRIGGER\",\n", + " \"EVALUATE_TRIGGER_POLICY\",\n", + " ]\n", + " )\n", + " )\n", + "][[\"pipeline_ref\", \"id\", \"sample_time_year\", \"duration\"]].copy()\n", + "df_new = df_new.sort_values(\"sample_time_year\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "state_rename = {\n", + " \"INFORM_SELECTOR_REMAINING_DATA\": \"inform remaining data\",\n", + " \"INFORM_SELECTOR_ABOUT_TRIGGER\": \"inform trigger\",\n", + "}\n", + "\n", + "df_new[\"id\"] = df_new[\"id\"].replace(state_rename).str.lower().str.replace(\"_\", \" \")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig = plot_cost_matrix(\n", + " df_new,\n", + " [759],\n", + " grid_alpha=0.75,\n", + " title_map={\n", + " # title_label=\"Yearbook PerformanceTrigger:\\n Exp. Acc=90% | NumMiscl=100 | No Reduction\",\n", + " 759: \"Yearbook PerformanceTrigger (NumMiscl)\",\n", + " },\n", + " height_factor=0.7,\n", + " width_factor=1.0,\n", + " duration_ylabel=\"Duration (sec)\",\n", + " cumulative_ylabel=\"Cumulative Duration (min)\",\n", + " x_ticks=[x for x in range(1940, 2010 + 1, 30)],\n", + " y_ticks_cumulative=[x for x in range(0, 9 + 1, 3)],\n", + " y_lim_cumulative=(0, 10),\n", + " y_minutes=False,\n", + " y_minutes_cumulative=True,\n", + ")\n", + "\n", + "save_plot(fig, \"yearbook_performance-trigger-cost-matrix\")\n", + "# Lower policy costs than in drift case" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/analytics/plotting/rh_thesis/performance/yearbook_heatmap_multi.ipynb b/analytics/plotting/rh_thesis/performance/yearbook_heatmap_multi.ipynb new file mode 100644 index 000000000..a227fb614 --- /dev/null +++ b/analytics/plotting/rh_thesis/performance/yearbook_heatmap_multi.ipynb @@ -0,0 +1,363 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "\n", + "from analytics.app.data.load import list_pipelines, load_pipeline_logs\n", + "from analytics.app.data.transform import (\n", + " dfs_models_and_evals,\n", + " patch_yearbook_time,\n", + " pipeline_leaf_times_df,\n", + ")\n", + "from analytics.plotting.common.save import save_plot\n", + "from modyn.supervisor.internal.pipeline_executor.models import PipelineLogs\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipelines_dirs = [\n", + " Path(\n", + " \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/30_performance/static_dyn/\"\n", + " ),\n", + " Path(\n", + " \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/30_performance/num_misclass/\"\n", + " ),\n", + "]\n", + "\n", + "pipeline_logs: dict[int, PipelineLogs] = {}\n", + "pipelines: dict[int, tuple[str, Path]] = {}\n", + "\n", + "for dir in pipelines_dirs:\n", + " dir_pipelines = list_pipelines(dir)\n", + " pipelines.update(dir_pipelines)\n", + " max_pipeline_id = max(dir_pipelines.keys())\n", + " print(pipelines)\n", + " pipeline_logs.update({p_id: load_pipeline_logs(p_id, dir) for (p_id, (_, p_path)) in dir_pipelines.items()})\n", + " assert dir.exists()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipeline_ids = list(pipelines.keys())\n", + "\n", + "# doesn't do anything unless include_composite_model = True\n", + "composite_model_variant = \"currently_active_model\"\n", + "\n", + "patch_yearbook = True\n", + "dataset_id = \"yearbook_test\"\n", + "eval_handler = \"periodic-delta+-1y\"\n", + "metric = \"Accuracy\"\n", + "include_composite_model = False" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Wrangle data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "list_df_eval_single: list[pd.DataFrame] = []\n", + "df_logs_models_list: list[pd.DataFrame] = []\n", + "\n", + "for pipeline_id in pipeline_ids:\n", + " logs = pipeline_logs[pipeline_id]\n", + " df_leaf_single = pipeline_leaf_times_df(logs, use_traintime_patch_at_trainer=False, pipeline_id=pipeline_id)\n", + " df_logs_models_single, _, df_eval_single = dfs_models_and_evals(\n", + " pipeline_logs[pipeline_id], df_leaf_single[\"sample_time\"].max(), pipelines[pipeline_id][0]\n", + " )\n", + " df_eval_single[\"pipeline_id\"] = pipeline_id\n", + " df_logs_models_single[\"pipeline_id\"] = pipeline_id\n", + " list_df_eval_single.append(df_eval_single)\n", + " df_logs_models_list.append(df_logs_models_single)\n", + "\n", + "df_adjusted = pd.concat(list_df_eval_single)\n", + "df_adjusted\n", + "\n", + "df_logs_models = pd.concat(df_logs_models_list)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted = df_adjusted[\n", + " (df_adjusted[\"dataset_id\"] == dataset_id)\n", + " & (df_adjusted[\"eval_handler\"] == eval_handler)\n", + " & (df_adjusted[\"metric\"] == metric)\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if patch_yearbook:\n", + " for column in [\"interval_start\", \"interval_center\", \"interval_end\"]:\n", + " patch_yearbook_time(df_adjusted, column)\n", + " for column in [\"train_start\", \"train_end\", \"real_train_end\", \"usage_start\", \"usage_end\"]:\n", + " patch_yearbook_time(df_logs_models, column)\n", + "\n", + " # correction for -1 second in timestamp format before patching\n", + " df_logs_models[\"usage_end\"] = (\n", + " df_logs_models[\"usage_end\"].dt.to_period(\"M\") + 1\n", + " ).dt.to_timestamp() # december (because of -1 second in timestamp format) -> start of year\n", + "\n", + "df_logs_models" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted = df_adjusted.sort_values(by=[\"interval_center\"])\n", + "len(df_adjusted)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Reduce to composite models\n", + "df_adjusted = df_adjusted[df_adjusted[composite_model_variant]]\n", + "df_adjusted[composite_model_variant].unique()\n", + "len(df_adjusted)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted[\"interval_center\"] = df_adjusted[\"interval_center\"].astype(str).str.split(\"-\").str[0]\n", + "\n", + "df_train_end_years_per_model = df_logs_models[[\"pipeline_id\", \"model_idx\", \"real_train_end\"]]\n", + "df_train_end_years_per_model[\"real_train_end\"] = df_train_end_years_per_model[\"real_train_end\"].dt.year" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted.groupby([\"pipeline_id\"]).size()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Static Performance Thresholds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "_pids = list(reversed([437, 432, 429, 425, 421, 418, 414, 411]))\n", + "\n", + "df_merged = df_adjusted.merge(df_train_end_years_per_model, on=[\"pipeline_id\", \"model_idx\"], how=\"left\")\n", + "# build heatmap matrix dataframe:\n", + "df_merged[\"pipeline_id\"] = df_merged[\"pipeline_id\"].astype(int)\n", + "df_merged = df_merged[df_merged[\"pipeline_id\"].isin(_pids)]\n", + "heatmap_data = df_merged.pivot(index=[\"pipeline_id\"], columns=\"interval_center\", values=\"value\")\n", + "\n", + "heatmap_data.index.min(), heatmap_data.index.max()\n", + "heatmap_data\n", + "\n", + "# sort index by pipeline_refs\n", + "heatmap_data = heatmap_data.reindex(_pids)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from analytics.plotting.common.heatmap import build_heatmap\n", + "\n", + "pipelines_refs = {437: \"95%\", 432: \"92.5%\", 429: \"90%\", 425: \"87.5%\", 421: \"85%\", 418: \"80%\", 414: \"75%\", 411: \"70%\"}\n", + "\n", + "fig = build_heatmap(\n", + " heatmap_data,\n", + " reverse_col=True,\n", + " x_ticks=[1950, 1975, 2000],\n", + " y_custom_ticks=[(i + 0.5, pipelines_refs[y]) for i, y in enumerate(heatmap_data.index)],\n", + " y_label=\"Pipeline with\\nAccuracy Threshold\",\n", + " x_label=\"Evaluation Year\",\n", + " title_label=\"Yearbook Composite Models: Static Accuracy Thresholds\",\n", + " color_label=\"Accuracy %\",\n", + " width_factor=1,\n", + " height_factor=0.58,\n", + " # grid_alpha=0.4,\n", + " grid_alpha=0.0,\n", + " # disable_horizontal_grid=True,\n", + " # cbar=False,\n", + " triggers={\n", + " i: df_logs_models[df_logs_models[\"pipeline_id\"] == p_id][\n", + " [\"train_start\", \"train_end\", \"usage_start\", \"usage_end\"]\n", + " ]\n", + " for i, p_id in enumerate(heatmap_data.index)\n", + " },\n", + ")\n", + "save_plot(fig, \"yb_trigger_heatmap_performance_multi_static_thresholds\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Dynamic Thresholds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "_pids = (\n", + " # Num misclass: with reduction\n", + " list(reversed([734, 758, 749]))\n", + " +\n", + " # Num misclass: without reduction\n", + " list(reversed([736, 759, 751, 743]))\n", + " +\n", + " # roll avg\n", + " list([527, 516, 506, 494])\n", + " +\n", + " # quantile\n", + " [445]\n", + ")\n", + "df_merged = df_adjusted.merge(df_train_end_years_per_model, on=[\"pipeline_id\", \"model_idx\"], how=\"left\")\n", + "# build heatmap matrix dataframe:\n", + "df_merged[\"pipeline_id\"] = df_merged[\"pipeline_id\"].astype(int)\n", + "df_merged = df_merged[df_merged[\"pipeline_id\"].isin(_pids)]\n", + "heatmap_data = df_merged.pivot(index=[\"pipeline_id\"], columns=\"interval_center\", values=\"value\")\n", + "\n", + "heatmap_data.index.min(), heatmap_data.index.max()\n", + "\n", + "# sort index by pipeline_refs\n", + "heatmap_data = heatmap_data.reindex(_pids)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from analytics.plotting.common.heatmap import build_heatmap\n", + "from analytics.plotting.common.save import save_plot\n", + "\n", + "pipelines_refs = {\n", + " # Roll Avg\n", + " 527: \"Δ 0.3\",\n", + " 516: \"Δ 0.2\",\n", + " 506: \"Δ 0.1\",\n", + " 494: \"Δ 0.05\",\n", + " # Quantile\n", + " 445: \"% 0.05\",\n", + " # Num misclass: without reduction\n", + " 736: \"X 50, noRed\",\n", + " 759: \"X 100, noRed\",\n", + " 751: \"X 200, noRed\",\n", + " 743: \"X 500, noRed\",\n", + " # Num misclass: with reduction\n", + " 734: \"X 50, Red\",\n", + " 758: \"X 100, Red\",\n", + " 749: \"X 200, Red\",\n", + "}\n", + "\n", + "fig = build_heatmap(\n", + " heatmap_data,\n", + " reverse_col=True,\n", + " x_ticks=[1950, 1975, 2000],\n", + " y_custom_ticks=[(i + 0.5, pipelines_refs[y]) for i, y in enumerate(heatmap_data.index)],\n", + " y_label=\"Criterion\",\n", + " x_label=\"Evaluation Year\",\n", + " title_label=\"Yearbook Composite Models:\\nDynamic Performance Thresholds & Num. Misclassifications\",\n", + " color_label=\"Accuracy %\",\n", + " width_factor=1,\n", + " height_factor=0.75,\n", + " # grid_alpha=0.4,\n", + " grid_alpha=0.0,\n", + " # disable_horizontal_grid=True,\n", + " # cbar=False,\n", + " triggers={\n", + " i: df_logs_models[df_logs_models[\"pipeline_id\"] == p_id][\n", + " [\"train_start\", \"train_end\", \"usage_start\", \"usage_end\"]\n", + " ]\n", + " for i, p_id in enumerate(heatmap_data.index)\n", + " },\n", + ")\n", + "save_plot(fig, \"yb_trigger_heatmap_performance_multi_dyn_thresholds\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/analytics/plotting/rh_thesis/performance/yearbook_heatmap_single.ipynb b/analytics/plotting/rh_thesis/performance/yearbook_heatmap_single.ipynb new file mode 100644 index 000000000..934586937 --- /dev/null +++ b/analytics/plotting/rh_thesis/performance/yearbook_heatmap_single.ipynb @@ -0,0 +1,307 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "\n", + "from analytics.app.data.load import list_pipelines\n", + "from analytics.app.data.transform import dfs_models_and_evals, logs_dataframe, patch_yearbook_time\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipelines_dir = Path(\n", + " \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/30_performance/static_dyn\"\n", + ")\n", + "assert pipelines_dir.exists()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipelines = list_pipelines(pipelines_dir)\n", + "max_pipeline_id = max(pipelines.keys())\n", + "pipelines" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from analytics.app.data.load import load_pipeline_logs\n", + "\n", + "pipeline_logs = {p_id: load_pipeline_logs(p_id, pipelines_dir) for (p_id, (_, p_path)) in pipelines.items()}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# mode:\n", + "pipeline_id = 418 # 250 0.8 static\n", + "\n", + "# doesn't do anything unless include_composite_model = True\n", + "composite_model_variant = \"currently_active_model\"\n", + "\n", + "patch_yearbook = True\n", + "dataset_id = \"yearbook_test\"\n", + "eval_handler = \"periodic-delta+-1y\"\n", + "metric = \"Accuracy\"\n", + "include_composite_model = False" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Wrangle data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipeline_log = pipeline_logs[pipeline_id]\n", + "pipeline_ref = f\"{pipeline_id}\".zfill(len(str(max_pipeline_id))) + f\" - {pipelines[pipeline_id][0]}\"\n", + "\n", + "df_all = logs_dataframe(pipeline_log, pipeline_ref)\n", + "\n", + "df_logs_models, _, df_eval_single = dfs_models_and_evals(\n", + " # subtracting would interfere with yearbook patching\n", + " pipeline_log,\n", + " df_all[\"sample_time\"].max(),\n", + " pipeline_ref,\n", + ")\n", + "\n", + "df_adjusted = df_eval_single\n", + "\n", + "\n", + "df_adjusted = df_adjusted[\n", + " (df_adjusted[\"dataset_id\"] == dataset_id)\n", + " & (df_adjusted[\"eval_handler\"] == eval_handler)\n", + " & (df_adjusted[\"metric\"] == metric)\n", + "]\n", + "\n", + "# in percent (0-100)\n", + "df_adjusted[\"value\"] = df_adjusted[\"value\"] * 100" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_logs_models" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if patch_yearbook:\n", + " for column in [\"interval_start\", \"interval_center\", \"interval_end\"]:\n", + " patch_yearbook_time(df_adjusted, column)\n", + " for column in [\"train_start\", \"train_end\", \"real_train_end\", \"usage_start\", \"usage_end\"]:\n", + " patch_yearbook_time(df_logs_models, column)\n", + "\n", + " # correction for -1 second in timestamp format before patching\n", + " df_logs_models[\"usage_end\"] = (\n", + " df_logs_models[\"usage_end\"].dt.to_period(\"M\") + 1\n", + " ).dt.to_timestamp() # december (because of -1 second in timestamp format) -> start of year\n", + "\n", + "df_logs_models" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted = df_adjusted.sort_values(by=[\"interval_center\"])\n", + "df_adjusted" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Add composite model\n", + "\n", + "assert df_adjusted[\"pipeline_ref\"].nunique() <= 1\n", + "# add the pipeline time series which is the performance of different models stitched together dep.\n", + "# w.r.t which model was active\n", + "pipeline_composite_model = df_adjusted[df_adjusted[composite_model_variant]]\n", + "pipeline_composite_model[\"model_idx\"] = 0\n", + "pipeline_composite_model[\"id_model\"] = 0\n", + "\n", + "label_map = {k: f\"{k}\" for k, v in df_adjusted[[\"model_idx\", \"id_model\"]].values}\n", + "label_map[0] = \"Pipeline composite model\"\n", + "\n", + "if include_composite_model:\n", + " df_adjusted = pd.concat([pipeline_composite_model, df_adjusted])\n", + "else:\n", + " df_adjusted[\"model_idx\"] = df_adjusted[\"model_idx\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Create Plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted[\"interval_center\"] = df_adjusted[\"interval_center\"].astype(str).str.split(\"-\").str[0]\n", + "df_adjusted" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_train_end_years_per_model = df_logs_models[[\"model_idx\", \"real_train_end\"]]\n", + "df_train_end_years_per_model[\"real_train_end\"] = df_train_end_years_per_model[\"real_train_end\"].dt.year\n", + "df_train_end_years_per_model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_merged = df_adjusted.merge(df_train_end_years_per_model, on=\"model_idx\", how=\"left\")\n", + "df_merged" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# build heatmap matrix dataframe:\n", + "heatmap_data = df_merged.pivot(index=[\"real_train_end\"], columns=\"interval_center\", values=\"value\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "heatmap_data.index.min(), heatmap_data.index.max()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "heatmap_data.index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from analytics.plotting.common.heatmap import build_heatmap\n", + "from analytics.plotting.common.save import save_plot\n", + "\n", + "fig = build_heatmap(\n", + " heatmap_data,\n", + " reverse_col=True,\n", + " x_ticks=[1950, 1975, 2000],\n", + " y_custom_ticks=[(i + 0.5, str(y)) for i, y in enumerate(heatmap_data.index)],\n", + " y_label=\"Trained up to\",\n", + " x_label=\"Evaluation Year\",\n", + " title_label=\"Yearbook PerformanceTrigger: Static Accuracy Threshold=70%\",\n", + " color_label=\"Accuracy %\",\n", + " width_factor=1,\n", + " height_factor=0.55,\n", + " # grid_alpha=0.4,\n", + " grid_alpha=0.0,\n", + " # disable_horizontal_grid=True,\n", + " # cbar=False,\n", + " df_logs_models=df_logs_models,\n", + ")\n", + "save_plot(fig, \"yb_trigger_heatmap_performance_single_static\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/analytics/plotting/rh_thesis/performance/yearbook_heatmap_single_num_miclass.ipynb b/analytics/plotting/rh_thesis/performance/yearbook_heatmap_single_num_miclass.ipynb new file mode 100644 index 000000000..e39e4764a --- /dev/null +++ b/analytics/plotting/rh_thesis/performance/yearbook_heatmap_single_num_miclass.ipynb @@ -0,0 +1,310 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "\n", + "from analytics.app.data.load import list_pipelines\n", + "from analytics.app.data.transform import dfs_models_and_evals, logs_dataframe, patch_yearbook_time\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipelines_dir = Path(\n", + " \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/30_performance/num_misclass/\"\n", + ")\n", + "assert pipelines_dir.exists()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipelines = list_pipelines(pipelines_dir)\n", + "max_pipeline_id = max(pipelines.keys())\n", + "pipelines" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from analytics.app.data.load import load_pipeline_logs\n", + "\n", + "pipeline_logs = {p_id: load_pipeline_logs(p_id, pipelines_dir) for (p_id, (_, p_path)) in pipelines.items()}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# mode:\n", + "pipeline_id = 759 # yearbook_performancetrigger_num_misclass-100-exp-0.9-red-False--int250y\n", + "\n", + "# doesn't do anything unless include_composite_model = True\n", + "composite_model_variant = \"currently_active_model\"\n", + "\n", + "patch_yearbook = True\n", + "dataset_id = \"yearbook_test\"\n", + "eval_handler = \"periodic-delta+-1y\"\n", + "metric = \"Accuracy\"\n", + "include_composite_model = False\n", + "\n", + "print(f\"Pipeline ID: {pipeline_id}, name: {pipelines[pipeline_id][0]}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Wrangle data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipeline_log = pipeline_logs[pipeline_id]\n", + "pipeline_ref = f\"{pipeline_id}\".zfill(len(str(max_pipeline_id))) + f\" - {pipelines[pipeline_id][0]}\"\n", + "\n", + "df_all = logs_dataframe(pipeline_log, pipeline_ref)\n", + "\n", + "df_logs_models, _, df_eval_single = dfs_models_and_evals(\n", + " # subtracting would interfere with yearbook patching\n", + " pipeline_log,\n", + " df_all[\"sample_time\"].max(),\n", + " pipeline_ref,\n", + ")\n", + "\n", + "df_adjusted = df_eval_single\n", + "\n", + "\n", + "df_adjusted = df_adjusted[\n", + " (df_adjusted[\"dataset_id\"] == dataset_id)\n", + " & (df_adjusted[\"eval_handler\"] == eval_handler)\n", + " & (df_adjusted[\"metric\"] == metric)\n", + "]\n", + "\n", + "# in percent (0-100)\n", + "df_adjusted[\"value\"] = df_adjusted[\"value\"] * 100" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_logs_models" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if patch_yearbook:\n", + " for column in [\"interval_start\", \"interval_center\", \"interval_end\"]:\n", + " patch_yearbook_time(df_adjusted, column)\n", + " for column in [\"train_start\", \"train_end\", \"real_train_end\", \"usage_start\", \"usage_end\"]:\n", + " patch_yearbook_time(df_logs_models, column)\n", + "\n", + " # correction for -1 second in timestamp format before patching\n", + " df_logs_models[\"usage_end\"] = (\n", + " df_logs_models[\"usage_end\"].dt.to_period(\"M\") + 1\n", + " ).dt.to_timestamp() # december (because of -1 second in timestamp format) -> start of year\n", + "\n", + "df_logs_models" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted = df_adjusted.sort_values(by=[\"interval_center\"])\n", + "df_adjusted" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Add composite model\n", + "\n", + "assert df_adjusted[\"pipeline_ref\"].nunique() <= 1\n", + "# add the pipeline time series which is the performance of different models stitched together dep.\n", + "# w.r.t which model was active\n", + "pipeline_composite_model = df_adjusted[df_adjusted[composite_model_variant]]\n", + "pipeline_composite_model[\"model_idx\"] = 0\n", + "pipeline_composite_model[\"id_model\"] = 0\n", + "\n", + "label_map = {k: f\"{k}\" for k, v in df_adjusted[[\"model_idx\", \"id_model\"]].values}\n", + "label_map[0] = \"Pipeline composite model\"\n", + "\n", + "if include_composite_model:\n", + " df_adjusted = pd.concat([pipeline_composite_model, df_adjusted])\n", + "else:\n", + " df_adjusted[\"model_idx\"] = df_adjusted[\"model_idx\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Create Plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_adjusted[\"interval_center\"] = df_adjusted[\"interval_center\"].astype(str).str.split(\"-\").str[0]\n", + "df_adjusted" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_train_end_years_per_model = df_logs_models[[\"model_idx\", \"real_train_end\"]]\n", + "df_train_end_years_per_model[\"real_train_end\"] = df_train_end_years_per_model[\"real_train_end\"].dt.year\n", + "df_train_end_years_per_model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_merged = df_adjusted.merge(df_train_end_years_per_model, on=\"model_idx\", how=\"left\")\n", + "df_merged" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# build heatmap matrix dataframe:\n", + "heatmap_data = df_merged.pivot(index=[\"real_train_end\"], columns=\"interval_center\", values=\"value\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "heatmap_data.index.min(), heatmap_data.index.max()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "heatmap_data.index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from analytics.plotting.common.heatmap import build_heatmap\n", + "from analytics.plotting.common.save import save_plot\n", + "\n", + "fig = build_heatmap(\n", + " heatmap_data,\n", + " reverse_col=True,\n", + " x_ticks=[1950, 1975, 2000],\n", + " y_custom_ticks=[(i + 0.5, str(y)) for i, y in enumerate(heatmap_data.index)],\n", + " y_label=\"Trained up to\",\n", + " x_label=\"Evaluation Year\",\n", + " # yearbook_performancetrigger_num_misclass-100-exp-0.9-red-False--int250y\n", + " title_label=\"Yearbook PerformanceTrigger:\\n Exp. Acc=90% | NumMiscl=100 | No Reduction\",\n", + " color_label=\"Accuracy %\",\n", + " width_factor=1,\n", + " height_factor=0.55,\n", + " # grid_alpha=0.4,\n", + " grid_alpha=0.0,\n", + " # disable_horizontal_grid=True,\n", + " # cbar=False,\n", + " df_logs_models=df_logs_models,\n", + ")\n", + "save_plot(fig, \"yb_trigger_heatmap_performance_single_num_misclass\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}