From d542debd038eca1843ecbae014ab1af4d937f920 Mon Sep 17 00:00:00 2001
From: Robin Holzinger <robin.holzinger@tum.de>
Date: Sat, 28 Sep 2024 16:04:40 +0200
Subject: [PATCH] Finalize all plots?

---
 analytics/plotting/common/color.py            |   2 +-
 .../plotting/common/tradeoff_scatterplot.py   |  20 +-
 analytics/plotting/rh_thesis/TODO.md          |  17 -
 .../compare_all/arxiv_perf_tradeoff.ipynb     | 542 +++++++++++
 .../compare_all/hp_perf_tradeoff.ipynb        | 559 +++++++++++
 .../compare_all/yb_perf_tradeoff.ipynb        | 891 ++++++++++++++++++
 .../plotting/rh_thesis/drift/arxiv_cost.ipynb | 282 ++++++
 .../drift/arxiv_heatmap_single.ipynb          |  17 +-
 .../plotting/rh_thesis/drift/hp_cost.ipynb    | 219 +++++
 .../rh_thesis/drift/hp_heatmap_single.ipynb   |   8 +-
 .../plotting/rh_thesis/drift/yb_cost.ipynb    |  24 +-
 .../drift/yb_cost_perf_tradeoff.ipynb         |   2 +-
 .../drift/yearbook_heatmap_multi.ipynb        |  27 +-
 .../drift/yearbook_heatmap_single.ipynb       |   6 +-
 .../arxiv_heatmap_metrics.ipynb               |  10 +-
 .../evaluation_setup/hp_heatmap_metrics.ipynb |  10 +-
 .../evaluation_setup/yb_heatmap_metrics.ipynb |  25 +-
 .../yb_heatmap_window_size.ipynb              |   8 +-
 .../rh_thesis/performance/arxiv_cost.ipynb    | 227 +++++
 .../performance/arxiv_heatmap_single.ipynb    | 285 ++++++
 .../rh_thesis/performance/hp_cost.ipynb       | 221 +++++
 .../performance/hp_heatmap_single.ipynb       | 286 ++++++
 .../rh_thesis/performance/yb_cost.ipynb       | 212 +++++
 .../performance/yearbook_heatmap_multi.ipynb  | 363 +++++++
 .../performance/yearbook_heatmap_single.ipynb | 307 ++++++
 .../yearbook_heatmap_single_num_miclass.ipynb | 310 ++++++
 26 files changed, 4807 insertions(+), 73 deletions(-)
 delete mode 100644 analytics/plotting/rh_thesis/TODO.md
 create mode 100644 analytics/plotting/rh_thesis/compare_all/arxiv_perf_tradeoff.ipynb
 create mode 100644 analytics/plotting/rh_thesis/compare_all/hp_perf_tradeoff.ipynb
 create mode 100644 analytics/plotting/rh_thesis/compare_all/yb_perf_tradeoff.ipynb
 create mode 100644 analytics/plotting/rh_thesis/drift/arxiv_cost.ipynb
 create mode 100644 analytics/plotting/rh_thesis/drift/hp_cost.ipynb
 create mode 100644 analytics/plotting/rh_thesis/performance/arxiv_cost.ipynb
 create mode 100644 analytics/plotting/rh_thesis/performance/arxiv_heatmap_single.ipynb
 create mode 100644 analytics/plotting/rh_thesis/performance/hp_cost.ipynb
 create mode 100644 analytics/plotting/rh_thesis/performance/hp_heatmap_single.ipynb
 create mode 100644 analytics/plotting/rh_thesis/performance/yb_cost.ipynb
 create mode 100644 analytics/plotting/rh_thesis/performance/yearbook_heatmap_multi.ipynb
 create mode 100644 analytics/plotting/rh_thesis/performance/yearbook_heatmap_single.ipynb
 create mode 100644 analytics/plotting/rh_thesis/performance/yearbook_heatmap_single_num_miclass.ipynb

diff --git a/analytics/plotting/common/color.py b/analytics/plotting/common/color.py
index 042945553..14b864026 100644
--- a/analytics/plotting/common/color.py
+++ b/analytics/plotting/common/color.py
@@ -63,8 +63,8 @@ def main_colors(light: bool = False) -> list[tuple[float, float, float]]:
         colorblind_palette[-2],
         colorblind_palette[1],
         colorblind_palette[2],
-        colorblind_palette[3],
         colorblind_palette[4],
+        colorblind_palette[5],
     ]
 
 
diff --git a/analytics/plotting/common/tradeoff_scatterplot.py b/analytics/plotting/common/tradeoff_scatterplot.py
index 0e003df53..ddb5f30d5 100644
--- a/analytics/plotting/common/tradeoff_scatterplot.py
+++ b/analytics/plotting/common/tradeoff_scatterplot.py
@@ -20,6 +20,8 @@ def plot_tradeoff_scatter(
     height_factor: float = 1.0,
     width_factor: float = 1.0,
     target_ax: Axes | None = None,
+    manual_legend_title: bool = True,
+    legend_ncol: int = 1,
 ) -> Figure:
     sns.set_theme(style="whitegrid")
     init_plot()
@@ -46,7 +48,13 @@ def plot_tradeoff_scatter(
         hue=hue,
         style=style,
         # style="pipeline_ref",
-        palette=[main_color(0), main_color(1), main_color(3)],
+        palette=[
+            main_color(0),
+            main_color(1),
+            main_color(3),
+            main_color(4),
+            main_color(5),
+        ],
         # palette={"drift": main_color(3), "yearly": main_color(0), "amount": main_color(1)},
         s=300,
         # legend=False,
@@ -56,10 +64,18 @@ def plot_tradeoff_scatter(
     # ax.set(xlim=(-4, 85))
 
     ax.legend(
-        title=hue,
         fontsize="small",
         title_fontsize="medium",
         # title="Pipeline",
+        **(
+            {
+                "title": hue,
+            }
+            if manual_legend_title
+            else {}
+        ),
+        # 2 columns
+        ncol=legend_ncol,
     )
 
     # Adjust x-axis tick labels
diff --git a/analytics/plotting/rh_thesis/TODO.md b/analytics/plotting/rh_thesis/TODO.md
deleted file mode 100644
index 27e973a65..000000000
--- a/analytics/plotting/rh_thesis/TODO.md
+++ /dev/null
@@ -1,17 +0,0 @@
-drift:
-
-- plot arxiv
-
-performance:
-
-- 1 cost plot
-- 1 single pipeline heatmap
-- 1 multi pipeline heatmap for every dataset (including best of every subtype)
-
-cost:
-
-- 1 dummy plot
-
-discussion:
-
-- tradeoff plot: 1 per dataset
diff --git a/analytics/plotting/rh_thesis/compare_all/arxiv_perf_tradeoff.ipynb b/analytics/plotting/rh_thesis/compare_all/arxiv_perf_tradeoff.ipynb
new file mode 100644
index 000000000..033b7328a
--- /dev/null
+++ b/analytics/plotting/rh_thesis/compare_all/arxiv_perf_tradeoff.ipynb
@@ -0,0 +1,542 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "\n",
+    "import pandas as pd\n",
+    "\n",
+    "from analytics.app.data.load import list_pipelines, load_pipeline_logs\n",
+    "from analytics.app.data.transform import (\n",
+    "    df_aggregate_eval_metric,\n",
+    "    dfs_models_and_evals,\n",
+    "    pipeline_leaf_times_df,\n",
+    ")\n",
+    "from analytics.plotting.common.save import save_plot\n",
+    "from analytics.plotting.common.tradeoff_scatterplot import plot_tradeoff_scatter\n",
+    "from modyn.supervisor.internal.grpc.enums import PipelineStage\n",
+    "from modyn.supervisor.internal.pipeline_executor.models import PipelineLogs\n",
+    "\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipelines_dirs = [\n",
+    "    Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/arxiv/10_baselines_time\"),\n",
+    "    Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/arxiv/11_baselines_amount\"),\n",
+    "    Path(\n",
+    "        \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/arxiv/21_datadrift_dynamic\"\n",
+    "    ),  # TODO\n",
+    "    Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/arxiv/30_performance\"),\n",
+    "]\n",
+    "\n",
+    "pipeline_logs: dict[int, PipelineLogs] = {}\n",
+    "pipelines: dict[int, tuple[str, Path]] = {}\n",
+    "\n",
+    "for dir in pipelines_dirs:\n",
+    "    print(\"Reading\", dir)\n",
+    "    dir_pipelines = list_pipelines(dir)\n",
+    "    print(dir_pipelines)\n",
+    "    pipelines.update(dir_pipelines)\n",
+    "\n",
+    "    max_pipeline_id = max(dir_pipelines.keys())\n",
+    "    print(pipelines)\n",
+    "    pipeline_logs.update({p_id: load_pipeline_logs(p_id, dir) for (p_id, (_, p_path)) in dir_pipelines.items()})\n",
+    "    assert dir.exists()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# doesn't do anything unless include_composite_model = True\n",
+    "composite_model_variant = \"currently_active_model\"\n",
+    "\n",
+    "patch_yearbook = True\n",
+    "dataset_id = \"arxiv_kaggle_test\"\n",
+    "eval_handler = \"periodic-current\"\n",
+    "metric = \"Accuracy\"\n",
+    "include_composite_model = True"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipelines = {p_id: (pname, p_path) for p_id, (pname, p_path) in pipelines.items()}\n",
+    "\n",
+    "pipeline_ids = pipelines.keys()\n",
+    "pipeline_ids = [\n",
+    "    y\n",
+    "    for y, _ in [\n",
+    "        (263, \"timetrigger_5y\"),\n",
+    "        (265, \"timetrigger_10y\"),\n",
+    "        # (267, 'timetrigger_26w'),\n",
+    "        (269, \"timetrigger_2y\"),\n",
+    "        (272, \"timetrigger_1y\"),\n",
+    "        # (264, 'dataamount_1000000'),\n",
+    "        (266, \"dataamount_50000\"),\n",
+    "        # (268, 'dataamount_500000'),\n",
+    "        (270, \"dataamount_25000\"),\n",
+    "        (271, \"dataamount_100000\"),\n",
+    "        (782, \"drifttrigger_mmd-quant-0.05-20_int20000_win1y\"),\n",
+    "        (783, \"drifttrigger_mmd-rollavg-0.5-20_int20000_win1y\"),\n",
+    "        (784, \"drifttrigger_mmd-rollavg-5.0-20_int20000_win1y\"),\n",
+    "        (785, \"drifttrigger_mmd-quant-0.15-20_int20000_win1y\"),\n",
+    "        (786, \"drifttrigger_mmd-rollavg-0.2-20_int20000_win1y\"),\n",
+    "        (787, \"drifttrigger_mmd-quant-0.1-20_int20000_win1y\"),\n",
+    "        (788, \"drifttrigger_mmd-rollavg-1.0-20_int20000_win1y\"),\n",
+    "        (789, \"drifttrigger_mmd-quant-0.3-20_int20000_win1y\"),\n",
+    "        (790, \"drifttrigger_mmd-rollavg-2.0-20_int20000_win1y\"),\n",
+    "        (674, \"performancetrigger_static-0.45-int20000\"),\n",
+    "        (675, \"performancetrigger_dynamic-quant-0.05-20-int20000\"),\n",
+    "        (676, \"performancetrigger_dynamic-rollavg-0.3-20-int20000\"),\n",
+    "        (677, \"performancetrigger_num_misclass-100000-exp-0.6-red-False--int20000\"),\n",
+    "        (678, \"performancetrigger_dynamic-rollavg-0.2-20-int20000\"),\n",
+    "        (679, \"performancetrigger_dynamic-rollavg-0.1-20-int20000\"),\n",
+    "        (680, \"performancetrigger_static-0.5-int20000\"),\n",
+    "        (681, \"performancetrigger_dynamic-quant-0.15-20-int20000\"),\n",
+    "        (682, \"performancetrigger_num_misclass-50000-exp-0.6-red-False--int20000\"),\n",
+    "        (723, \"performancetrigger_num_misclass-30000-exp-0.6-red-False--int20000\"),\n",
+    "        (756, \"performancetrigger_num_misclass-15000-exp-0.6-red-False--int20000\"),\n",
+    "        (762, \"performancetrigger_num_misclass-10000-exp-0.6-red-False--int20000\"),\n",
+    "    ]\n",
+    "]\n",
+    "\n",
+    "[(p_id, pname) for p_id, (pname, _) in pipelines.items() if p_id in pipeline_ids]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Wrangle data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "list_df_eval_single: list[pd.DataFrame] = []\n",
+    "df_leaf_list: list[pd.DataFrame] = []\n",
+    "\n",
+    "for pipeline_id in pipeline_ids:\n",
+    "    logs = pipeline_logs[pipeline_id]\n",
+    "    df_leaf_single = pipeline_leaf_times_df(logs, use_traintime_patch_at_trainer=True, pipeline_id=pipeline_id)\n",
+    "    df_leaf_single[\"pipeline_id\"] = pipeline_id\n",
+    "    df_leaf_list.append(df_leaf_single)\n",
+    "\n",
+    "    _, _, df_eval_single = dfs_models_and_evals(\n",
+    "        pipeline_logs[pipeline_id], df_leaf_single[\"sample_time\"].max(), pipelines[pipeline_id][0]\n",
+    "    )\n",
+    "    df_eval_single[\"pipeline_id\"] = pipeline_id\n",
+    "    list_df_eval_single.append(df_eval_single)\n",
+    "\n",
+    "df_adjusted = pd.concat(list_df_eval_single)\n",
+    "df_adjusted\n",
+    "\n",
+    "df_leaf = pd.concat(df_leaf_list)\n",
+    "df_leaf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(df_leaf[\"id\"].unique())\n",
+    "assert set(df_leaf[\"id\"].unique()) == {\n",
+    "    \"TRAIN\",\n",
+    "    \"INIT_CLUSTER_CONNECTION\",\n",
+    "    \"EVALUATE_TRIGGER_POLICY\",\n",
+    "    \"INFORM_SELECTOR_REMAINING_DATA\",\n",
+    "    \"INFORM_SELECTOR_ABOUT_TRIGGER\",\n",
+    "    \"TRAINING_COMPLETED\",\n",
+    "    \"STORE_TRAINED_MODEL\",\n",
+    "    \"EVALUATE\",\n",
+    "    \"DONE\",\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted = df_adjusted[\n",
+    "    (df_adjusted[\"dataset_id\"] == dataset_id)\n",
+    "    & (df_adjusted[\"eval_handler\"] == eval_handler)\n",
+    "    & (df_adjusted[\"metric\"] == metric)\n",
+    "]\n",
+    "\n",
+    "# in percent (0-100)\n",
+    "df_adjusted[\"value\"] = df_adjusted[\"value\"] * 100\n",
+    "df_adjusted"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted = df_adjusted.sort_values(by=[\"interval_center\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Reduce to composite models\n",
+    "df_adjusted = df_adjusted[df_adjusted[composite_model_variant]]\n",
+    "df_adjusted[composite_model_variant].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reduce evaluation interval to interval where all policies have evaluations\n",
+    "min_active_eval_center_per_pipeline = (\n",
+    "    df_adjusted[df_adjusted[composite_model_variant]].groupby(\"pipeline_ref\")[\"interval_center\"].min()\n",
+    ")\n",
+    "\n",
+    "maximum_min = pd.to_datetime(min_active_eval_center_per_pipeline).max()\n",
+    "print(maximum_min, min_active_eval_center_per_pipeline)\n",
+    "\n",
+    "assert maximum_min < pd.to_datetime(\"2006-01-01\")\n",
+    "\n",
+    "df_adjusted = df_adjusted[pd.to_datetime(df_adjusted[\"interval_center\"]) >= maximum_min]\n",
+    "df_adjusted[\"interval_center\"].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted[\"interval_center\"] = df_adjusted[\"interval_center\"].astype(str).str.split(\"-\").str[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Aggregate metrics to a scalar value per pipeline\n",
+    "mean_accuracies = df_aggregate_eval_metric(\n",
+    "    df_adjusted,\n",
+    "    group_by=[\"pipeline_id\", \"pipeline_ref\", \"metric\"],\n",
+    "    in_col=\"value\",\n",
+    "    out_col=\"metric_value\",\n",
+    "    aggregate_func=\"mean\",\n",
+    ")\n",
+    "mean_accuracies"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_triggers = df_leaf[df_leaf[\"id\"] == PipelineStage.TRAIN.name]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_triggers = df_leaf[df_leaf[\"id\"] == PipelineStage.TRAIN.name]\n",
+    "df_triggers = df_triggers[df_triggers[\"sample_time\"] > maximum_min]\n",
+    "df_triggers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Find number of trigger per pipeline that are after maximum_min\n",
+    "\n",
+    "# before the cutoff there was one trigger (equivalent to start of our reduced dataset): +1\n",
+    "num_triggers = df_triggers.groupby(\"pipeline_id\").aggregate(count=(\"id\", \"count\"), sum_duration=(\"duration\", \"sum\"))\n",
+    "num_triggers[\"count\"] += 1\n",
+    "num_triggers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_triggers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "merged = num_triggers.merge(mean_accuracies, on=\"pipeline_id\", how=\"inner\")\n",
+    "assert num_triggers.shape[0] == merged.shape[0]\n",
+    "merged"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_type(x: str):\n",
+    "    if \"year\" in x:\n",
+    "        return \"time\"\n",
+    "    elif \"samples\" in x:\n",
+    "        return \"amount\"\n",
+    "    elif \"d\" in x:\n",
+    "        return \"drift\"\n",
+    "    else:\n",
+    "        return \"unknown\"\n",
+    "\n",
+    "\n",
+    "merged[\"type\"] = merged[\"pipeline_ref\"].apply(lambda x: create_type(x))\n",
+    "merged"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "renamed = merged.copy()\n",
+    "\n",
+    "# renamed = merged[\n",
+    "#     merged[\"pipeline_id\"].isin(\n",
+    "#         [\n",
+    "#             # # static thresholds\n",
+    "#             # 113,  # 0.03\n",
+    "#             # 112,  # 0.05\n",
+    "#             # 107,  # 0.07\n",
+    "#             # 109,  # 0.09\n",
+    "#             # 85,  # 0.12\n",
+    "#             # # dyn quantile\n",
+    "#             # 353,  # % 0.05\n",
+    "#             # 345,  # % 0.10\n",
+    "#             # 357,  # % 0.15\n",
+    "#             # # dyn roll. avg\n",
+    "#             # 372,  # Δ 2.0\n",
+    "#             # 370,  # Δ 1.0\n",
+    "#             # 369,  # Δ 0.5\n",
+    "#             # 363,  # Δ 0.05\n",
+    "#         ]\n",
+    "#     )\n",
+    "# ].copy()\n",
+    "renamed[\"Trigger SubType\"] = renamed[\"pipeline_ref\"].apply(\n",
+    "    lambda x: (\n",
+    "        \"DataAmount\"\n",
+    "        if \"dataamount\" in x\n",
+    "        else (\n",
+    "            \"Time\"\n",
+    "            if \"time\" in x\n",
+    "            else (\n",
+    "                (\n",
+    "                    \"Static\"\n",
+    "                    if \"_mmd-0\" in x\n",
+    "                    else (\"Quantile\" if \"quant\" in x else (\"Rolling Avg.\" if \"roll\" in x else (\"unknown\")))\n",
+    "                )\n",
+    "                if \"drift\" in x\n",
+    "                else (\n",
+    "                    (\n",
+    "                        \"Static\"\n",
+    "                        if \"static\" in x\n",
+    "                        else (\n",
+    "                            \"Quantile\"\n",
+    "                            if \"quant\" in x\n",
+    "                            else (\n",
+    "                                \"Rolling Avg.\"\n",
+    "                                if \"roll\" in x\n",
+    "                                else (\"AvoidableMisclass\" if \"num_misclass\" in x else (\"unknown\"))\n",
+    "                            )\n",
+    "                        )\n",
+    "                    )\n",
+    "                    if \"performancetrigger\" in x\n",
+    "                    else (\n",
+    "                        \"DataIncorporationLatency\"\n",
+    "                        if \"data_inc\" in x\n",
+    "                        else (\"AvoidableMisclass\" if \"avoidable\" in x else (\"unknown\"))\n",
+    "                    )\n",
+    "                )\n",
+    "            )\n",
+    "        )\n",
+    "    )\n",
+    ")\n",
+    "renamed[\"Trigger Type\"] = renamed[\"pipeline_ref\"].apply(\n",
+    "    lambda x: (\n",
+    "        \"Simple\"\n",
+    "        if \"dataamount\" in x\n",
+    "        else (\n",
+    "            \"Simple\"\n",
+    "            if \"time\" in x\n",
+    "            else (\n",
+    "                \"DataDrift\"\n",
+    "                if \"drift\" in x\n",
+    "                else (\"Performance\" if \"performancetrigger\" in x else (\"Cost\" if \"costtrigger\" in x else (\"unknown\")))\n",
+    "            )\n",
+    "        )\n",
+    "    )\n",
+    ")\n",
+    "\n",
+    "# assert no unknowns and DataIncorporationLatency\n",
+    "assert not renamed[\"Trigger Type\"].str.contains(\"unknown\").any()\n",
+    "assert not renamed[\"Trigger SubType\"].str.contains(\"unknown\").any()\n",
+    "assert not renamed[\"Trigger SubType\"].str.contains(\"DataIncorporationLatency\").any()\n",
+    "\n",
+    "# assert no cost triggers\n",
+    "assert not renamed[\"Trigger Type\"].str.contains(\"Cost\").any()\n",
+    "\n",
+    "renamed[\"Trigger Type\"] = pd.Categorical(\n",
+    "    renamed[\"Trigger Type\"], categories=[\"Simple\", \"DataDrift\", \"Performance\"], ordered=True\n",
+    ")\n",
+    "\n",
+    "renamed[\"Trigger SubType\"] = pd.Categorical(\n",
+    "    renamed[\"Trigger SubType\"],\n",
+    "    categories=[\"DataAmount\", \"Time\", \"Static\", \"Quantile\", \"Rolling Avg.\", \"AvoidableMisclass\"],\n",
+    "    ordered=True,\n",
+    ")\n",
+    "\n",
+    "renamed = renamed.sort_values(by=[\"Trigger Type\", \"Trigger SubType\", \"pipeline_id\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig = plot_tradeoff_scatter(\n",
+    "    renamed,\n",
+    "    x=\"count\",\n",
+    "    y=\"metric_value\",\n",
+    "    hue=\"Trigger Type\",\n",
+    "    style=\"Trigger SubType\",\n",
+    "    x_label=\"Number of Triggers\",\n",
+    "    y_label=\"Mean Accuracy %\",\n",
+    "    height_factor=0.8,\n",
+    "    width_factor=0.8,\n",
+    "    manual_legend_title=False,\n",
+    "    legend_ncol=2,\n",
+    ")\n",
+    "\n",
+    "save_plot(fig, \"_all_tradeoff_arxiv_triggers_performance\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "in_minutes = renamed.copy()\n",
+    "in_minutes[\"sum_duration\"] = in_minutes[\"sum_duration\"] / 60\n",
+    "\n",
+    "fig = plot_tradeoff_scatter(\n",
+    "    in_minutes,\n",
+    "    x=\"sum_duration\",\n",
+    "    y=\"metric_value\",\n",
+    "    hue=\"Trigger Type\",\n",
+    "    style=\"Trigger SubType\",\n",
+    "    x_label=\"Total Cost (Minutes)\",\n",
+    "    y_label=\"Mean Accuracy %\",\n",
+    "    height_factor=0.7,\n",
+    "    width_factor=0.8,\n",
+    "    manual_legend_title=False,\n",
+    "    legend_ncol=2,\n",
+    ")\n",
+    "\n",
+    "# save_plot(fig, \"tradeoff_drift_yearbook_cost_performance\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig = plot_tradeoff_scatter(\n",
+    "    renamed,\n",
+    "    x=\"count\",\n",
+    "    y=\"sum_duration\",\n",
+    "    hue=\"Trigger Type\",\n",
+    "    style=\"Trigger SubType\",\n",
+    "    x_label=\"Number of Triggers\",\n",
+    "    y_label=\"Total Cost (seconds)\",\n",
+    "    height_factor=1.5,\n",
+    "    width_factor=1.8,\n",
+    ")\n",
+    "\n",
+    "# save_plot(fig, \"tradeoff_drift_yearbook_triggers_cost\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/analytics/plotting/rh_thesis/compare_all/hp_perf_tradeoff.ipynb b/analytics/plotting/rh_thesis/compare_all/hp_perf_tradeoff.ipynb
new file mode 100644
index 000000000..ba2991250
--- /dev/null
+++ b/analytics/plotting/rh_thesis/compare_all/hp_perf_tradeoff.ipynb
@@ -0,0 +1,559 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "\n",
+    "import pandas as pd\n",
+    "\n",
+    "from analytics.app.data.load import list_pipelines, load_pipeline_logs\n",
+    "from analytics.app.data.transform import (\n",
+    "    df_aggregate_eval_metric,\n",
+    "    dfs_models_and_evals,\n",
+    "    pipeline_leaf_times_df,\n",
+    ")\n",
+    "from analytics.plotting.common.tradeoff_scatterplot import plot_tradeoff_scatter\n",
+    "from modyn.supervisor.internal.grpc.enums import PipelineStage\n",
+    "from modyn.supervisor.internal.pipeline_executor.models import PipelineLogs\n",
+    "\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipelines_dirs = [\n",
+    "    Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/huffpost/10_baselines_time\"),\n",
+    "    Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/huffpost/11_baselines_amount\"),\n",
+    "    Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/huffpost/21_datadrift_dynamic\"),\n",
+    "    Path(\n",
+    "        \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/huffpost/30_performance/num_misclass\"\n",
+    "    ),\n",
+    "    Path(\n",
+    "        \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/huffpost/30_performance/static_dyn\"\n",
+    "    ),\n",
+    "]\n",
+    "\n",
+    "pipeline_logs: dict[int, PipelineLogs] = {}\n",
+    "pipelines: dict[int, tuple[str, Path]] = {}\n",
+    "\n",
+    "for dir in pipelines_dirs:\n",
+    "    print(\"Reading\", dir)\n",
+    "    dir_pipelines = list_pipelines(dir)\n",
+    "    print(dir_pipelines)\n",
+    "    pipelines.update(dir_pipelines)\n",
+    "\n",
+    "    max_pipeline_id = max(dir_pipelines.keys())\n",
+    "    print(pipelines)\n",
+    "    pipeline_logs.update({p_id: load_pipeline_logs(p_id, dir) for (p_id, (_, p_path)) in dir_pipelines.items()})\n",
+    "    assert dir.exists()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# doesn't do anything unless include_composite_model = True\n",
+    "composite_model_variant = \"currently_active_model\"\n",
+    "\n",
+    "patch_yearbook = True\n",
+    "dataset_id = \"huffpost_kaggle_test\"\n",
+    "eval_handler = \"periodic-current\"\n",
+    "metric = \"Accuracy\"\n",
+    "include_composite_model = True"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipelines = {p_id: (pname, p_path) for p_id, (pname, p_path) in pipelines.items()}\n",
+    "\n",
+    "# pipeline_ids = pipelines.keys()\n",
+    "pipeline_ids = [\n",
+    "    y\n",
+    "    for y, _ in [\n",
+    "        (273, \"timetrigger_26w\"),\n",
+    "        (275, \"timetrigger_13w\"),\n",
+    "        (278, \"timetrigger_1y\"),\n",
+    "        # (280, 'timetrigger_4y'),\n",
+    "        # (282, 'timetrigger_2y'),\n",
+    "        (274, \"dataamount_10000\"),\n",
+    "        (276, \"dataamount_5000\"),\n",
+    "        (277, \"dataamount_20000\"),\n",
+    "        # (279, 'dataamount_80000'),\n",
+    "        # (281, 'dataamount_40000'),\n",
+    "        (745, \"dataamount_15000\"),\n",
+    "        # (750, 'dataamount_30000'),\n",
+    "        (763, \"drifttrigger_mmd-quant-0.05-20_int1500_win1y\"),\n",
+    "        (769, \"drifttrigger_mmd-quant-0.15-20_int1500_win1y\"),\n",
+    "        (770, \"drifttrigger_mmd-rollavg-5.0-20_int1500_win1y\"),\n",
+    "        (771, \"drifttrigger_mmd-rollavg-2.0-20_int1500_win1y\"),\n",
+    "        (772, \"drifttrigger_mmd-rollavg-1.0-20_int1500_win1y\"),\n",
+    "        (774, \"drifttrigger_mmd-rollavg-0.5-20_int1500_win1y\"),\n",
+    "        (689, \"performancetrigger_num_misclass-8000-exp-0.5-red-False--int1500y\"),\n",
+    "        (705, \"performancetrigger_num_misclass-8000-exp-0.6-red-False--int1500y\"),\n",
+    "        (722, \"performancetrigger_num_misclass-4000-exp-0.5-red-False--int1500y\"),\n",
+    "        (724, \"performancetrigger_num_misclass-4000-exp-0.6-red-False--int1500y\"),\n",
+    "        (725, \"performancetrigger_num_misclass-1000-exp-0.5-red-False--int1500y\"),\n",
+    "        (726, \"performancetrigger_num_misclass-1000-exp-0.6-red-False--int1500y\"),\n",
+    "        (773, \"performancetrigger_num_misclass-500-exp-0.5-red-False--int1500y\"),\n",
+    "        (775, \"performancetrigger_num_misclass-250-exp-0.6-red-False--int1500y\"),\n",
+    "        (776, \"performancetrigger_num_misclass-500-exp-0.6-red-False--int1500y\"),\n",
+    "        (778, \"performancetrigger_num_misclass-250-exp-0.5-red-False--int1500y\"),\n",
+    "        (635, \"performancetrigger_static-0.45-int1500y\"),\n",
+    "        (636, \"performancetrigger_dynamic-quant-0.05-15-int1500y\"),\n",
+    "        (637, \"performancetrigger_dynamic-rollavg-0.3-15-int1500y\"),\n",
+    "        (639, \"performancetrigger_static-0.5-int1500y\"),\n",
+    "        (640, \"performancetrigger_dynamic-rollavg-0.3-30-int1500y\"),\n",
+    "        (642, \"performancetrigger_dynamic-quant-0.05-30-int1500y\"),\n",
+    "        (643, \"performancetrigger_static-0.55-int1500y\"),\n",
+    "        (645, \"performancetrigger_dynamic-rollavg-0.2-15-int1500y\"),\n",
+    "        (646, \"performancetrigger_dynamic-quant-0.15-15-int1500y\"),\n",
+    "        (647, \"performancetrigger_static-0.6-int1500y\"),\n",
+    "        (649, \"performancetrigger_dynamic-rollavg-0.2-30-int1500y\"),\n",
+    "        (650, \"performancetrigger_dynamic-quant-0.15-30-int1500y\"),\n",
+    "        (651, \"performancetrigger_dynamic-rollavg-0.1-15-int1500y\"),\n",
+    "        (653, \"performancetrigger_dynamic-quant-0.3-15-int1500y\"),\n",
+    "        (654, \"performancetrigger_dynamic-rollavg-0.1-30-int1500y\"),\n",
+    "        (656, \"performancetrigger_dynamic-quant-0.3-30-int1500y\"),\n",
+    "    ]\n",
+    "]\n",
+    "\n",
+    "[(p_id, pname) for p_id, (pname, _) in pipelines.items() if p_id in pipeline_ids]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Wrangle data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "list_df_eval_single: list[pd.DataFrame] = []\n",
+    "df_leaf_list: list[pd.DataFrame] = []\n",
+    "\n",
+    "for pipeline_id in pipeline_ids:\n",
+    "    logs = pipeline_logs[pipeline_id]\n",
+    "    df_leaf_single = pipeline_leaf_times_df(logs, use_traintime_patch_at_trainer=True, pipeline_id=pipeline_id)\n",
+    "    df_leaf_single[\"pipeline_id\"] = pipeline_id\n",
+    "    df_leaf_list.append(df_leaf_single)\n",
+    "\n",
+    "    _, _, df_eval_single = dfs_models_and_evals(\n",
+    "        pipeline_logs[pipeline_id], df_leaf_single[\"sample_time\"].max(), pipelines[pipeline_id][0]\n",
+    "    )\n",
+    "    df_eval_single[\"pipeline_id\"] = pipeline_id\n",
+    "    list_df_eval_single.append(df_eval_single)\n",
+    "\n",
+    "df_adjusted = pd.concat(list_df_eval_single)\n",
+    "df_adjusted\n",
+    "\n",
+    "df_leaf = pd.concat(df_leaf_list)\n",
+    "df_leaf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(df_leaf[\"id\"].unique())\n",
+    "assert set(df_leaf[\"id\"].unique()) == {\n",
+    "    \"TRAIN\",\n",
+    "    \"INIT_CLUSTER_CONNECTION\",\n",
+    "    \"EVALUATE_TRIGGER_POLICY\",\n",
+    "    \"INFORM_SELECTOR_REMAINING_DATA\",\n",
+    "    \"INFORM_SELECTOR_ABOUT_TRIGGER\",\n",
+    "    \"TRAINING_COMPLETED\",\n",
+    "    \"STORE_TRAINED_MODEL\",\n",
+    "    \"EVALUATE\",\n",
+    "    \"DONE\",\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted = df_adjusted[\n",
+    "    (df_adjusted[\"dataset_id\"] == dataset_id)\n",
+    "    & (df_adjusted[\"eval_handler\"] == eval_handler)\n",
+    "    & (df_adjusted[\"metric\"] == metric)\n",
+    "]\n",
+    "\n",
+    "# in percent (0-100)\n",
+    "df_adjusted[\"value\"] = df_adjusted[\"value\"] * 100\n",
+    "df_adjusted"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted = df_adjusted.sort_values(by=[\"interval_center\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Reduce to composite models\n",
+    "df_adjusted = df_adjusted[df_adjusted[composite_model_variant]]\n",
+    "df_adjusted[composite_model_variant].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reduce evaluation interval to interval where all policies have evaluations\n",
+    "min_active_eval_center_per_pipeline = (\n",
+    "    df_adjusted[df_adjusted[composite_model_variant]].groupby(\"pipeline_ref\")[\"interval_center\"].min()\n",
+    ")\n",
+    "maximum_min = min_active_eval_center_per_pipeline.max()\n",
+    "print(maximum_min, min_active_eval_center_per_pipeline)\n",
+    "\n",
+    "assert maximum_min < pd.Timestamp(\"2013-05-01\")\n",
+    "\n",
+    "df_adjusted = df_adjusted[df_adjusted[\"interval_center\"] >= maximum_min]\n",
+    "df_adjusted[\"interval_center\"].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted[\"interval_center\"] = df_adjusted[\"interval_center\"].astype(str).str.split(\"-\").str[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Aggregate metrics to a scalar value per pipeline\n",
+    "mean_accuracies = df_aggregate_eval_metric(\n",
+    "    df_adjusted,\n",
+    "    group_by=[\"pipeline_id\", \"pipeline_ref\", \"metric\"],\n",
+    "    in_col=\"value\",\n",
+    "    out_col=\"metric_value\",\n",
+    "    aggregate_func=\"mean\",\n",
+    ")\n",
+    "mean_accuracies"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_triggers = df_leaf[df_leaf[\"id\"] == PipelineStage.TRAIN.name]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_triggers = df_leaf[df_leaf[\"id\"] == PipelineStage.TRAIN.name]\n",
+    "df_triggers = df_triggers[df_triggers[\"sample_time\"] > maximum_min]\n",
+    "df_triggers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Find number of trigger per pipeline that are after maximum_min\n",
+    "\n",
+    "# before the cutoff there was one trigger (equivalent to start of our reduced dataset): +1\n",
+    "num_triggers = df_triggers.groupby(\"pipeline_id\").aggregate(count=(\"id\", \"count\"), sum_duration=(\"duration\", \"sum\"))\n",
+    "num_triggers[\"count\"] += 1\n",
+    "num_triggers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_triggers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "merged = num_triggers.merge(mean_accuracies, on=\"pipeline_id\", how=\"inner\")\n",
+    "assert num_triggers.shape[0] == merged.shape[0]\n",
+    "merged"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_type(x: str):\n",
+    "    if \"year\" in x:\n",
+    "        return \"time\"\n",
+    "    elif \"samples\" in x:\n",
+    "        return \"amount\"\n",
+    "    elif \"d\" in x:\n",
+    "        return \"drift\"\n",
+    "    else:\n",
+    "        return \"unknown\"\n",
+    "\n",
+    "\n",
+    "merged[\"type\"] = merged[\"pipeline_ref\"].apply(lambda x: create_type(x))\n",
+    "merged"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "renamed = merged.copy()\n",
+    "\n",
+    "# renamed = merged[\n",
+    "#     merged[\"pipeline_id\"].isin(\n",
+    "#         [\n",
+    "#             # # static thresholds\n",
+    "#             # 113,  # 0.03\n",
+    "#             # 112,  # 0.05\n",
+    "#             # 107,  # 0.07\n",
+    "#             # 109,  # 0.09\n",
+    "#             # 85,  # 0.12\n",
+    "#             # # dyn quantile\n",
+    "#             # 353,  # % 0.05\n",
+    "#             # 345,  # % 0.10\n",
+    "#             # 357,  # % 0.15\n",
+    "#             # # dyn roll. avg\n",
+    "#             # 372,  # Δ 2.0\n",
+    "#             # 370,  # Δ 1.0\n",
+    "#             # 369,  # Δ 0.5\n",
+    "#             # 363,  # Δ 0.05\n",
+    "#         ]\n",
+    "#     )\n",
+    "# ].copy()\n",
+    "renamed[\"Trigger SubType\"] = renamed[\"pipeline_ref\"].apply(\n",
+    "    lambda x: (\n",
+    "        \"DataAmount\"\n",
+    "        if \"dataamount\" in x\n",
+    "        else (\n",
+    "            \"Time\"\n",
+    "            if \"time\" in x\n",
+    "            else (\n",
+    "                (\n",
+    "                    \"Static\"\n",
+    "                    if \"_mmd-0\" in x\n",
+    "                    else (\"Quantile\" if \"quant\" in x else (\"Rolling Avg.\" if \"roll\" in x else (\"unknown\")))\n",
+    "                )\n",
+    "                if \"drift\" in x\n",
+    "                else (\n",
+    "                    (\n",
+    "                        \"Static\"\n",
+    "                        if \"static\" in x\n",
+    "                        else (\n",
+    "                            \"Quantile\"\n",
+    "                            if \"quant\" in x\n",
+    "                            else (\n",
+    "                                \"Rolling Avg.\"\n",
+    "                                if \"roll\" in x\n",
+    "                                else (\"AvoidableMisclass\" if \"num_misclass\" in x else (\"unknown\"))\n",
+    "                            )\n",
+    "                        )\n",
+    "                    )\n",
+    "                    if \"performancetrigger\" in x\n",
+    "                    else (\n",
+    "                        \"DataIncorporationLatency\"\n",
+    "                        if \"data_inc\" in x\n",
+    "                        else (\"AvoidableMisclass\" if \"avoidable\" in x else (\"unknown\"))\n",
+    "                    )\n",
+    "                )\n",
+    "            )\n",
+    "        )\n",
+    "    )\n",
+    ")\n",
+    "renamed[\"Trigger Type\"] = renamed[\"pipeline_ref\"].apply(\n",
+    "    lambda x: (\n",
+    "        \"Simple\"\n",
+    "        if \"dataamount\" in x\n",
+    "        else (\n",
+    "            \"Simple\"\n",
+    "            if \"time\" in x\n",
+    "            else (\n",
+    "                \"DataDrift\"\n",
+    "                if \"drift\" in x\n",
+    "                else (\"Performance\" if \"performancetrigger\" in x else (\"Cost\" if \"costtrigger\" in x else (\"unknown\")))\n",
+    "            )\n",
+    "        )\n",
+    "    )\n",
+    ")\n",
+    "\n",
+    "# assert no unknowns and DataIncorporationLatency\n",
+    "assert not renamed[\"Trigger Type\"].str.contains(\"unknown\").any()\n",
+    "assert not renamed[\"Trigger SubType\"].str.contains(\"unknown\").any()\n",
+    "assert not renamed[\"Trigger SubType\"].str.contains(\"DataIncorporationLatency\").any()\n",
+    "\n",
+    "# assert no cost triggers\n",
+    "assert not renamed[\"Trigger Type\"].str.contains(\"Cost\").any()\n",
+    "\n",
+    "renamed[\"Trigger Type\"] = pd.Categorical(\n",
+    "    renamed[\"Trigger Type\"], categories=[\"Simple\", \"DataDrift\", \"Performance\"], ordered=True\n",
+    ")\n",
+    "\n",
+    "renamed[\"Trigger SubType\"] = pd.Categorical(\n",
+    "    renamed[\"Trigger SubType\"],\n",
+    "    categories=[\"DataAmount\", \"Time\", \"Static\", \"Quantile\", \"Rolling Avg.\", \"AvoidableMisclass\"],\n",
+    "    ordered=True,\n",
+    ")\n",
+    "\n",
+    "renamed = renamed.sort_values(by=[\"Trigger Type\", \"Trigger SubType\", \"pipeline_id\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig = plot_tradeoff_scatter(\n",
+    "    renamed,\n",
+    "    x=\"count\",\n",
+    "    y=\"metric_value\",\n",
+    "    hue=\"Trigger Type\",\n",
+    "    style=\"Trigger SubType\",\n",
+    "    x_label=\"Number of Triggers\",\n",
+    "    y_label=\"Mean Accuracy %\",\n",
+    "    height_factor=1,\n",
+    "    width_factor=1,\n",
+    ")\n",
+    "\n",
+    "# save_plot(fig, \"_all_tradeoff_yearbook_triggers_performance\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "in_minutes = renamed.copy()\n",
+    "in_minutes[\"sum_duration\"] = in_minutes[\"sum_duration\"] / 60\n",
+    "\n",
+    "fig = plot_tradeoff_scatter(\n",
+    "    in_minutes,\n",
+    "    x=\"sum_duration\",\n",
+    "    y=\"metric_value\",\n",
+    "    hue=\"Trigger Type\",\n",
+    "    style=\"Trigger SubType\",\n",
+    "    x_label=\"Total Cost (Minutes)\",\n",
+    "    y_label=\"Mean Accuracy %\",\n",
+    "    height_factor=1,\n",
+    "    width_factor=1,\n",
+    ")\n",
+    "\n",
+    "# save_plot(fig, \"tradeoff_drift_yearbook_cost_performance\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig = plot_tradeoff_scatter(\n",
+    "    renamed,\n",
+    "    x=\"count\",\n",
+    "    y=\"sum_duration\",\n",
+    "    hue=\"Trigger Type\",\n",
+    "    style=\"Trigger SubType\",\n",
+    "    x_label=\"Number of Triggers\",\n",
+    "    y_label=\"Total Cost (seconds)\",\n",
+    "    height_factor=1.5,\n",
+    "    width_factor=1.8,\n",
+    ")\n",
+    "\n",
+    "# save_plot(fig, \"tradeoff_drift_yearbook_triggers_cost\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/analytics/plotting/rh_thesis/compare_all/yb_perf_tradeoff.ipynb b/analytics/plotting/rh_thesis/compare_all/yb_perf_tradeoff.ipynb
new file mode 100644
index 000000000..811a5f083
--- /dev/null
+++ b/analytics/plotting/rh_thesis/compare_all/yb_perf_tradeoff.ipynb
@@ -0,0 +1,891 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "\n",
+    "import pandas as pd\n",
+    "\n",
+    "from analytics.app.data.load import list_pipelines, load_pipeline_logs\n",
+    "from analytics.app.data.transform import (\n",
+    "    df_aggregate_eval_metric,\n",
+    "    dfs_models_and_evals,\n",
+    "    patch_yearbook_time,\n",
+    "    pipeline_leaf_times_df,\n",
+    ")\n",
+    "from analytics.plotting.common.save import save_plot\n",
+    "from analytics.plotting.common.tradeoff_scatterplot import plot_tradeoff_scatter\n",
+    "from modyn.supervisor.internal.grpc.enums import PipelineStage\n",
+    "from modyn.supervisor.internal.pipeline_executor.models import PipelineLogs\n",
+    "\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipelines_dirs = [\n",
+    "    Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/10_baselines_time\"),\n",
+    "    Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/11_baselines_amount\"),\n",
+    "    Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/20_datadrift_static\"),\n",
+    "    Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/21_datadrift_dynamic\"),\n",
+    "    Path(\n",
+    "        \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/30_performance/num_misclass\"\n",
+    "    ),\n",
+    "    Path(\n",
+    "        \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/30_performance/static_dyn\"\n",
+    "    ),\n",
+    "    Path(\n",
+    "        \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/40_cost_dataincorporation_latency\"\n",
+    "    ),\n",
+    "    Path(\n",
+    "        \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/41_avoidable_miclass_cost\"\n",
+    "    ),\n",
+    "]\n",
+    "\n",
+    "pipeline_logs: dict[int, PipelineLogs] = {}\n",
+    "pipelines: dict[int, tuple[str, Path]] = {}\n",
+    "\n",
+    "for dir in pipelines_dirs:\n",
+    "    print(\"Reading\", dir)\n",
+    "    dir_pipelines = list_pipelines(dir)\n",
+    "    print(dir_pipelines)\n",
+    "    pipelines.update(dir_pipelines)\n",
+    "\n",
+    "    max_pipeline_id = max(dir_pipelines.keys())\n",
+    "    print(pipelines)\n",
+    "    pipeline_logs.update({p_id: load_pipeline_logs(p_id, dir) for (p_id, (_, p_path)) in dir_pipelines.items()})\n",
+    "    assert dir.exists()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# doesn't do anything unless include_composite_model = True\n",
+    "composite_model_variant = \"currently_active_model\"\n",
+    "\n",
+    "patch_yearbook = True\n",
+    "dataset_id = \"yearbook_test\"\n",
+    "eval_handler = \"periodic-delta+-1y\"\n",
+    "metric = \"Accuracy\"\n",
+    "include_composite_model = True"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipelines = {p_id: (pname, p_path) for p_id, (pname, p_path) in pipelines.items()}\n",
+    "pipeline_ids = pipelines.keys()\n",
+    "pipeline_ids = [\n",
+    "    y\n",
+    "    for y, _ in [\n",
+    "        # (20, 'yearbook_timetrigger_40y'),\n",
+    "        (23, \"yearbook_timetrigger_25y\"),\n",
+    "        (24, \"yearbook_timetrigger_15y\"),\n",
+    "        (25, \"yearbook_timetrigger_10y\"),\n",
+    "        (26, \"yearbook_timetrigger_5y\"),\n",
+    "        (27, \"yearbook_timetrigger_4y\"),\n",
+    "        (29, \"yearbook_timetrigger_3y\"),\n",
+    "        (31, \"yearbook_timetrigger_2y\"),\n",
+    "        (33, \"yearbook_timetrigger_1y\"),\n",
+    "        (21, \"yearbook_dataamount_250\"),\n",
+    "        (30, \"yearbook_dataamount_500\"),\n",
+    "        (32, \"yearbook_dataamount_1000\"),\n",
+    "        (35, \"yearbook_dataamount_2500\"),\n",
+    "        (36, \"yearbook_dataamount_5000\"),\n",
+    "        (37, \"yearbook_dataamount_10000\"),\n",
+    "        # (38, 'yearbook_dataamount_15000'),\n",
+    "        # (39, 'yearbook_dataamount_30000'),\n",
+    "        # duplicates\n",
+    "        # (666, 'yearbook_dataamount_1000'),\n",
+    "        # (667, 'yearbook_dataamount_250'),\n",
+    "        # (668, 'yearbook_dataamount_2500'),\n",
+    "        # (669, 'yearbook_dataamount_5000'),\n",
+    "        # (670, 'yearbook_dataamount_10000'),\n",
+    "        # (671, 'yearbook_dataamount_500'),\n",
+    "        # (672, 'yearbook_dataamount_15000'),\n",
+    "        # (673, 'yearbook_dataamount_30000'),\n",
+    "        # (63, 'yearbook_drifttrigger_mmd-0.09_int100_win1d'),\n",
+    "        # (64, 'yearbook_drifttrigger_mmd-0.07_int100_win1d'),\n",
+    "        # (65, 'yearbook_drifttrigger_mmd-0.12_int100_win1d'),\n",
+    "        # (66, 'yearbook_drifttrigger_mmd-0.15_int100_win1d'),\n",
+    "        # (67, 'yearbook_drifttrigger_mmd-0.03_int100_win1d'),\n",
+    "        # (68, 'yearbook_drifttrigger_mmd-0.05_int100_win1d'),\n",
+    "        # (69, 'yearbook_drifttrigger_mmd-0.12_int100_win4d'),\n",
+    "        # (70, 'yearbook_drifttrigger_mmd-0.2_int100_win1d'),\n",
+    "        # (71, 'yearbook_drifttrigger_mmd-0.4_int100_win1d'),\n",
+    "        # (72, 'yearbook_drifttrigger_mmd-0.15_int100_win4d'),\n",
+    "        # (73, 'yearbook_drifttrigger_mmd-0.09_int100_win4d'),\n",
+    "        # (74, 'yearbook_drifttrigger_mmd-0.07_int100_win4d'),\n",
+    "        # (75, 'yearbook_drifttrigger_mmd-0.12_int100_win10d'),\n",
+    "        # (76, 'yearbook_drifttrigger_mmd-0.03_int100_win4d'),\n",
+    "        # (77, 'yearbook_drifttrigger_mmd-0.05_int100_win4d'),\n",
+    "        # (78, 'yearbook_drifttrigger_mmd-0.15_int100_win10d'),\n",
+    "        # (79, 'yearbook_drifttrigger_mmd-0.4_int100_win4d'),\n",
+    "        # (80, 'yearbook_drifttrigger_mmd-0.2_int100_win4d'),\n",
+    "        (81, \"yearbook_drifttrigger_mmd-0.12_int250_win1d\"),\n",
+    "        # (82, 'yearbook_drifttrigger_mmd-0.09_int100_win10d'),\n",
+    "        # (83, 'yearbook_drifttrigger_mmd-0.07_int100_win10d'),\n",
+    "        (84, \"yearbook_drifttrigger_mmd-0.15_int250_win1d\"),\n",
+    "        (85, \"yearbook_drifttrigger_mmd-0.12_int250_win4d\"),\n",
+    "        # (86, 'yearbook_drifttrigger_mmd-0.03_int100_win10d'),\n",
+    "        # (87, 'yearbook_drifttrigger_mmd-0.05_int100_win10d'),\n",
+    "        (88, \"yearbook_drifttrigger_mmd-0.15_int250_win4d\"),\n",
+    "        (89, \"yearbook_drifttrigger_mmd-0.12_int250_win10d\"),\n",
+    "        # (90, 'yearbook_drifttrigger_mmd-0.2_int100_win10d'),\n",
+    "        (91, \"yearbook_drifttrigger_mmd-0.15_int250_win10d\"),\n",
+    "        # (92, 'yearbook_drifttrigger_mmd-0.4_int100_win10d'),\n",
+    "        # (93, 'yearbook_drifttrigger_mmd-0.12_int500_win1d'),\n",
+    "        # (94, 'yearbook_drifttrigger_mmd-0.15_int500_win1d'),\n",
+    "        (95, \"yearbook_drifttrigger_mmd-0.07_int250_win1d\"),\n",
+    "        # (96, 'yearbook_drifttrigger_mmd-0.12_int500_win4d'),\n",
+    "        (97, \"yearbook_drifttrigger_mmd-0.09_int250_win1d\"),\n",
+    "        # (98, 'yearbook_drifttrigger_mmd-0.15_int500_win4d'),\n",
+    "        (99, \"yearbook_drifttrigger_mmd-0.05_int250_win1d\"),\n",
+    "        (100, \"yearbook_drifttrigger_mmd-0.03_int250_win1d\"),\n",
+    "        # (101, 'yearbook_drifttrigger_mmd-0.12_int500_win10d'),\n",
+    "        # (102, 'yearbook_drifttrigger_mmd-0.15_int500_win10d'),\n",
+    "        (103, \"yearbook_drifttrigger_mmd-0.2_int250_win1d\"),\n",
+    "        (104, \"yearbook_drifttrigger_mmd-0.4_int250_win1d\"),\n",
+    "        # (105, 'yearbook_drifttrigger_mmd-0.12_int1000_win1d'),\n",
+    "        # (106, 'yearbook_drifttrigger_mmd-0.15_int1000_win1d'),\n",
+    "        (107, \"yearbook_drifttrigger_mmd-0.07_int250_win4d\"),\n",
+    "        # (108, 'yearbook_drifttrigger_mmd-0.12_int1000_win4d'),\n",
+    "        (109, \"yearbook_drifttrigger_mmd-0.09_int250_win4d\"),\n",
+    "        # (110, 'yearbook_drifttrigger_mmd-0.15_int1000_win4d'),\n",
+    "        # (111, 'yearbook_drifttrigger_mmd-0.12_int1000_win10d'),\n",
+    "        (112, \"yearbook_drifttrigger_mmd-0.05_int250_win4d\"),\n",
+    "        (113, \"yearbook_drifttrigger_mmd-0.03_int250_win4d\"),\n",
+    "        # (114, 'yearbook_drifttrigger_mmd-0.15_int1000_win10d'),\n",
+    "        (115, \"yearbook_drifttrigger_mmd-0.2_int250_win4d\"),\n",
+    "        (116, \"yearbook_drifttrigger_mmd-0.4_int250_win4d\"),\n",
+    "        (117, \"yearbook_drifttrigger_mmd-0.07_int250_win10d\"),\n",
+    "        (118, \"yearbook_drifttrigger_mmd-0.09_int250_win10d\"),\n",
+    "        (119, \"yearbook_drifttrigger_mmd-0.05_int250_win10d\"),\n",
+    "        # (122, 'yearbook_drifttrigger_mmd-0.09_int500_win1d'),\n",
+    "        (123, \"yearbook_drifttrigger_mmd-0.2_int250_win10d\"),\n",
+    "        # (126, 'yearbook_drifttrigger_mmd-0.09_int500_win4d'),\n",
+    "        # (127, 'yearbook_drifttrigger_mmd-0.07_int500_win1d'),\n",
+    "        # (132, 'yearbook_drifttrigger_mmd-0.05_int500_win1d'),\n",
+    "        # (133, 'yearbook_drifttrigger_mmd-0.4_int500_win10d'),\n",
+    "        # (136, 'yearbook_drifttrigger_mmd-0.4_int1000_win1d'),\n",
+    "        # (137, 'yearbook_drifttrigger_mmd-0.2_int500_win1d'),\n",
+    "        # (138, 'yearbook_drifttrigger_mmd-0.09_int1000_win4d'),\n",
+    "        # (139, 'yearbook_drifttrigger_mmd-0.07_int500_win4d'),\n",
+    "        # (144, 'yearbook_drifttrigger_mmd-0.4_int1000_win10d'),\n",
+    "        # (145, 'yearbook_drifttrigger_mmd-0.05_int500_win4d'),\n",
+    "        # (146, 'yearbook_drifttrigger_mmd-0.2_int500_win4d'),\n",
+    "        # (147, 'yearbook_drifttrigger_mmd-0.07_int500_win10d'),\n",
+    "        # (148, 'yearbook_drifttrigger_mmd-0.05_int500_win10d'),\n",
+    "        # (149, 'yearbook_drifttrigger_mmd-0.2_int500_win10d'),\n",
+    "        # (150, 'yearbook_drifttrigger_mmd-0.07_int1000_win1d'),\n",
+    "        # (151, 'yearbook_drifttrigger_mmd-0.05_int1000_win1d'),\n",
+    "        # (152, 'yearbook_drifttrigger_mmd-0.2_int1000_win1d'),\n",
+    "        # (153, 'yearbook_drifttrigger_mmd-0.07_int1000_win4d'),\n",
+    "        # (154, 'yearbook_drifttrigger_mmd-0.05_int1000_win4d'),\n",
+    "        # (155, 'yearbook_drifttrigger_mmd-0.2_int1000_win4d'),\n",
+    "        # (156, 'yearbook_drifttrigger_mmd-0.07_int1000_win10d'),\n",
+    "        # (157, 'yearbook_drifttrigger_mmd-0.05_int1000_win10d'),\n",
+    "        # (158, 'yearbook_drifttrigger_mmd-0.2_int1000_win10d'),\n",
+    "        (159, \"yearbook_drifttrigger_mmd-0.03_int250_win10d\"),\n",
+    "        # (160, 'yearbook_drifttrigger_mmd-0.03_int1000_win1d'),\n",
+    "        # (161, 'yearbook_drifttrigger_mmd-0.4_int500_win4d'),\n",
+    "        # (162, 'yearbook_drifttrigger_mmd-0.03_int500_win10d'),\n",
+    "        # (163, 'yearbook_drifttrigger_mmd-0.4_int1000_win4d'),\n",
+    "        # (164, 'yearbook_drifttrigger_mmd-0.09_int1000_win10d'),\n",
+    "        # (165, 'yearbook_drifttrigger_mmd-0.03_int500_win1d'),\n",
+    "        (166, \"yearbook_drifttrigger_mmd-0.4_int250_win10d\"),\n",
+    "        # (167, 'yearbook_drifttrigger_mmd-0.09_int1000_win1d'),\n",
+    "        # (168, 'yearbook_drifttrigger_mmd-0.09_int500_win10d'),\n",
+    "        # (169, 'yearbook_drifttrigger_mmd-0.03_int500_win4d'),\n",
+    "        # (170, 'yearbook_drifttrigger_mmd-0.4_int500_win1d'),\n",
+    "        # (171, 'yearbook_drifttrigger_mmd-0.03_int1000_win10d'),\n",
+    "        # (172, 'yearbook_drifttrigger_mmd-0.03_int1000_win4d'),\n",
+    "        # (329, 'yearbook_drifttrigger_mmd-quant-0.05-10_int500_win4d'),\n",
+    "        # (330, 'yearbook_drifttrigger_mmd-quant-0.05-20_int500_win4d\\n'),\n",
+    "        # (331, 'yearbook_drifttrigger_mmd-quant-0.05-30_int100_win4d'),\n",
+    "        # (332, 'yearbook_drifttrigger_mmd-quant-0.1-10_int500_win4d'),\n",
+    "        # (333, 'yearbook_drifttrigger_mmd-quant-0.15-20_int500_win4d'),\n",
+    "        # (334, 'yearbook_drifttrigger_mmd-quant-0.15-10_int500_win4d'),\n",
+    "        # (335, 'yearbook_drifttrigger_mmd-quant-0.1-20_int500_win4d'),\n",
+    "        # (336, 'yearbook_drifttrigger_mmd-quant-0.3-20_int500_win4d'),\n",
+    "        # (337, 'yearbook_drifttrigger_mmd-quant-0.1-30_int500_win4d'),\n",
+    "        # (338, 'yearbook_drifttrigger_mmd-quant-0.3-10_int500_win4d'),\n",
+    "        # (339, 'yearbook_drifttrigger_mmd-rollavg-0.05-20_int500_win4d'),\n",
+    "        (340, \"yearbook_drifttrigger_mmd-quant-0.1-10_int250_win4d\"),\n",
+    "        # (341, 'yearbook_drifttrigger_mmd-quant-0.15-30_int100_win4d'),\n",
+    "        # (342, 'yearbook_drifttrigger_mmd-rollavg-0.05-10_int500_win4d'),\n",
+    "        # (343, 'yearbook_drifttrigger_mmd-rollavg-0.2-20_int500_win4d'),\n",
+    "        # (344, 'yearbook_drifttrigger_mmd-rollavg-0.2-10_int500_win4d'),\n",
+    "        (345, \"yearbook_drifttrigger_mmd-quant-0.1-20_int250_win4d\"),\n",
+    "        # (346, 'yearbook_drifttrigger_mmd-rollavg-0.5-20_int500_win4d'),\n",
+    "        # (347, 'yearbook_drifttrigger_mmd-rollavg-0.5-10_int500_win4d'),\n",
+    "        # (348, 'yearbook_drifttrigger_mmd-rollavg-1.0-20_int500_win4d'),\n",
+    "        (349, \"yearbook_drifttrigger_mmd-quant-0.1-30_int250_win4d\"),\n",
+    "        # (350, 'yearbook_drifttrigger_mmd-rollavg-1.0-10_int500_win4d'),\n",
+    "        # (351, 'yearbook_drifttrigger_mmd-rollavg-2.0-20_int500_win4d'),\n",
+    "        # (352, 'yearbook_drifttrigger_mmd-quant-0.3-30_int100_win4d'),\n",
+    "        (353, \"yearbook_drifttrigger_mmd-quant-0.05-20_int250_win4d\"),\n",
+    "        # (354, 'yearbook_drifttrigger_mmd-rollavg-2.0-10_int500_win4d'),\n",
+    "        # (355, 'yearbook_drifttrigger_mmd-quant-0.1-10_int100_win4d'),\n",
+    "        (356, \"yearbook_drifttrigger_mmd-quant-0.05-10_int250_win4d\"),\n",
+    "        (357, \"yearbook_drifttrigger_mmd-quant-0.15-20_int250_win4d\"),\n",
+    "        (358, \"yearbook_drifttrigger_mmd-quant-0.15-10_int250_win4d\"),\n",
+    "        (359, \"yearbook_drifttrigger_mmd-quant-0.3-20_int250_win4d\"),\n",
+    "        # (360, 'yearbook_drifttrigger_mmd-rollavg-0.05-30_int100_win4d'),\n",
+    "        # (361, 'yearbook_drifttrigger_mmd-quant-0.1-20_int100_win4d'),\n",
+    "        (362, \"yearbook_drifttrigger_mmd-quant-0.3-10_int250_win4d\"),\n",
+    "        (363, \"yearbook_drifttrigger_mmd-rollavg-0.05-20_int250_win4d\"),\n",
+    "        (364, \"yearbook_drifttrigger_mmd-rollavg-0.05-10_int250_win4d\"),\n",
+    "        (365, \"yearbook_drifttrigger_mmd-rollavg-0.2-20_int250_win4d\"),\n",
+    "        # (366, 'yearbook_drifttrigger_mmd-quant-0.1-30_int100_win4d'),\n",
+    "        # (367, 'yearbook_drifttrigger_mmd-rollavg-0.2-30_int100_win4d'),\n",
+    "        (368, \"yearbook_drifttrigger_mmd-rollavg-0.2-10_int250_win4d\"),\n",
+    "        (369, \"yearbook_drifttrigger_mmd-rollavg-0.5-20_int250_win4d\"),\n",
+    "        (370, \"yearbook_drifttrigger_mmd-rollavg-1.0-20_int250_win4d\"),\n",
+    "        (371, \"yearbook_drifttrigger_mmd-rollavg-0.5-10_int250_win4d\"),\n",
+    "        (372, \"yearbook_drifttrigger_mmd-rollavg-2.0-20_int250_win4d\"),\n",
+    "        (373, \"yearbook_drifttrigger_mmd-rollavg-1.0-10_int250_win4d\"),\n",
+    "        # (374, 'yearbook_drifttrigger_mmd-rollavg-0.5-30_int100_win4d'),\n",
+    "        # (375, 'yearbook_drifttrigger_mmd-quant-0.05-20_int100_win4d'),\n",
+    "        (376, \"yearbook_drifttrigger_mmd-rollavg-2.0-10_int250_win4d\"),\n",
+    "        # (377, 'yearbook_drifttrigger_mmd-quant-0.05-10_int100_win4d'),\n",
+    "        # (378, 'yearbook_drifttrigger_mmd-rollavg-1.0-30_int100_win4d'),\n",
+    "        # (379, 'yearbook_drifttrigger_mmd-quant-0.15-20_int100_win4d'),\n",
+    "        # (380, 'yearbook_drifttrigger_mmd-quant-0.15-10_int100_win4d'),\n",
+    "        # (381, 'yearbook_drifttrigger_mmd-rollavg-2.0-30_int100_win4d'),\n",
+    "        # (382, 'yearbook_drifttrigger_mmd-quant-0.3-20_int100_win4d'),\n",
+    "        (383, \"yearbook_drifttrigger_mmd-quant-0.05-30_int250_win4d\"),\n",
+    "        # (384, 'yearbook_drifttrigger_mmd-quant-0.3-10_int100_win4d'),\n",
+    "        (385, \"yearbook_drifttrigger_mmd-quant-0.15-30_int250_win4d\"),\n",
+    "        (386, \"yearbook_drifttrigger_mmd-quant-0.3-30_int250_win4d\"),\n",
+    "        # (387, 'yearbook_drifttrigger_mmd-rollavg-0.05-20_int100_win4d'),\n",
+    "        (388, \"yearbook_drifttrigger_mmd-rollavg-0.05-30_int250_win4d\"),\n",
+    "        # (389, 'yearbook_drifttrigger_mmd-rollavg-0.05-10_int100_win4d'),\n",
+    "        (390, \"yearbook_drifttrigger_mmd-rollavg-0.2-30_int250_win4d\"),\n",
+    "        # (391, 'yearbook_drifttrigger_mmd-rollavg-0.2-20_int100_win4d'),\n",
+    "        (392, \"yearbook_drifttrigger_mmd-rollavg-0.5-30_int250_win4d\"),\n",
+    "        (393, \"yearbook_drifttrigger_mmd-rollavg-1.0-30_int250_win4d\"),\n",
+    "        # (394, 'yearbook_drifttrigger_mmd-rollavg-0.2-10_int100_win4d'),\n",
+    "        (395, \"yearbook_drifttrigger_mmd-rollavg-2.0-30_int250_win4d\"),\n",
+    "        # (396, 'yearbook_drifttrigger_mmd-rollavg-0.5-20_int100_win4d'),\n",
+    "        # (397, 'yearbook_drifttrigger_mmd-quant-0.05-30_int500_win4d'),\n",
+    "        # (398, 'yearbook_drifttrigger_mmd-quant-0.15-30_int500_win4d'),\n",
+    "        # (399, 'yearbook_drifttrigger_mmd-rollavg-0.5-10_int100_win4d'),\n",
+    "        # (400, 'yearbook_drifttrigger_mmd-quant-0.3-30_int500_win4d'),\n",
+    "        # (401, 'yearbook_drifttrigger_mmd-rollavg-1.0-20_int100_win4d'),\n",
+    "        # (402, 'yearbook_drifttrigger_mmd-rollavg-0.05-30_int500_win4d'),\n",
+    "        # (403, 'yearbook_drifttrigger_mmd-rollavg-0.2-30_int500_win4d'),\n",
+    "        # (404, 'yearbook_drifttrigger_mmd-rollavg-1.0-10_int100_win4d'),\n",
+    "        # (405, 'yearbook_drifttrigger_mmd-rollavg-0.5-30_int500_win4d'),\n",
+    "        # (406, 'yearbook_drifttrigger_mmd-rollavg-2.0-20_int100_win4d'),\n",
+    "        # (407, 'yearbook_drifttrigger_mmd-rollavg-1.0-30_int500_win4d'),\n",
+    "        # (408, 'yearbook_drifttrigger_mmd-rollavg-2.0-30_int500_win4d'),\n",
+    "        # (409, 'yearbook_drifttrigger_mmd-rollavg-2.0-10_int100_win4d'),\n",
+    "        # (683,\n",
+    "        # 'yearbook_performancetrigger_num_misclass-1500-exp-0.85-red-True--int250y'),\n",
+    "        # (685,\n",
+    "        # 'yearbook_performancetrigger_num_misclass-1500-exp-0.85-red-False--int250y'),\n",
+    "        (686, \"yearbook_performancetrigger_num_misclass-1500-exp-0.9-red-True--int250y\"),\n",
+    "        (687, \"yearbook_performancetrigger_num_misclass-1500-exp-0.9-red-False--int250y\"),\n",
+    "        # (688,\n",
+    "        # 'yearbook_performancetrigger_num_misclass-1500-exp-0.95-red-True--int250y'),\n",
+    "        # (704,\n",
+    "        # 'yearbook_performancetrigger_num_misclass-200-exp-0.85-red-False--int250y'),\n",
+    "        # (727,\n",
+    "        # 'yearbook_performancetrigger_num_misclass-1500-exp-0.95-red-False--int250y'),\n",
+    "        # (728,\n",
+    "        # 'yearbook_performancetrigger_num_misclass-1000-exp-0.85-red-True--int250y'),\n",
+    "        # (729,\n",
+    "        # 'yearbook_performancetrigger_num_misclass-1000-exp-0.85-red-False--int250y'),\n",
+    "        # (730,\n",
+    "        # 'yearbook_performancetrigger_num_misclass-50-exp-0.85-red-True--int250y'),\n",
+    "        (731, \"yearbook_performancetrigger_num_misclass-1000-exp-0.9-red-True--int250y\"),\n",
+    "        # (732,\n",
+    "        # 'yearbook_performancetrigger_num_misclass-50-exp-0.85-red-False--int250y'),\n",
+    "        (733, \"yearbook_performancetrigger_num_misclass-1000-exp-0.9-red-False--int250y\"),\n",
+    "        (734, \"yearbook_performancetrigger_num_misclass-50-exp-0.9-red-True--int250y\"),\n",
+    "        # (735,\n",
+    "        # 'yearbook_performancetrigger_num_misclass-1000-exp-0.95-red-True--int250y'),\n",
+    "        (736, \"yearbook_performancetrigger_num_misclass-50-exp-0.9-red-False--int250y\"),\n",
+    "        # (737,\n",
+    "        # 'yearbook_performancetrigger_num_misclass-1000-exp-0.95-red-False--int250y'),\n",
+    "        # (738,\n",
+    "        # 'yearbook_performancetrigger_num_misclass-50-exp-0.95-red-True--int250y'),\n",
+    "        # (739,\n",
+    "        # 'yearbook_performancetrigger_num_misclass-500-exp-0.85-red-True--int250y'),\n",
+    "        # (740,\n",
+    "        # 'yearbook_performancetrigger_num_misclass-50-exp-0.95-red-False--int250y'),\n",
+    "        # (741,\n",
+    "        # 'yearbook_performancetrigger_num_misclass-500-exp-0.85-red-False--int250y'),\n",
+    "        (742, \"yearbook_performancetrigger_num_misclass-500-exp-0.9-red-True--int250y\"),\n",
+    "        (743, \"yearbook_performancetrigger_num_misclass-500-exp-0.9-red-False--int250y\"),\n",
+    "        # (744,\n",
+    "        # 'yearbook_performancetrigger_num_misclass-500-exp-0.95-red-True--int250y'),\n",
+    "        # (746,\n",
+    "        # 'yearbook_performancetrigger_num_misclass-500-exp-0.95-red-False--int250y'),\n",
+    "        # (747,\n",
+    "        # 'yearbook_performancetrigger_num_misclass-200-exp-0.85-red-True--int250y'),\n",
+    "        (749, \"yearbook_performancetrigger_num_misclass-200-exp-0.9-red-True--int250y\"),\n",
+    "        (751, \"yearbook_performancetrigger_num_misclass-200-exp-0.9-red-False--int250y\"),\n",
+    "        # (753,\n",
+    "        # 'yearbook_performancetrigger_num_misclass-200-exp-0.95-red-True--int250y'),\n",
+    "        # (754,\n",
+    "        # 'yearbook_performancetrigger_num_misclass-200-exp-0.95-red-False--int250y'),\n",
+    "        # (755,\n",
+    "        # 'yearbook_performancetrigger_num_misclass-100-exp-0.85-red-True--int250y'),\n",
+    "        # (757,\n",
+    "        # 'yearbook_performancetrigger_num_misclass-100-exp-0.85-red-False--int250y'),\n",
+    "        (758, \"yearbook_performancetrigger_num_misclass-100-exp-0.9-red-True--int250y\"),\n",
+    "        (759, \"yearbook_performancetrigger_num_misclass-100-exp-0.9-red-False--int250y\"),\n",
+    "        # (760,\n",
+    "        # 'yearbook_performancetrigger_num_misclass-100-exp-0.95-red-True--int250y'),\n",
+    "        # (761,\n",
+    "        # 'yearbook_performancetrigger_num_misclass-100-exp-0.95-red-False--int250y'),\n",
+    "        # (410, 'yearbook_performancetrigger_static-0.7-int100y'),\n",
+    "        (411, \"yearbook_performancetrigger_static-0.7-int250y\"),\n",
+    "        # (412, 'yearbook_performancetrigger_static-0.7-int500y'),\n",
+    "        # (413, 'yearbook_performancetrigger_static-0.75-int500y'),\n",
+    "        (414, \"yearbook_performancetrigger_static-0.75-int250y\"),\n",
+    "        # (416, 'yearbook_performancetrigger_static-0.8-int500y'),\n",
+    "        # (417, 'yearbook_performancetrigger_static-0.75-int100y'),\n",
+    "        (418, \"yearbook_performancetrigger_static-0.8-int250y\"),\n",
+    "        # (419, 'yearbook_performancetrigger_static-0.85-int500y'),\n",
+    "        (421, \"yearbook_performancetrigger_static-0.85-int250y\"),\n",
+    "        # (422, 'yearbook_performancetrigger_static-0.875-int500y'),\n",
+    "        # (423, 'yearbook_performancetrigger_static-0.8-int100y'),\n",
+    "        # (424, 'yearbook_performancetrigger_static-0.9-int500y'),\n",
+    "        (425, \"yearbook_performancetrigger_static-0.875-int250y\"),\n",
+    "        # (427, 'yearbook_performancetrigger_static-0.925-int500y'),\n",
+    "        # (428, 'yearbook_performancetrigger_static-0.85-int100y'),\n",
+    "        (429, \"yearbook_performancetrigger_static-0.9-int250y\"),\n",
+    "        # (430, 'yearbook_performancetrigger_static-0.95-int500y'),\n",
+    "        (432, \"yearbook_performancetrigger_static-0.925-int250y\"),\n",
+    "        # (433, 'yearbook_performancetrigger_dynamic-quant-0.05-10-int500y'),\n",
+    "        # (434, 'yearbook_performancetrigger_static-0.875-int100y'),\n",
+    "        # (436, 'yearbook_performancetrigger_dynamic-quant-0.05-20-int500y'),\n",
+    "        (437, \"yearbook_performancetrigger_static-0.95-int250y\"),\n",
+    "        # (438, 'yearbook_performancetrigger_dynamic-quant-0.05-30-int500y'),\n",
+    "        # (440, 'yearbook_performancetrigger_dynamic-quant-0.15-10-int500y'),\n",
+    "        # (441, 'yearbook_performancetrigger_static-0.9-int100y'),\n",
+    "        # (442, 'yearbook_performancetrigger_dynamic-quant-0.05-10-int250y'),\n",
+    "        # (443, 'yearbook_performancetrigger_dynamic-quant-0.15-20-int500y'),\n",
+    "        (445, \"yearbook_performancetrigger_dynamic-quant-0.05-20-int250y\"),\n",
+    "        # (446, 'yearbook_performancetrigger_dynamic-quant-0.15-30-int500y'),\n",
+    "        # (447, 'yearbook_performancetrigger_dynamic-quant-0.3-10-int500y'),\n",
+    "        # (448, 'yearbook_performancetrigger_dynamic-quant-0.05-30-int250y'),\n",
+    "        # (450, 'yearbook_performancetrigger_static-0.925-int100y'),\n",
+    "        # (451, 'yearbook_performancetrigger_dynamic-quant-0.3-20-int500y'),\n",
+    "        # (452, 'yearbook_performancetrigger_dynamic-quant-0.15-10-int250y'),\n",
+    "        # (454, 'yearbook_performancetrigger_dynamic-quant-0.3-30-int500y'),\n",
+    "        (455, \"yearbook_performancetrigger_dynamic-quant-0.15-20-int250y\"),\n",
+    "        # (458, 'yearbook_performancetrigger_dynamic-quant-0.15-30-int250y'),\n",
+    "        # (459, 'yearbook_performancetrigger_static-0.95-int100y'),\n",
+    "        # (463, 'yearbook_performancetrigger_dynamic-quant-0.3-10-int250y'),\n",
+    "        # (464, 'yearbook_performancetrigger_dynamic-rollavg-0.05-10-int500y'),\n",
+    "        # (465, 'yearbook_performancetrigger_dynamic-rollavg-0.05-20-int500y'),\n",
+    "        (467, \"yearbook_performancetrigger_dynamic-quant-0.3-20-int250y\"),\n",
+    "        # (468, 'yearbook_performancetrigger_dynamic-rollavg-0.05-30-int500y'),\n",
+    "        # (469, 'yearbook_performancetrigger_dynamic-rollavg-0.1-10-int500y'),\n",
+    "        # (471, 'yearbook_performancetrigger_dynamic-quant-0.3-30-int250y'),\n",
+    "        # (472, 'yearbook_performancetrigger_dynamic-rollavg-0.1-20-int500y'),\n",
+    "        # (473, 'yearbook_performancetrigger_dynamic-quant-0.05-10-int100y'),\n",
+    "        # (474, 'yearbook_performancetrigger_dynamic-rollavg-0.1-30-int500y'),\n",
+    "        # (475, 'yearbook_performancetrigger_dynamic-rollavg-0.2-10-int500y'),\n",
+    "        # (478, 'yearbook_performancetrigger_dynamic-rollavg-0.2-20-int500y'),\n",
+    "        # (479, 'yearbook_performancetrigger_dynamic-rollavg-0.2-30-int500y'),\n",
+    "        (481, \"yearbook_performancetrigger_dynamic-quant-0.05-20-int100y\"),\n",
+    "        # (483, 'yearbook_performancetrigger_dynamic-rollavg-0.3-10-int500y'),\n",
+    "        # (484, 'yearbook_performancetrigger_dynamic-rollavg-0.3-20-int500y'),\n",
+    "        # (486, 'yearbook_performancetrigger_dynamic-rollavg-0.3-30-int500y'),\n",
+    "        # (489, 'yearbook_performancetrigger_dynamic-quant-0.05-30-int100y'),\n",
+    "        # (491, 'yearbook_performancetrigger_dynamic-rollavg-0.05-10-int250y'),\n",
+    "        (494, \"yearbook_performancetrigger_dynamic-rollavg-0.05-20-int250y\"),\n",
+    "        # (497, 'yearbook_performancetrigger_dynamic-quant-0.15-10-int100y'),\n",
+    "        # (499, 'yearbook_performancetrigger_dynamic-rollavg-0.05-30-int250y'),\n",
+    "        # (503, 'yearbook_performancetrigger_dynamic-rollavg-0.1-10-int250y'),\n",
+    "        (506, \"yearbook_performancetrigger_dynamic-rollavg-0.1-20-int250y\"),\n",
+    "        (507, \"yearbook_performancetrigger_dynamic-quant-0.15-20-int100y\"),\n",
+    "        # (509, 'yearbook_performancetrigger_dynamic-rollavg-0.1-30-int250y'),\n",
+    "        # (513, 'yearbook_performancetrigger_dynamic-rollavg-0.2-10-int250y'),\n",
+    "        (516, \"yearbook_performancetrigger_dynamic-rollavg-0.2-20-int250y\"),\n",
+    "        # (519, 'yearbook_performancetrigger_dynamic-quant-0.15-30-int100y'),\n",
+    "        # (521, 'yearbook_performancetrigger_dynamic-rollavg-0.2-30-int250y'),\n",
+    "        # (524, 'yearbook_performancetrigger_dynamic-rollavg-0.3-10-int250y'),\n",
+    "        # (527, 'yearbook_performancetrigger_dynamic-rollavg-0.3-20-int250y'),\n",
+    "        # (529, 'yearbook_performancetrigger_dynamic-rollavg-0.3-30-int250y'),\n",
+    "        # (530, 'yearbook_performancetrigger_dynamic-quant-0.3-10-int100y'),\n",
+    "        (539, \"yearbook_performancetrigger_dynamic-quant-0.3-20-int100y\"),\n",
+    "        # (548, 'yearbook_performancetrigger_dynamic-quant-0.3-30-int100y'),\n",
+    "        # (818, 'yearbook_costtrigger_data_inc_int250_exch15552000'),\n",
+    "        # (819, 'yearbook_costtrigger_data_inc_int250_exch13824000'),\n",
+    "        (821, \"yearbook_costtrigger_avoidable_misclass_int250_exch86400000_redFalse\"),\n",
+    "        # (822, 'yearbook_costtrigger_data_inc_int250_exch12096000'),\n",
+    "        # (825, 'yearbook_costtrigger_data_inc_int250_exch10368000'),\n",
+    "        # (835, 'yearbook_costtrigger_data_inc_int250_exch129600000'),\n",
+    "        # (836, 'yearbook_costtrigger_data_inc_int250_exch34560000'),\n",
+    "        # (837, 'yearbook_costtrigger_data_inc_int250_exch25920000'),\n",
+    "        # (838, 'yearbook_costtrigger_data_inc_int250_exch4320000'),\n",
+    "        #  (821, 'yearbook_costtrigger_avoidable_misclass_int250_exch86400000_redFalse'),\n",
+    "        (839, \"yearbook_costtrigger_avoidable_misclass_int250_exch86400000_redFalse\"),\n",
+    "        (840, \"yearbook_costtrigger_avoidable_misclass_int250_exch864000_redFalse\"),\n",
+    "        (841, \"yearbook_costtrigger_avoidable_misclass_int250_exch864000_redFalse\"),\n",
+    "        (842, \"yearbook_costtrigger_avoidable_misclass_int250_exch864.0_redFalse\"),\n",
+    "        #  (843, 'yearbook_costtrigger_avoidable_misclass_int250_exch8640000_redFalse'),\n",
+    "        (844, \"yearbook_costtrigger_avoidable_misclass_int250_exch8640.0_redFalse\"),\n",
+    "        #  (846, 'yearbook_costtrigger_avoidable_misclass_int250_exch8640000000_redFalse'),\n",
+    "        #  (847, 'yearbook_costtrigger_avoidable_misclass_int250_exch864000000_redFalse'),\n",
+    "        (848, \"yearbook_costtrigger_avoidable_misclass_int250_exch64800.0_redFalse\"),\n",
+    "        #  (849, 'yearbook_costtrigger_avoidable_misclass_int250_exch43200.0_redFalse'),\n",
+    "        (850, \"yearbook_costtrigger_avoidable_misclass_int250_exch21600.0_redFalse\"),\n",
+    "        #  (851, 'yearbook_costtrigger_avoidable_misclass_int250_exch4320000000_redFalse')]\n",
+    "    ]\n",
+    "]\n",
+    "\n",
+    "[(p_id, pname) for p_id, (pname, _) in pipelines.items() if p_id in pipeline_ids]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Wrangle data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "list_df_eval_single: list[pd.DataFrame] = []\n",
+    "df_leaf_list: list[pd.DataFrame] = []\n",
+    "\n",
+    "for pipeline_id in pipeline_ids:\n",
+    "    logs = pipeline_logs[pipeline_id]\n",
+    "    df_leaf_single = pipeline_leaf_times_df(logs, use_traintime_patch_at_trainer=True, pipeline_id=pipeline_id)\n",
+    "    df_leaf_single[\"pipeline_id\"] = pipeline_id\n",
+    "    df_leaf_list.append(df_leaf_single)\n",
+    "\n",
+    "    _, _, df_eval_single = dfs_models_and_evals(\n",
+    "        pipeline_logs[pipeline_id], df_leaf_single[\"sample_time\"].max(), pipelines[pipeline_id][0]\n",
+    "    )\n",
+    "    df_eval_single[\"pipeline_id\"] = pipeline_id\n",
+    "    list_df_eval_single.append(df_eval_single)\n",
+    "\n",
+    "df_adjusted = pd.concat(list_df_eval_single)\n",
+    "df_adjusted\n",
+    "\n",
+    "df_leaf = pd.concat(df_leaf_list)\n",
+    "df_leaf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(df_leaf[\"id\"].unique())\n",
+    "assert set(df_leaf[\"id\"].unique()) == {\n",
+    "    \"TRAIN\",\n",
+    "    \"INIT_CLUSTER_CONNECTION\",\n",
+    "    \"EVALUATE_TRIGGER_POLICY\",\n",
+    "    \"INFORM_SELECTOR_REMAINING_DATA\",\n",
+    "    \"INFORM_SELECTOR_ABOUT_TRIGGER\",\n",
+    "    \"TRAINING_COMPLETED\",\n",
+    "    \"STORE_TRAINED_MODEL\",\n",
+    "    \"EVALUATE\",\n",
+    "    \"DONE\",\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted = df_adjusted[\n",
+    "    (df_adjusted[\"dataset_id\"] == dataset_id)\n",
+    "    & (df_adjusted[\"eval_handler\"] == eval_handler)\n",
+    "    & (df_adjusted[\"metric\"] == metric)\n",
+    "]\n",
+    "\n",
+    "# in percent (0-100)\n",
+    "df_adjusted[\"value\"] = df_adjusted[\"value\"] * 100\n",
+    "df_adjusted"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if patch_yearbook:\n",
+    "    for column in [\"interval_start\", \"interval_center\", \"interval_end\"]:\n",
+    "        patch_yearbook_time(df_adjusted, column)\n",
+    "    patch_yearbook_time(df_leaf, \"sample_time\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted = df_adjusted.sort_values(by=[\"interval_center\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Reduce to composite models\n",
+    "df_adjusted = df_adjusted[df_adjusted[composite_model_variant]]\n",
+    "df_adjusted[composite_model_variant].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reduce evaluation interval to interval where all policies have evaluations\n",
+    "min_active_eval_center_per_pipeline = (\n",
+    "    df_adjusted[df_adjusted[composite_model_variant]].groupby(\"pipeline_ref\")[\"interval_center\"].min()\n",
+    ")\n",
+    "maximum_min = min_active_eval_center_per_pipeline.max()\n",
+    "print(maximum_min, min_active_eval_center_per_pipeline)\n",
+    "\n",
+    "assert maximum_min < pd.Timestamp(\"1962-01-01\")\n",
+    "\n",
+    "df_adjusted = df_adjusted[df_adjusted[\"interval_center\"] >= maximum_min]\n",
+    "df_adjusted[\"interval_center\"].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted[\"interval_center\"] = df_adjusted[\"interval_center\"].astype(str).str.split(\"-\").str[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Aggregate metrics to a scalar value per pipeline\n",
+    "mean_accuracies = df_aggregate_eval_metric(\n",
+    "    df_adjusted,\n",
+    "    group_by=[\"pipeline_id\", \"pipeline_ref\", \"metric\"],\n",
+    "    in_col=\"value\",\n",
+    "    out_col=\"metric_value\",\n",
+    "    aggregate_func=\"mean\",\n",
+    ")\n",
+    "mean_accuracies"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_triggers = df_leaf[df_leaf[\"id\"] == PipelineStage.TRAIN.name]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_triggers = df_leaf[df_leaf[\"id\"] == PipelineStage.TRAIN.name]\n",
+    "df_triggers = df_triggers[df_triggers[\"sample_time\"] > maximum_min]\n",
+    "df_triggers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Find number of trigger per pipeline that are after maximum_min\n",
+    "\n",
+    "# before the cutoff there was one trigger (equivalent to start of our reduced dataset): +1\n",
+    "num_triggers = df_triggers.groupby(\"pipeline_id\").aggregate(count=(\"id\", \"count\"), sum_duration=(\"duration\", \"sum\"))\n",
+    "num_triggers[\"count\"] += 1\n",
+    "num_triggers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_triggers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "merged = num_triggers.merge(mean_accuracies, on=\"pipeline_id\", how=\"inner\")\n",
+    "assert num_triggers.shape[0] == merged.shape[0]\n",
+    "merged"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_type(x: str):\n",
+    "    if \"year\" in x:\n",
+    "        return \"time\"\n",
+    "    elif \"samples\" in x:\n",
+    "        return \"amount\"\n",
+    "    elif \"d\" in x:\n",
+    "        return \"drift\"\n",
+    "    else:\n",
+    "        return \"unknown\"\n",
+    "\n",
+    "\n",
+    "merged[\"type\"] = merged[\"pipeline_ref\"].apply(lambda x: create_type(x))\n",
+    "merged"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "renamed = merged.copy()\n",
+    "\n",
+    "# renamed = merged[\n",
+    "#     merged[\"pipeline_id\"].isin(\n",
+    "#         [\n",
+    "#             # # static thresholds\n",
+    "#             # 113,  # 0.03\n",
+    "#             # 112,  # 0.05\n",
+    "#             # 107,  # 0.07\n",
+    "#             # 109,  # 0.09\n",
+    "#             # 85,  # 0.12\n",
+    "#             # # dyn quantile\n",
+    "#             # 353,  # % 0.05\n",
+    "#             # 345,  # % 0.10\n",
+    "#             # 357,  # % 0.15\n",
+    "#             # # dyn roll. avg\n",
+    "#             # 372,  # Δ 2.0\n",
+    "#             # 370,  # Δ 1.0\n",
+    "#             # 369,  # Δ 0.5\n",
+    "#             # 363,  # Δ 0.05\n",
+    "#         ]\n",
+    "#     )\n",
+    "# ].copy()\n",
+    "renamed[\"Trigger SubType\"] = renamed[\"pipeline_ref\"].apply(\n",
+    "    lambda x: (\n",
+    "        \"DataAmount\"\n",
+    "        if \"dataamount\" in x\n",
+    "        else (\n",
+    "            \"Time\"\n",
+    "            if \"time\" in x\n",
+    "            else (\n",
+    "                (\n",
+    "                    \"Static\"\n",
+    "                    if \"_mmd-0\" in x\n",
+    "                    else (\"Quantile\" if \"quant\" in x else (\"Rolling Avg.\" if \"roll\" in x else (\"unknown\")))\n",
+    "                )\n",
+    "                if \"drift\" in x\n",
+    "                else (\n",
+    "                    (\n",
+    "                        \"Static\"\n",
+    "                        if \"static\" in x\n",
+    "                        else (\n",
+    "                            \"Quantile\"\n",
+    "                            if \"quant\" in x\n",
+    "                            else (\n",
+    "                                \"Rolling Avg.\"\n",
+    "                                if \"roll\" in x\n",
+    "                                else (\"AvoidableMisclass\" if \"num_misclass\" in x else (\"unknown\"))\n",
+    "                            )\n",
+    "                        )\n",
+    "                    )\n",
+    "                    if \"performancetrigger\" in x\n",
+    "                    else (\n",
+    "                        \"DataIncorporationLatency\"\n",
+    "                        if \"data_inc\" in x\n",
+    "                        else (\"AvoidableMisclass\" if \"avoidable\" in x else (\"unknown\"))\n",
+    "                    )\n",
+    "                )\n",
+    "            )\n",
+    "        )\n",
+    "    )\n",
+    ")\n",
+    "renamed[\"Trigger Type\"] = renamed[\"pipeline_ref\"].apply(\n",
+    "    lambda x: (\n",
+    "        \"Simple\"\n",
+    "        if \"dataamount\" in x\n",
+    "        else (\n",
+    "            \"Simple\"\n",
+    "            if \"time\" in x\n",
+    "            else (\n",
+    "                \"DataDrift\"\n",
+    "                if \"drift\" in x\n",
+    "                else (\"Performance\" if \"performancetrigger\" in x else (\"Cost\" if \"costtrigger\" in x else (\"unknown\")))\n",
+    "            )\n",
+    "        )\n",
+    "    )\n",
+    ")\n",
+    "\n",
+    "# assert no unknowns and DataIncorporationLatency\n",
+    "assert not renamed[\"Trigger Type\"].str.contains(\"unknown\").any()\n",
+    "assert not renamed[\"Trigger SubType\"].str.contains(\"unknown\").any()\n",
+    "assert not renamed[\"Trigger SubType\"].str.contains(\"DataIncorporationLatency\").any()\n",
+    "\n",
+    "renamed[\"Trigger Type\"] = pd.Categorical(\n",
+    "    renamed[\"Trigger Type\"], categories=[\"Simple\", \"DataDrift\", \"Performance\", \"Cost\"], ordered=True\n",
+    ")\n",
+    "\n",
+    "renamed[\"Trigger SubType\"] = pd.Categorical(\n",
+    "    renamed[\"Trigger SubType\"],\n",
+    "    categories=[\"DataAmount\", \"Time\", \"Static\", \"Quantile\", \"Rolling Avg.\", \"AvoidableMisclass\"],\n",
+    "    ordered=True,\n",
+    ")\n",
+    "\n",
+    "renamed = renamed.sort_values(by=[\"Trigger Type\", \"Trigger SubType\", \"pipeline_id\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig = plot_tradeoff_scatter(\n",
+    "    renamed,\n",
+    "    x=\"count\",\n",
+    "    y=\"metric_value\",\n",
+    "    hue=\"Trigger Type\",\n",
+    "    style=\"Trigger SubType\",\n",
+    "    x_label=\"Number of Triggers\",\n",
+    "    y_label=\"Mean Accuracy %\",\n",
+    "    height_factor=0.8,\n",
+    "    width_factor=0.9,\n",
+    "    manual_legend_title=False,\n",
+    "    legend_ncol=2,\n",
+    ")\n",
+    "\n",
+    "save_plot(fig, \"_all_tradeoff_yearbook_triggers_performance\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "in_minutes = renamed.copy()\n",
+    "in_minutes[\"sum_duration\"] = in_minutes[\"sum_duration\"] / 60\n",
+    "\n",
+    "fig = plot_tradeoff_scatter(\n",
+    "    in_minutes,\n",
+    "    x=\"sum_duration\",\n",
+    "    y=\"metric_value\",\n",
+    "    hue=\"Trigger Type\",\n",
+    "    style=\"Trigger SubType\",\n",
+    "    x_label=\"Total Cost (Minutes)\",\n",
+    "    y_label=\"Mean Accuracy %\",\n",
+    "    height_factor=0.8,\n",
+    "    width_factor=0.9,\n",
+    "    manual_legend_title=False,\n",
+    "    legend_ncol=2,\n",
+    ")\n",
+    "\n",
+    "save_plot(fig, \"_all_tradeoff_yearbook_cost_performance\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig = plot_tradeoff_scatter(\n",
+    "    renamed,\n",
+    "    x=\"count\",\n",
+    "    y=\"sum_duration\",\n",
+    "    hue=\"Trigger Type\",\n",
+    "    style=\"Trigger SubType\",\n",
+    "    x_label=\"Number of Triggers\",\n",
+    "    y_label=\"Total Cost (seconds)\",\n",
+    "    height_factor=1.5,\n",
+    "    width_factor=1.8,\n",
+    "    manual_legend_title=False,\n",
+    "    legend_ncol=2,\n",
+    ")\n",
+    "\n",
+    "# save_plot(fig, \"tradeoff_drift_yearbook_triggers_cost\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/analytics/plotting/rh_thesis/drift/arxiv_cost.ipynb b/analytics/plotting/rh_thesis/drift/arxiv_cost.ipynb
new file mode 100644
index 000000000..efa21290b
--- /dev/null
+++ b/analytics/plotting/rh_thesis/drift/arxiv_cost.ipynb
@@ -0,0 +1,282 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "\n",
+    "import matplotlib.dates as mdates\n",
+    "import pandas as pd\n",
+    "from matplotlib.ticker import FixedFormatter, FixedLocator\n",
+    "\n",
+    "from analytics.app.data.load import list_pipelines, load_pipeline_logs\n",
+    "from analytics.app.data.transform import pipeline_leaf_times_df\n",
+    "from analytics.plotting.common.cost_matrix import plot_cost_matrix\n",
+    "from analytics.plotting.common.save import save_plot\n",
+    "from modyn.supervisor.internal.pipeline_executor.models import PipelineLogs\n",
+    "\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipelines_dirs = [\n",
+    "    # TODO\n",
+    "]\n",
+    "\n",
+    "pipeline_logs: dict[int, PipelineLogs] = {}\n",
+    "pipelines: dict[int, tuple[str, Path]] = {}\n",
+    "\n",
+    "for dir in pipelines_dirs:\n",
+    "    dir_pipelines = list_pipelines(dir)\n",
+    "    pipelines.update(dir_pipelines)\n",
+    "    max_pipeline_id = max(dir_pipelines.keys())\n",
+    "    print(pipelines)\n",
+    "    pipeline_logs.update({p_id: load_pipeline_logs(p_id, dir) for (p_id, (_, p_path)) in dir_pipelines.items()})\n",
+    "    assert dir.exists()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# mode: time + amount\n",
+    "pipeline_ids = [267, 269, 265] + [268, 271, 270]\n",
+    "\n",
+    "# doesn't do anything unless include_composite_model = True\n",
+    "composite_model_variant = \"currently_active_model\"\n",
+    "\n",
+    "patch_yearbook = True\n",
+    "dataset_id = \"arxiv_kaggle_test\"\n",
+    "eval_handler = \"periodic-current\"\n",
+    "metric = \"Accuracy\"\n",
+    "include_composite_model = True"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Wrangle data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_leaf_list = []\n",
+    "for pipeline_id in pipeline_ids:\n",
+    "    logs = pipeline_logs[pipeline_id]\n",
+    "    df_leaf_single = pipeline_leaf_times_df(logs, use_traintime_patch_at_trainer=True, pipeline_id=pipeline_id)\n",
+    "    df_leaf_list.append(df_leaf_single)\n",
+    "\n",
+    "df_leaf = pd.concat(df_leaf_list)\n",
+    "df_leaf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted = df_leaf.copy()\n",
+    "\n",
+    "# coloring in order of decreasing avg. duration\n",
+    "avg_duration_per_stage = df_adjusted.groupby([\"pipeline_ref\", \"id\"])[\"duration\"].mean().sort_values(ascending=False)\n",
+    "df_adjusted = df_adjusted.merge(avg_duration_per_stage, on=[\"pipeline_ref\", \"id\"], suffixes=(\"\", \"_avg\")).sort_values(\n",
+    "    \"duration_avg\", ascending=False\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted\n",
+    "df_adjusted[\"sample_time_year\"] = df_adjusted[\"sample_time\"]\n",
+    "df_adjusted[\"sample_time_year_bin\"] = pd.cut(df_adjusted[\"sample_time_year\"], bins=10, labels=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_new = df_adjusted[\n",
+    "    (\n",
+    "        df_adjusted[\"id\"].isin(\n",
+    "            [\n",
+    "                \"TRAIN\",\n",
+    "                \"STORE_TRAINED_MODEL\",\n",
+    "                \"INFORM_SELECTOR_REMAINING_DATA\",\n",
+    "                \"INFORM_SELECTOR_ABOUT_TRIGGER\",\n",
+    "                \"EVALUATE_TRIGGER_POLICY\",\n",
+    "            ]\n",
+    "        )\n",
+    "    )\n",
+    "][[\"pipeline_ref\", \"id\", \"sample_time_year\", \"duration\"]].copy()\n",
+    "df_new = df_new.sort_values(\"sample_time_year\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "state_rename = {\n",
+    "    \"INFORM_SELECTOR_REMAINING_DATA\": \"inform remaining data\",\n",
+    "    \"INFORM_SELECTOR_ABOUT_TRIGGER\": \"inform trigger\",\n",
+    "}\n",
+    "\n",
+    "df_new[\"id\"] = df_new[\"id\"].replace(state_rename).str.lower().str.replace(\"_\", \" \")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_new"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig = plot_cost_matrix(\n",
+    "    df_new,\n",
+    "    [265, 269, 267],\n",
+    "    grid_alpha=0.75,\n",
+    "    title_map={\n",
+    "        265: \"TimeTrigger 10 years\",\n",
+    "        269: \"TimeTrigger 2 years\",\n",
+    "        267: \"TimeTrigger 26 weeks\",\n",
+    "    },\n",
+    "    height_factor=1.8,\n",
+    "    width_factor=1.0,\n",
+    "    duration_ylabel=\"Duration (min)\",\n",
+    "    cumulative_ylabel=\"Cumulative Duration (min)\",\n",
+    "    x_date_locator=FixedLocator([mdates.date2num(pd.Timestamp(d)) for d in [\"2000-01-01\", \"2009-01-01\", \"2018-01-01\"]]),\n",
+    "    x_date_formatter=FixedFormatter([str(year) for year in [\"Jan 2000\", \"Jan 2009\", \"Jan 2018\"]]),\n",
+    "    x_lim=(pd.Timestamp(\"1995-01-01\"), pd.Timestamp(\"2024-09-01\")),\n",
+    "    y_ticks_cumulative=[x for x in range(0, 1000, 200)],\n",
+    "    y_lim_cumulative=(0, 1000),\n",
+    "    y_minutes=True,\n",
+    "    y_minutes_cumulative=True,\n",
+    ")\n",
+    "\n",
+    "save_plot(fig, \"arxiv_time-trigger-cost-matrix\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig = plot_cost_matrix(\n",
+    "    df_new,\n",
+    "    [268, 271, 270],\n",
+    "    grid_alpha=0.75,\n",
+    "    title_map={\n",
+    "        268: \"AmountTrigger 500k samples\",\n",
+    "        271: \"AmountTrigger 100k samples\",\n",
+    "        270: \"AmountTrigger 25k samples\",\n",
+    "    },\n",
+    "    height_factor=1.8,\n",
+    "    width_factor=1.0,\n",
+    "    duration_ylabel=\"Duration (min)\",\n",
+    "    cumulative_ylabel=\"Cumulative Duration (min)\",\n",
+    "    x_date_locator=FixedLocator([mdates.date2num(pd.Timestamp(d)) for d in [\"2000-01-01\", \"2009-01-01\", \"2018-01-01\"]]),\n",
+    "    x_date_formatter=FixedFormatter([str(year) for year in [\"Jan 2000\", \"Jan 2009\", \"Jan 2018\"]]),\n",
+    "    x_lim=(pd.Timestamp(\"1995-01-01\"), pd.Timestamp(\"2024-09-01\")),\n",
+    "    y_ticks_cumulative=[x for x in range(0, 1000, 200)],\n",
+    "    y_lim_cumulative=(0, 1000),\n",
+    "    y_minutes=True,\n",
+    "    y_minutes_cumulative=True,\n",
+    ")\n",
+    "\n",
+    "save_plot(fig, \"arxiv_amount-trigger-cost-matrix\")\n",
+    "# not interesting: note that for 250 samples we see multiple trigger at the same timestamp"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Plot 100k amount and 2y time trigger together"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig = plot_cost_matrix(\n",
+    "    df_new,\n",
+    "    [269, 271],\n",
+    "    grid_alpha=0.75,\n",
+    "    title_map={\n",
+    "        269: \"TimeTrigger 2 years\",\n",
+    "        271: \"AmountTrigger 100k samples\",\n",
+    "    },\n",
+    "    height_factor=1.2,\n",
+    "    width_factor=1.0,\n",
+    "    duration_ylabel=\"Duration (min)\",\n",
+    "    cumulative_ylabel=\"Cumulative Duration (min)\",\n",
+    "    x_date_locator=FixedLocator([mdates.date2num(pd.Timestamp(d)) for d in [\"2000-01-01\", \"2009-01-01\", \"2018-01-01\"]]),\n",
+    "    x_date_formatter=FixedFormatter([str(year) for year in [\"Jan 2000\", \"Jan 2009\", \"Jan 2018\"]]),\n",
+    "    x_lim=(pd.Timestamp(\"1995-01-01\"), pd.Timestamp(\"2024-09-01\")),\n",
+    "    y_ticks_cumulative=[x for x in range(0, 1000, 200)],\n",
+    "    y_lim_cumulative=(0, 1000),\n",
+    "    y_minutes=True,\n",
+    "    y_minutes_cumulative=True,\n",
+    ")\n",
+    "\n",
+    "save_plot(fig, \"arxiv_timeamount-trigger-cost-matrix\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/analytics/plotting/rh_thesis/drift/arxiv_heatmap_single.ipynb b/analytics/plotting/rh_thesis/drift/arxiv_heatmap_single.ipynb
index 100651d98..b8c9e6fa6 100644
--- a/analytics/plotting/rh_thesis/drift/arxiv_heatmap_single.ipynb
+++ b/analytics/plotting/rh_thesis/drift/arxiv_heatmap_single.ipynb
@@ -57,13 +57,13 @@
    "outputs": [],
    "source": [
     "# mode:\n",
-    "pipeline_id = 771  # hp drifttrigger_mmd-rollavg-2.0-20_int1500_win1y\n",
+    "pipeline_id = 782  # drifttrigger_mmd-quant-0.05-20_int20000_win1y\n",
     "\n",
     "# doesn't do anything unless include_composite_model = True\n",
     "composite_model_variant = \"currently_active_model\"\n",
     "\n",
     "patch_yearbook = True\n",
-    "dataset_id = \"huffpost_kaggle_test\"\n",
+    "dataset_id = \"arxiv_kaggle_test\"\n",
     "eval_handler = \"periodic-current\"\n",
     "metric = \"Accuracy\"\n",
     "include_composite_model = False"
@@ -230,20 +230,21 @@
     "    heatmap_data,\n",
     "    reverse_col=True,\n",
     "    x_custom_ticks=[\n",
-    "        (i, f\"{period.to_timestamp().strftime('%b %Y')}\".replace(\" \", \"\\n\"))\n",
+    "        (i, f\"{period.to_timestamp().strftime('%b %Y')}\")\n",
     "        for i, period in list(enumerate(heatmap_data.columns))[::1]\n",
-    "        if period in [pd.Period(\"Apr 2014\"), pd.Period(\"Jul 2018\"), pd.Period(\"Jan 2022\")]\n",
+    "        if period in [pd.Period(\"Mar 2000\"), pd.Period(\"Mar 2009\"), pd.Period(\"Mar 2020\")]\n",
     "    ],\n",
     "    y_custom_ticks=[\n",
-    "        (i + 0.5, f\"{period.to_timestamp().strftime('%b %Y')}\")\n",
+    "        (i, f\"{period.to_timestamp().strftime('%b %Y')}\".replace(\" \", \"\\n\"))\n",
     "        for i, period in list(enumerate(heatmap_data.index))[::1]\n",
+    "        if period in [pd.Period(\"Jun 2000\"), pd.Period(\"Jun 2009\"), pd.Period(\"May 2020\")]\n",
     "    ],\n",
     "    y_label=\"Trained up to\",\n",
     "    x_label=\"Evaluation Year\",\n",
-    "    title_label=\"HuffPost Dynamic Threshold\\nRolling Average: Δ +200%\",\n",
+    "    title_label=\"Arxiv Dynamic Drift Threshold: MMD Quantile: 0.05\",\n",
     "    color_label=\"Accuracy %\",\n",
-    "    width_factor=0.6,\n",
-    "    height_factor=0.61,\n",
+    "    width_factor=1,\n",
+    "    height_factor=0.55,\n",
     "    # grid_alpha=0.4,\n",
     "    grid_alpha=0.0,\n",
     "    # disable_horizontal_grid=True,\n",
diff --git a/analytics/plotting/rh_thesis/drift/hp_cost.ipynb b/analytics/plotting/rh_thesis/drift/hp_cost.ipynb
new file mode 100644
index 000000000..6fb266f85
--- /dev/null
+++ b/analytics/plotting/rh_thesis/drift/hp_cost.ipynb
@@ -0,0 +1,219 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "\n",
+    "import matplotlib.dates as mdates\n",
+    "import pandas as pd\n",
+    "from matplotlib.ticker import FixedFormatter, FixedLocator\n",
+    "\n",
+    "from analytics.app.data.load import list_pipelines, load_pipeline_logs\n",
+    "from analytics.app.data.transform import pipeline_leaf_times_df\n",
+    "from analytics.plotting.common.cost_matrix import plot_cost_matrix\n",
+    "from analytics.plotting.common.save import save_plot\n",
+    "from modyn.supervisor.internal.pipeline_executor.models import PipelineLogs\n",
+    "\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipelines_dirs = [\n",
+    "    Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/huffpost/21_datadrift_dynamic\"),\n",
+    "]\n",
+    "\n",
+    "pipeline_logs: dict[int, PipelineLogs] = {}\n",
+    "pipelines: dict[int, tuple[str, Path]] = {}\n",
+    "\n",
+    "for dir in pipelines_dirs:\n",
+    "    dir_pipelines = list_pipelines(dir)\n",
+    "    pipelines.update(dir_pipelines)\n",
+    "    max_pipeline_id = max(dir_pipelines.keys())\n",
+    "    print(pipelines)\n",
+    "    pipeline_logs.update({p_id: load_pipeline_logs(p_id, dir) for (p_id, (_, p_path)) in dir_pipelines.items()})\n",
+    "    assert dir.exists()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# mode: time + amount\n",
+    "pipeline_ids = [771]  # hp drifttrigger_mmd-rollavg-2.0-20_int1500_win1y\n",
+    "\n",
+    "\n",
+    "# doesn't do anything unless include_composite_model = True\n",
+    "composite_model_variant = \"currently_active_model\"\n",
+    "\n",
+    "patch_yearbook = True\n",
+    "dataset_id = \"huffpost_kaggle_test\"\n",
+    "eval_handler = \"periodic-current\"\n",
+    "metric = \"Accuracy\"\n",
+    "include_composite_model = True"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Wrangle data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_leaf_list = []\n",
+    "for pipeline_id in pipeline_ids:\n",
+    "    logs = pipeline_logs[pipeline_id]\n",
+    "    df_leaf_single = pipeline_leaf_times_df(logs, use_traintime_patch_at_trainer=True, pipeline_id=pipeline_id)\n",
+    "    df_leaf_list.append(df_leaf_single)\n",
+    "\n",
+    "df_leaf = pd.concat(df_leaf_list)\n",
+    "df_leaf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted = df_leaf.copy()\n",
+    "\n",
+    "# coloring in order of decreasing avg. duration\n",
+    "avg_duration_per_stage = df_adjusted.groupby([\"pipeline_ref\", \"id\"])[\"duration\"].mean().sort_values(ascending=False)\n",
+    "df_adjusted = df_adjusted.merge(avg_duration_per_stage, on=[\"pipeline_ref\", \"id\"], suffixes=(\"\", \"_avg\")).sort_values(\n",
+    "    \"duration_avg\", ascending=False\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted\n",
+    "df_adjusted[\"sample_time_year\"] = df_adjusted[\"sample_time\"]\n",
+    "df_adjusted[\"sample_time_year_bin\"] = pd.cut(df_adjusted[\"sample_time_year\"], bins=10, labels=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_new = df_adjusted[\n",
+    "    (\n",
+    "        df_adjusted[\"id\"].isin(\n",
+    "            [\n",
+    "                \"TRAIN\",\n",
+    "                \"STORE_TRAINED_MODEL\",\n",
+    "                \"INFORM_SELECTOR_REMAINING_DATA\",\n",
+    "                \"INFORM_SELECTOR_ABOUT_TRIGGER\",\n",
+    "                \"EVALUATE_TRIGGER_POLICY\",\n",
+    "            ]\n",
+    "        )\n",
+    "    )\n",
+    "][[\"pipeline_ref\", \"id\", \"sample_time_year\", \"duration\"]].copy()\n",
+    "df_new = df_new.sort_values(\"sample_time_year\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "state_rename = {\n",
+    "    \"INFORM_SELECTOR_REMAINING_DATA\": \"inform remaining data\",\n",
+    "    \"INFORM_SELECTOR_ABOUT_TRIGGER\": \"inform trigger\",\n",
+    "}\n",
+    "\n",
+    "df_new[\"id\"] = df_new[\"id\"].replace(state_rename).str.lower().str.replace(\"_\", \" \")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_new"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig = plot_cost_matrix(\n",
+    "    df_new,\n",
+    "    [771],\n",
+    "    grid_alpha=0.75,\n",
+    "    title_map={\n",
+    "        771: \"HuffPost Dynamic Drift:  Δ +200%\",\n",
+    "    },\n",
+    "    height_factor=0.7,\n",
+    "    width_factor=1.0,\n",
+    "    duration_ylabel=\"Duration (min)\",\n",
+    "    cumulative_ylabel=\"Cumulative Duration (min)\",\n",
+    "    x_date_locator=FixedLocator([mdates.date2num(pd.Timestamp(d)) for d in [\"2014-05-01\", \"2018-06-01\", \"2021-01-01\"]]),\n",
+    "    x_date_formatter=FixedFormatter([str(year) for year in [\"May\\n2014\", \"Jun\\n2018\", \"Jan\\n2021\"]]),\n",
+    "    x_lim=(pd.Timestamp(\"2012-01-01\"), pd.Timestamp(\"2022-09-01\")),\n",
+    "    y_ticks_cumulative=[x for x in range(0, 110, 25)],\n",
+    "    y_lim_cumulative=(0, 100),\n",
+    "    y_minutes=True,\n",
+    "    y_minutes_cumulative=True,\n",
+    ")\n",
+    "\n",
+    "save_plot(fig, \"huffpost_drift-trigger-cost-matrix\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/analytics/plotting/rh_thesis/drift/hp_heatmap_single.ipynb b/analytics/plotting/rh_thesis/drift/hp_heatmap_single.ipynb
index 26784853b..b0c43f313 100644
--- a/analytics/plotting/rh_thesis/drift/hp_heatmap_single.ipynb
+++ b/analytics/plotting/rh_thesis/drift/hp_heatmap_single.ipynb
@@ -230,7 +230,7 @@
     "    heatmap_data,\n",
     "    reverse_col=True,\n",
     "    x_custom_ticks=[\n",
-    "        (i, f\"{period.to_timestamp().strftime('%b %Y')}\".replace(\" \", \"\\n\"))\n",
+    "        (i, f\"{period.to_timestamp().strftime('%b %Y')}\")\n",
     "        for i, period in list(enumerate(heatmap_data.columns))[::1]\n",
     "        if period in [pd.Period(\"Apr 2014\"), pd.Period(\"Jul 2018\"), pd.Period(\"Jan 2022\")]\n",
     "    ],\n",
@@ -240,10 +240,10 @@
     "    ],\n",
     "    y_label=\"Trained up to\",\n",
     "    x_label=\"Evaluation Year\",\n",
-    "    title_label=\"HuffPost Dynamic Threshold\\nRolling Average: Δ +200%\",\n",
+    "    title_label=\"HuffPost Dynamic Drift Threshold: Rolling Average Δ +200%\",\n",
     "    color_label=\"Accuracy %\",\n",
-    "    width_factor=0.6,\n",
-    "    height_factor=0.61,\n",
+    "    width_factor=1,\n",
+    "    height_factor=0.5,\n",
     "    # grid_alpha=0.4,\n",
     "    grid_alpha=0.0,\n",
     "    # disable_horizontal_grid=True,\n",
diff --git a/analytics/plotting/rh_thesis/drift/yb_cost.ipynb b/analytics/plotting/rh_thesis/drift/yb_cost.ipynb
index b642f2841..a136ed341 100644
--- a/analytics/plotting/rh_thesis/drift/yb_cost.ipynb
+++ b/analytics/plotting/rh_thesis/drift/yb_cost.ipynb
@@ -160,21 +160,37 @@
     "    [107],\n",
     "    grid_alpha=0.75,\n",
     "    title_map={\n",
-    "        107: \"static MMD=0.07 threshold\",\n",
+    "        107: \"Static MMD Threshold=0.07\",\n",
     "    },\n",
-    "    height_factor=0.8,\n",
+    "    height_factor=0.7,\n",
     "    width_factor=1.0,\n",
     "    duration_ylabel=\"Duration (sec)\",\n",
     "    cumulative_ylabel=\"Cumulative Duration (min)\",\n",
     "    x_ticks=[x for x in range(1940, 2010 + 1, 30)],\n",
-    "    y_ticks_cumulative=[x for x in range(0, 60 + 1, 20)],\n",
-    "    y_lim_cumulative=(0, 70),\n",
+    "    y_ticks_cumulative=[x for x in range(0, 9 + 1, 3)],\n",
+    "    y_lim_cumulative=(0, 10),\n",
     "    y_minutes=False,\n",
     "    y_minutes_cumulative=True,\n",
     ")\n",
     "\n",
     "save_plot(fig, \"yearbook_drift-trigger-cost-matrix\")"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# warmup noticeable where not detection is launched"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
diff --git a/analytics/plotting/rh_thesis/drift/yb_cost_perf_tradeoff.ipynb b/analytics/plotting/rh_thesis/drift/yb_cost_perf_tradeoff.ipynb
index 379988a5f..c33a59917 100644
--- a/analytics/plotting/rh_thesis/drift/yb_cost_perf_tradeoff.ipynb
+++ b/analytics/plotting/rh_thesis/drift/yb_cost_perf_tradeoff.ipynb
@@ -211,7 +211,7 @@
     "maximum_min = min_active_eval_center_per_pipeline.max()\n",
     "print(maximum_min, min_active_eval_center_per_pipeline)\n",
     "\n",
-    "assert maximum_min < pd.Timestamp(\"1950-01-01\")\n",
+    "assert maximum_min < pd.Timestamp(\"1940-01-01\")\n",
     "\n",
     "df_adjusted = df_adjusted[df_adjusted[\"interval_center\"] >= maximum_min]\n",
     "df_adjusted[\"interval_center\"].unique()"
diff --git a/analytics/plotting/rh_thesis/drift/yearbook_heatmap_multi.ipynb b/analytics/plotting/rh_thesis/drift/yearbook_heatmap_multi.ipynb
index ac8b6f040..b3390cbd6 100644
--- a/analytics/plotting/rh_thesis/drift/yearbook_heatmap_multi.ipynb
+++ b/analytics/plotting/rh_thesis/drift/yearbook_heatmap_multi.ipynb
@@ -222,10 +222,10 @@
     "    y_custom_ticks=[(i + 0.5, pipelines_refs[y]) for i, y in enumerate(heatmap_data.index)],\n",
     "    y_label=\"Pipeline with\\nWindow Size\",\n",
     "    x_label=\"Evaluation Year\",\n",
-    "    title_label=\"Yearbook Composite Models:\\nWindow Sizes (MMD=0.07)\",\n",
+    "    title_label=\"Yearbook Composite Models: Drift Window Sizes (MMD=0.07)\",\n",
     "    color_label=\"Accuracy %\",\n",
-    "    width_factor=0.5,\n",
-    "    height_factor=0.43,\n",
+    "    width_factor=1,\n",
+    "    height_factor=0.38,\n",
     "    # grid_alpha=0.4,\n",
     "    grid_alpha=0.0,\n",
     "    # disable_horizontal_grid=True,\n",
@@ -291,10 +291,10 @@
     "    y_custom_ticks=[(i + 0.5, pipelines_refs[y]) for i, y in enumerate(heatmap_data.index)],\n",
     "    y_label=\"MMD Threshold\",\n",
     "    x_label=\"Evaluation Year\",\n",
-    "    title_label=\"Yearbook Composite Models:\\nStatic Thresholds\",\n",
+    "    title_label=\"Yearbook Composite Models: Static Drift Thresholds\",\n",
     "    color_label=\"Accuracy %\",\n",
-    "    width_factor=0.5,\n",
-    "    height_factor=0.55,\n",
+    "    width_factor=1,\n",
+    "    height_factor=0.45,\n",
     "    # grid_alpha=0.4,\n",
     "    grid_alpha=0.0,\n",
     "    # disable_horizontal_grid=True,\n",
@@ -362,12 +362,12 @@
     "    reverse_col=True,\n",
     "    x_ticks=[1950, 1975, 2000],\n",
     "    y_custom_ticks=[(i + 0.5, pipelines_refs[y]) for i, y in enumerate(heatmap_data.index)],\n",
-    "    y_label=\"Dynamic Quantile\",\n",
+    "    y_label=\"Criterion\",\n",
     "    x_label=\"Evaluation Year\",\n",
-    "    title_label=\"Yearbook Composite Models:\\nDynamic Thresholds\",\n",
+    "    title_label=\"Yearbook Composite Models: Dynamic Drift Thresholds\",\n",
     "    color_label=\"Accuracy %\",\n",
-    "    width_factor=0.5,\n",
-    "    height_factor=0.55,\n",
+    "    width_factor=1,\n",
+    "    height_factor=0.47,\n",
     "    # grid_alpha=0.4,\n",
     "    grid_alpha=0.0,\n",
     "    # disable_horizontal_grid=True,\n",
@@ -381,6 +381,13 @@
     ")\n",
     "save_plot(fig, \"yb_trigger_heatmap_drift_multi_dynamic_thresholds\")"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
diff --git a/analytics/plotting/rh_thesis/drift/yearbook_heatmap_single.ipynb b/analytics/plotting/rh_thesis/drift/yearbook_heatmap_single.ipynb
index 015b7bc92..54bdc59eb 100644
--- a/analytics/plotting/rh_thesis/drift/yearbook_heatmap_single.ipynb
+++ b/analytics/plotting/rh_thesis/drift/yearbook_heatmap_single.ipynb
@@ -271,10 +271,10 @@
     "    y_custom_ticks=[(i + 0.5, str(y)) for i, y in enumerate(heatmap_data.index)],\n",
     "    y_label=\"Trained up to\",\n",
     "    x_label=\"Evaluation Year\",\n",
-    "    title_label=\"Yearbook 4y Windows\\nStatic Threshold: MMD=0.07\",\n",
+    "    title_label=\"Yearbook 4y Drift Windows: Static MMD Threshold=0.07\",\n",
     "    color_label=\"Accuracy %\",\n",
-    "    width_factor=0.5,\n",
-    "    height_factor=0.61,\n",
+    "    width_factor=1,\n",
+    "    height_factor=0.55,\n",
     "    # grid_alpha=0.4,\n",
     "    grid_alpha=0.0,\n",
     "    # disable_horizontal_grid=True,\n",
diff --git a/analytics/plotting/rh_thesis/evaluation_setup/arxiv_heatmap_metrics.ipynb b/analytics/plotting/rh_thesis/evaluation_setup/arxiv_heatmap_metrics.ipynb
index 51234c32f..5d0537799 100644
--- a/analytics/plotting/rh_thesis/evaluation_setup/arxiv_heatmap_metrics.ipynb
+++ b/analytics/plotting/rh_thesis/evaluation_setup/arxiv_heatmap_metrics.ipynb
@@ -292,16 +292,16 @@
    "outputs": [],
    "source": [
     "plot_content = {\n",
-    "    (0, \"Evaluation Year\"): {\n",
-    "        (0, \"Trained up to\"): (\n",
+    "    (0, \"Trained up to\"): {\n",
+    "        (0, \"Evaluation Year\"): (\n",
     "            \"Accuracy\",\n",
     "            generate_heatmap_data_for_handler(\n",
     "                df_merged, \"Accuracy\"\n",
     "            ),  # almost identical to F1-micro and F1-weighted; macro is broken\n",
     "        ),\n",
-    "        (1, \"Trained up to\"): (\"Top-2-Accuracy\", generate_heatmap_data_for_handler(df_merged, \"Top-2-Accuracy\")),\n",
-    "        (2, \"Trained up to\"): (\"Top-5-Accuracy\", generate_heatmap_data_for_handler(df_merged, \"Top-5-Accuracy\")),\n",
-    "        (3, \"Trained up to\"): (\"Top-10-Accuracy\", generate_heatmap_data_for_handler(df_merged, \"Top-10-Accuracy\")),\n",
+    "        (1, \"Evaluation Year\"): (\"Top-2-Accuracy\", generate_heatmap_data_for_handler(df_merged, \"Top-2-Accuracy\")),\n",
+    "        (2, \"Evaluation Year\"): (\"Top-5-Accuracy\", generate_heatmap_data_for_handler(df_merged, \"Top-5-Accuracy\")),\n",
+    "        (3, \"Evaluation Year\"): (\"Top-10-Accuracy\", generate_heatmap_data_for_handler(df_merged, \"Top-10-Accuracy\")),\n",
     "    }\n",
     "}\n",
     "\n",
diff --git a/analytics/plotting/rh_thesis/evaluation_setup/hp_heatmap_metrics.ipynb b/analytics/plotting/rh_thesis/evaluation_setup/hp_heatmap_metrics.ipynb
index d1621419e..4c4c4e479 100644
--- a/analytics/plotting/rh_thesis/evaluation_setup/hp_heatmap_metrics.ipynb
+++ b/analytics/plotting/rh_thesis/evaluation_setup/hp_heatmap_metrics.ipynb
@@ -292,16 +292,16 @@
    "outputs": [],
    "source": [
     "plot_content = {\n",
-    "    (0, \"Evaluation Year\"): {\n",
-    "        (0, \"Trained up to\"): (\n",
+    "    (0, \"Trained up to\"): {\n",
+    "        (0, \"Evaluation Year\"): (\n",
     "            \"Accuracy\",\n",
     "            generate_heatmap_data_for_handler(\n",
     "                df_merged, \"Accuracy\"\n",
     "            ),  # almost identical to F1-micro and F1-weighted; macro is broken\n",
     "        ),\n",
-    "        (1, \"Trained up to\"): (\"Top-2-Accuracy\", generate_heatmap_data_for_handler(df_merged, \"Top-2-Accuracy\")),\n",
-    "        (2, \"Trained up to\"): (\"Top-5-Accuracy\", generate_heatmap_data_for_handler(df_merged, \"Top-5-Accuracy\")),\n",
-    "        (3, \"Trained up to\"): (\"Top-10-Accuracy\", generate_heatmap_data_for_handler(df_merged, \"Top-10-Accuracy\")),\n",
+    "        (1, \"Evaluation Year\"): (\"Top-2-Accuracy\", generate_heatmap_data_for_handler(df_merged, \"Top-2-Accuracy\")),\n",
+    "        (2, \"Evaluation Year\"): (\"Top-5-Accuracy\", generate_heatmap_data_for_handler(df_merged, \"Top-5-Accuracy\")),\n",
+    "        (3, \"Evaluation Year\"): (\"Top-10-Accuracy\", generate_heatmap_data_for_handler(df_merged, \"Top-10-Accuracy\")),\n",
     "    }\n",
     "}\n",
     "\n",
diff --git a/analytics/plotting/rh_thesis/evaluation_setup/yb_heatmap_metrics.ipynb b/analytics/plotting/rh_thesis/evaluation_setup/yb_heatmap_metrics.ipynb
index 018ab16ed..6cdc52db8 100644
--- a/analytics/plotting/rh_thesis/evaluation_setup/yb_heatmap_metrics.ipynb
+++ b/analytics/plotting/rh_thesis/evaluation_setup/yb_heatmap_metrics.ipynb
@@ -302,9 +302,9 @@
    "outputs": [],
    "source": [
     "plot_content = {\n",
-    "    (0, \"Evaluation Year\"): {\n",
-    "        (0, \"Trained up to\"): (\"Accuracy\", generate_heatmap_data_for_handler(df_merged, \"Accuracy\")),\n",
-    "        (1, \"Trained up to\"): (\"ROC-AUC\", generate_heatmap_data_for_handler(df_merged, \"ROC-AUC\")),\n",
+    "    (0, \"Trained up to\"): {\n",
+    "        (0, \"Evaluation Year\"): (\"Accuracy\", generate_heatmap_data_for_handler(df_merged, \"Accuracy\")),\n",
+    "        (1, \"Evaluation Year\"): (\"ROC-AUC\", generate_heatmap_data_for_handler(df_merged, \"ROC-AUC\")),\n",
     "    }\n",
     "}\n",
     "\n",
@@ -326,7 +326,7 @@
     "    grid_alpha=0.5,\n",
     ")\n",
     "\n",
-    "save_plot(fig, \"evaluation_metrics_yb_1\")"
+    "save_plot(fig, \"evaluation_metrics_yb_one\")"
    ]
   },
   {
@@ -336,10 +336,10 @@
    "outputs": [],
    "source": [
     "plot_content = {\n",
-    "    (0, \"Evaluation Year\"): {\n",
-    "        (0, \"Trained up to\"): (\"F1-micro\", generate_heatmap_data_for_handler(df_merged, \"F1-micro\")),\n",
-    "        (1, \"Trained up to\"): (\"F1-macro\", generate_heatmap_data_for_handler(df_merged, \"F1-macro\")),\n",
-    "        (2, \"Trained up to\"): (\"F1-weighted\", generate_heatmap_data_for_handler(df_merged, \"F1-weighted\")),\n",
+    "    (0, \"Trained up to\"): {\n",
+    "        (0, \"Evaluation Year\"): (\"F1-micro\", generate_heatmap_data_for_handler(df_merged, \"F1-micro\")),\n",
+    "        (1, \"Evaluation Year\"): (\"F1-macro\", generate_heatmap_data_for_handler(df_merged, \"F1-macro\")),\n",
+    "        (2, \"Evaluation Year\"): (\"F1-weighted\", generate_heatmap_data_for_handler(df_merged, \"F1-weighted\")),\n",
     "    }\n",
     "}\n",
     "\n",
@@ -361,8 +361,15 @@
     "    grid_alpha=0.5,\n",
     ")\n",
     "\n",
-    "save_plot(fig, \"evaluation_metrics_yb_1\")"
+    "save_plot(fig, \"evaluation_metrics_yb_two\")"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
diff --git a/analytics/plotting/rh_thesis/evaluation_setup/yb_heatmap_window_size.ipynb b/analytics/plotting/rh_thesis/evaluation_setup/yb_heatmap_window_size.ipynb
index 24593e11e..108303011 100644
--- a/analytics/plotting/rh_thesis/evaluation_setup/yb_heatmap_window_size.ipynb
+++ b/analytics/plotting/rh_thesis/evaluation_setup/yb_heatmap_window_size.ipynb
@@ -292,13 +292,13 @@
     "\n",
     "\n",
     "plot_content = {\n",
-    "    (0, \"Evaluation Year\"): {\n",
-    "        (0, \"Trained up to\"): (\"Same Year\", generate_heatmap_data_for_handler(df_merged, \"periodic-current\")),\n",
-    "        (1, \"Trained up to\"): (\n",
+    "    (0, \"Trained up to\"): {\n",
+    "        (0, \"Evaluation Year\"): (\"Same Year\", generate_heatmap_data_for_handler(df_merged, \"periodic-current\")),\n",
+    "        (1, \"Evaluation Year\"): (\n",
     "            \"3 Year Window (±1 yr.)\",\n",
     "            generate_heatmap_data_for_handler(df_merged, \"periodic-delta+-1y\"),\n",
     "        ),\n",
-    "        (2, \"Trained up to\"): (\n",
+    "        (2, \"Evaluation Year\"): (\n",
     "            \"11 Year Window (±5 yr.)\",\n",
     "            generate_heatmap_data_for_handler(df_merged, \"periodic-delta+-5y\"),\n",
     "        ),\n",
diff --git a/analytics/plotting/rh_thesis/performance/arxiv_cost.ipynb b/analytics/plotting/rh_thesis/performance/arxiv_cost.ipynb
new file mode 100644
index 000000000..03a3f16f0
--- /dev/null
+++ b/analytics/plotting/rh_thesis/performance/arxiv_cost.ipynb
@@ -0,0 +1,227 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "\n",
+    "import matplotlib.dates as mdates\n",
+    "import pandas as pd\n",
+    "from matplotlib.ticker import FixedFormatter, FixedLocator\n",
+    "\n",
+    "from analytics.app.data.load import list_pipelines, load_pipeline_logs\n",
+    "from analytics.app.data.transform import pipeline_leaf_times_df\n",
+    "from analytics.plotting.common.cost_matrix import plot_cost_matrix\n",
+    "from analytics.plotting.common.save import save_plot\n",
+    "from modyn.supervisor.internal.pipeline_executor.models import PipelineLogs\n",
+    "\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipelines_dirs = [\n",
+    "    Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/arxiv/30_performance\")\n",
+    "]\n",
+    "\n",
+    "pipeline_logs: dict[int, PipelineLogs] = {}\n",
+    "pipelines: dict[int, tuple[str, Path]] = {}\n",
+    "\n",
+    "for dir in pipelines_dirs:\n",
+    "    dir_pipelines = list_pipelines(dir)\n",
+    "    pipelines.update(dir_pipelines)\n",
+    "    max_pipeline_id = max(dir_pipelines.keys())\n",
+    "    print(pipelines)\n",
+    "    pipeline_logs.update({p_id: load_pipeline_logs(p_id, dir) for (p_id, (_, p_path)) in dir_pipelines.items()})\n",
+    "    assert dir.exists()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# mode: time + amount\n",
+    "pipeline_ids = [762]  # performancetrigger_num_misclass-10000-exp-0.6-red-False--int20000\n",
+    "\n",
+    "\n",
+    "# doesn't do anything unless include_composite_model = True\n",
+    "composite_model_variant = \"currently_active_model\"\n",
+    "\n",
+    "patch_yearbook = True\n",
+    "dataset_id = \"arxiv_kaggle_test\"\n",
+    "eval_handler = \"periodic-current\"\n",
+    "metric = \"Accuracy\"\n",
+    "include_composite_model = True"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Wrangle data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_leaf_list = []\n",
+    "for pipeline_id in pipeline_ids:\n",
+    "    logs = pipeline_logs[pipeline_id]\n",
+    "    df_leaf_single = pipeline_leaf_times_df(logs, use_traintime_patch_at_trainer=True, pipeline_id=pipeline_id)\n",
+    "    df_leaf_list.append(df_leaf_single)\n",
+    "\n",
+    "df_leaf = pd.concat(df_leaf_list)\n",
+    "df_leaf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted = df_leaf.copy()\n",
+    "\n",
+    "# coloring in order of decreasing avg. duration\n",
+    "avg_duration_per_stage = df_adjusted.groupby([\"pipeline_ref\", \"id\"])[\"duration\"].mean().sort_values(ascending=False)\n",
+    "df_adjusted = df_adjusted.merge(avg_duration_per_stage, on=[\"pipeline_ref\", \"id\"], suffixes=(\"\", \"_avg\")).sort_values(\n",
+    "    \"duration_avg\", ascending=False\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted\n",
+    "df_adjusted[\"sample_time_year\"] = df_adjusted[\"sample_time\"]\n",
+    "df_adjusted[\"sample_time_year_bin\"] = pd.cut(df_adjusted[\"sample_time_year\"], bins=10, labels=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_new = df_adjusted[\n",
+    "    (\n",
+    "        df_adjusted[\"id\"].isin(\n",
+    "            [\n",
+    "                \"TRAIN\",\n",
+    "                \"STORE_TRAINED_MODEL\",\n",
+    "                \"INFORM_SELECTOR_REMAINING_DATA\",\n",
+    "                \"INFORM_SELECTOR_ABOUT_TRIGGER\",\n",
+    "                \"EVALUATE_TRIGGER_POLICY\",\n",
+    "            ]\n",
+    "        )\n",
+    "    )\n",
+    "][[\"pipeline_ref\", \"id\", \"sample_time_year\", \"duration\"]].copy()\n",
+    "df_new = df_new.sort_values(\"sample_time_year\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "state_rename = {\n",
+    "    \"INFORM_SELECTOR_REMAINING_DATA\": \"inform remaining data\",\n",
+    "    \"INFORM_SELECTOR_ABOUT_TRIGGER\": \"inform trigger\",\n",
+    "}\n",
+    "\n",
+    "df_new[\"id\"] = df_new[\"id\"].replace(state_rename).str.lower().str.replace(\"_\", \" \")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_new"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig = plot_cost_matrix(\n",
+    "    df_new,\n",
+    "    [762],\n",
+    "    grid_alpha=0.75,\n",
+    "    title_map={\n",
+    "        762: \"arXiv PerformanceTrigger (NumMisclass.)\",\n",
+    "    },\n",
+    "    height_factor=0.7,\n",
+    "    width_factor=1.0,\n",
+    "    duration_ylabel=\"Duration (min)\",\n",
+    "    cumulative_ylabel=\"Cumulative Duration (min)\",\n",
+    "    x_date_locator=FixedLocator([mdates.date2num(pd.Timestamp(d)) for d in [\"2000-01-01\", \"2009-01-01\", \"2018-01-01\"]]),\n",
+    "    x_date_formatter=FixedFormatter([str(year) for year in [\"Jan 2000\", \"Jan 2009\", \"Jan 2018\"]]),\n",
+    "    x_lim=(pd.Timestamp(\"1995-01-01\"), pd.Timestamp(\"2024-09-01\")),\n",
+    "    y_ticks_cumulative=[x for x in range(0, 1000, 200)],\n",
+    "    y_lim_cumulative=(0, 1000),\n",
+    "    y_minutes=True,\n",
+    "    y_minutes_cumulative=True,\n",
+    ")\n",
+    "save_plot(fig, \"arxiv_performance-trigger-cost-matrix\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Lower/insignificant policy eval costs compared to drift"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/analytics/plotting/rh_thesis/performance/arxiv_heatmap_single.ipynb b/analytics/plotting/rh_thesis/performance/arxiv_heatmap_single.ipynb
new file mode 100644
index 000000000..44c4d2e7e
--- /dev/null
+++ b/analytics/plotting/rh_thesis/performance/arxiv_heatmap_single.ipynb
@@ -0,0 +1,285 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "\n",
+    "import pandas as pd\n",
+    "\n",
+    "from analytics.app.data.load import list_pipelines\n",
+    "from analytics.app.data.transform import dfs_models_and_evals, logs_dataframe\n",
+    "\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipelines_dir = Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/arxiv/30_performance\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipelines = list_pipelines(pipelines_dir)\n",
+    "max_pipeline_id = max(pipelines.keys())\n",
+    "pipelines"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from analytics.app.data.load import load_pipeline_logs\n",
+    "\n",
+    "pipeline_logs = {p_id: load_pipeline_logs(p_id, pipelines_dir) for (p_id, (_, p_path)) in pipelines.items()}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# mode:\n",
+    "pipeline_id = 762  # performancetrigger_num_misclass-10000-exp-0.6-red-False--int20000\n",
+    "\n",
+    "# doesn't do anything unless include_composite_model = True\n",
+    "composite_model_variant = \"currently_active_model\"\n",
+    "\n",
+    "patch_yearbook = True\n",
+    "dataset_id = \"arxiv_kaggle_test\"\n",
+    "eval_handler = \"periodic-current\"\n",
+    "metric = \"Accuracy\"\n",
+    "include_composite_model = False"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Wrangle data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipeline_log = pipeline_logs[pipeline_id]\n",
+    "pipeline_ref = f\"{pipeline_id}\".zfill(len(str(max_pipeline_id))) + f\" - {pipelines[pipeline_id][0]}\"\n",
+    "\n",
+    "df_all = logs_dataframe(pipeline_log, pipeline_ref)\n",
+    "\n",
+    "df_logs_models, _, df_eval_single = dfs_models_and_evals(\n",
+    "    # subtracting would interfere with yearbook patching\n",
+    "    pipeline_log,\n",
+    "    df_all[\"sample_time\"].max(),\n",
+    "    pipeline_ref,\n",
+    ")\n",
+    "\n",
+    "df_adjusted = df_eval_single\n",
+    "\n",
+    "\n",
+    "df_adjusted = df_adjusted[\n",
+    "    (df_adjusted[\"dataset_id\"] == dataset_id)\n",
+    "    & (df_adjusted[\"eval_handler\"] == eval_handler)\n",
+    "    & (df_adjusted[\"metric\"] == metric)\n",
+    "]\n",
+    "\n",
+    "# in percent (0-100)\n",
+    "df_adjusted[\"value\"] = df_adjusted[\"value\"] * 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted = df_adjusted.sort_values(by=[\"interval_center\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Add composite model\n",
+    "\n",
+    "assert df_adjusted[\"pipeline_ref\"].nunique() <= 1\n",
+    "# add the pipeline time series which is the performance of different models stitched together dep.\n",
+    "# w.r.t which model was active\n",
+    "pipeline_composite_model = df_adjusted[df_adjusted[composite_model_variant]]\n",
+    "pipeline_composite_model[\"model_idx\"] = 0\n",
+    "pipeline_composite_model[\"id_model\"] = 0\n",
+    "\n",
+    "label_map = {k: f\"{k}\" for k, v in df_adjusted[[\"model_idx\", \"id_model\"]].values}\n",
+    "label_map[0] = \"Pipeline composite model\"\n",
+    "\n",
+    "if include_composite_model:\n",
+    "    df_adjusted = pd.concat([pipeline_composite_model, df_adjusted])\n",
+    "else:\n",
+    "    df_adjusted[\"model_idx\"] = df_adjusted[\"model_idx\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Create Plot"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted = df_adjusted.sort_values(by=[\"interval_center\"])\n",
+    "df_adjusted[\"interval_center\"] = df_adjusted[\"interval_center\"].dt.to_period(\"M\")\n",
+    "df_adjusted"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_train_end_years_per_model = df_logs_models[[\"model_idx\", \"real_train_end\"]]\n",
+    "df_train_end_years_per_model[\"real_train_end\"] = df_train_end_years_per_model[\"real_train_end\"].dt.to_period(\"M\")\n",
+    "df_train_end_years_per_model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_merged = df_adjusted.merge(df_train_end_years_per_model, on=\"model_idx\", how=\"left\")\n",
+    "df_merged"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_merged.groupby([\"real_train_end\", \"interval_center\"]).size()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# build heatmap matrix dataframe:\n",
+    "df_merged[\"real_train_end\"] = df_merged[\"real_train_end\"].apply(lambda x: pd.Period(x, freq=\"M\"))\n",
+    "heatmap_data = df_merged.pivot(index=[\"real_train_end\"], columns=\"interval_center\", values=\"value\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "heatmap_data.index.min(), heatmap_data.index.max()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "heatmap_data.index"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from analytics.plotting.common.heatmap import build_heatmap\n",
+    "from analytics.plotting.common.save import save_plot\n",
+    "\n",
+    "fig = build_heatmap(\n",
+    "    heatmap_data,\n",
+    "    reverse_col=True,\n",
+    "    x_custom_ticks=[\n",
+    "        (i, f\"{period.to_timestamp().strftime('%b %Y')}\")\n",
+    "        for i, period in list(enumerate(heatmap_data.columns))[::1]\n",
+    "        if period in [pd.Period(\"Mar 2000\"), pd.Period(\"Mar 2009\"), pd.Period(\"Mar 2020\")]\n",
+    "    ],\n",
+    "    y_custom_ticks=[\n",
+    "        (i, f\"{period.to_timestamp().strftime('%b %Y')}\".replace(\" \", \"\\n\"))\n",
+    "        for i, period in list(enumerate(heatmap_data.index))[::1]\n",
+    "        if period in [pd.Period(\"Jun 2000\"), pd.Period(\"Jun 2009\"), pd.Period(\"May 2020\")]\n",
+    "    ],\n",
+    "    y_label=\"Trained up to\",\n",
+    "    x_label=\"Evaluation Year\",\n",
+    "    title_label=\"Arxiv PerformanceTrigger\\nExp. Acc=60% | NumMiscl=10k | No Reduction\",\n",
+    "    color_label=\"Accuracy %\",\n",
+    "    width_factor=1,\n",
+    "    height_factor=0.7,\n",
+    "    # grid_alpha=0.4,\n",
+    "    grid_alpha=0.0,\n",
+    "    # disable_horizontal_grid=True,\n",
+    "    # cbar=False,\n",
+    "    df_logs_models=df_logs_models,\n",
+    "    x_axis=\"period\",\n",
+    ")\n",
+    "save_plot(fig, \"arxiv_trigger_heatmap_performance_single_dynamic\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/analytics/plotting/rh_thesis/performance/hp_cost.ipynb b/analytics/plotting/rh_thesis/performance/hp_cost.ipynb
new file mode 100644
index 000000000..a4401c8df
--- /dev/null
+++ b/analytics/plotting/rh_thesis/performance/hp_cost.ipynb
@@ -0,0 +1,221 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "\n",
+    "import matplotlib.dates as mdates\n",
+    "import pandas as pd\n",
+    "from matplotlib.ticker import FixedFormatter, FixedLocator\n",
+    "\n",
+    "from analytics.app.data.load import list_pipelines, load_pipeline_logs\n",
+    "from analytics.app.data.transform import pipeline_leaf_times_df\n",
+    "from analytics.plotting.common.cost_matrix import plot_cost_matrix\n",
+    "from analytics.plotting.common.save import save_plot\n",
+    "from modyn.supervisor.internal.pipeline_executor.models import PipelineLogs\n",
+    "\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipelines_dirs = [\n",
+    "    Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/huffpost/30_performance/static_dyn\")\n",
+    "]\n",
+    "\n",
+    "pipeline_logs: dict[int, PipelineLogs] = {}\n",
+    "pipelines: dict[int, tuple[str, Path]] = {}\n",
+    "\n",
+    "for dir in pipelines_dirs:\n",
+    "    dir_pipelines = list_pipelines(dir)\n",
+    "    pipelines.update(dir_pipelines)\n",
+    "    max_pipeline_id = max(dir_pipelines.keys())\n",
+    "    print(pipelines)\n",
+    "    pipeline_logs.update({p_id: load_pipeline_logs(p_id, dir) for (p_id, (_, p_path)) in dir_pipelines.items()})\n",
+    "    assert dir.exists()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# mode: time + amount\n",
+    "pipeline_ids = [639]  # performancetrigger_static-0.5-int1500y\n",
+    "\n",
+    "\n",
+    "# doesn't do anything unless include_composite_model = True\n",
+    "composite_model_variant = \"currently_active_model\"\n",
+    "\n",
+    "patch_yearbook = True\n",
+    "dataset_id = \"huffpost_kaggle_test\"\n",
+    "eval_handler = \"periodic-current\"\n",
+    "metric = \"Accuracy\"\n",
+    "include_composite_model = True"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Wrangle data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_leaf_list = []\n",
+    "for pipeline_id in pipeline_ids:\n",
+    "    logs = pipeline_logs[pipeline_id]\n",
+    "    df_leaf_single = pipeline_leaf_times_df(logs, use_traintime_patch_at_trainer=True, pipeline_id=pipeline_id)\n",
+    "    df_leaf_list.append(df_leaf_single)\n",
+    "\n",
+    "df_leaf = pd.concat(df_leaf_list)\n",
+    "df_leaf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted = df_leaf.copy()\n",
+    "\n",
+    "# coloring in order of decreasing avg. duration\n",
+    "avg_duration_per_stage = df_adjusted.groupby([\"pipeline_ref\", \"id\"])[\"duration\"].mean().sort_values(ascending=False)\n",
+    "df_adjusted = df_adjusted.merge(avg_duration_per_stage, on=[\"pipeline_ref\", \"id\"], suffixes=(\"\", \"_avg\")).sort_values(\n",
+    "    \"duration_avg\", ascending=False\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted\n",
+    "df_adjusted[\"sample_time_year\"] = df_adjusted[\"sample_time\"]\n",
+    "df_adjusted[\"sample_time_year_bin\"] = pd.cut(df_adjusted[\"sample_time_year\"], bins=10, labels=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_new = df_adjusted[\n",
+    "    (\n",
+    "        df_adjusted[\"id\"].isin(\n",
+    "            [\n",
+    "                \"TRAIN\",\n",
+    "                \"STORE_TRAINED_MODEL\",\n",
+    "                \"INFORM_SELECTOR_REMAINING_DATA\",\n",
+    "                \"INFORM_SELECTOR_ABOUT_TRIGGER\",\n",
+    "                \"EVALUATE_TRIGGER_POLICY\",\n",
+    "            ]\n",
+    "        )\n",
+    "    )\n",
+    "][[\"pipeline_ref\", \"id\", \"sample_time_year\", \"duration\"]].copy()\n",
+    "df_new = df_new.sort_values(\"sample_time_year\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "state_rename = {\n",
+    "    \"INFORM_SELECTOR_REMAINING_DATA\": \"inform remaining data\",\n",
+    "    \"INFORM_SELECTOR_ABOUT_TRIGGER\": \"inform trigger\",\n",
+    "}\n",
+    "\n",
+    "df_new[\"id\"] = df_new[\"id\"].replace(state_rename).str.lower().str.replace(\"_\", \" \")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_new"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig = plot_cost_matrix(\n",
+    "    df_new,\n",
+    "    [639],\n",
+    "    grid_alpha=0.75,\n",
+    "    title_map={\n",
+    "        639: \"HuffPost Static PerformanceTrigger\",\n",
+    "    },\n",
+    "    height_factor=0.7,\n",
+    "    width_factor=1.0,\n",
+    "    duration_ylabel=\"Duration (min)\",\n",
+    "    cumulative_ylabel=\"Cumulative Duration (min)\",\n",
+    "    x_date_locator=FixedLocator([mdates.date2num(pd.Timestamp(d)) for d in [\"2014-05-01\", \"2018-06-01\", \"2021-01-01\"]]),\n",
+    "    x_date_formatter=FixedFormatter([str(year) for year in [\"May\\n2014\", \"Jun\\n2018\", \"Jan\\n2021\"]]),\n",
+    "    x_lim=(pd.Timestamp(\"2012-01-01\"), pd.Timestamp(\"2022-09-01\")),\n",
+    "    y_ticks_cumulative=[x for x in range(0, 110, 25)],\n",
+    "    y_lim_cumulative=(0, 100),\n",
+    "    y_minutes=True,\n",
+    "    y_minutes_cumulative=True,\n",
+    ")\n",
+    "\n",
+    "save_plot(fig, \"huffpost_performance-trigger-cost-matrix\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Lower policy eval costs compared to drift"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/analytics/plotting/rh_thesis/performance/hp_heatmap_single.ipynb b/analytics/plotting/rh_thesis/performance/hp_heatmap_single.ipynb
new file mode 100644
index 000000000..ad25baf58
--- /dev/null
+++ b/analytics/plotting/rh_thesis/performance/hp_heatmap_single.ipynb
@@ -0,0 +1,286 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "\n",
+    "import pandas as pd\n",
+    "\n",
+    "from analytics.app.data.load import list_pipelines\n",
+    "from analytics.app.data.transform import dfs_models_and_evals, logs_dataframe\n",
+    "\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipelines_dir = Path(\n",
+    "    \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/huffpost/30_performance/static_dyn\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipelines = list_pipelines(pipelines_dir)\n",
+    "max_pipeline_id = max(pipelines.keys())\n",
+    "pipelines"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from analytics.app.data.load import load_pipeline_logs\n",
+    "\n",
+    "pipeline_logs = {p_id: load_pipeline_logs(p_id, pipelines_dir) for (p_id, (_, p_path)) in pipelines.items()}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# mode:\n",
+    "pipeline_id = 639  # performancetrigger_static-0.5-int1500y\n",
+    "\n",
+    "# doesn't do anything unless include_composite_model = True\n",
+    "composite_model_variant = \"currently_active_model\"\n",
+    "\n",
+    "patch_yearbook = True\n",
+    "dataset_id = \"huffpost_kaggle_test\"\n",
+    "eval_handler = \"periodic-current\"\n",
+    "metric = \"Accuracy\"\n",
+    "include_composite_model = False"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Wrangle data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipeline_log = pipeline_logs[pipeline_id]\n",
+    "pipeline_ref = f\"{pipeline_id}\".zfill(len(str(max_pipeline_id))) + f\" - {pipelines[pipeline_id][0]}\"\n",
+    "\n",
+    "df_all = logs_dataframe(pipeline_log, pipeline_ref)\n",
+    "\n",
+    "df_logs_models, _, df_eval_single = dfs_models_and_evals(\n",
+    "    # subtracting would interfere with yearbook patching\n",
+    "    pipeline_log,\n",
+    "    df_all[\"sample_time\"].max(),\n",
+    "    pipeline_ref,\n",
+    ")\n",
+    "\n",
+    "df_adjusted = df_eval_single\n",
+    "\n",
+    "\n",
+    "df_adjusted = df_adjusted[\n",
+    "    (df_adjusted[\"dataset_id\"] == dataset_id)\n",
+    "    & (df_adjusted[\"eval_handler\"] == eval_handler)\n",
+    "    & (df_adjusted[\"metric\"] == metric)\n",
+    "]\n",
+    "\n",
+    "# in percent (0-100)\n",
+    "df_adjusted[\"value\"] = df_adjusted[\"value\"] * 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted = df_adjusted.sort_values(by=[\"interval_center\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Add composite model\n",
+    "\n",
+    "assert df_adjusted[\"pipeline_ref\"].nunique() <= 1\n",
+    "# add the pipeline time series which is the performance of different models stitched together dep.\n",
+    "# w.r.t which model was active\n",
+    "pipeline_composite_model = df_adjusted[df_adjusted[composite_model_variant]]\n",
+    "pipeline_composite_model[\"model_idx\"] = 0\n",
+    "pipeline_composite_model[\"id_model\"] = 0\n",
+    "\n",
+    "label_map = {k: f\"{k}\" for k, v in df_adjusted[[\"model_idx\", \"id_model\"]].values}\n",
+    "label_map[0] = \"Pipeline composite model\"\n",
+    "\n",
+    "if include_composite_model:\n",
+    "    df_adjusted = pd.concat([pipeline_composite_model, df_adjusted])\n",
+    "else:\n",
+    "    df_adjusted[\"model_idx\"] = df_adjusted[\"model_idx\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Create Plot"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted = df_adjusted.sort_values(by=[\"interval_center\"])\n",
+    "df_adjusted[\"interval_center\"] = df_adjusted[\"interval_center\"].dt.to_period(\"M\")\n",
+    "df_adjusted"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_train_end_years_per_model = df_logs_models[[\"model_idx\", \"real_train_end\"]]\n",
+    "df_train_end_years_per_model[\"real_train_end\"] = df_train_end_years_per_model[\"real_train_end\"].dt.to_period(\"M\")\n",
+    "df_train_end_years_per_model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_merged = df_adjusted.merge(df_train_end_years_per_model, on=\"model_idx\", how=\"left\")\n",
+    "df_merged"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_merged.groupby([\"real_train_end\", \"interval_center\"]).size()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# build heatmap matrix dataframe:\n",
+    "df_merged[\"real_train_end\"] = df_merged[\"real_train_end\"].apply(lambda x: pd.Period(x, freq=\"M\"))\n",
+    "heatmap_data = df_merged.pivot(index=[\"real_train_end\"], columns=\"interval_center\", values=\"value\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "heatmap_data.index.min(), heatmap_data.index.max()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "heatmap_data.index"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from analytics.plotting.common.heatmap import build_heatmap\n",
+    "from analytics.plotting.common.save import save_plot\n",
+    "\n",
+    "fig = build_heatmap(\n",
+    "    heatmap_data,\n",
+    "    reverse_col=True,\n",
+    "    x_custom_ticks=[\n",
+    "        (i, f\"{period.to_timestamp().strftime('%b %Y')}\")\n",
+    "        for i, period in list(enumerate(heatmap_data.columns))[::1]\n",
+    "        if period in [pd.Period(\"Apr 2014\"), pd.Period(\"Jul 2018\"), pd.Period(\"Jan 2022\")]\n",
+    "    ],\n",
+    "    y_custom_ticks=[\n",
+    "        (i + 0.5, f\"{period.to_timestamp().strftime('%b %Y')}\")\n",
+    "        for i, period in list(enumerate(heatmap_data.index))[::1]\n",
+    "    ],\n",
+    "    y_label=\"Trained up to\",\n",
+    "    x_label=\"Evaluation Year\",\n",
+    "    title_label=\"HuffPost PerformanceTrigger: Static Accuracy Threshold=50%\",\n",
+    "    color_label=\"Accuracy %\",\n",
+    "    width_factor=1,\n",
+    "    height_factor=0.5,\n",
+    "    # grid_alpha=0.4,\n",
+    "    grid_alpha=0.0,\n",
+    "    # disable_horizontal_grid=True,\n",
+    "    # cbar=False,\n",
+    "    df_logs_models=df_logs_models,\n",
+    "    x_axis=\"period\",\n",
+    ")\n",
+    "save_plot(fig, \"hp_trigger_heatmap_performance_single_static\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/analytics/plotting/rh_thesis/performance/yb_cost.ipynb b/analytics/plotting/rh_thesis/performance/yb_cost.ipynb
new file mode 100644
index 000000000..acf1a8608
--- /dev/null
+++ b/analytics/plotting/rh_thesis/performance/yb_cost.ipynb
@@ -0,0 +1,212 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "\n",
+    "import pandas as pd\n",
+    "\n",
+    "from analytics.app.data.load import list_pipelines, load_pipeline_logs\n",
+    "from analytics.app.data.transform import patch_yearbook_time, pipeline_leaf_times_df\n",
+    "from analytics.plotting.common.cost_matrix import plot_cost_matrix\n",
+    "from analytics.plotting.common.save import save_plot\n",
+    "from modyn.supervisor.internal.pipeline_executor.models import PipelineLogs\n",
+    "\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipelines_dirs = [\n",
+    "    Path(\n",
+    "        \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/30_performance/num_misclass/\"\n",
+    "    ),\n",
+    "]\n",
+    "\n",
+    "pipeline_logs: dict[int, PipelineLogs] = {}\n",
+    "pipelines: dict[int, tuple[str, Path]] = {}\n",
+    "\n",
+    "for dir in pipelines_dirs:\n",
+    "    dir_pipelines = list_pipelines(dir)\n",
+    "    pipelines.update(dir_pipelines)\n",
+    "    max_pipeline_id = max(dir_pipelines.keys())\n",
+    "    print(pipelines)\n",
+    "    pipeline_logs.update({p_id: load_pipeline_logs(p_id, dir) for (p_id, (_, p_path)) in dir_pipelines.items()})\n",
+    "    assert dir.exists()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# mode: time + amount\n",
+    "pipeline_ids = [759]  # yearbook_performancetrigger_num_misclass-100-exp-0.9-red-False--int250y\n",
+    "\n",
+    "# doesn't do anything unless include_composite_model = True\n",
+    "composite_model_variant = \"currently_active_model\"\n",
+    "\n",
+    "patch_yearbook = True\n",
+    "dataset_id = \"yearbook_test\"\n",
+    "eval_handler = \"periodic-delta+-1y\"\n",
+    "metric = \"Accuracy\"\n",
+    "include_composite_model = True"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Wrangle data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_leaf_list = []\n",
+    "for pipeline_id in pipeline_ids:\n",
+    "    logs = pipeline_logs[pipeline_id]\n",
+    "    df_leaf_single = pipeline_leaf_times_df(logs, use_traintime_patch_at_trainer=True, pipeline_id=pipeline_id)\n",
+    "    df_leaf_list.append(df_leaf_single)\n",
+    "\n",
+    "df_leaf = pd.concat(df_leaf_list)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted = df_leaf.copy()\n",
+    "\n",
+    "# coloring in order of decreasing avg. duration\n",
+    "avg_duration_per_stage = df_adjusted.groupby([\"pipeline_ref\", \"id\"])[\"duration\"].mean().sort_values(ascending=False)\n",
+    "df_adjusted = df_adjusted.merge(avg_duration_per_stage, on=[\"pipeline_ref\", \"id\"], suffixes=(\"\", \"_avg\")).sort_values(\n",
+    "    \"duration_avg\", ascending=False\n",
+    ")\n",
+    "\n",
+    "# Yearbook as a mapped time dimension (to display the correct timestamps we need to convert back from days to years)\n",
+    "if patch_yearbook:\n",
+    "    patch_yearbook_time(df_adjusted, \"sample_time\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted\n",
+    "df_adjusted[\"sample_time_year\"] = df_adjusted[\"sample_time\"].dt.year\n",
+    "df_adjusted[\"sample_time_year_bin\"] = pd.cut(df_adjusted[\"sample_time_year\"], bins=10, labels=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_new = df_adjusted[\n",
+    "    (\n",
+    "        df_adjusted[\"id\"].isin(\n",
+    "            [\n",
+    "                \"TRAIN\",\n",
+    "                \"STORE_TRAINED_MODEL\",\n",
+    "                \"INFORM_SELECTOR_REMAINING_DATA\",\n",
+    "                \"INFORM_SELECTOR_ABOUT_TRIGGER\",\n",
+    "                \"EVALUATE_TRIGGER_POLICY\",\n",
+    "            ]\n",
+    "        )\n",
+    "    )\n",
+    "][[\"pipeline_ref\", \"id\", \"sample_time_year\", \"duration\"]].copy()\n",
+    "df_new = df_new.sort_values(\"sample_time_year\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "state_rename = {\n",
+    "    \"INFORM_SELECTOR_REMAINING_DATA\": \"inform remaining data\",\n",
+    "    \"INFORM_SELECTOR_ABOUT_TRIGGER\": \"inform trigger\",\n",
+    "}\n",
+    "\n",
+    "df_new[\"id\"] = df_new[\"id\"].replace(state_rename).str.lower().str.replace(\"_\", \" \")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig = plot_cost_matrix(\n",
+    "    df_new,\n",
+    "    [759],\n",
+    "    grid_alpha=0.75,\n",
+    "    title_map={\n",
+    "        # title_label=\"Yearbook PerformanceTrigger:\\n Exp. Acc=90% | NumMiscl=100 | No Reduction\",\n",
+    "        759: \"Yearbook PerformanceTrigger (NumMiscl)\",\n",
+    "    },\n",
+    "    height_factor=0.7,\n",
+    "    width_factor=1.0,\n",
+    "    duration_ylabel=\"Duration (sec)\",\n",
+    "    cumulative_ylabel=\"Cumulative Duration (min)\",\n",
+    "    x_ticks=[x for x in range(1940, 2010 + 1, 30)],\n",
+    "    y_ticks_cumulative=[x for x in range(0, 9 + 1, 3)],\n",
+    "    y_lim_cumulative=(0, 10),\n",
+    "    y_minutes=False,\n",
+    "    y_minutes_cumulative=True,\n",
+    ")\n",
+    "\n",
+    "save_plot(fig, \"yearbook_performance-trigger-cost-matrix\")\n",
+    "# Lower policy costs than in drift case"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/analytics/plotting/rh_thesis/performance/yearbook_heatmap_multi.ipynb b/analytics/plotting/rh_thesis/performance/yearbook_heatmap_multi.ipynb
new file mode 100644
index 000000000..a227fb614
--- /dev/null
+++ b/analytics/plotting/rh_thesis/performance/yearbook_heatmap_multi.ipynb
@@ -0,0 +1,363 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "\n",
+    "import pandas as pd\n",
+    "\n",
+    "from analytics.app.data.load import list_pipelines, load_pipeline_logs\n",
+    "from analytics.app.data.transform import (\n",
+    "    dfs_models_and_evals,\n",
+    "    patch_yearbook_time,\n",
+    "    pipeline_leaf_times_df,\n",
+    ")\n",
+    "from analytics.plotting.common.save import save_plot\n",
+    "from modyn.supervisor.internal.pipeline_executor.models import PipelineLogs\n",
+    "\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipelines_dirs = [\n",
+    "    Path(\n",
+    "        \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/30_performance/static_dyn/\"\n",
+    "    ),\n",
+    "    Path(\n",
+    "        \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/30_performance/num_misclass/\"\n",
+    "    ),\n",
+    "]\n",
+    "\n",
+    "pipeline_logs: dict[int, PipelineLogs] = {}\n",
+    "pipelines: dict[int, tuple[str, Path]] = {}\n",
+    "\n",
+    "for dir in pipelines_dirs:\n",
+    "    dir_pipelines = list_pipelines(dir)\n",
+    "    pipelines.update(dir_pipelines)\n",
+    "    max_pipeline_id = max(dir_pipelines.keys())\n",
+    "    print(pipelines)\n",
+    "    pipeline_logs.update({p_id: load_pipeline_logs(p_id, dir) for (p_id, (_, p_path)) in dir_pipelines.items()})\n",
+    "    assert dir.exists()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipeline_ids = list(pipelines.keys())\n",
+    "\n",
+    "# doesn't do anything unless include_composite_model = True\n",
+    "composite_model_variant = \"currently_active_model\"\n",
+    "\n",
+    "patch_yearbook = True\n",
+    "dataset_id = \"yearbook_test\"\n",
+    "eval_handler = \"periodic-delta+-1y\"\n",
+    "metric = \"Accuracy\"\n",
+    "include_composite_model = False"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Wrangle data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "list_df_eval_single: list[pd.DataFrame] = []\n",
+    "df_logs_models_list: list[pd.DataFrame] = []\n",
+    "\n",
+    "for pipeline_id in pipeline_ids:\n",
+    "    logs = pipeline_logs[pipeline_id]\n",
+    "    df_leaf_single = pipeline_leaf_times_df(logs, use_traintime_patch_at_trainer=False, pipeline_id=pipeline_id)\n",
+    "    df_logs_models_single, _, df_eval_single = dfs_models_and_evals(\n",
+    "        pipeline_logs[pipeline_id], df_leaf_single[\"sample_time\"].max(), pipelines[pipeline_id][0]\n",
+    "    )\n",
+    "    df_eval_single[\"pipeline_id\"] = pipeline_id\n",
+    "    df_logs_models_single[\"pipeline_id\"] = pipeline_id\n",
+    "    list_df_eval_single.append(df_eval_single)\n",
+    "    df_logs_models_list.append(df_logs_models_single)\n",
+    "\n",
+    "df_adjusted = pd.concat(list_df_eval_single)\n",
+    "df_adjusted\n",
+    "\n",
+    "df_logs_models = pd.concat(df_logs_models_list)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted = df_adjusted[\n",
+    "    (df_adjusted[\"dataset_id\"] == dataset_id)\n",
+    "    & (df_adjusted[\"eval_handler\"] == eval_handler)\n",
+    "    & (df_adjusted[\"metric\"] == metric)\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if patch_yearbook:\n",
+    "    for column in [\"interval_start\", \"interval_center\", \"interval_end\"]:\n",
+    "        patch_yearbook_time(df_adjusted, column)\n",
+    "    for column in [\"train_start\", \"train_end\", \"real_train_end\", \"usage_start\", \"usage_end\"]:\n",
+    "        patch_yearbook_time(df_logs_models, column)\n",
+    "\n",
+    "    # correction for -1 second in timestamp format before patching\n",
+    "    df_logs_models[\"usage_end\"] = (\n",
+    "        df_logs_models[\"usage_end\"].dt.to_period(\"M\") + 1\n",
+    "    ).dt.to_timestamp()  # december (because of -1 second in timestamp format) -> start of year\n",
+    "\n",
+    "df_logs_models"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted = df_adjusted.sort_values(by=[\"interval_center\"])\n",
+    "len(df_adjusted)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Reduce to composite models\n",
+    "df_adjusted = df_adjusted[df_adjusted[composite_model_variant]]\n",
+    "df_adjusted[composite_model_variant].unique()\n",
+    "len(df_adjusted)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted[\"interval_center\"] = df_adjusted[\"interval_center\"].astype(str).str.split(\"-\").str[0]\n",
+    "\n",
+    "df_train_end_years_per_model = df_logs_models[[\"pipeline_id\", \"model_idx\", \"real_train_end\"]]\n",
+    "df_train_end_years_per_model[\"real_train_end\"] = df_train_end_years_per_model[\"real_train_end\"].dt.year"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted.groupby([\"pipeline_id\"]).size()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Static Performance Thresholds"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "_pids = list(reversed([437, 432, 429, 425, 421, 418, 414, 411]))\n",
+    "\n",
+    "df_merged = df_adjusted.merge(df_train_end_years_per_model, on=[\"pipeline_id\", \"model_idx\"], how=\"left\")\n",
+    "# build heatmap matrix dataframe:\n",
+    "df_merged[\"pipeline_id\"] = df_merged[\"pipeline_id\"].astype(int)\n",
+    "df_merged = df_merged[df_merged[\"pipeline_id\"].isin(_pids)]\n",
+    "heatmap_data = df_merged.pivot(index=[\"pipeline_id\"], columns=\"interval_center\", values=\"value\")\n",
+    "\n",
+    "heatmap_data.index.min(), heatmap_data.index.max()\n",
+    "heatmap_data\n",
+    "\n",
+    "# sort index by pipeline_refs\n",
+    "heatmap_data = heatmap_data.reindex(_pids)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from analytics.plotting.common.heatmap import build_heatmap\n",
+    "\n",
+    "pipelines_refs = {437: \"95%\", 432: \"92.5%\", 429: \"90%\", 425: \"87.5%\", 421: \"85%\", 418: \"80%\", 414: \"75%\", 411: \"70%\"}\n",
+    "\n",
+    "fig = build_heatmap(\n",
+    "    heatmap_data,\n",
+    "    reverse_col=True,\n",
+    "    x_ticks=[1950, 1975, 2000],\n",
+    "    y_custom_ticks=[(i + 0.5, pipelines_refs[y]) for i, y in enumerate(heatmap_data.index)],\n",
+    "    y_label=\"Pipeline with\\nAccuracy Threshold\",\n",
+    "    x_label=\"Evaluation Year\",\n",
+    "    title_label=\"Yearbook Composite Models: Static Accuracy Thresholds\",\n",
+    "    color_label=\"Accuracy %\",\n",
+    "    width_factor=1,\n",
+    "    height_factor=0.58,\n",
+    "    # grid_alpha=0.4,\n",
+    "    grid_alpha=0.0,\n",
+    "    # disable_horizontal_grid=True,\n",
+    "    # cbar=False,\n",
+    "    triggers={\n",
+    "        i: df_logs_models[df_logs_models[\"pipeline_id\"] == p_id][\n",
+    "            [\"train_start\", \"train_end\", \"usage_start\", \"usage_end\"]\n",
+    "        ]\n",
+    "        for i, p_id in enumerate(heatmap_data.index)\n",
+    "    },\n",
+    ")\n",
+    "save_plot(fig, \"yb_trigger_heatmap_performance_multi_static_thresholds\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Dynamic Thresholds"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "_pids = (\n",
+    "    # Num misclass: with reduction\n",
+    "    list(reversed([734, 758, 749]))\n",
+    "    +\n",
+    "    # Num misclass: without reduction\n",
+    "    list(reversed([736, 759, 751, 743]))\n",
+    "    +\n",
+    "    # roll avg\n",
+    "    list([527, 516, 506, 494])\n",
+    "    +\n",
+    "    # quantile\n",
+    "    [445]\n",
+    ")\n",
+    "df_merged = df_adjusted.merge(df_train_end_years_per_model, on=[\"pipeline_id\", \"model_idx\"], how=\"left\")\n",
+    "# build heatmap matrix dataframe:\n",
+    "df_merged[\"pipeline_id\"] = df_merged[\"pipeline_id\"].astype(int)\n",
+    "df_merged = df_merged[df_merged[\"pipeline_id\"].isin(_pids)]\n",
+    "heatmap_data = df_merged.pivot(index=[\"pipeline_id\"], columns=\"interval_center\", values=\"value\")\n",
+    "\n",
+    "heatmap_data.index.min(), heatmap_data.index.max()\n",
+    "\n",
+    "# sort index by pipeline_refs\n",
+    "heatmap_data = heatmap_data.reindex(_pids)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from analytics.plotting.common.heatmap import build_heatmap\n",
+    "from analytics.plotting.common.save import save_plot\n",
+    "\n",
+    "pipelines_refs = {\n",
+    "    # Roll Avg\n",
+    "    527: \"Δ 0.3\",\n",
+    "    516: \"Δ 0.2\",\n",
+    "    506: \"Δ 0.1\",\n",
+    "    494: \"Δ 0.05\",\n",
+    "    # Quantile\n",
+    "    445: \"% 0.05\",\n",
+    "    # Num misclass: without reduction\n",
+    "    736: \"X 50, noRed\",\n",
+    "    759: \"X 100, noRed\",\n",
+    "    751: \"X 200, noRed\",\n",
+    "    743: \"X 500, noRed\",\n",
+    "    # Num misclass: with reduction\n",
+    "    734: \"X 50, Red\",\n",
+    "    758: \"X 100, Red\",\n",
+    "    749: \"X 200, Red\",\n",
+    "}\n",
+    "\n",
+    "fig = build_heatmap(\n",
+    "    heatmap_data,\n",
+    "    reverse_col=True,\n",
+    "    x_ticks=[1950, 1975, 2000],\n",
+    "    y_custom_ticks=[(i + 0.5, pipelines_refs[y]) for i, y in enumerate(heatmap_data.index)],\n",
+    "    y_label=\"Criterion\",\n",
+    "    x_label=\"Evaluation Year\",\n",
+    "    title_label=\"Yearbook Composite Models:\\nDynamic Performance Thresholds & Num. Misclassifications\",\n",
+    "    color_label=\"Accuracy %\",\n",
+    "    width_factor=1,\n",
+    "    height_factor=0.75,\n",
+    "    # grid_alpha=0.4,\n",
+    "    grid_alpha=0.0,\n",
+    "    # disable_horizontal_grid=True,\n",
+    "    # cbar=False,\n",
+    "    triggers={\n",
+    "        i: df_logs_models[df_logs_models[\"pipeline_id\"] == p_id][\n",
+    "            [\"train_start\", \"train_end\", \"usage_start\", \"usage_end\"]\n",
+    "        ]\n",
+    "        for i, p_id in enumerate(heatmap_data.index)\n",
+    "    },\n",
+    ")\n",
+    "save_plot(fig, \"yb_trigger_heatmap_performance_multi_dyn_thresholds\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/analytics/plotting/rh_thesis/performance/yearbook_heatmap_single.ipynb b/analytics/plotting/rh_thesis/performance/yearbook_heatmap_single.ipynb
new file mode 100644
index 000000000..934586937
--- /dev/null
+++ b/analytics/plotting/rh_thesis/performance/yearbook_heatmap_single.ipynb
@@ -0,0 +1,307 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "\n",
+    "import pandas as pd\n",
+    "\n",
+    "from analytics.app.data.load import list_pipelines\n",
+    "from analytics.app.data.transform import dfs_models_and_evals, logs_dataframe, patch_yearbook_time\n",
+    "\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipelines_dir = Path(\n",
+    "    \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/30_performance/static_dyn\"\n",
+    ")\n",
+    "assert pipelines_dir.exists()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipelines = list_pipelines(pipelines_dir)\n",
+    "max_pipeline_id = max(pipelines.keys())\n",
+    "pipelines"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from analytics.app.data.load import load_pipeline_logs\n",
+    "\n",
+    "pipeline_logs = {p_id: load_pipeline_logs(p_id, pipelines_dir) for (p_id, (_, p_path)) in pipelines.items()}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# mode:\n",
+    "pipeline_id = 418  # 250 0.8 static\n",
+    "\n",
+    "# doesn't do anything unless include_composite_model = True\n",
+    "composite_model_variant = \"currently_active_model\"\n",
+    "\n",
+    "patch_yearbook = True\n",
+    "dataset_id = \"yearbook_test\"\n",
+    "eval_handler = \"periodic-delta+-1y\"\n",
+    "metric = \"Accuracy\"\n",
+    "include_composite_model = False"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Wrangle data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipeline_log = pipeline_logs[pipeline_id]\n",
+    "pipeline_ref = f\"{pipeline_id}\".zfill(len(str(max_pipeline_id))) + f\" - {pipelines[pipeline_id][0]}\"\n",
+    "\n",
+    "df_all = logs_dataframe(pipeline_log, pipeline_ref)\n",
+    "\n",
+    "df_logs_models, _, df_eval_single = dfs_models_and_evals(\n",
+    "    # subtracting would interfere with yearbook patching\n",
+    "    pipeline_log,\n",
+    "    df_all[\"sample_time\"].max(),\n",
+    "    pipeline_ref,\n",
+    ")\n",
+    "\n",
+    "df_adjusted = df_eval_single\n",
+    "\n",
+    "\n",
+    "df_adjusted = df_adjusted[\n",
+    "    (df_adjusted[\"dataset_id\"] == dataset_id)\n",
+    "    & (df_adjusted[\"eval_handler\"] == eval_handler)\n",
+    "    & (df_adjusted[\"metric\"] == metric)\n",
+    "]\n",
+    "\n",
+    "# in percent (0-100)\n",
+    "df_adjusted[\"value\"] = df_adjusted[\"value\"] * 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_logs_models"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if patch_yearbook:\n",
+    "    for column in [\"interval_start\", \"interval_center\", \"interval_end\"]:\n",
+    "        patch_yearbook_time(df_adjusted, column)\n",
+    "    for column in [\"train_start\", \"train_end\", \"real_train_end\", \"usage_start\", \"usage_end\"]:\n",
+    "        patch_yearbook_time(df_logs_models, column)\n",
+    "\n",
+    "    # correction for -1 second in timestamp format before patching\n",
+    "    df_logs_models[\"usage_end\"] = (\n",
+    "        df_logs_models[\"usage_end\"].dt.to_period(\"M\") + 1\n",
+    "    ).dt.to_timestamp()  # december (because of -1 second in timestamp format) -> start of year\n",
+    "\n",
+    "df_logs_models"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted = df_adjusted.sort_values(by=[\"interval_center\"])\n",
+    "df_adjusted"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Add composite model\n",
+    "\n",
+    "assert df_adjusted[\"pipeline_ref\"].nunique() <= 1\n",
+    "# add the pipeline time series which is the performance of different models stitched together dep.\n",
+    "# w.r.t which model was active\n",
+    "pipeline_composite_model = df_adjusted[df_adjusted[composite_model_variant]]\n",
+    "pipeline_composite_model[\"model_idx\"] = 0\n",
+    "pipeline_composite_model[\"id_model\"] = 0\n",
+    "\n",
+    "label_map = {k: f\"{k}\" for k, v in df_adjusted[[\"model_idx\", \"id_model\"]].values}\n",
+    "label_map[0] = \"Pipeline composite model\"\n",
+    "\n",
+    "if include_composite_model:\n",
+    "    df_adjusted = pd.concat([pipeline_composite_model, df_adjusted])\n",
+    "else:\n",
+    "    df_adjusted[\"model_idx\"] = df_adjusted[\"model_idx\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Create Plot"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted[\"interval_center\"] = df_adjusted[\"interval_center\"].astype(str).str.split(\"-\").str[0]\n",
+    "df_adjusted"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_train_end_years_per_model = df_logs_models[[\"model_idx\", \"real_train_end\"]]\n",
+    "df_train_end_years_per_model[\"real_train_end\"] = df_train_end_years_per_model[\"real_train_end\"].dt.year\n",
+    "df_train_end_years_per_model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_merged = df_adjusted.merge(df_train_end_years_per_model, on=\"model_idx\", how=\"left\")\n",
+    "df_merged"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# build heatmap matrix dataframe:\n",
+    "heatmap_data = df_merged.pivot(index=[\"real_train_end\"], columns=\"interval_center\", values=\"value\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "heatmap_data.index.min(), heatmap_data.index.max()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "heatmap_data.index"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from analytics.plotting.common.heatmap import build_heatmap\n",
+    "from analytics.plotting.common.save import save_plot\n",
+    "\n",
+    "fig = build_heatmap(\n",
+    "    heatmap_data,\n",
+    "    reverse_col=True,\n",
+    "    x_ticks=[1950, 1975, 2000],\n",
+    "    y_custom_ticks=[(i + 0.5, str(y)) for i, y in enumerate(heatmap_data.index)],\n",
+    "    y_label=\"Trained up to\",\n",
+    "    x_label=\"Evaluation Year\",\n",
+    "    title_label=\"Yearbook PerformanceTrigger: Static Accuracy Threshold=70%\",\n",
+    "    color_label=\"Accuracy %\",\n",
+    "    width_factor=1,\n",
+    "    height_factor=0.55,\n",
+    "    # grid_alpha=0.4,\n",
+    "    grid_alpha=0.0,\n",
+    "    # disable_horizontal_grid=True,\n",
+    "    # cbar=False,\n",
+    "    df_logs_models=df_logs_models,\n",
+    ")\n",
+    "save_plot(fig, \"yb_trigger_heatmap_performance_single_static\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/analytics/plotting/rh_thesis/performance/yearbook_heatmap_single_num_miclass.ipynb b/analytics/plotting/rh_thesis/performance/yearbook_heatmap_single_num_miclass.ipynb
new file mode 100644
index 000000000..e39e4764a
--- /dev/null
+++ b/analytics/plotting/rh_thesis/performance/yearbook_heatmap_single_num_miclass.ipynb
@@ -0,0 +1,310 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "\n",
+    "import pandas as pd\n",
+    "\n",
+    "from analytics.app.data.load import list_pipelines\n",
+    "from analytics.app.data.transform import dfs_models_and_evals, logs_dataframe, patch_yearbook_time\n",
+    "\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipelines_dir = Path(\n",
+    "    \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/30_performance/num_misclass/\"\n",
+    ")\n",
+    "assert pipelines_dir.exists()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipelines = list_pipelines(pipelines_dir)\n",
+    "max_pipeline_id = max(pipelines.keys())\n",
+    "pipelines"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from analytics.app.data.load import load_pipeline_logs\n",
+    "\n",
+    "pipeline_logs = {p_id: load_pipeline_logs(p_id, pipelines_dir) for (p_id, (_, p_path)) in pipelines.items()}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# mode:\n",
+    "pipeline_id = 759  # yearbook_performancetrigger_num_misclass-100-exp-0.9-red-False--int250y\n",
+    "\n",
+    "# doesn't do anything unless include_composite_model = True\n",
+    "composite_model_variant = \"currently_active_model\"\n",
+    "\n",
+    "patch_yearbook = True\n",
+    "dataset_id = \"yearbook_test\"\n",
+    "eval_handler = \"periodic-delta+-1y\"\n",
+    "metric = \"Accuracy\"\n",
+    "include_composite_model = False\n",
+    "\n",
+    "print(f\"Pipeline ID: {pipeline_id}, name: {pipelines[pipeline_id][0]}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Wrangle data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipeline_log = pipeline_logs[pipeline_id]\n",
+    "pipeline_ref = f\"{pipeline_id}\".zfill(len(str(max_pipeline_id))) + f\" - {pipelines[pipeline_id][0]}\"\n",
+    "\n",
+    "df_all = logs_dataframe(pipeline_log, pipeline_ref)\n",
+    "\n",
+    "df_logs_models, _, df_eval_single = dfs_models_and_evals(\n",
+    "    # subtracting would interfere with yearbook patching\n",
+    "    pipeline_log,\n",
+    "    df_all[\"sample_time\"].max(),\n",
+    "    pipeline_ref,\n",
+    ")\n",
+    "\n",
+    "df_adjusted = df_eval_single\n",
+    "\n",
+    "\n",
+    "df_adjusted = df_adjusted[\n",
+    "    (df_adjusted[\"dataset_id\"] == dataset_id)\n",
+    "    & (df_adjusted[\"eval_handler\"] == eval_handler)\n",
+    "    & (df_adjusted[\"metric\"] == metric)\n",
+    "]\n",
+    "\n",
+    "# in percent (0-100)\n",
+    "df_adjusted[\"value\"] = df_adjusted[\"value\"] * 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_logs_models"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if patch_yearbook:\n",
+    "    for column in [\"interval_start\", \"interval_center\", \"interval_end\"]:\n",
+    "        patch_yearbook_time(df_adjusted, column)\n",
+    "    for column in [\"train_start\", \"train_end\", \"real_train_end\", \"usage_start\", \"usage_end\"]:\n",
+    "        patch_yearbook_time(df_logs_models, column)\n",
+    "\n",
+    "    # correction for -1 second in timestamp format before patching\n",
+    "    df_logs_models[\"usage_end\"] = (\n",
+    "        df_logs_models[\"usage_end\"].dt.to_period(\"M\") + 1\n",
+    "    ).dt.to_timestamp()  # december (because of -1 second in timestamp format) -> start of year\n",
+    "\n",
+    "df_logs_models"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted = df_adjusted.sort_values(by=[\"interval_center\"])\n",
+    "df_adjusted"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Add composite model\n",
+    "\n",
+    "assert df_adjusted[\"pipeline_ref\"].nunique() <= 1\n",
+    "# add the pipeline time series which is the performance of different models stitched together dep.\n",
+    "# w.r.t which model was active\n",
+    "pipeline_composite_model = df_adjusted[df_adjusted[composite_model_variant]]\n",
+    "pipeline_composite_model[\"model_idx\"] = 0\n",
+    "pipeline_composite_model[\"id_model\"] = 0\n",
+    "\n",
+    "label_map = {k: f\"{k}\" for k, v in df_adjusted[[\"model_idx\", \"id_model\"]].values}\n",
+    "label_map[0] = \"Pipeline composite model\"\n",
+    "\n",
+    "if include_composite_model:\n",
+    "    df_adjusted = pd.concat([pipeline_composite_model, df_adjusted])\n",
+    "else:\n",
+    "    df_adjusted[\"model_idx\"] = df_adjusted[\"model_idx\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Create Plot"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted[\"interval_center\"] = df_adjusted[\"interval_center\"].astype(str).str.split(\"-\").str[0]\n",
+    "df_adjusted"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_train_end_years_per_model = df_logs_models[[\"model_idx\", \"real_train_end\"]]\n",
+    "df_train_end_years_per_model[\"real_train_end\"] = df_train_end_years_per_model[\"real_train_end\"].dt.year\n",
+    "df_train_end_years_per_model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_merged = df_adjusted.merge(df_train_end_years_per_model, on=\"model_idx\", how=\"left\")\n",
+    "df_merged"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# build heatmap matrix dataframe:\n",
+    "heatmap_data = df_merged.pivot(index=[\"real_train_end\"], columns=\"interval_center\", values=\"value\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "heatmap_data.index.min(), heatmap_data.index.max()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "heatmap_data.index"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from analytics.plotting.common.heatmap import build_heatmap\n",
+    "from analytics.plotting.common.save import save_plot\n",
+    "\n",
+    "fig = build_heatmap(\n",
+    "    heatmap_data,\n",
+    "    reverse_col=True,\n",
+    "    x_ticks=[1950, 1975, 2000],\n",
+    "    y_custom_ticks=[(i + 0.5, str(y)) for i, y in enumerate(heatmap_data.index)],\n",
+    "    y_label=\"Trained up to\",\n",
+    "    x_label=\"Evaluation Year\",\n",
+    "    # yearbook_performancetrigger_num_misclass-100-exp-0.9-red-False--int250y\n",
+    "    title_label=\"Yearbook PerformanceTrigger:\\n Exp. Acc=90% | NumMiscl=100 | No Reduction\",\n",
+    "    color_label=\"Accuracy %\",\n",
+    "    width_factor=1,\n",
+    "    height_factor=0.55,\n",
+    "    # grid_alpha=0.4,\n",
+    "    grid_alpha=0.0,\n",
+    "    # disable_horizontal_grid=True,\n",
+    "    # cbar=False,\n",
+    "    df_logs_models=df_logs_models,\n",
+    ")\n",
+    "save_plot(fig, \"yb_trigger_heatmap_performance_single_num_misclass\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}