From 17058cfa002ac5a535833bd1061d31fe14417e4d Mon Sep 17 00:00:00 2001
From: Robin Holzinger <robin.holzinger@tum.de>
Date: Tue, 24 Sep 2024 15:02:42 +0200
Subject: [PATCH] More plots

---
 analytics/plotting/common/heatmap.py          |  50 +--
 .../common/linear_regression_scatterplot.py   | 100 ++++++
 .../yb_traintime_dataamount.ipynb             | 115 ++++---
 .../yb_triggering/arxiv_heatmap.ipynb         | 293 ++++++++++++++++++
 .../rh_thesis/yb_triggering/hp_heatmap.ipynb  | 287 +++++++++++++++++
 .../rh_thesis/yb_triggering/yb_heatmap.ipynb  |  23 +-
 6 files changed, 778 insertions(+), 90 deletions(-)
 create mode 100644 analytics/plotting/common/linear_regression_scatterplot.py
 create mode 100644 analytics/plotting/rh_thesis/yb_triggering/arxiv_heatmap.ipynb
 create mode 100644 analytics/plotting/rh_thesis/yb_triggering/hp_heatmap.ipynb

diff --git a/analytics/plotting/common/heatmap.py b/analytics/plotting/common/heatmap.py
index 2f75acd1a..e769be5c4 100644
--- a/analytics/plotting/common/heatmap.py
+++ b/analytics/plotting/common/heatmap.py
@@ -16,9 +16,11 @@
 
 def build_heatmap(
     heatmap_data: pd.DataFrame,
-    y_ticks: list[int] | None = None,
+    y_ticks: list[int] | list[str] | None = None,
     y_ticks_bins: int | None = None,
     x_ticks: list[int] | None = None,
+    x_custom_ticks: list[tuple[int, str]] | None = None,  # (position, label)
+    y_custom_ticks: list[tuple[int, str]] | None = None,  # (position, label)
     reverse_col: bool = False,
     y_label: str = "Reference Year",
     x_label: str = "Current Year",
@@ -34,6 +36,7 @@ def build_heatmap(
     policy: list[tuple[int, int, int]] = [],
     cmap: Any | None = None,
     linewidth: int = 2,
+    grid_alpha: float = 0.0,
 ) -> Figure | Axes:
     init_plot()
     setup_font(small_label=True, small_title=True)
@@ -53,7 +56,7 @@ def build_heatmap(
         heatmap_data,
         cmap=("RdBu" + ("_r" if reverse_col else "")) if not cmap else cmap,
         linewidths=0.0,
-        linecolor="black",
+        linecolor="white",
         # color bar from 0 to 1
         cbar_kws={
             "label": color_label,
@@ -84,7 +87,7 @@ def build_heatmap(
 
     # Adjust x-axis tick labels
     ax.set_xlabel(x_label)
-    if not x_ticks:
+    if not x_ticks and not x_custom_ticks:
         ax.set_xticks(
             ticks=[x + 0.5 for x in range(0, 2010 - 1930 + 1, 20)],
             labels=[x for x in range(1930, 2010 + 1, 20)],
@@ -92,14 +95,26 @@ def build_heatmap(
             # ha='right'
         )
     else:
-        ax.set_xticks(
-            ticks=[x - 1930 + 0.5 for x in x_ticks],
-            labels=[x for x in x_ticks],
-            rotation=0,
-            # ha='right'
-        )
+        if x_custom_ticks:
+            ax.set_xticks(
+                ticks=[x[0] for x in x_custom_ticks],
+                labels=[x[1] for x in x_custom_ticks],
+                rotation=0,
+                # ha='right'
+            )
+        else:
+            assert x_ticks is not None
+            ax.set_xticks(
+                ticks=[x - 1930 + 0.5 for x in x_ticks],
+                labels=[x for x in x_ticks],
+                rotation=0,
+                # ha='right'
+            )
     ax.invert_yaxis()
 
+    ax.grid(axis="y", linestyle="--", alpha=grid_alpha, color="white")
+    ax.grid(axis="x", linestyle="--", alpha=grid_alpha, color="white")
+
     if y_ticks is not None:
         ax.set_yticks(
             ticks=[y + 0.5 - 1930 for y in y_ticks],
@@ -109,21 +124,20 @@ def build_heatmap(
     elif y_ticks_bins is not None:
         ax.yaxis.set_major_locator(MaxNLocator(nbins=y_ticks_bins))
         ax.set_yticklabels([int(i) + min(heatmap_data.index) for i in ax.get_yticks()], rotation=0)
+    else:
+        if y_custom_ticks:
+            ax.set_yticks(
+                ticks=[y[0] for y in y_custom_ticks],
+                labels=[y[1] for y in y_custom_ticks],
+                rotation=0,
+                # ha='right'
+            )
 
     ax.set_ylabel(y_label)
 
     if title_label:
         ax.set_title(title_label)
 
-        # drift_pipeline = []
-
-    # TODO visualize policy
-    # Draft training boxes
-    # if drift_pipeline:
-    # x_start = active_[1][f"_start"].year - 1930
-    # x_end = active_[1][f"{type_}_end"].year - 1930
-    # y = active_[1]["model_idx"]
-
     previous_y = 0
     for x_start, x_end, y in policy:
         # main box
diff --git a/analytics/plotting/common/linear_regression_scatterplot.py b/analytics/plotting/common/linear_regression_scatterplot.py
new file mode 100644
index 000000000..c4237b9e9
--- /dev/null
+++ b/analytics/plotting/common/linear_regression_scatterplot.py
@@ -0,0 +1,100 @@
+from typing import Any
+
+import matplotlib.pyplot as plt
+import pandas as pd
+import seaborn as sns
+from matplotlib.axes import Axes
+from matplotlib.figure import Figure
+
+from analytics.plotting.common.color import main_color
+from analytics.plotting.common.common import init_plot
+from analytics.plotting.common.font import setup_font
+
+# Create the heatmap
+
+
+def scatter_linear_regression(
+    data: pd.DataFrame,
+    x: str,
+    y: str,
+    hue: str,
+    y_ticks: list[int] | list[str] | None = None,
+    x_ticks: list[int] | None = None,
+    y_label: str = "Reference Year",
+    x_label: str = "Current Year",
+    height_factor: float = 1.0,
+    width_factor: float = 1.0,
+    legend_label: str = "Number Samples",
+    title_label: str = "",
+    target_ax: Axes | None = None,
+    palette: Any = None,
+) -> Figure | tuple[Axes, Axes]:
+    sns.set_style("whitegrid")
+
+    init_plot()
+    setup_font(small_label=True, small_title=True)
+
+    DOUBLE_FIG_WIDTH = 10
+    DOUBLE_FIG_HEIGHT = 3.5
+
+    if not target_ax:
+        fig = plt.figure(
+            edgecolor="black",
+            frameon=True,
+            figsize=(
+                DOUBLE_FIG_WIDTH * width_factor,
+                2 * DOUBLE_FIG_HEIGHT * height_factor,
+            ),
+            dpi=300,
+        )
+
+    ax1 = sns.regplot(
+        data,
+        x=x,
+        y=y,  # duration
+        color=main_color(0),
+    )
+
+    ax2 = sns.scatterplot(
+        data,
+        x=x,
+        y=y,  # duration
+        hue=hue,
+        palette=palette,
+        s=200,
+        legend=True,
+        marker="X",
+    )
+
+    ax2.legend(title=legend_label, ncol=2, handletextpad=0, columnspacing=0.5, fontsize="x-small")
+    # ax2.legend().set_title(legend_label)
+
+    # Adjust x-axis tick labels
+    ax2.set_xlabel(x_label)
+    if x_ticks is not None:
+        ax2.set_xticks(
+            ticks=x_ticks,
+            labels=x_ticks,
+            rotation=0,
+            # ha='right'
+        )
+
+    if y_ticks is not None:
+        ax2.set_yticks(
+            ticks=y_ticks,
+            labels=y_ticks,
+            rotation=0,
+        )
+
+    ax2.set_ylabel(y_label)
+
+    if title_label:
+        ax2.set_title(title_label)
+
+    print("Number of plotted items", data.shape[0])
+
+    # Display the plot
+    plt.tight_layout()
+    # plt.show()
+
+    return fig if not target_ax else (ax1, ax2)
diff --git a/analytics/plotting/rh_thesis/traintime_vs_dataamount/yb_traintime_dataamount.ipynb b/analytics/plotting/rh_thesis/traintime_vs_dataamount/yb_traintime_dataamount.ipynb
index 36f5a67c4..c8b836199 100644
--- a/analytics/plotting/rh_thesis/traintime_vs_dataamount/yb_traintime_dataamount.ipynb
+++ b/analytics/plotting/rh_thesis/traintime_vs_dataamount/yb_traintime_dataamount.ipynb
@@ -8,13 +8,11 @@
    "source": [
     "from pathlib import Path\n",
     "\n",
-    "import matplotlib.pyplot as plt\n",
     "import pandas as pd\n",
-    "import seaborn as sns\n",
     "\n",
     "from analytics.app.data.load import list_pipelines\n",
-    "from analytics.plotting.common.common import init_plot\n",
-    "from analytics.plotting.common.font import setup_font\n",
+    "from analytics.plotting.common.color import discrete_colors\n",
+    "from analytics.plotting.common.linear_regression_scatterplot import scatter_linear_regression\n",
     "from modyn.supervisor.internal.grpc.enums import PipelineStage\n",
     "from modyn.supervisor.internal.pipeline_executor.models import StageLog\n",
     "\n",
@@ -30,11 +28,13 @@
    "source": [
     "# INPUTS\n",
     "\n",
+    "# pipelines_dir = Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/11_baselines_amount\")\n",
     "pipelines_dir = Path(\n",
-    "    \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/arxiv/11_baselines_amount\"\n",
+    "    \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/huffpost/11_baselines_amount\"\n",
     ")\n",
-    "# pipelines_dir = Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/huffpost/11_baselines_amount\")\n",
-    "# pipelines_dir = Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/11_baselines_amount\")\n",
+    "# pipelines_dir = Path(\n",
+    "#     \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/arxiv/11_baselines_amount\"\n",
+    "# )\n",
     "output_dir = Path(\"/Users/robinholzinger/robin/dev/eth/modyn-2/.analytics.log/.data/_plots\")\n",
     "assert pipelines_dir.exists()\n",
     "assert output_dir.exists()"
@@ -62,6 +62,25 @@
     "pipeline_logs = {p_id: load_pipeline_logs(p_id, pipelines_dir) for (p_id, (_, p_path)) in pipelines.items()}"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# extract number of epochs\n",
+    "num_epochs: int | None = None\n",
+    "\n",
+    "for p_id, logs in pipeline_logs.items():\n",
+    "    for log in logs:\n",
+    "        if num_epochs is None:\n",
+    "            num_epochs = logs.config.pipeline.training.epochs_per_trigger\n",
+    "        else:\n",
+    "            assert num_epochs == logs.config.pipeline.training.epochs_per_trigger\n",
+    "\n",
+    "assert num_epochs"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -107,7 +126,7 @@
     "\n",
     "\n",
     "def pipeline_name_cleaner(name: str):\n",
-    "    return re.sub(r\".*_dataamount_(\\d+)\", \"trigger every \\\\1 samples\", name)\n",
+    "    return re.sub(r\".*dataamount_(\\d+)\", r\"\\1\", name)\n",
     "\n",
     "\n",
     "df_train[\"pipeline_id\"] = df_train[\"pipeline_id\"].apply(pipeline_name_cleaner)\n",
@@ -125,6 +144,18 @@
     "# df_train[\"duration\"] = df_train[\"duration\"].dt.total_seconds()\n",
     "# df_train[\"train_time_at_trainer\"] = df_train[\"train_time_at_trainer\"] / 1_000  # millis to seconds\n",
     "df_train[\"train_time_at_trainer\"] = df_train[\"train_time_at_trainer\"] / 1_000 / 60  # millis to minutes\n",
+    "\n",
+    "# vs. number of passed sample: num_samples\n",
+    "df_train[\"num_input_samples\"] = df_train[\"num_samples\"] / num_epochs\n",
+    "\n",
+    "\n",
+    "dataset = pipelines_dir.parent.name\n",
+    "\n",
+    "if dataset != \"yearbook\":\n",
+    "    df_train[\"num_input_samples\"] = df_train[\"num_input_samples\"] / 1_000\n",
+    "    df_train[\"pipeline_id\"] = (df_train[\"pipeline_id\"].astype(int) // 1_000).astype(str) + \"k\"\n",
+    "\n",
+    "\n",
     "df_train"
    ]
   },
@@ -151,61 +182,36 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from analytics.plotting.common.color import discrete_colors, main_color\n",
+    "from analytics.plotting.common.save import save_plot\n",
     "\n",
-    "sns.set_style(\"whitegrid\")\n",
-    "\n",
-    "init_plot()\n",
-    "setup_font(small_label=True, small_title=True)\n",
-    "\n",
-    "\n",
-    "FONTSIZE = 20\n",
-    "DOUBLE_FIG_WIDTH = 10\n",
-    "DOUBLE_FIG_HEIGHT = 3.5\n",
-    "DOUBLE_FIG_SIZE = (DOUBLE_FIG_WIDTH, 1.5 * DOUBLE_FIG_HEIGHT)\n",
-    "\n",
-    "width_factor = 0.5\n",
-    "height_factor = 0.5\n",
-    "\n",
-    "fig = plt.figure(\n",
-    "    edgecolor=\"black\",\n",
-    "    frameon=True,\n",
-    "    figsize=(\n",
-    "        DOUBLE_FIG_WIDTH * width_factor,\n",
-    "        2 * DOUBLE_FIG_HEIGHT * height_factor,\n",
-    "    ),\n",
-    "    dpi=300,\n",
-    ")\n",
-    "\n",
-    "ax1 = sns.regplot(\n",
+    "fig = scatter_linear_regression(\n",
     "    df_train,\n",
-    "    x=\"num_samples\",\n",
-    "    y=\"train_time_at_trainer\",  # duration\n",
-    "    color=main_color(0),\n",
-    ")\n",
-    "\n",
-    "ax2 = sns.scatterplot(\n",
-    "    df_train,\n",
-    "    x=\"num_samples\",\n",
-    "    y=\"train_time_at_trainer\",  # duration\n",
+    "    x=\"num_input_samples\",\n",
+    "    y=\"train_time_at_trainer\",  # duration is broken due to bug in grpc interface\n",
     "    hue=\"pipeline_id\",\n",
     "    palette=(\n",
-    "        discrete_colors(14)[0:5] + discrete_colors(14)[9:14]\n",
+    "        discrete_colors(14)[0:4] + discrete_colors(14)[10:14]\n",
     "        if \"yearbook\" in str(pipelines_dir)\n",
     "        else (\n",
-    "            discrete_colors(8)[0:3] + discrete_colors(8)[6:8]\n",
+    "            discrete_colors(12)[0:4] + discrete_colors(12)[9:12]\n",
     "            if \"huffpost\" in str(pipelines_dir)\n",
     "            else discrete_colors(8)[0:3] + discrete_colors(8)[6:8]\n",
     "        )\n",
     "    ),\n",
-    "    s=200,\n",
-    "    legend=True,\n",
-    "    marker=\"X\",\n",
+    "    title_label=\"Training Size (Samples) vs. Cost (Time)\",\n",
+    "    x_label=\"#Trained Samples (k) / #Epochs\",\n",
+    "    y_label=\"Duration (min)\",\n",
+    "    legend_label=\"Trigger every\",\n",
+    "    height_factor=0.5,\n",
+    "    width_factor=0.575,\n",
+    "    # x_ticks=[],\n",
+    "    # y_ticks=[],\n",
     ")\n",
     "\n",
-    "# Display the plot\n",
-    "plt.tight_layout()\n",
-    "plt.show()"
+    "save_plot(\n",
+    "    fig=fig,\n",
+    "    name=dataset + \"_training_size_vs_cost\",\n",
+    ")"
    ]
   },
   {
@@ -218,6 +224,13 @@
     "# TODO: plot / add number of datapoints to thesis so that the signicance of regression line is clear\n",
     "# State in thesis that there are no outliers to be expected!"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
diff --git a/analytics/plotting/rh_thesis/yb_triggering/arxiv_heatmap.ipynb b/analytics/plotting/rh_thesis/yb_triggering/arxiv_heatmap.ipynb
new file mode 100644
index 000000000..db936afc1
--- /dev/null
+++ b/analytics/plotting/rh_thesis/yb_triggering/arxiv_heatmap.ipynb
@@ -0,0 +1,293 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "\n",
+    "import pandas as pd\n",
+    "\n",
+    "from analytics.app.data.load import list_pipelines\n",
+    "from analytics.app.data.transform import dfs_models_and_evals, logs_dataframe\n",
+    "\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# INPUTS\n",
+    "\n",
+    "drift_pipeline = False\n",
+    "if drift_pipeline:\n",
+    "    pipelines_dir = Path(\n",
+    "        \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/arxiv/10_baselines_time\"\n",
+    "    )\n",
+    "else:\n",
+    "    pipelines_dir = Path(\n",
+    "        \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/arxiv/10_baselines_time\"\n",
+    "    )\n",
+    "output_dir = Path(\"/Users/robinholzinger/robin/dev/eth/modyn-2/.data/plots/triggering\")\n",
+    "assert pipelines_dir.exists()\n",
+    "assert output_dir.exists()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipelines = list_pipelines(pipelines_dir)\n",
+    "max_pipeline_id = max(pipelines.keys())\n",
+    "pipelines"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from analytics.app.data.load import load_pipeline_logs\n",
+    "\n",
+    "pipeline_logs = {p_id: load_pipeline_logs(p_id, pipelines_dir) for (p_id, (_, p_path)) in pipelines.items()}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "type(pipeline_logs[267 if not drift_pipeline else 267])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# mode:\n",
+    "pipeline_id = 267 if not drift_pipeline else 267\n",
+    "\n",
+    "# doesn't do anything unless include_composite_model = True\n",
+    "composite_model_variant = \"currently_active_model\"\n",
+    "\n",
+    "patch_yearbook = True\n",
+    "dataset_id = \"arxiv_kaggle_test\"\n",
+    "eval_handler = \"periodic-current\"\n",
+    "metric = \"Accuracy\"\n",
+    "include_composite_model = False"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Wrangle data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipeline_log = pipeline_logs[pipeline_id]\n",
+    "pipeline_ref = f\"{pipeline_id}\".zfill(len(str(max_pipeline_id))) + f\" - {pipelines[pipeline_id][0]}\"\n",
+    "\n",
+    "df_all = logs_dataframe(pipeline_log, pipeline_ref)\n",
+    "\n",
+    "df_logs_models, _, df_eval_single = dfs_models_and_evals(\n",
+    "    # subtracting would interfere with yearbook patching\n",
+    "    pipeline_log,\n",
+    "    df_all[\"sample_time\"].max(),\n",
+    "    pipeline_ref,\n",
+    ")\n",
+    "\n",
+    "df_adjusted = df_eval_single\n",
+    "\n",
+    "df_adjusted = df_adjusted[\n",
+    "    (df_adjusted[\"dataset_id\"] == dataset_id)\n",
+    "    & (df_adjusted[\"eval_handler\"] == eval_handler)\n",
+    "    & (df_adjusted[\"metric\"] == metric)\n",
+    "]\n",
+    "\n",
+    "# in percent (0-100)\n",
+    "df_adjusted[\"value\"] = df_adjusted[\"value\"] * 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted = df_adjusted.sort_values(by=[\"interval_center\"])\n",
+    "df_adjusted"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Add composite model\n",
+    "\n",
+    "assert df_adjusted[\"pipeline_ref\"].nunique() <= 1\n",
+    "# add the pipeline time series which is the performance of different models stitched together dep.\n",
+    "# w.r.t which model was active\n",
+    "pipeline_composite_model = df_adjusted[df_adjusted[composite_model_variant]]\n",
+    "pipeline_composite_model[\"model_idx\"] = 0\n",
+    "pipeline_composite_model[\"id_model\"] = 0\n",
+    "\n",
+    "label_map = {k: f\"{k}\" for k, v in df_adjusted[[\"model_idx\", \"id_model\"]].values}\n",
+    "label_map[0] = \"Pipeline composite model\"\n",
+    "\n",
+    "if include_composite_model:\n",
+    "    df_adjusted = pd.concat([pipeline_composite_model, df_adjusted])\n",
+    "else:\n",
+    "    df_adjusted[\"model_idx\"] = df_adjusted[\"model_idx\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Create Plot"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted[\"interval_center\"] = df_adjusted[\"interval_center\"]\n",
+    "# .astype(str).str.split(\"-\").str[0]\n",
+    "df_adjusted"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_train_end_years_per_model = df_logs_models[[\"model_idx\", \"real_train_end\"]]\n",
+    "df_train_end_years_per_model[\"real_train_end\"] = df_train_end_years_per_model[\"real_train_end\"]  # .dt.year  # TODO\n",
+    "df_train_end_years_per_model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_merged = df_adjusted.merge(df_train_end_years_per_model, on=\"model_idx\", how=\"left\")\n",
+    "df_merged"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_merged.groupby([\"interval_center\", \"real_train_end\"]).size()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# build heatmap matrix dataframe:\n",
+    "heatmap_data = df_merged.pivot(index=[\"real_train_end\"], columns=\"interval_center\", values=\"value\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "heatmap_data.index.min(), heatmap_data.index.max()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from analytics.plotting.common.heatmap import build_heatmap\n",
+    "from analytics.plotting.common.save import save_plot\n",
+    "\n",
+    "fig = build_heatmap(\n",
+    "    heatmap_data,\n",
+    "    reverse_col=True,\n",
+    "    x_custom_ticks=[\n",
+    "        (2 * (x - 1995), str(x))\n",
+    "        for x in [2000, 2009, 2020]  # twice a year\n",
+    "    ],\n",
+    "    y_custom_ticks=[\n",
+    "        (2 * (x - 1995), str(x))\n",
+    "        for x in [2000, 2009, 2020]  # twice a year\n",
+    "    ],\n",
+    "    y_label=\"Trained up to\",\n",
+    "    x_label=\"Evaluation Year\",\n",
+    "    title_label=\"ArticleNet Performance\\nEvaluation Heatmap\",\n",
+    "    color_label=\"Accuracy %\",\n",
+    "    width_factor=0.5,\n",
+    "    height_factor=0.6,\n",
+    "    square=True,\n",
+    "    grid_alpha=0.55,\n",
+    ")\n",
+    "save_plot(fig, \"arxiv_trigger_heatmap_every_6_months\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "list(df_logs_models.iterrows())"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/analytics/plotting/rh_thesis/yb_triggering/hp_heatmap.ipynb b/analytics/plotting/rh_thesis/yb_triggering/hp_heatmap.ipynb
new file mode 100644
index 000000000..6dda87255
--- /dev/null
+++ b/analytics/plotting/rh_thesis/yb_triggering/hp_heatmap.ipynb
@@ -0,0 +1,287 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "\n",
+    "import pandas as pd\n",
+    "\n",
+    "from analytics.app.data.load import list_pipelines\n",
+    "from analytics.app.data.transform import dfs_models_and_evals, logs_dataframe\n",
+    "\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# INPUTS\n",
+    "\n",
+    "drift_pipeline = False\n",
+    "if drift_pipeline:\n",
+    "    pipelines_dir = Path(\n",
+    "        \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/huffpost/10_baselines_time\"\n",
+    "    )\n",
+    "else:\n",
+    "    pipelines_dir = Path(\n",
+    "        \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/huffpost/10_baselines_time\"\n",
+    "    )\n",
+    "output_dir = Path(\"/Users/robinholzinger/robin/dev/eth/modyn-2/.data/plots/triggering\")\n",
+    "assert pipelines_dir.exists()\n",
+    "assert output_dir.exists()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipelines = list_pipelines(pipelines_dir)\n",
+    "max_pipeline_id = max(pipelines.keys())\n",
+    "pipelines"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from analytics.app.data.load import load_pipeline_logs\n",
+    "\n",
+    "pipeline_logs = {p_id: load_pipeline_logs(p_id, pipelines_dir) for (p_id, (_, p_path)) in pipelines.items()}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "type(pipeline_logs[275 if not drift_pipeline else 275])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# mode:\n",
+    "pipeline_id = 275 if not drift_pipeline else 275\n",
+    "\n",
+    "# doesn't do anything unless include_composite_model = True\n",
+    "composite_model_variant = \"currently_active_model\"\n",
+    "\n",
+    "patch_yearbook = True\n",
+    "dataset_id = \"huffpost_kaggle_test\"\n",
+    "eval_handler = \"periodic-current\"\n",
+    "metric = \"Accuracy\"\n",
+    "include_composite_model = False"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Wrangle data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipeline_log = pipeline_logs[pipeline_id]\n",
+    "pipeline_ref = f\"{pipeline_id}\".zfill(len(str(max_pipeline_id))) + f\" - {pipelines[pipeline_id][0]}\"\n",
+    "\n",
+    "df_all = logs_dataframe(pipeline_log, pipeline_ref)\n",
+    "\n",
+    "df_logs_models, _, df_eval_single = dfs_models_and_evals(\n",
+    "    # subtracting would interfere with yearbook patching\n",
+    "    pipeline_log,\n",
+    "    df_all[\"sample_time\"].max(),\n",
+    "    pipeline_ref,\n",
+    ")\n",
+    "\n",
+    "df_adjusted = df_eval_single\n",
+    "\n",
+    "df_adjusted = df_adjusted[\n",
+    "    (df_adjusted[\"dataset_id\"] == dataset_id)\n",
+    "    & (df_adjusted[\"eval_handler\"] == eval_handler)\n",
+    "    & (df_adjusted[\"metric\"] == metric)\n",
+    "]\n",
+    "\n",
+    "# in percent (0-100)\n",
+    "df_adjusted[\"value\"] = df_adjusted[\"value\"] * 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted = df_adjusted.sort_values(by=[\"interval_center\"])\n",
+    "df_adjusted"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Add composite model\n",
+    "\n",
+    "assert df_adjusted[\"pipeline_ref\"].nunique() <= 1\n",
+    "# add the pipeline time series which is the performance of different models stitched together dep.\n",
+    "# w.r.t which model was active\n",
+    "pipeline_composite_model = df_adjusted[df_adjusted[composite_model_variant]]\n",
+    "pipeline_composite_model[\"model_idx\"] = 0\n",
+    "pipeline_composite_model[\"id_model\"] = 0\n",
+    "\n",
+    "label_map = {k: f\"{k}\" for k, v in df_adjusted[[\"model_idx\", \"id_model\"]].values}\n",
+    "label_map[0] = \"Pipeline composite model\"\n",
+    "\n",
+    "if include_composite_model:\n",
+    "    df_adjusted = pd.concat([pipeline_composite_model, df_adjusted])\n",
+    "else:\n",
+    "    df_adjusted[\"model_idx\"] = df_adjusted[\"model_idx\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Create Plot"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_adjusted[\"interval_center\"] = df_adjusted[\"interval_center\"]\n",
+    "# .astype(str).str.split(\"-\").str[0]\n",
+    "df_adjusted"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_train_end_years_per_model = df_logs_models[[\"model_idx\", \"real_train_end\"]]\n",
+    "df_train_end_years_per_model[\"real_train_end\"] = df_train_end_years_per_model[\"real_train_end\"]  # .dt.year  # TODO\n",
+    "df_train_end_years_per_model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_merged = df_adjusted.merge(df_train_end_years_per_model, on=\"model_idx\", how=\"left\")\n",
+    "df_merged"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_merged.groupby([\"interval_center\", \"real_train_end\"]).size()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# build heatmap matrix dataframe:\n",
+    "heatmap_data = df_merged.pivot(index=[\"real_train_end\"], columns=\"interval_center\", values=\"value\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "heatmap_data.index.min(), heatmap_data.index.max()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from analytics.plotting.common.heatmap import build_heatmap\n",
+    "from analytics.plotting.common.save import save_plot\n",
+    "\n",
+    "fig = build_heatmap(\n",
+    "    heatmap_data,\n",
+    "    reverse_col=True,\n",
+    "    x_custom_ticks=[(4 * (x - 2012) + 0.5, str(x)) for x in [2014, 2018, 2021]],\n",
+    "    y_custom_ticks=[(4 * (x - 2012), str(x)) for x in [2014, 2018, 2021]],\n",
+    "    y_label=\"Trained up to\",\n",
+    "    x_label=\"Evaluation Year\",\n",
+    "    title_label=\"ArticleNet Performance\\nEvaluation Heatmap\",\n",
+    "    color_label=\"Accuracy %\",\n",
+    "    width_factor=0.5,\n",
+    "    height_factor=0.6,\n",
+    "    square=True,\n",
+    "    grid_alpha=0.55,\n",
+    ")\n",
+    "save_plot(fig, \"hp_trigger_heatmap_quarterly\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "list(df_logs_models.iterrows())"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/analytics/plotting/rh_thesis/yb_triggering/yb_heatmap.ipynb b/analytics/plotting/rh_thesis/yb_triggering/yb_heatmap.ipynb
index 3bcc003c5..d810fa86e 100644
--- a/analytics/plotting/rh_thesis/yb_triggering/yb_heatmap.ipynb
+++ b/analytics/plotting/rh_thesis/yb_triggering/yb_heatmap.ipynb
@@ -17,15 +17,6 @@
     "%autoreload 2"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# TODO: move to leader function"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -281,11 +272,12 @@
     "    y_ticks=[1950, 1975, 2000],\n",
     "    y_label=\"Trained up to\",\n",
     "    x_label=\"Evaluation Year\",\n",
-    "    title_label=\"Model Performance\\nEvaluation Heatmap\",\n",
+    "    title_label=\"YearbookNet Performance\\nEvaluation Heatmap\",\n",
     "    color_label=\"Accuracy %\",\n",
     "    width_factor=0.5,\n",
     "    height_factor=0.6,\n",
     "    square=True,\n",
+    "    grid_alpha=0.4,\n",
     ")\n",
     "save_plot(fig, \"yb_trigger_heatmap_yearly\")"
    ]
@@ -298,17 +290,6 @@
    "source": [
     "list(df_logs_models.iterrows())"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for img_type in [\"png\", \"svg\"]:\n",
-    "    img_path = output_dir / f\"yearbook_heatmap{'_trigger' if drift_pipeline else ''}.{img_type}\"\n",
-    "    fig.savefig(img_path, bbox_inches=\"tight\", transparent=True)"
-   ]
   }
  ],
  "metadata": {