Skip to content

Commit

Permalink
More plots
Browse files Browse the repository at this point in the history
  • Loading branch information
robinholzi committed Sep 22, 2024
1 parent 85e88a1 commit bec88be
Show file tree
Hide file tree
Showing 3 changed files with 806 additions and 12 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"import seaborn as sns\n",
"\n",
"from analytics.app.data.load import list_pipelines\n",
"from analytics.plotting.common.common import init_plot\n",
"from analytics.plotting.common.font import setup_font\n",
"from modyn.supervisor.internal.grpc.enums import PipelineStage\n",
"from modyn.supervisor.internal.pipeline_executor.models import StageLog\n",
"\n",
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# INPUTS\n",
"\n",
"pipelines_dir = Path(\n",
" \"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/arxiv/11_baselines_amount\"\n",
")\n",
"# pipelines_dir = Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/huffpost/11_baselines_amount\")\n",
"# pipelines_dir = Path(\"/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/11_baselines_amount\")\n",
"output_dir = Path(\"/Users/robinholzinger/robin/dev/eth/modyn-2/.analytics.log/.data/_plots\")\n",
"assert pipelines_dir.exists()\n",
"assert output_dir.exists()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pipelines = list_pipelines(pipelines_dir)\n",
"max_pipeline_id = max(pipelines.keys())\n",
"pipelines"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from analytics.app.data.load import load_pipeline_logs\n",
"\n",
"pipeline_logs = {p_id: load_pipeline_logs(p_id, pipelines_dir) for (p_id, (_, p_path)) in pipelines.items()}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Wrangle data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"list_df_train: list[pd.DataFrame] = []\n",
"\n",
"for pipeline_id in pipelines:\n",
" logs = pipeline_logs[pipeline_id]\n",
" train_logs = [record for record in logs.supervisor_logs.stage_runs if record.id == PipelineStage.TRAIN.name]\n",
" df_train = StageLog.df(stage_logs=train_logs, extended=True)\n",
" df_train[\"pipeline_id\"] = pipelines[pipeline_id][0]\n",
" list_df_train.append(df_train)\n",
"\n",
"df_train = pd.concat(list_df_train)\n",
"df_train.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Conversion"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Clean pipeline name\n",
"\n",
"import re\n",
"\n",
"\n",
"def pipeline_name_cleaner(name: str):\n",
" return re.sub(r\".*_dataamount_(\\d+)\", \"trigger every \\\\1 samples\", name)\n",
"\n",
"\n",
"df_train[\"pipeline_id\"] = df_train[\"pipeline_id\"].apply(pipeline_name_cleaner)\n",
"df_train.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# to seconds\n",
"df_train[\"duration\"] = df_train[\"duration\"].dt.total_seconds() / 60\n",
"# df_train[\"duration\"] = df_train[\"duration\"].dt.total_seconds()\n",
"# df_train[\"train_time_at_trainer\"] = df_train[\"train_time_at_trainer\"] / 1_000 # millis to seconds\n",
"df_train[\"train_time_at_trainer\"] = df_train[\"train_time_at_trainer\"] / 1_000 / 60 # millis to minutes\n",
"df_train"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Sort by number of samples\n",
"df_train = df_train.sort_values(by=\"num_samples\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Create Plot"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from analytics.plotting.common.color import discrete_colors, main_color\n",
"\n",
"sns.set_style(\"whitegrid\")\n",
"\n",
"init_plot()\n",
"setup_font(small_label=True, small_title=True)\n",
"\n",
"\n",
"FONTSIZE = 20\n",
"DOUBLE_FIG_WIDTH = 10\n",
"DOUBLE_FIG_HEIGHT = 3.5\n",
"DOUBLE_FIG_SIZE = (DOUBLE_FIG_WIDTH, 1.5 * DOUBLE_FIG_HEIGHT)\n",
"\n",
"width_factor = 0.5\n",
"height_factor = 0.5\n",
"\n",
"fig = plt.figure(\n",
" edgecolor=\"black\",\n",
" frameon=True,\n",
" figsize=(\n",
" DOUBLE_FIG_WIDTH * width_factor,\n",
" 2 * DOUBLE_FIG_HEIGHT * height_factor,\n",
" ),\n",
" dpi=300,\n",
")\n",
"\n",
"ax1 = sns.regplot(\n",
" df_train,\n",
" x=\"num_samples\",\n",
" y=\"train_time_at_trainer\", # duration\n",
" color=main_color(0),\n",
")\n",
"\n",
"ax2 = sns.scatterplot(\n",
" df_train,\n",
" x=\"num_samples\",\n",
" y=\"train_time_at_trainer\", # duration\n",
" hue=\"pipeline_id\",\n",
" palette=(\n",
" discrete_colors(14)[0:5] + discrete_colors(14)[9:14]\n",
" if \"yearbook\" in str(pipelines_dir)\n",
" else (\n",
" discrete_colors(8)[0:3] + discrete_colors(8)[6:8]\n",
" if \"huffpost\" in str(pipelines_dir)\n",
" else discrete_colors(8)[0:3] + discrete_colors(8)[6:8]\n",
" )\n",
" ),\n",
" s=200,\n",
" legend=True,\n",
" marker=\"X\",\n",
")\n",
"\n",
"# Display the plot\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# TODO: run more variants of in less dense areas\n",
"# TODO: plot / add number of datapoints to thesis so that the signicance of regression line is clear\n",
"# State in thesis that there are no outliers to be expected!"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit bec88be

Please sign in to comment.