From c93a6cf84984e692f08f36ed9629d0c57a3330a2 Mon Sep 17 00:00:00 2001 From: Adam Janovsky Date: Fri, 1 Dec 2023 15:04:28 +0100 Subject: [PATCH] add some dataset paths to notebook --- notebooks/cc/references.ipynb | 64 +++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/notebooks/cc/references.ipynb b/notebooks/cc/references.ipynb index 74e50dc5..ff05ea3d 100644 --- a/notebooks/cc/references.ipynb +++ b/notebooks/cc/references.ipynb @@ -30,6 +30,7 @@ "from collections.abc import Iterable\n", "from pathlib import Path\n", "\n", + "import matplotlib\n", "import matplotlib.pyplot as plt\n", "import networkx as nx\n", "import networkx.algorithms.community as nx_comm\n", @@ -46,7 +47,14 @@ "\n", "%matplotlib inline\n", "\n", + "# LaTeX plotting\n", "# matplotlib.use(\"pgf\")\n", + "# plt.rcParams[\"pgf.texsystem\"] = \"pdflatex\"\n", + "# plt.rcParams[\"font.family\"] = \"serif\"\n", + "# plt.rcParams[\"text.usetex\"] = True\n", + "# plt.rcParams[\"pgf.rcfonts\"] = False\n", + "\n", + "\n", "sns.set_theme(style=\"white\")\n", "plt.rcParams[\"axes.linewidth\"] = 0.5\n", "plt.rcParams[\"legend.fontsize\"] = 6.5\n", @@ -60,10 +68,7 @@ "plt.rcParams[\"xtick.major.size\"] = 5\n", "plt.rcParams[\"xtick.major.width\"] = 0.5\n", "plt.rcParams[\"xtick.major.pad\"] = 0\n", - "# plt.rcParams[\"pgf.texsystem\"] = \"pdflatex\"\n", - "# plt.rcParams[\"font.family\"] = \"serif\"\n", - "# plt.rcParams[\"text.usetex\"] = True\n", - "# plt.rcParams[\"pgf.rcfonts\"] = False\n", + "\n", "plt.rcParams[\"axes.titlesize\"] = 8\n", "plt.rcParams[\"legend.handletextpad\"] = 0.3\n", "plt.rcParams[\"lines.markersize\"] = 4\n", @@ -76,9 +81,12 @@ "\n", "# plt.rcParams['figure.figsize'] = (10, 6)\n", "\n", + "REPO_ROOT = Path().resolve()\n", "RESULTS_DIR = Path(\"./results/references\")\n", "RESULTS_DIR.mkdir(exist_ok=True, parents=True)\n", - "SMARTCARD_CATEGORY = \"ICs, Smart Cards and Smart Card-Related Devices and Systems\"\n" + "SMARTCARD_CATEGORY = \"ICs, Smart Cards and Smart Card-Related Devices and Systems\"\n", + "DATASET_PATH = REPO_ROOT / \"dataset/cc_november_23/dataset.json\"\n", + "PREDICTIONS_PATH = REPO_ROOT / \"dataset/reference_prediction/predictions.csv\"\n" ] }, { @@ -193,9 +201,9 @@ "metadata": {}, "outputs": [], "source": [ - "dset = CCDataset.from_json(\"/var/tmp/xjanovsk/certs/sec-certs/dataset/cc_november_23/dataset.json\")\n", + "dset = CCDataset.from_json(DATASET_PATH)\n", "cc_df = preprocess_cc_df(dset.to_pandas())\n", - "refs_df = preprocess_refs_df(\"/var/tmp/xjanovsk/certs/sec-certs/dataset/reference_prediction/predictions.csv\", cc_df)\n", + "refs_df = preprocess_refs_df(PREDICTIONS_PATH, cc_df)\n", "unique_labels = refs_df.reference_label.unique().tolist()\n", "\n", "# Load labeled reference graph as networkx directed graph\n", @@ -269,8 +277,9 @@ "\n", " df_melted = df[[\"n_refs\", \"n_trans_refs\", \"n_in_refs\", \"n_in_trans_refs\"]].melt()\n", " df_melted[\"incoming\"] = df_melted.variable.map(lambda x: bool(x.endswith(\"by\")))\n", - " sns.catplot(data=df_melted, kind=\"boxen\", x=\"variable\", y=\"value\", col=\"variable\", sharex=False, sharey=False)\n", - " plt.savefig(RESULTS_DIR / \"boxen_plot_references.pdf\", bbox_inches=\"tight\")\n", + " g = sns.catplot(data=df_melted, kind=\"boxen\", x=\"variable\", y=\"value\", col=\"variable\", sharex=False, sharey=False)\n", + " fig = g.get_figure()\n", + " fig.savefig(RESULTS_DIR / \"boxen_plot_references.pdf\", bbox_inches=\"tight\")\n", "\n", " plt.show()\n", "\n", @@ -327,7 +336,8 @@ "\n", " g = sns.lineplot(data=df_reach_evolution_melted, x=\"date\", y=\"reach\", hue=\"certificate\")\n", " g.set(title=\"Reach of top-10 certificates in time\", xlabel=\"Time\", ylabel=\"Certificate reach\")\n", - " plt.savefig(RESULTS_DIR / \"lineplot_top_certificate_reach.pdf\", bbox_inches=\"tight\")\n", + " fig = g.get_figure()\n", + " fig.savefig(RESULTS_DIR / \"lineplot_top_certificate_reach.pdf\", bbox_inches=\"tight\")\n", " plt.show()\n", "\n", " return top_10_certs.index.tolist()\n", @@ -385,7 +395,8 @@ "\n", " g = sns.lineplot(data=df_avg_num_refs_melted, x=\"date\", y=\"n_references\", hue=\"category\")\n", " g.set(title=\"Average number of references in certificates\", xlabel=\"Time\", ylabel=\"Number of references\")\n", - " plt.savefig(RESULTS_DIR / \"lineplot_avg_n_references.pdf\", bbox_inches=\"tight\")\n", + " fig = g.get_figure()\n", + " fig.savefig(RESULTS_DIR / \"lineplot_avg_n_references.pdf\", bbox_inches=\"tight\")\n", " plt.show()\n", "\n", " return {}\n", @@ -416,7 +427,8 @@ " xlabel=\"Time\",\n", " ylabel=\"Number of (transitively) referencing certificates\",\n", " )\n", - " plt.savefig(RESULTS_DIR / \"lineplot_avg_n_references.pdf\", bbox_inches=\"tight\")\n", + " fig = g.get_figure()\n", + " fig.savefig(RESULTS_DIR / \"lineplot_avg_reach.pdf\", bbox_inches=\"tight\")\n", " plt.show()\n", "\n", " return {}\n", @@ -426,15 +438,6 @@ "compute_avg_reach_over_time(cc_df_comp)\n" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cc_df_comp.head()\n" - ] - }, { "attachments": {}, "cell_type": "markdown", @@ -490,7 +493,8 @@ " xlabel=\"Time\",\n", " ylabel=\"Number of certificates\",\n", " )\n", - " plt.savefig(RESULTS_DIR / \"lienplot_n_active_certs_vs_n_references.pdf\", bbox_inches=\"tight\")\n", + " fig = g.get_figure()\n", + " fig.savefig(RESULTS_DIR / \"lienplot_n_active_certs_vs_n_references.pdf\", bbox_inches=\"tight\")\n", " plt.show()\n", " return {}\n", "\n", @@ -536,7 +540,8 @@ " xlabel=\"Time\",\n", " ylabel=\"Number of certificates\",\n", " )\n", - " plt.savefig(RESULTS_DIR / \"lineplot_references_summary.pdf\", bbox_inches=\"tight\")\n", + " fig = g.get_figure()\n", + " fig.savefig(RESULTS_DIR / \"lineplot_references_summary.pdf\", bbox_inches=\"tight\")\n", " plt.show()\n", "\n", " df_ratios = df_summary_references.copy()\n", @@ -630,7 +635,8 @@ " xlabel=\"Time\",\n", " ylabel=\"Number of certificates\",\n", " )\n", - " plt.savefig(RESULTS_DIR / \"lienplot_active_certs_referencing_archived.pdf\", bbox_inches=\"tight\")\n", + " fig = g.get_figure()\n", + " fig.savefig(RESULTS_DIR / \"lienplot_active_certs_referencing_archived.pdf\", bbox_inches=\"tight\")\n", " plt.show()\n", "\n", " return {}\n", @@ -704,7 +710,8 @@ " xlabel=\"Time\",\n", " ylabel=\"Number of certificates\",\n", " )\n", - " plt.savefig(RESULTS_DIR / \"lienplot_active_certs_referencing_vulnerable.pdf\", bbox_inches=\"tight\")\n", + " fig = g.get_figure()\n", + " fig.savefig(RESULTS_DIR / \"lienplot_active_certs_referencing_vulnerable.pdf\", bbox_inches=\"tight\")\n", " plt.show()\n", " return {}\n", "\n", @@ -780,8 +787,6 @@ " exploded[\"ref_category\"] = exploded.refs.map(lambda x: cert_id_to_category_mapping[x] if pd.notnull(x) else np.nan)\n", " exploded = exploded.loc[exploded.ref_category.notnull()]\n", "\n", - " exploded_with_refs = exploded.loc[exploded.ref_category != \"No references\"]\n", - "\n", " all_categories = set(exploded.category.unique()) | set(exploded.ref_category.unique())\n", " colors = list(sns.color_palette(\"hls\", len(all_categories), as_cmap=False).as_hex())\n", " color_dict = dict(zip(all_categories, colors))\n", @@ -800,7 +805,8 @@ " ax=axes,\n", " )\n", "\n", - " plt.show()\n", + " # plt.show()\n", + " plt.savefig(RESULTS_DIR / \"sankey_references_categories.pdf\", bbox_inches=\"tight\")\n", "\n", " return {}\n", "\n", @@ -1215,7 +1221,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.6" + "version": "3.10.13" }, "vscode": { "interpreter": {