Skip to content

Commit

Permalink
add some dataset paths to notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
adamjanovsky committed Dec 1, 2023
1 parent f2feaa5 commit c93a6cf
Showing 1 changed file with 35 additions and 29 deletions.
64 changes: 35 additions & 29 deletions notebooks/cc/references.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
"from collections.abc import Iterable\n",
"from pathlib import Path\n",
"\n",
"import matplotlib\n",
"import matplotlib.pyplot as plt\n",
"import networkx as nx\n",
"import networkx.algorithms.community as nx_comm\n",
Expand All @@ -46,7 +47,14 @@
"\n",
"%matplotlib inline\n",
"\n",
"# LaTeX plotting\n",
"# matplotlib.use(\"pgf\")\n",
"# plt.rcParams[\"pgf.texsystem\"] = \"pdflatex\"\n",
"# plt.rcParams[\"font.family\"] = \"serif\"\n",
"# plt.rcParams[\"text.usetex\"] = True\n",
"# plt.rcParams[\"pgf.rcfonts\"] = False\n",
"\n",
"\n",
"sns.set_theme(style=\"white\")\n",
"plt.rcParams[\"axes.linewidth\"] = 0.5\n",
"plt.rcParams[\"legend.fontsize\"] = 6.5\n",
Expand All @@ -60,10 +68,7 @@
"plt.rcParams[\"xtick.major.size\"] = 5\n",
"plt.rcParams[\"xtick.major.width\"] = 0.5\n",
"plt.rcParams[\"xtick.major.pad\"] = 0\n",
"# plt.rcParams[\"pgf.texsystem\"] = \"pdflatex\"\n",
"# plt.rcParams[\"font.family\"] = \"serif\"\n",
"# plt.rcParams[\"text.usetex\"] = True\n",
"# plt.rcParams[\"pgf.rcfonts\"] = False\n",
"\n",
"plt.rcParams[\"axes.titlesize\"] = 8\n",
"plt.rcParams[\"legend.handletextpad\"] = 0.3\n",
"plt.rcParams[\"lines.markersize\"] = 4\n",
Expand All @@ -76,9 +81,12 @@
"\n",
"# plt.rcParams['figure.figsize'] = (10, 6)\n",
"\n",
"REPO_ROOT = Path().resolve()\n",
"RESULTS_DIR = Path(\"./results/references\")\n",
"RESULTS_DIR.mkdir(exist_ok=True, parents=True)\n",
"SMARTCARD_CATEGORY = \"ICs, Smart Cards and Smart Card-Related Devices and Systems\"\n"
"SMARTCARD_CATEGORY = \"ICs, Smart Cards and Smart Card-Related Devices and Systems\"\n",
"DATASET_PATH = REPO_ROOT / \"dataset/cc_november_23/dataset.json\"\n",
"PREDICTIONS_PATH = REPO_ROOT / \"dataset/reference_prediction/predictions.csv\"\n"
]
},
{
Expand Down Expand Up @@ -193,9 +201,9 @@
"metadata": {},
"outputs": [],
"source": [
"dset = CCDataset.from_json(\"/var/tmp/xjanovsk/certs/sec-certs/dataset/cc_november_23/dataset.json\")\n",
"dset = CCDataset.from_json(DATASET_PATH)\n",
"cc_df = preprocess_cc_df(dset.to_pandas())\n",
"refs_df = preprocess_refs_df(\"/var/tmp/xjanovsk/certs/sec-certs/dataset/reference_prediction/predictions.csv\", cc_df)\n",
"refs_df = preprocess_refs_df(PREDICTIONS_PATH, cc_df)\n",
"unique_labels = refs_df.reference_label.unique().tolist()\n",
"\n",
"# Load labeled reference graph as networkx directed graph\n",
Expand Down Expand Up @@ -269,8 +277,9 @@
"\n",
" df_melted = df[[\"n_refs\", \"n_trans_refs\", \"n_in_refs\", \"n_in_trans_refs\"]].melt()\n",
" df_melted[\"incoming\"] = df_melted.variable.map(lambda x: bool(x.endswith(\"by\")))\n",
" sns.catplot(data=df_melted, kind=\"boxen\", x=\"variable\", y=\"value\", col=\"variable\", sharex=False, sharey=False)\n",
" plt.savefig(RESULTS_DIR / \"boxen_plot_references.pdf\", bbox_inches=\"tight\")\n",
" g = sns.catplot(data=df_melted, kind=\"boxen\", x=\"variable\", y=\"value\", col=\"variable\", sharex=False, sharey=False)\n",
" fig = g.get_figure()\n",
" fig.savefig(RESULTS_DIR / \"boxen_plot_references.pdf\", bbox_inches=\"tight\")\n",
"\n",
" plt.show()\n",
"\n",
Expand Down Expand Up @@ -327,7 +336,8 @@
"\n",
" g = sns.lineplot(data=df_reach_evolution_melted, x=\"date\", y=\"reach\", hue=\"certificate\")\n",
" g.set(title=\"Reach of top-10 certificates in time\", xlabel=\"Time\", ylabel=\"Certificate reach\")\n",
" plt.savefig(RESULTS_DIR / \"lineplot_top_certificate_reach.pdf\", bbox_inches=\"tight\")\n",
" fig = g.get_figure()\n",
" fig.savefig(RESULTS_DIR / \"lineplot_top_certificate_reach.pdf\", bbox_inches=\"tight\")\n",
" plt.show()\n",
"\n",
" return top_10_certs.index.tolist()\n",
Expand Down Expand Up @@ -385,7 +395,8 @@
"\n",
" g = sns.lineplot(data=df_avg_num_refs_melted, x=\"date\", y=\"n_references\", hue=\"category\")\n",
" g.set(title=\"Average number of references in certificates\", xlabel=\"Time\", ylabel=\"Number of references\")\n",
" plt.savefig(RESULTS_DIR / \"lineplot_avg_n_references.pdf\", bbox_inches=\"tight\")\n",
" fig = g.get_figure()\n",
" fig.savefig(RESULTS_DIR / \"lineplot_avg_n_references.pdf\", bbox_inches=\"tight\")\n",
" plt.show()\n",
"\n",
" return {}\n",
Expand Down Expand Up @@ -416,7 +427,8 @@
" xlabel=\"Time\",\n",
" ylabel=\"Number of (transitively) referencing certificates\",\n",
" )\n",
" plt.savefig(RESULTS_DIR / \"lineplot_avg_n_references.pdf\", bbox_inches=\"tight\")\n",
" fig = g.get_figure()\n",
" fig.savefig(RESULTS_DIR / \"lineplot_avg_reach.pdf\", bbox_inches=\"tight\")\n",
" plt.show()\n",
"\n",
" return {}\n",
Expand All @@ -426,15 +438,6 @@
"compute_avg_reach_over_time(cc_df_comp)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cc_df_comp.head()\n"
]
},
{
"attachments": {},
"cell_type": "markdown",
Expand Down Expand Up @@ -490,7 +493,8 @@
" xlabel=\"Time\",\n",
" ylabel=\"Number of certificates\",\n",
" )\n",
" plt.savefig(RESULTS_DIR / \"lienplot_n_active_certs_vs_n_references.pdf\", bbox_inches=\"tight\")\n",
" fig = g.get_figure()\n",
" fig.savefig(RESULTS_DIR / \"lienplot_n_active_certs_vs_n_references.pdf\", bbox_inches=\"tight\")\n",
" plt.show()\n",
" return {}\n",
"\n",
Expand Down Expand Up @@ -536,7 +540,8 @@
" xlabel=\"Time\",\n",
" ylabel=\"Number of certificates\",\n",
" )\n",
" plt.savefig(RESULTS_DIR / \"lineplot_references_summary.pdf\", bbox_inches=\"tight\")\n",
" fig = g.get_figure()\n",
" fig.savefig(RESULTS_DIR / \"lineplot_references_summary.pdf\", bbox_inches=\"tight\")\n",
" plt.show()\n",
"\n",
" df_ratios = df_summary_references.copy()\n",
Expand Down Expand Up @@ -630,7 +635,8 @@
" xlabel=\"Time\",\n",
" ylabel=\"Number of certificates\",\n",
" )\n",
" plt.savefig(RESULTS_DIR / \"lienplot_active_certs_referencing_archived.pdf\", bbox_inches=\"tight\")\n",
" fig = g.get_figure()\n",
" fig.savefig(RESULTS_DIR / \"lienplot_active_certs_referencing_archived.pdf\", bbox_inches=\"tight\")\n",
" plt.show()\n",
"\n",
" return {}\n",
Expand Down Expand Up @@ -704,7 +710,8 @@
" xlabel=\"Time\",\n",
" ylabel=\"Number of certificates\",\n",
" )\n",
" plt.savefig(RESULTS_DIR / \"lienplot_active_certs_referencing_vulnerable.pdf\", bbox_inches=\"tight\")\n",
" fig = g.get_figure()\n",
" fig.savefig(RESULTS_DIR / \"lienplot_active_certs_referencing_vulnerable.pdf\", bbox_inches=\"tight\")\n",
" plt.show()\n",
" return {}\n",
"\n",
Expand Down Expand Up @@ -780,8 +787,6 @@
" exploded[\"ref_category\"] = exploded.refs.map(lambda x: cert_id_to_category_mapping[x] if pd.notnull(x) else np.nan)\n",
" exploded = exploded.loc[exploded.ref_category.notnull()]\n",
"\n",
" exploded_with_refs = exploded.loc[exploded.ref_category != \"No references\"]\n",
"\n",
" all_categories = set(exploded.category.unique()) | set(exploded.ref_category.unique())\n",
" colors = list(sns.color_palette(\"hls\", len(all_categories), as_cmap=False).as_hex())\n",
" color_dict = dict(zip(all_categories, colors))\n",
Expand All @@ -800,7 +805,8 @@
" ax=axes,\n",
" )\n",
"\n",
" plt.show()\n",
" # plt.show()\n",
" plt.savefig(RESULTS_DIR / \"sankey_references_categories.pdf\", bbox_inches=\"tight\")\n",
"\n",
" return {}\n",
"\n",
Expand Down Expand Up @@ -1215,7 +1221,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
"version": "3.10.13"
},
"vscode": {
"interpreter": {
Expand Down

0 comments on commit c93a6cf

Please sign in to comment.