",
sizex, sizey];
- gridster.add_widget.apply(gridster, nw);
+ gridster.add_widget.apply(gridster, nw_additionalInformation);
});
$('body').on("click", ".gridster ul > li .remove", function () {
diff --git a/frontend/metrics_program.html b/frontend/metrics_program.html
index d54d04a..9cdfc18 100644
--- a/frontend/metrics_program.html
+++ b/frontend/metrics_program.html
@@ -1,13 +1,10 @@
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
@@ -20,9 +17,10 @@
-
-
-
+
+
+
+
diff --git a/frontend/metrics_program_withoutEnergyMetrics.html b/frontend/metrics_program_withoutEnergyMetrics.html
index e820cc9..7916b4b 100644
--- a/frontend/metrics_program_withoutEnergyMetrics.html
+++ b/frontend/metrics_program_withoutEnergyMetrics.html
@@ -1,14 +1,11 @@
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
@@ -20,9 +17,10 @@
-
-
-
+
+
+
+
diff --git a/frontend/metrics_study.html b/frontend/metrics_study.html
index 79bf1f0..48e65c8 100644
--- a/frontend/metrics_study.html
+++ b/frontend/metrics_study.html
@@ -1,13 +1,10 @@
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
diff --git a/frontend/metrics_study_withoutEnergyMetrics.html b/frontend/metrics_study_withoutEnergyMetrics.html
index 4a2137e..26018f1 100644
--- a/frontend/metrics_study_withoutEnergyMetrics.html
+++ b/frontend/metrics_study_withoutEnergyMetrics.html
@@ -1,14 +1,11 @@
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
diff --git a/viz_scripts/generic_metrics.ipynb b/viz_scripts/generic_metrics.ipynb
index 607a867..9aa02db 100644
--- a/viz_scripts/generic_metrics.ipynb
+++ b/viz_scripts/generic_metrics.ipynb
@@ -29,10 +29,10 @@
"month = 11\n",
"program = \"default\"\n",
"study_type = \"study\"\n",
- "mode_of_interest = None\n",
"include_test_users = False\n",
"dynamic_labels = {}\n",
- "use_imperial = False"
+ "use_imperial = True\n",
+ "sensed_algo_prefix = \"cleaned\""
]
},
{
@@ -46,6 +46,7 @@
"\n",
"import numpy as np\n",
"import pandas as pd\n",
+ "import matplotlib.pyplot as plt\n",
"\n",
"from plots import *\n",
"import scaffolding\n",
@@ -79,7 +80,7 @@
"id": "796f59c7",
"metadata": {},
"source": [
- "### Color Mapping for labels"
+ "### Color Dictionary"
]
},
{
@@ -89,7 +90,7 @@
"metadata": {},
"outputs": [],
"source": [
- "colors_mode, colors_purpose = scaffolding.mapping_color_labels(dynamic_labels, dic_re, dic_pur)"
+ "colors_mode, colors_purpose, colors_sensed = scaffolding.mapping_color_labels(dynamic_labels, dic_re, dic_pur)"
]
},
{
@@ -97,7 +98,7 @@
"id": "intellectual-columbus",
"metadata": {},
"source": [
- "## Collect Data From Database"
+ "## Collect Data From Database for Generic Metrics"
]
},
{
@@ -119,56 +120,127 @@
},
{
"cell_type": "markdown",
- "id": "modified-skiing",
+ "id": "6d44c87e",
"metadata": {},
"source": [
- "## Generic Metrics"
+ "## Collect Data from Database for Sensed Metrics"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "55ec383b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "expanded_ct_sensed, file_suffix_sensed, quality_text_sensed, debug_df_sensed = scaffolding.load_viz_notebook_sensor_inference_data(year,\n",
+ " month,\n",
+ " program,\n",
+ " include_test_users,\n",
+ " sensed_algo_prefix)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a1bd14bc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "merged_debug_df = debug_df.combine_first(debug_df_sensed)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3a0ec842",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "quality_text, quality_text_sensed"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cacc71f3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import re\n",
+ "labeled_match = re.match(r'Based on ([0-9]+) confirmed trips from ([0-9]+) (users|testers and participants)\\nof ([0-9]+) total trips from ([0-9]+) (users|testers and participants) (\\(([0-9.]+|nan)%\\))', quality_text)\n",
+ "# labeled_match\n",
+ "stacked_bar_quality_text_labeled = f\"{labeled_match.group(1)} trips {labeled_match.group(7)}\\n from {labeled_match.group(2)} {labeled_match.group(3)}\"\n",
+ "sensed_match = re.match(r'Based on ([0-9]+) trips from ([0-9]+) (users|testers and participants)', quality_text_sensed)\n",
+ "stacked_bar_quality_text_sensed = f\"{sensed_match.group(1)} trips (100%)\\n from {sensed_match.group(2)} {sensed_match.group(3)}\"\n",
+ "stacked_bar_quality_text_labeled, stacked_bar_quality_text_sensed"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1af1f02b",
+ "metadata": {},
+ "source": [
+ "## 100% Stacked Bar Charts"
]
},
{
"cell_type": "markdown",
- "id": "distributed-peace",
+ "id": "e50959c1",
"metadata": {},
"source": [
- "### Distribution of Mode_confirm attribute"
+ "### Distribution of modes"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "tracked-serbia",
- "metadata": {
- "scrolled": false
- },
+ "id": "aef741fe",
+ "metadata": {},
"outputs": [],
"source": [
- "file_name='ntrips_mode_confirm%s' % file_suffix\n",
+ "file_name = f'ntrips_total{file_suffix}'\n",
"plot_title_no_quality= \"Number of trips for each mode (selected by users)\"\n",
+ "\n",
"try:\n",
- " labels_mc = expanded_ct['Mode_confirm'].value_counts(dropna=True).keys().tolist()\n",
- " values_mc = expanded_ct['Mode_confirm'].value_counts(dropna=True).tolist() \n",
- " plot_title = plot_title_no_quality+\"\\n\"+quality_text\n",
- " pie_chart_mode(plot_title,labels_mc,values_mc,colors_mode,file_name)\n",
- " alt_text = store_alt_text_pie(pd.DataFrame(values_mc, labels_mc), file_name, plot_title)\n",
- " print(expanded_ct['Mode_confirm'].value_counts(dropna=True))\n",
- "except:\n",
- " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n",
- " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)"
+ " fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n",
+ " # We will have text results corresponding to the axes for simplicity and consistency\n",
+ " text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
+ " plot_and_text_stacked_bar_chart(expanded_ct.groupby(\"Mode_confirm\").agg({distance_col: 'count'}), \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df)\n",
+ " plot_and_text_stacked_bar_chart(expanded_ct_sensed.groupby(\"primary_mode\").agg({distance_col: 'count'}), \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
+ " \n",
+ " set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n",
+ "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
+ " plt.clf()\n",
+ " generate_missing_plot(plot_title_no_quality, merged_debug_df, file_name)\n",
+ " alt_text = store_alt_text_missing(merged_debug_df, file_name, plot_title_no_quality) \n",
+ " alt_html = store_alt_html_missing(merged_debug_df, file_name, plot_title_no_quality)\n",
+ "except Exception as e:\n",
+ " # TODO: Future cleanup can pass in just the figure and have the function choose the last axis\n",
+ " fig, ax = plt.subplots()\n",
+ " plot_and_text_error(e, ax, file_name)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7fa4f3da",
+ "metadata": {},
+ "source": [
+ "### Distribution of modes in commute trips"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "about-seafood",
- "metadata": {
- "scrolled": false
- },
+ "id": "5c6cd4cf",
+ "metadata": {},
"outputs": [],
"source": [
"plot_title_no_quality= \"Number of commute trips for each mode (selected by users)\"\n",
- "file_name= 'ntrips_commute_mode_confirm%s' % file_suffix\n",
+ "file_name = f\"ntrips_commute_mode_confirm{file_suffix}\"\n",
"\n",
"try:\n",
+ " # Preprocess to find commute trips\n",
" if (len(dynamic_labels)):\n",
" purpose_map_label = scaffolding.mapping_labels(dynamic_labels, \"PURPOSE\")\n",
" translation_work = purpose_map_label['work']\n",
@@ -176,22 +248,28 @@
" else:\n",
" trip_purpose_query = \"Trip_purpose == 'Work'\"\n",
"\n",
- " labels_mc = expanded_ct.query(trip_purpose_query).Mode_confirm.value_counts(dropna=True).keys().tolist()\n",
- " values_mc = expanded_ct.query(trip_purpose_query).Mode_confirm.value_counts(dropna=True).tolist()\n",
- " commute_quality_text = scaffolding.get_quality_text(expanded_ct, expanded_ct.query(trip_purpose_query), \"commute\", include_test_users)\n",
- " plot_title= plot_title_no_quality+\"\\n\"+commute_quality_text\n",
+ " expanded_ct_commute = expanded_ct.query(trip_purpose_query)\n",
+ " commute_quality_text = scaffolding.get_quality_text(expanded_ct, expanded_ct_commute, \"commute\", include_test_users) if not expanded_ct.empty else \"\"\n",
+ " plot_title = plot_title_no_quality + \"\\n\" + commute_quality_text\n",
" \n",
- " pie_chart_mode(plot_title,labels_mc,values_mc,colors_mode,file_name)\n",
- " alt_text = store_alt_text_pie(pd.DataFrame(values_mc, labels_mc), file_name, plot_title)\n",
- "except:\n",
- " debug_df.loc[\"Commute_trips\"] = len(expanded_ct.query(trip_purpose_query)) if \"Trip_purpose\" in expanded_ct.columns else 0\n",
- " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n",
- " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)"
+ " # Plot entries\n",
+ " fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True) \n",
+ " text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n",
+ " plot_and_text_stacked_bar_chart(expanded_ct_commute.groupby(\"Mode_confirm\").agg({distance_col: 'count'}), \"Labeled by user\\n (Confirmed trips)\", ax, text_results, colors_mode, debug_df)\n",
+ " set_title_and_save(fig, text_results, plot_title, file_name)\n",
+ "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
+ " plt.clf()\n",
+ " generate_missing_plot(plot_title_no_quality, debug_df, file_name)\n",
+ " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality) \n",
+ " alt_html = store_alt_html_missing(debug_df, file_name, plot_title_no_quality)\n",
+ "except Exception as e:\n",
+ " fig, ax = plt.subplots()\n",
+ " plot_and_text_error(e, ax, file_name)"
]
},
{
"cell_type": "markdown",
- "id": "careful-spencer",
+ "id": "ffb2df0b",
"metadata": {},
"source": [
"### Distribution of Trip_purpose attribute"
@@ -200,30 +278,30 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "conservative-september",
- "metadata": {
- "scrolled": false
- },
+ "id": "46474ada",
+ "metadata": {},
"outputs": [],
"source": [
"plot_title_no_quality=\"Number of trips for each purpose (selected by users)\"\n",
- "file_name= 'ntrips_purpose%s' % file_suffix\n",
- "\n",
+ "file_name= f\"ntrips_purpose{file_suffix}\"\n",
"try:\n",
- " labels_tp = expanded_ct['Trip_purpose'].value_counts(dropna=True).keys().tolist()\n",
- " values_tp = expanded_ct['Trip_purpose'].value_counts(dropna=True).tolist()\n",
- " plot_title= plot_title_no_quality+\"\\n\"+quality_text\n",
- " pie_chart_purpose(plot_title,labels_tp,values_tp,colors_purpose,file_name)\n",
- " alt_text = store_alt_text_pie(pd.DataFrame(values_tp, labels_tp), file_name, plot_title)\n",
- " print(expanded_ct['Trip_purpose'].value_counts(dropna=True))\n",
- "except:\n",
- " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n",
- " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)"
+ " fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n",
+ " text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n",
+ " plot_and_text_stacked_bar_chart(expanded_ct.groupby(\"Trip_purpose\").agg({distance_col: 'count'}), \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax, text_results, colors_purpose, debug_df)\n",
+ " set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n",
+ "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
+ " plt.clf()\n",
+ " generate_missing_plot(plot_title_no_quality, debug_df, file_name)\n",
+ " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)\n",
+ " alt_html = store_alt_html_missing(debug_df, file_name, plot_title_no_quality)\n",
+ "except Exception as e:\n",
+ " fig, ax = plt.subplots()\n",
+ " plot_and_text_error(e, ax, file_name)"
]
},
{
"cell_type": "markdown",
- "id": "crucial-keyboard",
+ "id": "8fc63a45",
"metadata": {},
"source": [
"### Mode choice for trips under 80% mark"
@@ -232,80 +310,130 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "identified-replica",
- "metadata": {
- "scrolled": false
- },
+ "id": "77ece8ee",
+ "metadata": {},
"outputs": [],
"source": [
- "file_name ='ntrips_under10miles_mode_confirm%s' % file_suffix\n",
+ "file_name = f'ntrips_under80{file_suffix}'\n",
"\n",
"try:\n",
- " #determine 80th percentile\n",
- " cutoff = expanded_ct.distance.quantile(0.8)\n",
+ " # Preprocess to find cutoff and filter below cutoff\n",
+ " # For simplicity, and to aid in comparison, we have a single cutoff based on the total number of trips\n",
+ " cutoff = expanded_ct_sensed.distance.quantile(0.8)\n",
" if pd.isna(cutoff):\n",
" cutoff = 0\n",
- " dist_threshold = expanded_ct[distance_col].quantile(0.8).round(1)\n",
+ " dist_threshold = expanded_ct_sensed[distance_col].quantile(0.8).round(1)\n",
" dist_threshold = str(dist_threshold) \n",
"\n",
- " plot_title_no_quality=\"Mode confirmations for trips under \" + dist_threshold + \" \" + label_units_lower\n",
+ " plot_title_no_quality=\"Number of trips per travel model under \" + dist_threshold + \" \" + label_units_lower\n",
" plot_title_no_quality=plot_title_no_quality+\"\\n[\"+dist_threshold + \" \" + label_units_lower+\" represents 80th percentile of trip length]\"\n",
"\n",
- " labels_d10 = expanded_ct.loc[(expanded_ct['distance'] <= cutoff)].Mode_confirm.value_counts(dropna=True).keys().tolist()\n",
- " values_d10 = expanded_ct.loc[(expanded_ct['distance'] <= cutoff)].Mode_confirm.value_counts(dropna=True).tolist()\n",
- " d10_quality_text = scaffolding.get_quality_text(expanded_ct, expanded_ct[expanded_ct['distance'] <= cutoff], \"< \" + dist_threshold + \" \" + label_units_lower, include_test_users)\n",
- " plot_title = plot_title_no_quality+\"\\n\"+d10_quality_text\n",
- " pie_chart_mode(plot_title,labels_d10,values_d10,colors_mode,file_name)\n",
- " alt_text = store_alt_text_pie(pd.DataFrame(values_d10, labels_d10), file_name, plot_title)\n",
- " print(expanded_ct.loc[(expanded_ct['distance'] <= cutoff)].Mode_confirm.value_counts(dropna=True))\n",
- "\n",
- "except:\n",
- " d10_df = expanded_ct.query(\"distance <= \" + str(cutoff)) if \"distance\" in expanded_ct.columns else expanded_ct\n",
- " debug_df.loc[\"Trips_less_than_80th_pct\"] = scaffolding.trip_label_count(\"Mode_confirm\", d10_df)\n",
- " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n",
- " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)"
+ " expanded_ct_u80 = expanded_ct.loc[(expanded_ct['distance'] <= cutoff)]\n",
+ " expanded_ct_sensed_u80 = expanded_ct_sensed.loc[(expanded_ct_sensed['distance'] <= cutoff)]\n",
+ " sensed_u80_quality_text = f\"{len(expanded_ct_sensed_u80)} trips ({round(len(expanded_ct_sensed_u80)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(expanded_ct_sensed_u80)} {sensed_match.group(3)}\"\n",
+ " labeled_u80_quality_text = f\"{len(expanded_ct_u80)} trips ({round(len(expanded_ct_u80)/len(expanded_ct)*100)}% of all labeled,\\n{round(len(expanded_ct_u80)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(expanded_ct_u80)} {sensed_match.group(3)}\"\n",
+ " \n",
+ " # Plot entries\n",
+ " fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n",
+ " text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
+ " plot_and_text_stacked_bar_chart(expanded_ct_u80.groupby(\"Mode_confirm\").agg({distance_col: 'count'}), \"Labeled by user\\n\"+labeled_u80_quality_text, ax[0], text_results[0], colors_mode, debug_df)\n",
+ " plot_and_text_stacked_bar_chart(expanded_ct_sensed_u80.groupby(\"primary_mode\").agg({distance_col: 'count'}), \"Sensed by OpenPATH\\n\"+sensed_u80_quality_text, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
+ " set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n",
+ "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
+ " # we can have an missing attribute error during the pre-procssing, in which case we should show the missing plot\n",
+ " # here, our pre-processing only relies on sensed data, so we use the debug_df_sensed\n",
+ " plt.clf()\n",
+ " plot_title_default = \"Number of trips below 80th percentile in each mode\"\n",
+ " generate_missing_plot(plot_title_default, merged_debug_df, file_name)\n",
+ " alt_text = store_alt_text_missing(merged_debug_df, file_name, plot_title_default)\n",
+ " alt_html = store_alt_html_missing(merged_debug_df, file_name, plot_title_no_quality)\n",
+ "except Exception as e:\n",
+ " fig, ax = plt.subplots()\n",
+ " plot_and_text_error(e, ax, file_name)"
]
},
{
"cell_type": "markdown",
- "id": "dominant-company",
+ "id": "b560cb32",
"metadata": {},
"source": [
- "### Miles per chosen transport mode"
+ "### Total Trip Length covered by each mode"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "satisfied-sharing",
- "metadata": {
- "scrolled": false
- },
+ "id": "ffccb96f",
+ "metadata": {},
"outputs": [],
"source": [
- "plot_title_no_quality= label_units + \" for each mode (selected by users)\"\n",
- "plot_title=plot_title_no_quality + '\\n' + quality_text\n",
- "file_name ='miles_mode_confirm%s' % file_suffix\n",
+ "plot_title_no_quality= label_units + \" for each mode\"\n",
+ "file_name =f\"total_trip_length{file_suffix}\"\n",
"\n",
"try:\n",
- " dist = expanded_ct.groupby('Mode_confirm').agg({distance_col: ['sum', 'count' , 'mean']})\n",
- " dist.columns = ['Total ('+label_units_lower+')', 'Count', 'Average ('+label_units_lower+')']\n",
- " dist = dist.reset_index()\n",
- " dist =dist.sort_values(by=['Total ('+label_units_lower+')'], ascending=False)\n",
- " dist_dict = dict(zip(dist['Mode_confirm'], dist['Total ('+label_units_lower+')']))\n",
+ " fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n",
" \n",
- " labels_m = []\n",
- " values_m = []\n",
+ " text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
+ " plot_and_text_stacked_bar_chart(expanded_ct.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}), \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df)\n",
+ " plot_and_text_stacked_bar_chart(expanded_ct_sensed.groupby(\"primary_mode\").agg({distance_col: 'sum'}), \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
+ " set_title_and_save(fig, text_results, plot_title_no_quality, file_name) \n",
+ "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
+ " plt.clf()\n",
+ " generate_missing_plot(plot_title_no_quality, merged_debug_df, file_name)\n",
+ " alt_text = store_alt_text_missing(merged_debug_df, file_name, plot_title_no_quality) \n",
+ " alt_html = store_alt_html_missing(merged_debug_df, file_name, plot_title_no_quality)\n",
+ "except Exception as e:\n",
+ " fig, ax = plt.subplots()\n",
+ " plot_and_text_error(e, ax, file_name)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bc9be240",
+ "metadata": {},
+ "source": [
+ "### Total Trip Length covered by each land transport mode"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b1338268",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "plot_title_no_quality= \"Total trip length (\" + label_units_lower + \") covered by each mode by land\"\n",
+ "file_name =f\"total_trip_length_land{file_suffix}\"\n",
+ "\n",
+ "try:\n",
+ " ## We do an existence check for the labeled df because we want to display the sensed value even if we don't have the labeled value\n",
+ " ## but we don't need to have an existence check for sensed because in that case we will have no data to display\n",
+ " labeled_land_trips_df = expanded_ct[expanded_ct['Mode_confirm'] != \"Airplane\"] if \"Mode_confirm\" in expanded_ct.columns else None\n",
+ " sensed_land_trips_df = expanded_ct_sensed[expanded_ct_sensed['primary_mode'] != \"AIR_OR_HSR\"]\n",
" \n",
- " for x, y in dist_dict.items():\n",
- " labels_m.append(x)\n",
- " values_m.append(y)\n",
+ " sensed_land_quality_text = f\"{len(sensed_land_trips_df)} trips ({round(len(sensed_land_trips_df)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(sensed_land_trips_df)} {sensed_match.group(3)}\"\n",
+ " labeled_land_quality_text = f\"{len(labeled_land_trips_df)} trips ({round(len(labeled_land_trips_df)/len(expanded_ct)*100)}% of all labeled,\\n{round(len(labeled_land_trips_df)/len(expanded_ct_sensed)*100)}%) of all trips)\\nfrom {scaffolding.unique_users(labeled_land_trips_df)} {sensed_match.group(3)}\"\n",
"\n",
- " pie_chart_mode(plot_title,labels_m,values_m,colors_mode,file_name)\n",
- " alt_text = store_alt_text_pie(pd.DataFrame(values_m, labels_m), file_name, plot_title)\n",
- "except:\n",
- " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n",
- " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality) "
+ " fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n",
+ " plot_and_text_stacked_bar_chart(labeled_land_trips_df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}), \"Labeled by user\\n\"+labeled_land_quality_text, ax[0], text_results[0], colors_mode, debug_df)\n",
+ " plot_and_text_stacked_bar_chart(sensed_land_trips_df.groupby(\"primary_mode\").agg({distance_col: 'sum'}), \"Sensed by OpenPATH\\n\"+sensed_land_quality_text, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
+ " set_title_and_save(fig, text_results, plot_title_no_quality, file_name) \n",
+ "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
+ " plt.clf()\n",
+ " generate_missing_plot(plot_title_no_quality, merged_debug_df, file_name)\n",
+ " alt_text = store_alt_text_missing(merged_debug_df, file_name, plot_title_no_quality) \n",
+ " alt_html = store_alt_html_missing(merged_debug_df, file_name, plot_title_no_quality)\n",
+ "except Exception as e:\n",
+ " fig, ax = plt.subplots()\n",
+ " plot_and_text_error(e, ax, file_name)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bcef1f6a",
+ "metadata": {},
+ "source": [
+ "## Generic Metrics (Bar Charts)"
]
},
{
@@ -327,6 +455,11 @@
"file_name ='average_miles_mode_confirm%s' % file_suffix\n",
"\n",
"try:\n",
+ " dist = expanded_ct.groupby('Mode_confirm').agg({distance_col: ['sum', 'count' , 'mean']})\n",
+ " dist.columns = ['Total ('+label_units_lower+')', 'Count', 'Average ('+label_units_lower+')']\n",
+ " dist = dist.reset_index()\n",
+ " dist =dist.sort_values(by=['Total ('+label_units_lower+')'], ascending=False)\n",
+ "\n",
" x='Mode_confirm'\n",
" y='Average ('+label_units_lower+')'\n",
" plot_title= plot_title_no_quality+\"\\n\"+quality_text\n",
@@ -430,7 +563,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.12"
+ "version": "3.9.18"
}
},
"nbformat": 4,
diff --git a/viz_scripts/generic_metrics_sensed.ipynb b/viz_scripts/generic_metrics_sensed.ipynb
index 762cbf1..fe43bb1 100644
--- a/viz_scripts/generic_metrics_sensed.ipynb
+++ b/viz_scripts/generic_metrics_sensed.ipynb
@@ -88,166 +88,6 @@
"## Generic Metrics"
]
},
- {
- "cell_type": "markdown",
- "id": "distributed-peace",
- "metadata": {},
- "source": [
- "### Distribution of Mode_confirm attribute"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "tracked-serbia",
- "metadata": {
- "scrolled": false
- },
- "outputs": [],
- "source": [
- "file_name='ntrips_sensed_mode%s' % file_suffix\n",
- "plot_title_no_quality= \"Number of trips for each primary mode\\n(inferred by OpenPATH from phone sensors)\"\n",
- "try:\n",
- " labels_mc = expanded_ct['primary_mode'].value_counts(dropna=True).keys().tolist()\n",
- " values_mc = expanded_ct['primary_mode'].value_counts(dropna=True).tolist() \n",
- " plot_title = plot_title_no_quality+\"\\n\"+quality_text\n",
- " pie_chart_sensed_mode(plot_title,labels_mc,values_mc,file_name)\n",
- " alt_text = store_alt_text_pie(pd.DataFrame(values_mc, labels_mc), file_name, plot_title)\n",
- " print(expanded_ct['primary_mode'].value_counts(dropna=True))\n",
- "except:\n",
- " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n",
- " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "crucial-keyboard",
- "metadata": {},
- "source": [
- "### Mode choice for trips under 80% mark"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "identified-replica",
- "metadata": {
- "scrolled": false
- },
- "outputs": [],
- "source": [
- "file_name ='ntrips_under10miles_sensed_mode%s' % file_suffix\n",
- "\n",
- "try:\n",
- " #determine 80th percentile\n",
- " cutoff = expanded_ct.distance.quantile(0.8)\n",
- " dist_threshold = expanded_ct[distance_col].quantile(0.8).round(1)\n",
- " dist_threshold = str(dist_threshold)\n",
- "\n",
- " plot_title_no_quality=\"Number of trips under \" + dist_threshold + \" \" + label_units_lower + \" for each primary mode\"\n",
- " plot_title_no_quality=plot_title_no_quality + \"\\n(inferred by OpenPATH from phone sensors)\" \n",
- " plot_title_no_quality=plot_title_no_quality + \"\\n[\"+dist_threshold + \" \" + label_units_lower+\" represents 80th percentile of trip length]\"\n",
- "\n",
- " labels_d10 = expanded_ct.loc[(expanded_ct['distance'] <= cutoff)].primary_mode.value_counts(dropna=True).keys().tolist()\n",
- " values_d10 = expanded_ct.loc[(expanded_ct['distance'] <= cutoff)].primary_mode.value_counts(dropna=True).tolist()\n",
- " d10_quality_text = scaffolding.get_quality_text(expanded_ct, expanded_ct[expanded_ct['distance'] <= cutoff], \"< \" + dist_threshold + \" \" + label_units_lower, include_test_users)\n",
- " plot_title= plot_title_no_quality+\"\\n\"+d10_quality_text\n",
- " pie_chart_sensed_mode(plot_title,labels_d10,values_d10,file_name)\n",
- " alt_text = store_alt_text_pie(pd.DataFrame(values_d10, labels_d10), file_name, plot_title)\n",
- " print(expanded_ct.loc[(expanded_ct['distance'] <= cutoff)].primary_mode.value_counts(dropna=True))\n",
- "except:\n",
- " d10_df = expanded_ct.query(\"distance <= \" + cutoff) if \"distance\" in expanded_ct.columns else expanded_ct\n",
- " debug_df.loc[\"Trips_less_than_80th_pct\"] = scaffolding.trip_label_count(\"Mode_confirm\", d10_df)\n",
- " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n",
- " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "dominant-company",
- "metadata": {},
- "source": [
- "### Miles per chosen transport mode"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "satisfied-sharing",
- "metadata": {
- "scrolled": false
- },
- "outputs": [],
- "source": [
- "plot_title_no_quality = label_units + \" for each primary mode\\n(inferred by OpenPATH from phone sensors)\"\n",
- "file_name ='miles_sensed_mode%s' % file_suffix\n",
- "\n",
- "try:\n",
- " dist = expanded_ct.groupby('primary_mode').agg({distance_col: ['sum', 'count' , 'mean']})\n",
- " dist.columns = ['Total ('+label_units_lower+')', 'Count', 'Average ('+label_units_lower+')']\n",
- " dist = dist.reset_index()\n",
- " dist = dist.sort_values(by=['Total ('+label_units_lower+')'], ascending=False)\n",
- " dist_dict = dict(zip(dist['primary_mode'], dist['Total ('+label_units_lower+')']))\n",
- "\n",
- " labels_m = []\n",
- " values_m = []\n",
- "\n",
- " for x, y in dist_dict.items():\n",
- " labels_m.append(x)\n",
- " values_m.append(y)\n",
- " \n",
- " plot_title = plot_title_no_quality + \"\\n\" + quality_text\n",
- " pie_chart_sensed_mode(plot_title,labels_m,values_m,file_name)\n",
- " alt_text = store_alt_text_pie(pd.DataFrame(values_m, labels_m), file_name, plot_title)\n",
- "\n",
- "except:\n",
- " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n",
- " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality) "
- ]
- },
- {
- "cell_type": "markdown",
- "id": "1d0c7548",
- "metadata": {},
- "source": [
- "### Miles per chosen land transport mode"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "004b7b3c",
- "metadata": {},
- "outputs": [],
- "source": [
- "plot_title_no_quality= label_units + \" for each land-based primary mode\\n(inferred by OpenPATH from phone sensors)\"\n",
- "file_name ='miles_sensed_mode_land%s' % file_suffix\n",
- "\n",
- "try:\n",
- " dist = expanded_ct.groupby('primary_mode').agg({distance_col: ['sum', 'count' , 'mean']})\n",
- " dist.columns = ['Total ('+label_units_lower+')', 'Count', 'Average ('+label_units_lower+')']\n",
- " dist = dist.reset_index()\n",
- " dist =dist.sort_values(by=['Total ('+label_units_lower+')'], ascending=False)\n",
- "\n",
- " dist_dict = dict(zip(dist['primary_mode'], dist['Total ('+label_units_lower+')']))\n",
- "\n",
- " labels_m = []\n",
- " values_m = []\n",
- "\n",
- " for x, y in dist_dict.items():\n",
- " if x != \"AIR_OR_HSR\":\n",
- " labels_m.append(x)\n",
- " values_m.append(y)\n",
- "\n",
- " plot_title = plot_title_no_quality + \"\\n\" + quality_text\n",
- " pie_chart_sensed_mode(plot_title,labels_m,values_m,file_name)\n",
- " alt_text = store_alt_text_pie(pd.DataFrame(values_m, labels_m), file_name, plot_title)\n",
- "\n",
- "except:\n",
- " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n",
- " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality) "
- ]
- },
{
"cell_type": "markdown",
"id": "43ecc5d7",
@@ -267,6 +107,11 @@
"file_name ='average_miles_sensed_mode%s' % file_suffix\n",
"\n",
"try:\n",
+ " dist = expanded_ct.groupby('primary_mode').agg({distance_col: ['sum', 'count' , 'mean']})\n",
+ " dist.columns = ['Total ('+label_units_lower+')', 'Count', 'Average ('+label_units_lower+')']\n",
+ " dist = dist.reset_index()\n",
+ " dist =dist.sort_values(by=['Total ('+label_units_lower+')'], ascending=False)\n",
+ " \n",
" data = dist.drop((dist.query(\"Count < 3\").index)).sort_values(by=['Average ('+label_units_lower+')'], ascending=False)\n",
" x='primary_mode'\n",
" y='Average ('+label_units_lower+')'\n",
@@ -370,7 +215,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.16"
+ "version": "3.9.18"
}
},
"nbformat": 4,
diff --git a/viz_scripts/mode_specific_metrics.ipynb b/viz_scripts/mode_specific_metrics.ipynb
index 0ba13f8..77f9d4b 100644
--- a/viz_scripts/mode_specific_metrics.ipynb
+++ b/viz_scripts/mode_specific_metrics.ipynb
@@ -93,7 +93,7 @@
"id": "397709c8",
"metadata": {},
"source": [
- "### Color Mapping for labels"
+ "### Color Dictionary"
]
},
{
@@ -103,7 +103,7 @@
"metadata": {},
"outputs": [],
"source": [
- "colors_mode, colors_purpose = scaffolding.mapping_color_labels(dynamic_labels, dic_re, dic_pur)"
+ "colors_mode, colors_purpose, colors_sensed = scaffolding.mapping_color_labels(dynamic_labels, dic_re, dic_pur)"
]
},
{
@@ -183,87 +183,95 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "respiratory-breach",
- "metadata": {
- "scrolled": false
- },
+ "id": "006ba3de",
+ "metadata": {},
"outputs": [],
"source": [
- "plot_title_no_quality=f\"Number of trips for each purpose for {mode_of_interest} only\"\n",
- "file_name= f'ntrips_{mode_of_interest}_purpose%s' % file_suffix\n",
+ "plot_title_no_quality = f\"Number of trips for each purpose for {mode_of_interest}\"\n",
+ "file_name= f\"ntrips_{mode_of_interest}_purpose{file_suffix}\"\n",
"\n",
"try:\n",
- " labels_tp = data_eb['Trip_purpose'].value_counts(dropna=True).keys().tolist()\n",
- " values_tp = data_eb['Trip_purpose'].value_counts(dropna=True).tolist()\n",
- " plot_title= plot_title_no_quality+\"\\n\"+quality_text\n",
- " pie_chart_purpose(plot_title,labels_tp,values_tp,colors_purpose,file_name)\n",
- " alt_text = store_alt_text_pie(pd.DataFrame(values_tp, labels_tp), file_name, plot_title)\n",
- "except:\n",
- " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n",
- " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)"
+ " fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n",
+ " text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n",
+ " plot_and_text_stacked_bar_chart(data_eb.groupby(\"Trip_purpose\").agg({distance_col: 'count'}),f\"Labeled `{mode_of_interest}` by user\", ax, text_results, colors_purpose, debug_df)\n",
+ " plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n",
+ " set_title_and_save(fig, text_results, plot_title, file_name)\n",
+ "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
+ " plt.clf()\n",
+ " generate_missing_plot(plot_title_no_quality, debug_df, file_name)\n",
+ " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality) \n",
+ " alt_html = store_alt_html_missing(debug_df, file_name, plot_title_no_quality)\n",
+ "except Exception as e:\n",
+ " fig, ax = plt.subplots()\n",
+ " plot_and_text_error(e, ax, file_name)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "dependent-reservoir",
+ "metadata": {},
+ "source": [
+ "### Total Trip Length for each mode replaced by the specified mode"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "thermal-midnight",
+ "id": "2b415510",
"metadata": {},
"outputs": [],
"source": [
- "plot_title_no_quality=f\"Number of trips for each replaced transport mode for {mode_of_interest} only\"\n",
- "file_name =f'ntrips_{mode_of_interest}_replaced_mode%s' % file_suffix\n",
+ "plot_title_no_quality = \"Total trip length (\" + label_units_lower + \") covered by replaced mode\"\n",
+ "file_name = f\"total_trip_length_{mode_of_interest}_replaced_mode{file_suffix}\"\n",
"\n",
"try:\n",
- " labels_eb = data_eb.Replaced_mode.value_counts(dropna=True).keys().tolist()\n",
- " values_eb = data_eb.Replaced_mode.value_counts(dropna=True).tolist()\n",
- " plot_title= plot_title_no_quality+\"\\n\"+quality_text\n",
- " pie_chart_mode(plot_title,labels_eb,values_eb,colors_mode,file_name)\n",
- " alt_text = store_alt_text_pie(pd.DataFrame(values_eb, labels_eb), file_name, plot_title)\n",
- "except:\n",
- " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n",
- " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)"
+ " fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n",
+ " text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n",
+ " plot_and_text_stacked_bar_chart(data_eb.groupby(\"Replaced_mode\").agg({distance_col: 'sum'}), \"Labeled by user\\n (Trip distance)\", ax, text_results, colors_mode, debug_df)\n",
+ " plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n",
+ " set_title_and_save(fig, text_results, plot_title, file_name)\n",
+ "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
+ " plt.clf()\n",
+ " generate_missing_plot(plot_title_no_quality, debug_df, file_name)\n",
+ " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality) \n",
+ " alt_html = store_alt_html_missing(debug_df, file_name, plot_title_no_quality)\n",
+ "except Exception as e:\n",
+ " fig, ax = plt.subplots()\n",
+ " plot_and_text_error(e, ax, file_name)"
]
},
{
"cell_type": "markdown",
- "id": "dependent-reservoir",
+ "id": "be6bb4be",
"metadata": {},
"source": [
- "### Miles for each mode replaced by the specified mode"
+ "### Number of Trips for each mode replaced by specified mode"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "pointed-velvet",
+ "id": "3947a5f2",
"metadata": {},
"outputs": [],
"source": [
- "plot_title_no_quality=f\"Distribution of \"+label_units+f\" Replaced by {mode_of_interest}\"\n",
- "file_name =f'miles_{mode_of_interest}_replaced_mode%s' % file_suffix\n",
+ "plot_title_no_quality= \"Number of trips for replaced mode\"\n",
+ "file_name = f'ntrips_{mode_of_interest}_total{file_suffix}'\n",
"\n",
"try:\n",
- " dg=data_eb.groupby('Replaced_mode').agg({distance_col: ['sum', 'count' , 'mean']},)\n",
- " dg.columns = ['Total ('+label_units_lower+')', 'Count' ,'Average ('+label_units_lower+')']\n",
- " dg = dg.reset_index()\n",
- " dg = dg.sort_values(by=['Total ('+label_units_lower+')'], ascending=False)\n",
- "\n",
- " dg_dict = dict(zip(dg['Replaced_mode'], dg['Total ('+label_units_lower+')']))\n",
- " \n",
- " labels_m = []\n",
- " values_m = []\n",
- "\n",
- " for x, y in dg_dict.items():\n",
- " labels_m.append(x)\n",
- " values_m.append(y)\n",
- "\n",
- " plot_title= plot_title_no_quality+\"\\n\"+quality_text\n",
- " pie_chart_mode(plot_title,labels_m,values_m,colors_mode,file_name)\n",
- " alt_text = store_alt_text_pie(pd.DataFrame(values_m, labels_m), file_name, plot_title)\n",
- " print(dg)\n",
- "except:\n",
- " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n",
- " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)"
+ " fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n",
+ " text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n",
+ " plot_and_text_stacked_bar_chart(data_eb.groupby(\"Replaced_mode\").agg({distance_col: 'count'}), f\"Labeled `{mode_of_interest}` by user\", ax, text_results, colors_mode, debug_df)\n",
+ " plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n",
+ " set_title_and_save(fig, text_results, plot_title, file_name)\n",
+ "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
+ " plt.clf()\n",
+ " generate_missing_plot(plot_title_no_quality, debug_df, file_name)\n",
+ " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality) \n",
+ " alt_html = store_alt_html_missing(debug_df, file_name, plot_title_no_quality) \n",
+ "except Exception as e:\n",
+ " fig, ax = plt.subplots()\n",
+ " plot_and_text_error(e, ax, file_name)"
]
},
{
@@ -285,6 +293,10 @@
"file_name ='average_miles_replaced_mode%s' % file_suffix\n",
"\n",
"try:\n",
+ " dg=data_eb.groupby('Replaced_mode').agg({distance_col: ['sum', 'count' , 'mean']},)\n",
+ " dg.columns = ['Total ('+label_units_lower+')', 'Count' ,'Average ('+label_units_lower+')']\n",
+ " dg = dg.reset_index()\n",
+ " dg = dg.sort_values(by=['Total ('+label_units_lower+')'], ascending=False)\n",
" data = dg.drop((dg.query(\"Count < 3\").index)).sort_values(by=['Average ('+label_units_lower+')'], ascending=False) \n",
" \n",
" x='Replaced_mode'\n",
@@ -392,7 +404,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.12"
+ "version": "3.9.18"
}
},
"nbformat": 4,
diff --git a/viz_scripts/plots.py b/viz_scripts/plots.py
index 3364a1a..83ed1e0 100644
--- a/viz_scripts/plots.py
+++ b/viz_scripts/plots.py
@@ -4,6 +4,7 @@
import itertools
import matplotlib.pyplot as plt
import seaborn as sns
+import traceback as tb
from matplotlib.patches import Patch
sns.set_style("whitegrid")
@@ -19,13 +20,21 @@
SAVE_DIR="/plots/"
+def calculate_pct(labels, values):
+ v2l_df = pd.DataFrame({"vals": values}, index=labels)
+
+ # Calculate % for all the values
+ vs = v2l_df.vals.sum()
+ v2l_df["pct"] = v2l_df.vals.apply(lambda x: round((x/vs) * 100, 1))
+
+ return (v2l_df.index.to_list(),v2l_df.vals.to_list(), v2l_df.pct.to_list())
def merge_small_entries(labels, values):
v2l_df = pd.DataFrame({"vals": values}, index=labels)
# Calculate % for all the values
vs = v2l_df.vals.sum()
- v2l_df["pct"] = v2l_df.vals.apply(lambda x: (x/vs) * 100)
+ v2l_df["pct"] = v2l_df.vals.apply(lambda x: round((x/vs) * 100, 1))
disp.display(v2l_df)
# Find small chunks to combine
@@ -48,82 +57,136 @@ def merge_small_entries(labels, values):
v2l_df.loc["Other"] = v2l_df.loc["Other"] + misc_count
disp.display(v2l_df)
- return (v2l_df.index.to_list(),v2l_df.vals.to_list())
-
-
-def format_pct(pct, values):
- total = sum(values)
- absolute = int(round(pct*total/100.0))
- return "{:.1f}%\n({:d})".format(pct, absolute) if pct > 4 else''
-
+ return (v2l_df.index.to_list(),v2l_df.vals.to_list(), v2l_df.pct.to_list())
-def pie_chart_mode(plot_title,labels,values,colors_map,file_name):
- fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(aspect="equal"))
- m_labels, m_values = merge_small_entries(labels, values)
-
- wedges, texts, autotexts = ax.pie(m_values,
- labels = m_labels,
- colors=[colors_map[key] for key in labels],
- pctdistance=0.75,
- autopct= lambda pct: format_pct(pct, values),
- textprops={'size': 23})
-
- ax.set_title(plot_title, size=25)
- plt.text(-1.3,-1.3,f"Last updated {arrow.get()}", fontsize=10)
- plt.setp(autotexts, **{'color':'white', 'weight':'bold', 'fontsize':20})
+# Create dataframes with cols: 'Label' 'Value' and 'Proportion'
+def process_trip_data(labels, values):
+ """ Inputs:
+ labels = Displayed labels (e.g. "Gas car, drove alone")
+ values = Corresponding vlaues of these labels
+ trip_type = Bar labels (e.g. Labeled by user (Confirmed trips))
+ Returns:
+ df_total_trip_expanded = Data frame without consolidation of Others, used to create the alt_html table
+ df_total_trip = Data frame with consolidation of Others, used to represent the Bar Charts.
+ """
+ if len(labels) == 0 and len(values) == 0:
+ return pd.DataFrame(), pd.DataFrame()
+ m_labels_expanded, m_values_expanded, m_pct_expanded = calculate_pct(labels, values)
+ data_trip_expanded = {'Label': m_labels_expanded, 'Value': m_values_expanded, 'Proportion': m_pct_expanded}
+ df_total_trip_expanded = pd.DataFrame(data_trip_expanded)
+
+ m_labels, m_values, m_pct = merge_small_entries(labels, values)
+ data_trip = {'Label': m_labels, 'Value': m_values, 'Proportion': m_pct}
+ df_total_trip = pd.DataFrame(data_trip)
+ return df_total_trip_expanded, df_total_trip
+
+def plot_and_text_error(e, ax, file_name):
+ stringified_exception = "".join(tb.format_exception(type(e), e, e.__traceback__))
+ ax.text(0,0,s=stringified_exception)
plt.savefig(SAVE_DIR+file_name+".png", bbox_inches='tight')
- plt.show()
-
-def pie_chart_sensed_mode(plot_title,labels,values,file_name):
- all_labels= ['IN_VEHICLE',
- 'UNKNOWN',
- 'WALKING',
- 'AIR_OR_HSR',
- 'BICYCLING',
- 'OTHER']
-
- val2labeldf = pd.DataFrame({"labels": labels, "values": values})
-
- colours = dict(zip(all_labels, plt.cm.tab10.colors[:len(all_labels)]))
- fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(aspect="equal"))
-
- m_labels, m_values = merge_small_entries(labels, values)
-
- wedges, texts, autotexts = ax.pie(m_values,
- labels = m_labels,
- colors=[colours[key] for key in labels],
- pctdistance=0.75,
- autopct= lambda pct: format_pct(pct, values),
- textprops={'size': 23})
+ alt_text = f"Error while generating chart:"
+ alt_text += stringified_exception
+ alt_text = access_alt_text(alt_text, file_name)
+ # TODO: Format the error as HTML instead of plain text
+ alt_html = access_alt_html(alt_text, file_name)
+ return alt_text, alt_html
+
+# Creates/ Appends single bar to the 100% Stacked Bar Chart
+def plot_and_text_stacked_bar_chart(df, bar_label, ax, text_result, colors, debug_df):
+ """ Inputs:
+ df = Data frame corresponding to the bar in a stacked bar chart. It is
+ expected to have three columns, which represent the 'label', 'value'
+ bar_label = Text to represent the Bar (e.g. Labeled by user\n (Confirmed trips))
+ ax = axis information
+ text_result = will be filled in with the alt_text and alt_html for the plot
+ """
+ if len(df.columns) > 1:
+ raise ValueError("dataframe should have two columns (labels and values), found %s" % (df.columns))
- ax.set_title(plot_title, size=25)
- plt.text(-1.3,-1.3,f"Last updated {arrow.get()}", fontsize=10)
- plt.setp(autotexts, **{'color':'white', 'weight':'bold', 'fontsize':20})
- plt.savefig(SAVE_DIR+file_name+".png", bbox_inches='tight')
- plt.show()
+ sns.set(font_scale=1.5)
+ bar_height = 0.2
+ bar_width = [0]
+ try:
+ grouped_df = df.reset_index().set_axis(['label', 'value'], axis='columns').sort_values(by='value', ascending=False)
+
+ # TODO: Do we need this as a separate function?
+ df_all_entries, df_only_small = process_trip_data(grouped_df.label.tolist(), grouped_df.value.tolist())
+
+ # TODO: Fix this to be more pandas-like and change the "long" variable name
+ for label in pd.unique(df_only_small['Label']):
+ long = df_only_small[df_only_small['Label'] == label]
+ # TODO: Remove if/else; if we only consider unique values, then long can never be empty
+ if not long.empty:
+ mode_prop = long['Proportion']
+ mode_count = long['Value']
+ vals_str = [f'{y:.1f} %\n({x:.0f})' if y > 4 else '' for x, y in zip(mode_count, mode_prop)]
+ bar = ax.barh(y=bar_label, width=mode_prop, height=bar_height, left=bar_width, label=label, color=colors[label])
+ ax.bar_label(bar, label_type='center', labels=vals_str, rotation=90, fontsize=16)
+ bar_width = [total + val for total, val in zip(bar_width, mode_prop)]
+ else:
+ print(f"{long} is empty")
+ ax.tick_params(axis='y', labelsize=18)
+ ax.tick_params(axis='x', labelsize=18, rotation=90)
+ ncols = len(df_only_small)//5 if len(df_only_small) % 5 == 0 else len(df_only_small)//5 + 1
+ ax.legend(bbox_to_anchor=(1, 0), loc='lower left', fancybox=True, shadow=True, fontsize=15)
+ # ax.legend(bbox_to_anchor=(1, 1), loc='upper left', fancybox=True, shadow=True, fontsize=15, ncols=ncols)
+ # Fix for the error: RuntimeError("Unknown return type"), adding the below line to address as mentioned here https://github.com/matplotlib/matplotlib/issues/25625/
+ ax.set_xlim(right=ax.get_xlim()[1] + 1.0, auto=True)
+ text_result[0], text_result[1] = store_alt_text_and_html_stacked_bar_chart(df_all_entries, bar_label)
+ print("After populating, %s" % text_result)
+ except Exception as e:
+ # tb.print_exception(type(e), e, e.__traceback__)
+ # ax.set_title("Insufficient data", loc="center")
+ ax.text(x = 0.5, y = 0.9, s = "Insufficient data", horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=20)
+ # TODO: consider switching to a two column table
+ ax.text(x = 0.5, y = 0.8, s = debug_df.to_string(), horizontalalignment='center', verticalalignment='top', transform=ax.transAxes, fontsize=10)
+ text_result[0] = store_alt_text_missing(debug_df, None, bar_label)
+ text_result[1] = store_alt_html_missing(debug_df, None, bar_label)
+ # ax.yaxis.set_visible(False)
+
+# Adds chart title, x and y axis label to the 100% Stacked Bar Chart
+def set_title_and_save(fig, text_results, plot_title, file_name):
+ # Setup label and title for the figure since these would be common for all sub-plots
+ # We only need the axis to tweak the position (WHY!) so we do so by getting the first ax object
+ ax = fig.get_axes()[0]
+ fig.supxlabel('Proportion (Count)', fontsize=20, x=0.5, y= ax.xaxis.get_label().get_position()[0] - 0.62, va='top')
+ # fig.supylabel('Trip Types', fontsize=20, x=-0.12, y=0.5, rotation='vertical')
+ fig.suptitle(plot_title, fontsize=25,va = 'bottom')
+ plt.text(x=0, y=ax.xaxis.get_label().get_position()[0] - 0.62, s=f"Last updated {arrow.get()}", fontsize=12)
+ plt.subplots_adjust(hspace=0.1, top= 0.95)
+
+ # if nRows == 1, then plt.subplots returns a single axis object instead of an array
+ # similarly we have text_result be a single list if nRows == 1 and a list of lists if nRows > 1
+ # but then we want to wrap it so that it is a list of lists with a single top level element
+ # so that the iteration logic below works
+ if len(fig.get_axes()) == 1:
+ text_results = [text_results]
+
+
+ # The number of plots is not fixed. Let's iterate over the array that is passed in to handle the text results.
+ # The number of axes in the figure is the number of plots
+ concat_alt_text = plot_title
+ concat_alt_html = f"""
+
+
+
+
{plot_title}
+ """
+ for i in range(0, len(fig.get_axes())):
+ concat_alt_text += text_results[i][0]
+ concat_alt_html += f"
{text_results[i][1]}
"
-def pie_chart_purpose(plot_title,labels,values,colors_map,file_name):
-
- fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(aspect="equal"))
+ concat_alt_html += f"""
+
+
+ """
- m_labels, m_values = merge_small_entries(labels, values)
-
- def func(pct, values):
- total = sum(values)
- absolute = int(round(pct*total/100.0))
- return "{:.1f}%\n({:d})".format(pct, absolute) if pct > 3 else''
-
- wedges, texts, autotexts = ax.pie(m_values,
- labels = m_labels,
- colors=[colors_map[key] for key in labels],
- pctdistance=0.85,
- autopct=lambda pct: func(pct, values),
- textprops={'size': 23})
-
- ax.set_title(plot_title, size=25)
- plt.text(-1.3,-1.3,f"Last updated {arrow.get()}", fontsize=10)
- plt.setp(autotexts, **{'color':'white', 'weight':'bold', 'fontsize':20})
- plt.savefig(SAVE_DIR+file_name+".png", bbox_inches='tight')
+ # Set up title and concatenate the text results
+ # TODO: Consider using a dictionary or a data object instead of an array of arrays
+ # for greater clarity
+ alt_text = access_alt_text(concat_alt_text, file_name)
+ alt_html = access_alt_html(concat_alt_html, file_name)
+ fig.savefig(SAVE_DIR + file_name + ".png", bbox_inches='tight')
plt.show()
def energy_impact(x,y,color,plot_title,file_name):
@@ -338,19 +401,6 @@ def store_alt_text_bar(df, chart_name, var_name):
alt_text = access_alt_text(alt_text, chart_name)
return alt_text
-def store_alt_text_pie(df, chart_name, var_name):
- """ Inputs:
- df = dataframe with index of item names, first column is counts
- chart_name = what to label chart by in the dictionary
- var_name = the variable being analyzed across pie slices
- """
- # Fill out the alt text based on components of the chart and passed data
- alt_text = f"Pie chart of {var_name}."
- for i in range(0,len(df)):
- alt_text += f" {df.index[i]} is {np.round(df.iloc[i,0] / np.sum(df.iloc[:,0]) * 100, 1)}%."
- alt_text = access_alt_text(alt_text, chart_name)
- return alt_text
-
def store_alt_text_timeseries(df, chart_name, var_name):
""" Inputs:
df = dataframe with first col of dates, second column is values
@@ -365,6 +415,46 @@ def store_alt_text_timeseries(df, chart_name, var_name):
alt_text = access_alt_text(alt_text, chart_name)
return alt_text
+# Creating html table with col as Trip Type, Label, Value, and Proportion
+def access_alt_html(html_content, chart_name):
+ """ Inputs:
+ html_body = the text describing the chart
+ chart_name = the alt text file to save or update
+ var_name = the variable being analyzed across bars
+ """
+ with open(SAVE_DIR + chart_name + ".html", "w") as f:
+ f.write(html_content)
+
+ return html_content
+
+# Appends bar information into into the alt_html
+def store_alt_text_and_html_stacked_bar_chart(df, var_name):
+ """ Inputs:
+ df = dataframe combining columns as Trip Type, Label, Value, Proportion
+ chart_name = name of the chart
+ """
+ # Generate alt text file
+ alt_text = f"\nStacked Bar of: {var_name}\n"
+ for i in range(len(df)):
+ alt_text += f"{df['Label'].iloc[i]} is {df['Value'].iloc[i]}({df['Proportion'].iloc[i]}%).\n"
+
+ # Generate html table
+ alt_html = "\n"
+ for i in range(len(df)):
+ alt_html += f"
{df['Label'].iloc[i]}
{df['Value'].iloc[i]}
{df['Proportion'].iloc[i]}%
"
+ html_content = f"""
+
Trip Type: {var_name}
+
+
+
Label
+
Value
+
Proportion
+
+ {alt_html}
+
+ """
+ return alt_text, html_content
+
def generate_missing_plot(plot_title,debug_df,file_name):
f, ax = plt.subplots(figsize=(10,10))
@@ -404,5 +494,33 @@ def store_alt_text_missing(df, chart_name, var_name):
alt_text = f"Unable to generate\nBar chart of {var_name}.\nReason:"
for i in range(0,len(df)):
alt_text += f" {df.index[i]} is {np.round(df.iloc[i,0], 1)}."
- alt_text = access_alt_text(alt_text, chart_name)
+
+ # For the bar charts, there is no longer a 1:1 mapping between missing alt
+ # text and a file. So we want to collect all the alt_text as strings and
+ # then save it. We cannot just remove the call to `access_alt_text`, since
+ # it will break other uses. So let's pass in None for the chart_name if we
+ # don't want to save it.
+ if chart_name is not None:
+ alt_text = access_alt_text(alt_text, chart_name)
return alt_text
+
+def store_alt_html_missing(df, chart_name, var_name):
+ """ Inputs:
+ df = dataframe with index of debug information, first column is counts
+ chart_name = what to label chart by in the dictionary
+ var_name = the variable being analyzed across pie slices
+ """
+ # Fill out the alt text based on components of the chart and passed data
+ alt_html = f"""
+
+
+
Unable to generate\nBar chart of {var_name}. Reason:
\n
+ """
+ alt_html += df.to_html()
+ alt_html += f"""
+
+
+ """
+ if chart_name is not None:
+ alt_html = access_alt_html(alt_html, chart_name)
+ return alt_html
diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py
index 6d71199..fa32948 100644
--- a/viz_scripts/scaffolding.py
+++ b/viz_scripts/scaffolding.py
@@ -196,22 +196,23 @@ def translate_labels(labels):
# Function: Maps "MODE", "PURPOSE", and "REPLACED_MODE" to colors.
# Input: dynamic_labels, dic_re, and dic_pur
-# Output: Map for color with mode and purpose
+# Output: Dictionary mapping between color with mode/purpose/sensed
def mapping_color_labels(dynamic_labels, dic_re, dic_pur):
+ sensed_values = ["WALKING", "BICYCLING", "IN_VEHICLE", "AIR_OR_HSR", "UNKNOWN", "OTHER", "Other"]
if len(dynamic_labels) > 0:
mode_values = list(mapping_labels(dynamic_labels, "MODE").values()) if "MODE" in dynamic_labels else []
replaced_mode_values = list(mapping_labels(dynamic_labels, "REPLACED_MODE").values()) if "REPLACED_MODE" in dynamic_labels else []
purpose_values = list(mapping_labels(dynamic_labels, "PURPOSE").values()) if "PURPOSE" in dynamic_labels else []
combined_mode_values = mode_values + replaced_mode_values
else:
- # Addition of 'Other' is required to the list since it's missing from auxillary_files/mode_labels.csv and auxillary_files/purpose_labels.csv
combined_mode_values = (list(OrderedDict.fromkeys(dic_re.values())) + ['Other'])
- purpose_values = (list(OrderedDict.fromkeys(dic_pur.values())) + ['Other'])
+ purpose_values = list(OrderedDict.fromkeys(dic_pur.values()))
colors_mode = dict(zip(combined_mode_values, plt.cm.tab20.colors[:len(combined_mode_values)]))
colors_purpose = dict(zip(purpose_values, plt.cm.tab20.colors[:len(purpose_values)]))
+ colors_sensed = dict(zip(sensed_values, plt.cm.tab20.colors[:len(sensed_values)]))
- return colors_mode, colors_purpose
+ return colors_mode, colors_purpose, colors_sensed
def load_viz_notebook_sensor_inference_data(year, month, program, include_test_users=False, sensed_algo_prefix="cleaned"):
""" Inputs:
@@ -236,7 +237,7 @@ def load_viz_notebook_sensor_inference_data(year, month, program, include_test_u
# Document data quality
file_suffix = get_file_suffix(year, month, program)
- quality_text = get_quality_text_sensed(expanded_ct, include_test_users)
+ quality_text = get_quality_text_sensed(expanded_ct, "", include_test_users)
debug_df = pd.DataFrame.from_dict({
"year": year,
@@ -296,10 +297,10 @@ def get_quality_text(before_df, after_df, mode_of_interest=None, include_test_us
print(quality_text)
return quality_text
-def get_quality_text_sensed(df, include_test_users=False):
+def get_quality_text_sensed(df, cutoff_text="", include_test_users=False):
cq = (len(df), unique_users(df))
user_str = 'testers and participants' if include_test_users else 'users'
- quality_text = f"Based on %s trips from %d {user_str}" % cq
+ quality_text = f"Based on %s trips ({cutoff_text}) from %d {user_str}" % cq if cutoff_text else f"Based on %s trips from %d {user_str}" % cq
print(quality_text)
return quality_text