diff --git a/frontend/index.html b/frontend/index.html index a7e3ac1..ee2ef31 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -395,6 +395,7 @@ return study_name; } + var mode_studied $(function () { $(document).ready(function () { // Simple solution to program/study plots dropdown; load the config and corresponding metrics.html @@ -403,6 +404,7 @@ // get units const dist_units = (data.display_config.use_imperial) ? "miles" : "kilometers"; console.log("Units for display are", data.display_config.use_imperial, dist_units); + mode_studied = data.intro.mode_studied // Load list of plots corresponding to study/program dynamic_labels = data.label_options if (data.intro.program_or_study == 'program') { @@ -417,18 +419,17 @@ console.log("Dynamic Labels are unavailable for: " + STUDY_CONFIG) } $.get(load_file, function (response) { - const configuredResponse = response.replaceAll("${data.intro.mode_studied}", data.intro.mode_studied); + const configuredResponse = response.replaceAll("${data.intro.mode_studied}", mode_studied); console.log("configuring units"); const unitConfigured = configuredResponse.replaceAll("${data.display_config.use_imperial}", dist_units); $('#metric').append(unitConfigured); addPreconfiguredMetrics([ - "ntrips_mode_confirm", - "miles_mode_confirm", - "ntrips_sensed_mode", - "miles_sensed_mode", - `ntrips_${data.intro.mode_studied}_purpose`, - `ntrips_${data.intro.mode_studied}_per_weekday`, - `sketch_CO2impact_${data.intro.mode_studied}` + "ntrips_total", + "total_trip_length", + "ntrips_purpose", + `ntrips_${mode_studied}_purpose`, + `ntrips_${mode_studied}_per_weekday`, + `sketch_CO2impact_${mode_studied}` ]); }); } @@ -450,10 +451,8 @@ const unitConfigured = response.replaceAll("${data.display_config.use_imperial}", dist_units); $('#metric').append(unitConfigured); addPreconfiguredMetrics([ - "ntrips_mode_confirm", - "miles_mode_confirm", - "ntrips_sensed_mode", - "miles_sensed_mode", + "ntrips_total", + "total_trip_length", "ntrips_purpose", "ntrips_sensed_per_weekday", "ts_emissions_user" @@ -531,15 +530,59 @@ // and without this change, even if we set the dateVal to '' // we would try to load ntrips_purpose__default.png const imgFile = "plots/" + metric + "_" + dateVal + program + ".png"; + const htmlFile = "plots/" + metric + "_" + dateVal + program + ".html"; const altTextFile = "plots/" + metric + "_" + dateVal + program + ".txt"; const altText = loadFile(altTextFile); - const nw = ["
  • " + const isStackedMetric = ['ntrips_total', 'ntrips_purpose', 'ntrips_under80', 'ntrips_commute_mode_confirm', + 'total_trip_length', 'total_trip_length_land',`ntrips_${mode_studied}_total`, + `ntrips_${mode_studied}_purpose`,`total_trip_length_${mode_studied}_replaced_mode`] + .includes(metric); + const jsonData = { metric, dateVal, program, metricLabel, dateLabel, programLabel, sizex, sizey }; + + if (isStackedMetric){ + const nw = ["
  • " + + labelText + + " " + + "" + + " " + + "" + + "" + altText + "" + + "
  • ", + sizex, sizey]; + gridster.add_widget.apply(gridster, nw); + } + else{ + const nw = ["
  • " + + labelText + + "" + + "" + altText + "" + + "
  • ", + sizex, sizey]; + gridster.add_widget.apply(gridster, nw); + } + }); + + $('body').on("click", ".gridster ul > li .addInfo", function () { + const additionalInfoButton = $(this); + const additionalInfo = JSON.parse(decodeURIComponent(additionalInfoButton.data('info'))); + const metric = additionalInfo.metric; + const dateVal = additionalInfo.dateVal; + const program = additionalInfo.program; + const metricLabel = additionalInfo.metricLabel; + const dateLabel = additionalInfo.dateLabel; + const programLabel = additionalInfo.programLabel; + const sizex = additionalInfo.sizex; + const sizey = additionalInfo.sizey; + const labelText = metricLabel + " " + dateLabel + " " + programLabel; + const htmlFile = "plots/" + metric + "_" + dateVal + program + ".html"; + + const nw_additionalInformation = ["
  • " + labelText + "" - + "" + altText + "" + + "" + "
  • ", sizex, sizey]; - gridster.add_widget.apply(gridster, nw); + gridster.add_widget.apply(gridster, nw_additionalInformation); }); $('body').on("click", ".gridster ul > li .remove", function () { diff --git a/frontend/metrics_program.html b/frontend/metrics_program.html index d54d04a..9cdfc18 100644 --- a/frontend/metrics_program.html +++ b/frontend/metrics_program.html @@ -1,13 +1,10 @@ - - - - - - - - - + + + + + + @@ -20,9 +17,10 @@ - - - + + + + diff --git a/frontend/metrics_program_withoutEnergyMetrics.html b/frontend/metrics_program_withoutEnergyMetrics.html index e820cc9..7916b4b 100644 --- a/frontend/metrics_program_withoutEnergyMetrics.html +++ b/frontend/metrics_program_withoutEnergyMetrics.html @@ -1,14 +1,11 @@ - - - - - - - - - + + + + + + @@ -20,9 +17,10 @@ - - - + + + + diff --git a/frontend/metrics_study.html b/frontend/metrics_study.html index 79bf1f0..48e65c8 100644 --- a/frontend/metrics_study.html +++ b/frontend/metrics_study.html @@ -1,13 +1,10 @@ - - - - - - - - - + + + + + + diff --git a/frontend/metrics_study_withoutEnergyMetrics.html b/frontend/metrics_study_withoutEnergyMetrics.html index 4a2137e..26018f1 100644 --- a/frontend/metrics_study_withoutEnergyMetrics.html +++ b/frontend/metrics_study_withoutEnergyMetrics.html @@ -1,14 +1,11 @@ - - - - - - - - - + + + + + + diff --git a/viz_scripts/generic_metrics.ipynb b/viz_scripts/generic_metrics.ipynb index 607a867..9aa02db 100644 --- a/viz_scripts/generic_metrics.ipynb +++ b/viz_scripts/generic_metrics.ipynb @@ -29,10 +29,10 @@ "month = 11\n", "program = \"default\"\n", "study_type = \"study\"\n", - "mode_of_interest = None\n", "include_test_users = False\n", "dynamic_labels = {}\n", - "use_imperial = False" + "use_imperial = True\n", + "sensed_algo_prefix = \"cleaned\"" ] }, { @@ -46,6 +46,7 @@ "\n", "import numpy as np\n", "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", "\n", "from plots import *\n", "import scaffolding\n", @@ -79,7 +80,7 @@ "id": "796f59c7", "metadata": {}, "source": [ - "### Color Mapping for labels" + "### Color Dictionary" ] }, { @@ -89,7 +90,7 @@ "metadata": {}, "outputs": [], "source": [ - "colors_mode, colors_purpose = scaffolding.mapping_color_labels(dynamic_labels, dic_re, dic_pur)" + "colors_mode, colors_purpose, colors_sensed = scaffolding.mapping_color_labels(dynamic_labels, dic_re, dic_pur)" ] }, { @@ -97,7 +98,7 @@ "id": "intellectual-columbus", "metadata": {}, "source": [ - "## Collect Data From Database" + "## Collect Data From Database for Generic Metrics" ] }, { @@ -119,56 +120,127 @@ }, { "cell_type": "markdown", - "id": "modified-skiing", + "id": "6d44c87e", "metadata": {}, "source": [ - "## Generic Metrics" + "## Collect Data from Database for Sensed Metrics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "55ec383b", + "metadata": {}, + "outputs": [], + "source": [ + "expanded_ct_sensed, file_suffix_sensed, quality_text_sensed, debug_df_sensed = scaffolding.load_viz_notebook_sensor_inference_data(year,\n", + " month,\n", + " program,\n", + " include_test_users,\n", + " sensed_algo_prefix)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1bd14bc", + "metadata": {}, + "outputs": [], + "source": [ + "merged_debug_df = debug_df.combine_first(debug_df_sensed)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3a0ec842", + "metadata": {}, + "outputs": [], + "source": [ + "quality_text, quality_text_sensed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cacc71f3", + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "labeled_match = re.match(r'Based on ([0-9]+) confirmed trips from ([0-9]+) (users|testers and participants)\\nof ([0-9]+) total trips from ([0-9]+) (users|testers and participants) (\\(([0-9.]+|nan)%\\))', quality_text)\n", + "# labeled_match\n", + "stacked_bar_quality_text_labeled = f\"{labeled_match.group(1)} trips {labeled_match.group(7)}\\n from {labeled_match.group(2)} {labeled_match.group(3)}\"\n", + "sensed_match = re.match(r'Based on ([0-9]+) trips from ([0-9]+) (users|testers and participants)', quality_text_sensed)\n", + "stacked_bar_quality_text_sensed = f\"{sensed_match.group(1)} trips (100%)\\n from {sensed_match.group(2)} {sensed_match.group(3)}\"\n", + "stacked_bar_quality_text_labeled, stacked_bar_quality_text_sensed" + ] + }, + { + "cell_type": "markdown", + "id": "1af1f02b", + "metadata": {}, + "source": [ + "## 100% Stacked Bar Charts" ] }, { "cell_type": "markdown", - "id": "distributed-peace", + "id": "e50959c1", "metadata": {}, "source": [ - "### Distribution of Mode_confirm attribute" + "### Distribution of modes" ] }, { "cell_type": "code", "execution_count": null, - "id": "tracked-serbia", - "metadata": { - "scrolled": false - }, + "id": "aef741fe", + "metadata": {}, "outputs": [], "source": [ - "file_name='ntrips_mode_confirm%s' % file_suffix\n", + "file_name = f'ntrips_total{file_suffix}'\n", "plot_title_no_quality= \"Number of trips for each mode (selected by users)\"\n", + "\n", "try:\n", - " labels_mc = expanded_ct['Mode_confirm'].value_counts(dropna=True).keys().tolist()\n", - " values_mc = expanded_ct['Mode_confirm'].value_counts(dropna=True).tolist() \n", - " plot_title = plot_title_no_quality+\"\\n\"+quality_text\n", - " pie_chart_mode(plot_title,labels_mc,values_mc,colors_mode,file_name)\n", - " alt_text = store_alt_text_pie(pd.DataFrame(values_mc, labels_mc), file_name, plot_title)\n", - " print(expanded_ct['Mode_confirm'].value_counts(dropna=True))\n", - "except:\n", - " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", - " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)" + " fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n", + " # We will have text results corresponding to the axes for simplicity and consistency\n", + " text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n", + " plot_and_text_stacked_bar_chart(expanded_ct.groupby(\"Mode_confirm\").agg({distance_col: 'count'}), \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df)\n", + " plot_and_text_stacked_bar_chart(expanded_ct_sensed.groupby(\"primary_mode\").agg({distance_col: 'count'}), \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n", + " \n", + " set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n", + "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", + " plt.clf()\n", + " generate_missing_plot(plot_title_no_quality, merged_debug_df, file_name)\n", + " alt_text = store_alt_text_missing(merged_debug_df, file_name, plot_title_no_quality) \n", + " alt_html = store_alt_html_missing(merged_debug_df, file_name, plot_title_no_quality)\n", + "except Exception as e:\n", + " # TODO: Future cleanup can pass in just the figure and have the function choose the last axis\n", + " fig, ax = plt.subplots()\n", + " plot_and_text_error(e, ax, file_name)" + ] + }, + { + "cell_type": "markdown", + "id": "7fa4f3da", + "metadata": {}, + "source": [ + "### Distribution of modes in commute trips" ] }, { "cell_type": "code", "execution_count": null, - "id": "about-seafood", - "metadata": { - "scrolled": false - }, + "id": "5c6cd4cf", + "metadata": {}, "outputs": [], "source": [ "plot_title_no_quality= \"Number of commute trips for each mode (selected by users)\"\n", - "file_name= 'ntrips_commute_mode_confirm%s' % file_suffix\n", + "file_name = f\"ntrips_commute_mode_confirm{file_suffix}\"\n", "\n", "try:\n", + " # Preprocess to find commute trips\n", " if (len(dynamic_labels)):\n", " purpose_map_label = scaffolding.mapping_labels(dynamic_labels, \"PURPOSE\")\n", " translation_work = purpose_map_label['work']\n", @@ -176,22 +248,28 @@ " else:\n", " trip_purpose_query = \"Trip_purpose == 'Work'\"\n", "\n", - " labels_mc = expanded_ct.query(trip_purpose_query).Mode_confirm.value_counts(dropna=True).keys().tolist()\n", - " values_mc = expanded_ct.query(trip_purpose_query).Mode_confirm.value_counts(dropna=True).tolist()\n", - " commute_quality_text = scaffolding.get_quality_text(expanded_ct, expanded_ct.query(trip_purpose_query), \"commute\", include_test_users)\n", - " plot_title= plot_title_no_quality+\"\\n\"+commute_quality_text\n", + " expanded_ct_commute = expanded_ct.query(trip_purpose_query)\n", + " commute_quality_text = scaffolding.get_quality_text(expanded_ct, expanded_ct_commute, \"commute\", include_test_users) if not expanded_ct.empty else \"\"\n", + " plot_title = plot_title_no_quality + \"\\n\" + commute_quality_text\n", " \n", - " pie_chart_mode(plot_title,labels_mc,values_mc,colors_mode,file_name)\n", - " alt_text = store_alt_text_pie(pd.DataFrame(values_mc, labels_mc), file_name, plot_title)\n", - "except:\n", - " debug_df.loc[\"Commute_trips\"] = len(expanded_ct.query(trip_purpose_query)) if \"Trip_purpose\" in expanded_ct.columns else 0\n", - " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", - " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)" + " # Plot entries\n", + " fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True) \n", + " text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n", + " plot_and_text_stacked_bar_chart(expanded_ct_commute.groupby(\"Mode_confirm\").agg({distance_col: 'count'}), \"Labeled by user\\n (Confirmed trips)\", ax, text_results, colors_mode, debug_df)\n", + " set_title_and_save(fig, text_results, plot_title, file_name)\n", + "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", + " plt.clf()\n", + " generate_missing_plot(plot_title_no_quality, debug_df, file_name)\n", + " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality) \n", + " alt_html = store_alt_html_missing(debug_df, file_name, plot_title_no_quality)\n", + "except Exception as e:\n", + " fig, ax = plt.subplots()\n", + " plot_and_text_error(e, ax, file_name)" ] }, { "cell_type": "markdown", - "id": "careful-spencer", + "id": "ffb2df0b", "metadata": {}, "source": [ "### Distribution of Trip_purpose attribute" @@ -200,30 +278,30 @@ { "cell_type": "code", "execution_count": null, - "id": "conservative-september", - "metadata": { - "scrolled": false - }, + "id": "46474ada", + "metadata": {}, "outputs": [], "source": [ "plot_title_no_quality=\"Number of trips for each purpose (selected by users)\"\n", - "file_name= 'ntrips_purpose%s' % file_suffix\n", - "\n", + "file_name= f\"ntrips_purpose{file_suffix}\"\n", "try:\n", - " labels_tp = expanded_ct['Trip_purpose'].value_counts(dropna=True).keys().tolist()\n", - " values_tp = expanded_ct['Trip_purpose'].value_counts(dropna=True).tolist()\n", - " plot_title= plot_title_no_quality+\"\\n\"+quality_text\n", - " pie_chart_purpose(plot_title,labels_tp,values_tp,colors_purpose,file_name)\n", - " alt_text = store_alt_text_pie(pd.DataFrame(values_tp, labels_tp), file_name, plot_title)\n", - " print(expanded_ct['Trip_purpose'].value_counts(dropna=True))\n", - "except:\n", - " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", - " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)" + " fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n", + " text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n", + " plot_and_text_stacked_bar_chart(expanded_ct.groupby(\"Trip_purpose\").agg({distance_col: 'count'}), \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax, text_results, colors_purpose, debug_df)\n", + " set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n", + "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", + " plt.clf()\n", + " generate_missing_plot(plot_title_no_quality, debug_df, file_name)\n", + " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)\n", + " alt_html = store_alt_html_missing(debug_df, file_name, plot_title_no_quality)\n", + "except Exception as e:\n", + " fig, ax = plt.subplots()\n", + " plot_and_text_error(e, ax, file_name)" ] }, { "cell_type": "markdown", - "id": "crucial-keyboard", + "id": "8fc63a45", "metadata": {}, "source": [ "### Mode choice for trips under 80% mark" @@ -232,80 +310,130 @@ { "cell_type": "code", "execution_count": null, - "id": "identified-replica", - "metadata": { - "scrolled": false - }, + "id": "77ece8ee", + "metadata": {}, "outputs": [], "source": [ - "file_name ='ntrips_under10miles_mode_confirm%s' % file_suffix\n", + "file_name = f'ntrips_under80{file_suffix}'\n", "\n", "try:\n", - " #determine 80th percentile\n", - " cutoff = expanded_ct.distance.quantile(0.8)\n", + " # Preprocess to find cutoff and filter below cutoff\n", + " # For simplicity, and to aid in comparison, we have a single cutoff based on the total number of trips\n", + " cutoff = expanded_ct_sensed.distance.quantile(0.8)\n", " if pd.isna(cutoff):\n", " cutoff = 0\n", - " dist_threshold = expanded_ct[distance_col].quantile(0.8).round(1)\n", + " dist_threshold = expanded_ct_sensed[distance_col].quantile(0.8).round(1)\n", " dist_threshold = str(dist_threshold) \n", "\n", - " plot_title_no_quality=\"Mode confirmations for trips under \" + dist_threshold + \" \" + label_units_lower\n", + " plot_title_no_quality=\"Number of trips per travel model under \" + dist_threshold + \" \" + label_units_lower\n", " plot_title_no_quality=plot_title_no_quality+\"\\n[\"+dist_threshold + \" \" + label_units_lower+\" represents 80th percentile of trip length]\"\n", "\n", - " labels_d10 = expanded_ct.loc[(expanded_ct['distance'] <= cutoff)].Mode_confirm.value_counts(dropna=True).keys().tolist()\n", - " values_d10 = expanded_ct.loc[(expanded_ct['distance'] <= cutoff)].Mode_confirm.value_counts(dropna=True).tolist()\n", - " d10_quality_text = scaffolding.get_quality_text(expanded_ct, expanded_ct[expanded_ct['distance'] <= cutoff], \"< \" + dist_threshold + \" \" + label_units_lower, include_test_users)\n", - " plot_title = plot_title_no_quality+\"\\n\"+d10_quality_text\n", - " pie_chart_mode(plot_title,labels_d10,values_d10,colors_mode,file_name)\n", - " alt_text = store_alt_text_pie(pd.DataFrame(values_d10, labels_d10), file_name, plot_title)\n", - " print(expanded_ct.loc[(expanded_ct['distance'] <= cutoff)].Mode_confirm.value_counts(dropna=True))\n", - "\n", - "except:\n", - " d10_df = expanded_ct.query(\"distance <= \" + str(cutoff)) if \"distance\" in expanded_ct.columns else expanded_ct\n", - " debug_df.loc[\"Trips_less_than_80th_pct\"] = scaffolding.trip_label_count(\"Mode_confirm\", d10_df)\n", - " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", - " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)" + " expanded_ct_u80 = expanded_ct.loc[(expanded_ct['distance'] <= cutoff)]\n", + " expanded_ct_sensed_u80 = expanded_ct_sensed.loc[(expanded_ct_sensed['distance'] <= cutoff)]\n", + " sensed_u80_quality_text = f\"{len(expanded_ct_sensed_u80)} trips ({round(len(expanded_ct_sensed_u80)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(expanded_ct_sensed_u80)} {sensed_match.group(3)}\"\n", + " labeled_u80_quality_text = f\"{len(expanded_ct_u80)} trips ({round(len(expanded_ct_u80)/len(expanded_ct)*100)}% of all labeled,\\n{round(len(expanded_ct_u80)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(expanded_ct_u80)} {sensed_match.group(3)}\"\n", + " \n", + " # Plot entries\n", + " fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n", + " text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n", + " plot_and_text_stacked_bar_chart(expanded_ct_u80.groupby(\"Mode_confirm\").agg({distance_col: 'count'}), \"Labeled by user\\n\"+labeled_u80_quality_text, ax[0], text_results[0], colors_mode, debug_df)\n", + " plot_and_text_stacked_bar_chart(expanded_ct_sensed_u80.groupby(\"primary_mode\").agg({distance_col: 'count'}), \"Sensed by OpenPATH\\n\"+sensed_u80_quality_text, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n", + " set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n", + "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", + " # we can have an missing attribute error during the pre-procssing, in which case we should show the missing plot\n", + " # here, our pre-processing only relies on sensed data, so we use the debug_df_sensed\n", + " plt.clf()\n", + " plot_title_default = \"Number of trips below 80th percentile in each mode\"\n", + " generate_missing_plot(plot_title_default, merged_debug_df, file_name)\n", + " alt_text = store_alt_text_missing(merged_debug_df, file_name, plot_title_default)\n", + " alt_html = store_alt_html_missing(merged_debug_df, file_name, plot_title_no_quality)\n", + "except Exception as e:\n", + " fig, ax = plt.subplots()\n", + " plot_and_text_error(e, ax, file_name)" ] }, { "cell_type": "markdown", - "id": "dominant-company", + "id": "b560cb32", "metadata": {}, "source": [ - "### Miles per chosen transport mode" + "### Total Trip Length covered by each mode" ] }, { "cell_type": "code", "execution_count": null, - "id": "satisfied-sharing", - "metadata": { - "scrolled": false - }, + "id": "ffccb96f", + "metadata": {}, "outputs": [], "source": [ - "plot_title_no_quality= label_units + \" for each mode (selected by users)\"\n", - "plot_title=plot_title_no_quality + '\\n' + quality_text\n", - "file_name ='miles_mode_confirm%s' % file_suffix\n", + "plot_title_no_quality= label_units + \" for each mode\"\n", + "file_name =f\"total_trip_length{file_suffix}\"\n", "\n", "try:\n", - " dist = expanded_ct.groupby('Mode_confirm').agg({distance_col: ['sum', 'count' , 'mean']})\n", - " dist.columns = ['Total ('+label_units_lower+')', 'Count', 'Average ('+label_units_lower+')']\n", - " dist = dist.reset_index()\n", - " dist =dist.sort_values(by=['Total ('+label_units_lower+')'], ascending=False)\n", - " dist_dict = dict(zip(dist['Mode_confirm'], dist['Total ('+label_units_lower+')']))\n", + " fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n", " \n", - " labels_m = []\n", - " values_m = []\n", + " text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n", + " plot_and_text_stacked_bar_chart(expanded_ct.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}), \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df)\n", + " plot_and_text_stacked_bar_chart(expanded_ct_sensed.groupby(\"primary_mode\").agg({distance_col: 'sum'}), \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n", + " set_title_and_save(fig, text_results, plot_title_no_quality, file_name) \n", + "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", + " plt.clf()\n", + " generate_missing_plot(plot_title_no_quality, merged_debug_df, file_name)\n", + " alt_text = store_alt_text_missing(merged_debug_df, file_name, plot_title_no_quality) \n", + " alt_html = store_alt_html_missing(merged_debug_df, file_name, plot_title_no_quality)\n", + "except Exception as e:\n", + " fig, ax = plt.subplots()\n", + " plot_and_text_error(e, ax, file_name)" + ] + }, + { + "cell_type": "markdown", + "id": "bc9be240", + "metadata": {}, + "source": [ + "### Total Trip Length covered by each land transport mode" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1338268", + "metadata": {}, + "outputs": [], + "source": [ + "plot_title_no_quality= \"Total trip length (\" + label_units_lower + \") covered by each mode by land\"\n", + "file_name =f\"total_trip_length_land{file_suffix}\"\n", + "\n", + "try:\n", + " ## We do an existence check for the labeled df because we want to display the sensed value even if we don't have the labeled value\n", + " ## but we don't need to have an existence check for sensed because in that case we will have no data to display\n", + " labeled_land_trips_df = expanded_ct[expanded_ct['Mode_confirm'] != \"Airplane\"] if \"Mode_confirm\" in expanded_ct.columns else None\n", + " sensed_land_trips_df = expanded_ct_sensed[expanded_ct_sensed['primary_mode'] != \"AIR_OR_HSR\"]\n", " \n", - " for x, y in dist_dict.items():\n", - " labels_m.append(x)\n", - " values_m.append(y)\n", + " sensed_land_quality_text = f\"{len(sensed_land_trips_df)} trips ({round(len(sensed_land_trips_df)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(sensed_land_trips_df)} {sensed_match.group(3)}\"\n", + " labeled_land_quality_text = f\"{len(labeled_land_trips_df)} trips ({round(len(labeled_land_trips_df)/len(expanded_ct)*100)}% of all labeled,\\n{round(len(labeled_land_trips_df)/len(expanded_ct_sensed)*100)}%) of all trips)\\nfrom {scaffolding.unique_users(labeled_land_trips_df)} {sensed_match.group(3)}\"\n", "\n", - " pie_chart_mode(plot_title,labels_m,values_m,colors_mode,file_name)\n", - " alt_text = store_alt_text_pie(pd.DataFrame(values_m, labels_m), file_name, plot_title)\n", - "except:\n", - " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", - " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality) " + " fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n", + " plot_and_text_stacked_bar_chart(labeled_land_trips_df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}), \"Labeled by user\\n\"+labeled_land_quality_text, ax[0], text_results[0], colors_mode, debug_df)\n", + " plot_and_text_stacked_bar_chart(sensed_land_trips_df.groupby(\"primary_mode\").agg({distance_col: 'sum'}), \"Sensed by OpenPATH\\n\"+sensed_land_quality_text, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n", + " set_title_and_save(fig, text_results, plot_title_no_quality, file_name) \n", + "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", + " plt.clf()\n", + " generate_missing_plot(plot_title_no_quality, merged_debug_df, file_name)\n", + " alt_text = store_alt_text_missing(merged_debug_df, file_name, plot_title_no_quality) \n", + " alt_html = store_alt_html_missing(merged_debug_df, file_name, plot_title_no_quality)\n", + "except Exception as e:\n", + " fig, ax = plt.subplots()\n", + " plot_and_text_error(e, ax, file_name)" + ] + }, + { + "cell_type": "markdown", + "id": "bcef1f6a", + "metadata": {}, + "source": [ + "## Generic Metrics (Bar Charts)" ] }, { @@ -327,6 +455,11 @@ "file_name ='average_miles_mode_confirm%s' % file_suffix\n", "\n", "try:\n", + " dist = expanded_ct.groupby('Mode_confirm').agg({distance_col: ['sum', 'count' , 'mean']})\n", + " dist.columns = ['Total ('+label_units_lower+')', 'Count', 'Average ('+label_units_lower+')']\n", + " dist = dist.reset_index()\n", + " dist =dist.sort_values(by=['Total ('+label_units_lower+')'], ascending=False)\n", + "\n", " x='Mode_confirm'\n", " y='Average ('+label_units_lower+')'\n", " plot_title= plot_title_no_quality+\"\\n\"+quality_text\n", @@ -430,7 +563,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.12" + "version": "3.9.18" } }, "nbformat": 4, diff --git a/viz_scripts/generic_metrics_sensed.ipynb b/viz_scripts/generic_metrics_sensed.ipynb index 762cbf1..fe43bb1 100644 --- a/viz_scripts/generic_metrics_sensed.ipynb +++ b/viz_scripts/generic_metrics_sensed.ipynb @@ -88,166 +88,6 @@ "## Generic Metrics" ] }, - { - "cell_type": "markdown", - "id": "distributed-peace", - "metadata": {}, - "source": [ - "### Distribution of Mode_confirm attribute" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "tracked-serbia", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "file_name='ntrips_sensed_mode%s' % file_suffix\n", - "plot_title_no_quality= \"Number of trips for each primary mode\\n(inferred by OpenPATH from phone sensors)\"\n", - "try:\n", - " labels_mc = expanded_ct['primary_mode'].value_counts(dropna=True).keys().tolist()\n", - " values_mc = expanded_ct['primary_mode'].value_counts(dropna=True).tolist() \n", - " plot_title = plot_title_no_quality+\"\\n\"+quality_text\n", - " pie_chart_sensed_mode(plot_title,labels_mc,values_mc,file_name)\n", - " alt_text = store_alt_text_pie(pd.DataFrame(values_mc, labels_mc), file_name, plot_title)\n", - " print(expanded_ct['primary_mode'].value_counts(dropna=True))\n", - "except:\n", - " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", - " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)" - ] - }, - { - "cell_type": "markdown", - "id": "crucial-keyboard", - "metadata": {}, - "source": [ - "### Mode choice for trips under 80% mark" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "identified-replica", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "file_name ='ntrips_under10miles_sensed_mode%s' % file_suffix\n", - "\n", - "try:\n", - " #determine 80th percentile\n", - " cutoff = expanded_ct.distance.quantile(0.8)\n", - " dist_threshold = expanded_ct[distance_col].quantile(0.8).round(1)\n", - " dist_threshold = str(dist_threshold)\n", - "\n", - " plot_title_no_quality=\"Number of trips under \" + dist_threshold + \" \" + label_units_lower + \" for each primary mode\"\n", - " plot_title_no_quality=plot_title_no_quality + \"\\n(inferred by OpenPATH from phone sensors)\" \n", - " plot_title_no_quality=plot_title_no_quality + \"\\n[\"+dist_threshold + \" \" + label_units_lower+\" represents 80th percentile of trip length]\"\n", - "\n", - " labels_d10 = expanded_ct.loc[(expanded_ct['distance'] <= cutoff)].primary_mode.value_counts(dropna=True).keys().tolist()\n", - " values_d10 = expanded_ct.loc[(expanded_ct['distance'] <= cutoff)].primary_mode.value_counts(dropna=True).tolist()\n", - " d10_quality_text = scaffolding.get_quality_text(expanded_ct, expanded_ct[expanded_ct['distance'] <= cutoff], \"< \" + dist_threshold + \" \" + label_units_lower, include_test_users)\n", - " plot_title= plot_title_no_quality+\"\\n\"+d10_quality_text\n", - " pie_chart_sensed_mode(plot_title,labels_d10,values_d10,file_name)\n", - " alt_text = store_alt_text_pie(pd.DataFrame(values_d10, labels_d10), file_name, plot_title)\n", - " print(expanded_ct.loc[(expanded_ct['distance'] <= cutoff)].primary_mode.value_counts(dropna=True))\n", - "except:\n", - " d10_df = expanded_ct.query(\"distance <= \" + cutoff) if \"distance\" in expanded_ct.columns else expanded_ct\n", - " debug_df.loc[\"Trips_less_than_80th_pct\"] = scaffolding.trip_label_count(\"Mode_confirm\", d10_df)\n", - " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", - " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)" - ] - }, - { - "cell_type": "markdown", - "id": "dominant-company", - "metadata": {}, - "source": [ - "### Miles per chosen transport mode" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "satisfied-sharing", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "plot_title_no_quality = label_units + \" for each primary mode\\n(inferred by OpenPATH from phone sensors)\"\n", - "file_name ='miles_sensed_mode%s' % file_suffix\n", - "\n", - "try:\n", - " dist = expanded_ct.groupby('primary_mode').agg({distance_col: ['sum', 'count' , 'mean']})\n", - " dist.columns = ['Total ('+label_units_lower+')', 'Count', 'Average ('+label_units_lower+')']\n", - " dist = dist.reset_index()\n", - " dist = dist.sort_values(by=['Total ('+label_units_lower+')'], ascending=False)\n", - " dist_dict = dict(zip(dist['primary_mode'], dist['Total ('+label_units_lower+')']))\n", - "\n", - " labels_m = []\n", - " values_m = []\n", - "\n", - " for x, y in dist_dict.items():\n", - " labels_m.append(x)\n", - " values_m.append(y)\n", - " \n", - " plot_title = plot_title_no_quality + \"\\n\" + quality_text\n", - " pie_chart_sensed_mode(plot_title,labels_m,values_m,file_name)\n", - " alt_text = store_alt_text_pie(pd.DataFrame(values_m, labels_m), file_name, plot_title)\n", - "\n", - "except:\n", - " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", - " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality) " - ] - }, - { - "cell_type": "markdown", - "id": "1d0c7548", - "metadata": {}, - "source": [ - "### Miles per chosen land transport mode" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "004b7b3c", - "metadata": {}, - "outputs": [], - "source": [ - "plot_title_no_quality= label_units + \" for each land-based primary mode\\n(inferred by OpenPATH from phone sensors)\"\n", - "file_name ='miles_sensed_mode_land%s' % file_suffix\n", - "\n", - "try:\n", - " dist = expanded_ct.groupby('primary_mode').agg({distance_col: ['sum', 'count' , 'mean']})\n", - " dist.columns = ['Total ('+label_units_lower+')', 'Count', 'Average ('+label_units_lower+')']\n", - " dist = dist.reset_index()\n", - " dist =dist.sort_values(by=['Total ('+label_units_lower+')'], ascending=False)\n", - "\n", - " dist_dict = dict(zip(dist['primary_mode'], dist['Total ('+label_units_lower+')']))\n", - "\n", - " labels_m = []\n", - " values_m = []\n", - "\n", - " for x, y in dist_dict.items():\n", - " if x != \"AIR_OR_HSR\":\n", - " labels_m.append(x)\n", - " values_m.append(y)\n", - "\n", - " plot_title = plot_title_no_quality + \"\\n\" + quality_text\n", - " pie_chart_sensed_mode(plot_title,labels_m,values_m,file_name)\n", - " alt_text = store_alt_text_pie(pd.DataFrame(values_m, labels_m), file_name, plot_title)\n", - "\n", - "except:\n", - " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", - " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality) " - ] - }, { "cell_type": "markdown", "id": "43ecc5d7", @@ -267,6 +107,11 @@ "file_name ='average_miles_sensed_mode%s' % file_suffix\n", "\n", "try:\n", + " dist = expanded_ct.groupby('primary_mode').agg({distance_col: ['sum', 'count' , 'mean']})\n", + " dist.columns = ['Total ('+label_units_lower+')', 'Count', 'Average ('+label_units_lower+')']\n", + " dist = dist.reset_index()\n", + " dist =dist.sort_values(by=['Total ('+label_units_lower+')'], ascending=False)\n", + " \n", " data = dist.drop((dist.query(\"Count < 3\").index)).sort_values(by=['Average ('+label_units_lower+')'], ascending=False)\n", " x='primary_mode'\n", " y='Average ('+label_units_lower+')'\n", @@ -370,7 +215,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.9.18" } }, "nbformat": 4, diff --git a/viz_scripts/mode_specific_metrics.ipynb b/viz_scripts/mode_specific_metrics.ipynb index 0ba13f8..77f9d4b 100644 --- a/viz_scripts/mode_specific_metrics.ipynb +++ b/viz_scripts/mode_specific_metrics.ipynb @@ -93,7 +93,7 @@ "id": "397709c8", "metadata": {}, "source": [ - "### Color Mapping for labels" + "### Color Dictionary" ] }, { @@ -103,7 +103,7 @@ "metadata": {}, "outputs": [], "source": [ - "colors_mode, colors_purpose = scaffolding.mapping_color_labels(dynamic_labels, dic_re, dic_pur)" + "colors_mode, colors_purpose, colors_sensed = scaffolding.mapping_color_labels(dynamic_labels, dic_re, dic_pur)" ] }, { @@ -183,87 +183,95 @@ { "cell_type": "code", "execution_count": null, - "id": "respiratory-breach", - "metadata": { - "scrolled": false - }, + "id": "006ba3de", + "metadata": {}, "outputs": [], "source": [ - "plot_title_no_quality=f\"Number of trips for each purpose for {mode_of_interest} only\"\n", - "file_name= f'ntrips_{mode_of_interest}_purpose%s' % file_suffix\n", + "plot_title_no_quality = f\"Number of trips for each purpose for {mode_of_interest}\"\n", + "file_name= f\"ntrips_{mode_of_interest}_purpose{file_suffix}\"\n", "\n", "try:\n", - " labels_tp = data_eb['Trip_purpose'].value_counts(dropna=True).keys().tolist()\n", - " values_tp = data_eb['Trip_purpose'].value_counts(dropna=True).tolist()\n", - " plot_title= plot_title_no_quality+\"\\n\"+quality_text\n", - " pie_chart_purpose(plot_title,labels_tp,values_tp,colors_purpose,file_name)\n", - " alt_text = store_alt_text_pie(pd.DataFrame(values_tp, labels_tp), file_name, plot_title)\n", - "except:\n", - " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", - " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)" + " fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n", + " text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n", + " plot_and_text_stacked_bar_chart(data_eb.groupby(\"Trip_purpose\").agg({distance_col: 'count'}),f\"Labeled `{mode_of_interest}` by user\", ax, text_results, colors_purpose, debug_df)\n", + " plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n", + " set_title_and_save(fig, text_results, plot_title, file_name)\n", + "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", + " plt.clf()\n", + " generate_missing_plot(plot_title_no_quality, debug_df, file_name)\n", + " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality) \n", + " alt_html = store_alt_html_missing(debug_df, file_name, plot_title_no_quality)\n", + "except Exception as e:\n", + " fig, ax = plt.subplots()\n", + " plot_and_text_error(e, ax, file_name)" + ] + }, + { + "cell_type": "markdown", + "id": "dependent-reservoir", + "metadata": {}, + "source": [ + "### Total Trip Length for each mode replaced by the specified mode" ] }, { "cell_type": "code", "execution_count": null, - "id": "thermal-midnight", + "id": "2b415510", "metadata": {}, "outputs": [], "source": [ - "plot_title_no_quality=f\"Number of trips for each replaced transport mode for {mode_of_interest} only\"\n", - "file_name =f'ntrips_{mode_of_interest}_replaced_mode%s' % file_suffix\n", + "plot_title_no_quality = \"Total trip length (\" + label_units_lower + \") covered by replaced mode\"\n", + "file_name = f\"total_trip_length_{mode_of_interest}_replaced_mode{file_suffix}\"\n", "\n", "try:\n", - " labels_eb = data_eb.Replaced_mode.value_counts(dropna=True).keys().tolist()\n", - " values_eb = data_eb.Replaced_mode.value_counts(dropna=True).tolist()\n", - " plot_title= plot_title_no_quality+\"\\n\"+quality_text\n", - " pie_chart_mode(plot_title,labels_eb,values_eb,colors_mode,file_name)\n", - " alt_text = store_alt_text_pie(pd.DataFrame(values_eb, labels_eb), file_name, plot_title)\n", - "except:\n", - " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", - " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)" + " fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n", + " text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n", + " plot_and_text_stacked_bar_chart(data_eb.groupby(\"Replaced_mode\").agg({distance_col: 'sum'}), \"Labeled by user\\n (Trip distance)\", ax, text_results, colors_mode, debug_df)\n", + " plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n", + " set_title_and_save(fig, text_results, plot_title, file_name)\n", + "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", + " plt.clf()\n", + " generate_missing_plot(plot_title_no_quality, debug_df, file_name)\n", + " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality) \n", + " alt_html = store_alt_html_missing(debug_df, file_name, plot_title_no_quality)\n", + "except Exception as e:\n", + " fig, ax = plt.subplots()\n", + " plot_and_text_error(e, ax, file_name)" ] }, { "cell_type": "markdown", - "id": "dependent-reservoir", + "id": "be6bb4be", "metadata": {}, "source": [ - "### Miles for each mode replaced by the specified mode" + "### Number of Trips for each mode replaced by specified mode" ] }, { "cell_type": "code", "execution_count": null, - "id": "pointed-velvet", + "id": "3947a5f2", "metadata": {}, "outputs": [], "source": [ - "plot_title_no_quality=f\"Distribution of \"+label_units+f\" Replaced by {mode_of_interest}\"\n", - "file_name =f'miles_{mode_of_interest}_replaced_mode%s' % file_suffix\n", + "plot_title_no_quality= \"Number of trips for replaced mode\"\n", + "file_name = f'ntrips_{mode_of_interest}_total{file_suffix}'\n", "\n", "try:\n", - " dg=data_eb.groupby('Replaced_mode').agg({distance_col: ['sum', 'count' , 'mean']},)\n", - " dg.columns = ['Total ('+label_units_lower+')', 'Count' ,'Average ('+label_units_lower+')']\n", - " dg = dg.reset_index()\n", - " dg = dg.sort_values(by=['Total ('+label_units_lower+')'], ascending=False)\n", - "\n", - " dg_dict = dict(zip(dg['Replaced_mode'], dg['Total ('+label_units_lower+')']))\n", - " \n", - " labels_m = []\n", - " values_m = []\n", - "\n", - " for x, y in dg_dict.items():\n", - " labels_m.append(x)\n", - " values_m.append(y)\n", - "\n", - " plot_title= plot_title_no_quality+\"\\n\"+quality_text\n", - " pie_chart_mode(plot_title,labels_m,values_m,colors_mode,file_name)\n", - " alt_text = store_alt_text_pie(pd.DataFrame(values_m, labels_m), file_name, plot_title)\n", - " print(dg)\n", - "except:\n", - " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", - " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)" + " fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n", + " text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n", + " plot_and_text_stacked_bar_chart(data_eb.groupby(\"Replaced_mode\").agg({distance_col: 'count'}), f\"Labeled `{mode_of_interest}` by user\", ax, text_results, colors_mode, debug_df)\n", + " plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n", + " set_title_and_save(fig, text_results, plot_title, file_name)\n", + "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", + " plt.clf()\n", + " generate_missing_plot(plot_title_no_quality, debug_df, file_name)\n", + " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality) \n", + " alt_html = store_alt_html_missing(debug_df, file_name, plot_title_no_quality) \n", + "except Exception as e:\n", + " fig, ax = plt.subplots()\n", + " plot_and_text_error(e, ax, file_name)" ] }, { @@ -285,6 +293,10 @@ "file_name ='average_miles_replaced_mode%s' % file_suffix\n", "\n", "try:\n", + " dg=data_eb.groupby('Replaced_mode').agg({distance_col: ['sum', 'count' , 'mean']},)\n", + " dg.columns = ['Total ('+label_units_lower+')', 'Count' ,'Average ('+label_units_lower+')']\n", + " dg = dg.reset_index()\n", + " dg = dg.sort_values(by=['Total ('+label_units_lower+')'], ascending=False)\n", " data = dg.drop((dg.query(\"Count < 3\").index)).sort_values(by=['Average ('+label_units_lower+')'], ascending=False) \n", " \n", " x='Replaced_mode'\n", @@ -392,7 +404,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.12" + "version": "3.9.18" } }, "nbformat": 4, diff --git a/viz_scripts/plots.py b/viz_scripts/plots.py index 3364a1a..83ed1e0 100644 --- a/viz_scripts/plots.py +++ b/viz_scripts/plots.py @@ -4,6 +4,7 @@ import itertools import matplotlib.pyplot as plt import seaborn as sns +import traceback as tb from matplotlib.patches import Patch sns.set_style("whitegrid") @@ -19,13 +20,21 @@ SAVE_DIR="/plots/" +def calculate_pct(labels, values): + v2l_df = pd.DataFrame({"vals": values}, index=labels) + + # Calculate % for all the values + vs = v2l_df.vals.sum() + v2l_df["pct"] = v2l_df.vals.apply(lambda x: round((x/vs) * 100, 1)) + + return (v2l_df.index.to_list(),v2l_df.vals.to_list(), v2l_df.pct.to_list()) def merge_small_entries(labels, values): v2l_df = pd.DataFrame({"vals": values}, index=labels) # Calculate % for all the values vs = v2l_df.vals.sum() - v2l_df["pct"] = v2l_df.vals.apply(lambda x: (x/vs) * 100) + v2l_df["pct"] = v2l_df.vals.apply(lambda x: round((x/vs) * 100, 1)) disp.display(v2l_df) # Find small chunks to combine @@ -48,82 +57,136 @@ def merge_small_entries(labels, values): v2l_df.loc["Other"] = v2l_df.loc["Other"] + misc_count disp.display(v2l_df) - return (v2l_df.index.to_list(),v2l_df.vals.to_list()) - - -def format_pct(pct, values): - total = sum(values) - absolute = int(round(pct*total/100.0)) - return "{:.1f}%\n({:d})".format(pct, absolute) if pct > 4 else'' - + return (v2l_df.index.to_list(),v2l_df.vals.to_list(), v2l_df.pct.to_list()) -def pie_chart_mode(plot_title,labels,values,colors_map,file_name): - fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(aspect="equal")) - m_labels, m_values = merge_small_entries(labels, values) - - wedges, texts, autotexts = ax.pie(m_values, - labels = m_labels, - colors=[colors_map[key] for key in labels], - pctdistance=0.75, - autopct= lambda pct: format_pct(pct, values), - textprops={'size': 23}) - - ax.set_title(plot_title, size=25) - plt.text(-1.3,-1.3,f"Last updated {arrow.get()}", fontsize=10) - plt.setp(autotexts, **{'color':'white', 'weight':'bold', 'fontsize':20}) +# Create dataframes with cols: 'Label' 'Value' and 'Proportion' +def process_trip_data(labels, values): + """ Inputs: + labels = Displayed labels (e.g. "Gas car, drove alone") + values = Corresponding vlaues of these labels + trip_type = Bar labels (e.g. Labeled by user (Confirmed trips)) + Returns: + df_total_trip_expanded = Data frame without consolidation of Others, used to create the alt_html table + df_total_trip = Data frame with consolidation of Others, used to represent the Bar Charts. + """ + if len(labels) == 0 and len(values) == 0: + return pd.DataFrame(), pd.DataFrame() + m_labels_expanded, m_values_expanded, m_pct_expanded = calculate_pct(labels, values) + data_trip_expanded = {'Label': m_labels_expanded, 'Value': m_values_expanded, 'Proportion': m_pct_expanded} + df_total_trip_expanded = pd.DataFrame(data_trip_expanded) + + m_labels, m_values, m_pct = merge_small_entries(labels, values) + data_trip = {'Label': m_labels, 'Value': m_values, 'Proportion': m_pct} + df_total_trip = pd.DataFrame(data_trip) + return df_total_trip_expanded, df_total_trip + +def plot_and_text_error(e, ax, file_name): + stringified_exception = "".join(tb.format_exception(type(e), e, e.__traceback__)) + ax.text(0,0,s=stringified_exception) plt.savefig(SAVE_DIR+file_name+".png", bbox_inches='tight') - plt.show() - -def pie_chart_sensed_mode(plot_title,labels,values,file_name): - all_labels= ['IN_VEHICLE', - 'UNKNOWN', - 'WALKING', - 'AIR_OR_HSR', - 'BICYCLING', - 'OTHER'] - - val2labeldf = pd.DataFrame({"labels": labels, "values": values}) - - colours = dict(zip(all_labels, plt.cm.tab10.colors[:len(all_labels)])) - fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(aspect="equal")) - - m_labels, m_values = merge_small_entries(labels, values) - - wedges, texts, autotexts = ax.pie(m_values, - labels = m_labels, - colors=[colours[key] for key in labels], - pctdistance=0.75, - autopct= lambda pct: format_pct(pct, values), - textprops={'size': 23}) + alt_text = f"Error while generating chart:" + alt_text += stringified_exception + alt_text = access_alt_text(alt_text, file_name) + # TODO: Format the error as HTML instead of plain text + alt_html = access_alt_html(alt_text, file_name) + return alt_text, alt_html + +# Creates/ Appends single bar to the 100% Stacked Bar Chart +def plot_and_text_stacked_bar_chart(df, bar_label, ax, text_result, colors, debug_df): + """ Inputs: + df = Data frame corresponding to the bar in a stacked bar chart. It is + expected to have three columns, which represent the 'label', 'value' + bar_label = Text to represent the Bar (e.g. Labeled by user\n (Confirmed trips)) + ax = axis information + text_result = will be filled in with the alt_text and alt_html for the plot + """ + if len(df.columns) > 1: + raise ValueError("dataframe should have two columns (labels and values), found %s" % (df.columns)) - ax.set_title(plot_title, size=25) - plt.text(-1.3,-1.3,f"Last updated {arrow.get()}", fontsize=10) - plt.setp(autotexts, **{'color':'white', 'weight':'bold', 'fontsize':20}) - plt.savefig(SAVE_DIR+file_name+".png", bbox_inches='tight') - plt.show() + sns.set(font_scale=1.5) + bar_height = 0.2 + bar_width = [0] + try: + grouped_df = df.reset_index().set_axis(['label', 'value'], axis='columns').sort_values(by='value', ascending=False) + + # TODO: Do we need this as a separate function? + df_all_entries, df_only_small = process_trip_data(grouped_df.label.tolist(), grouped_df.value.tolist()) + + # TODO: Fix this to be more pandas-like and change the "long" variable name + for label in pd.unique(df_only_small['Label']): + long = df_only_small[df_only_small['Label'] == label] + # TODO: Remove if/else; if we only consider unique values, then long can never be empty + if not long.empty: + mode_prop = long['Proportion'] + mode_count = long['Value'] + vals_str = [f'{y:.1f} %\n({x:.0f})' if y > 4 else '' for x, y in zip(mode_count, mode_prop)] + bar = ax.barh(y=bar_label, width=mode_prop, height=bar_height, left=bar_width, label=label, color=colors[label]) + ax.bar_label(bar, label_type='center', labels=vals_str, rotation=90, fontsize=16) + bar_width = [total + val for total, val in zip(bar_width, mode_prop)] + else: + print(f"{long} is empty") + ax.tick_params(axis='y', labelsize=18) + ax.tick_params(axis='x', labelsize=18, rotation=90) + ncols = len(df_only_small)//5 if len(df_only_small) % 5 == 0 else len(df_only_small)//5 + 1 + ax.legend(bbox_to_anchor=(1, 0), loc='lower left', fancybox=True, shadow=True, fontsize=15) + # ax.legend(bbox_to_anchor=(1, 1), loc='upper left', fancybox=True, shadow=True, fontsize=15, ncols=ncols) + # Fix for the error: RuntimeError("Unknown return type"), adding the below line to address as mentioned here https://github.com/matplotlib/matplotlib/issues/25625/ + ax.set_xlim(right=ax.get_xlim()[1] + 1.0, auto=True) + text_result[0], text_result[1] = store_alt_text_and_html_stacked_bar_chart(df_all_entries, bar_label) + print("After populating, %s" % text_result) + except Exception as e: + # tb.print_exception(type(e), e, e.__traceback__) + # ax.set_title("Insufficient data", loc="center") + ax.text(x = 0.5, y = 0.9, s = "Insufficient data", horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=20) + # TODO: consider switching to a two column table + ax.text(x = 0.5, y = 0.8, s = debug_df.to_string(), horizontalalignment='center', verticalalignment='top', transform=ax.transAxes, fontsize=10) + text_result[0] = store_alt_text_missing(debug_df, None, bar_label) + text_result[1] = store_alt_html_missing(debug_df, None, bar_label) + # ax.yaxis.set_visible(False) + +# Adds chart title, x and y axis label to the 100% Stacked Bar Chart +def set_title_and_save(fig, text_results, plot_title, file_name): + # Setup label and title for the figure since these would be common for all sub-plots + # We only need the axis to tweak the position (WHY!) so we do so by getting the first ax object + ax = fig.get_axes()[0] + fig.supxlabel('Proportion (Count)', fontsize=20, x=0.5, y= ax.xaxis.get_label().get_position()[0] - 0.62, va='top') + # fig.supylabel('Trip Types', fontsize=20, x=-0.12, y=0.5, rotation='vertical') + fig.suptitle(plot_title, fontsize=25,va = 'bottom') + plt.text(x=0, y=ax.xaxis.get_label().get_position()[0] - 0.62, s=f"Last updated {arrow.get()}", fontsize=12) + plt.subplots_adjust(hspace=0.1, top= 0.95) + + # if nRows == 1, then plt.subplots returns a single axis object instead of an array + # similarly we have text_result be a single list if nRows == 1 and a list of lists if nRows > 1 + # but then we want to wrap it so that it is a list of lists with a single top level element + # so that the iteration logic below works + if len(fig.get_axes()) == 1: + text_results = [text_results] + + + # The number of plots is not fixed. Let's iterate over the array that is passed in to handle the text results. + # The number of axes in the figure is the number of plots + concat_alt_text = plot_title + concat_alt_html = f""" + + + +

    {plot_title}

    + """ + for i in range(0, len(fig.get_axes())): + concat_alt_text += text_results[i][0] + concat_alt_html += f"

    {text_results[i][1]}

    " -def pie_chart_purpose(plot_title,labels,values,colors_map,file_name): - - fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(aspect="equal")) + concat_alt_html += f""" + + + """ - m_labels, m_values = merge_small_entries(labels, values) - - def func(pct, values): - total = sum(values) - absolute = int(round(pct*total/100.0)) - return "{:.1f}%\n({:d})".format(pct, absolute) if pct > 3 else'' - - wedges, texts, autotexts = ax.pie(m_values, - labels = m_labels, - colors=[colors_map[key] for key in labels], - pctdistance=0.85, - autopct=lambda pct: func(pct, values), - textprops={'size': 23}) - - ax.set_title(plot_title, size=25) - plt.text(-1.3,-1.3,f"Last updated {arrow.get()}", fontsize=10) - plt.setp(autotexts, **{'color':'white', 'weight':'bold', 'fontsize':20}) - plt.savefig(SAVE_DIR+file_name+".png", bbox_inches='tight') + # Set up title and concatenate the text results + # TODO: Consider using a dictionary or a data object instead of an array of arrays + # for greater clarity + alt_text = access_alt_text(concat_alt_text, file_name) + alt_html = access_alt_html(concat_alt_html, file_name) + fig.savefig(SAVE_DIR + file_name + ".png", bbox_inches='tight') plt.show() def energy_impact(x,y,color,plot_title,file_name): @@ -338,19 +401,6 @@ def store_alt_text_bar(df, chart_name, var_name): alt_text = access_alt_text(alt_text, chart_name) return alt_text -def store_alt_text_pie(df, chart_name, var_name): - """ Inputs: - df = dataframe with index of item names, first column is counts - chart_name = what to label chart by in the dictionary - var_name = the variable being analyzed across pie slices - """ - # Fill out the alt text based on components of the chart and passed data - alt_text = f"Pie chart of {var_name}." - for i in range(0,len(df)): - alt_text += f" {df.index[i]} is {np.round(df.iloc[i,0] / np.sum(df.iloc[:,0]) * 100, 1)}%." - alt_text = access_alt_text(alt_text, chart_name) - return alt_text - def store_alt_text_timeseries(df, chart_name, var_name): """ Inputs: df = dataframe with first col of dates, second column is values @@ -365,6 +415,46 @@ def store_alt_text_timeseries(df, chart_name, var_name): alt_text = access_alt_text(alt_text, chart_name) return alt_text +# Creating html table with col as Trip Type, Label, Value, and Proportion +def access_alt_html(html_content, chart_name): + """ Inputs: + html_body = the text describing the chart + chart_name = the alt text file to save or update + var_name = the variable being analyzed across bars + """ + with open(SAVE_DIR + chart_name + ".html", "w") as f: + f.write(html_content) + + return html_content + +# Appends bar information into into the alt_html +def store_alt_text_and_html_stacked_bar_chart(df, var_name): + """ Inputs: + df = dataframe combining columns as Trip Type, Label, Value, Proportion + chart_name = name of the chart + """ + # Generate alt text file + alt_text = f"\nStacked Bar of: {var_name}\n" + for i in range(len(df)): + alt_text += f"{df['Label'].iloc[i]} is {df['Value'].iloc[i]}({df['Proportion'].iloc[i]}%).\n" + + # Generate html table + alt_html = "\n" + for i in range(len(df)): + alt_html += f"{df['Label'].iloc[i]}{df['Value'].iloc[i]}{df['Proportion'].iloc[i]}%" + html_content = f""" +

    Trip Type: {var_name}

    + + + + + + + {alt_html} +
    LabelValueProportion
    + """ + return alt_text, html_content + def generate_missing_plot(plot_title,debug_df,file_name): f, ax = plt.subplots(figsize=(10,10)) @@ -404,5 +494,33 @@ def store_alt_text_missing(df, chart_name, var_name): alt_text = f"Unable to generate\nBar chart of {var_name}.\nReason:" for i in range(0,len(df)): alt_text += f" {df.index[i]} is {np.round(df.iloc[i,0], 1)}." - alt_text = access_alt_text(alt_text, chart_name) + + # For the bar charts, there is no longer a 1:1 mapping between missing alt + # text and a file. So we want to collect all the alt_text as strings and + # then save it. We cannot just remove the call to `access_alt_text`, since + # it will break other uses. So let's pass in None for the chart_name if we + # don't want to save it. + if chart_name is not None: + alt_text = access_alt_text(alt_text, chart_name) return alt_text + +def store_alt_html_missing(df, chart_name, var_name): + """ Inputs: + df = dataframe with index of debug information, first column is counts + chart_name = what to label chart by in the dictionary + var_name = the variable being analyzed across pie slices + """ + # Fill out the alt text based on components of the chart and passed data + alt_html = f""" + + +

    Unable to generate\nBar chart of {var_name}. Reason:

    \n + """ + alt_html += df.to_html() + alt_html += f""" + + + """ + if chart_name is not None: + alt_html = access_alt_html(alt_html, chart_name) + return alt_html diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py index 6d71199..fa32948 100644 --- a/viz_scripts/scaffolding.py +++ b/viz_scripts/scaffolding.py @@ -196,22 +196,23 @@ def translate_labels(labels): # Function: Maps "MODE", "PURPOSE", and "REPLACED_MODE" to colors. # Input: dynamic_labels, dic_re, and dic_pur -# Output: Map for color with mode and purpose +# Output: Dictionary mapping between color with mode/purpose/sensed def mapping_color_labels(dynamic_labels, dic_re, dic_pur): + sensed_values = ["WALKING", "BICYCLING", "IN_VEHICLE", "AIR_OR_HSR", "UNKNOWN", "OTHER", "Other"] if len(dynamic_labels) > 0: mode_values = list(mapping_labels(dynamic_labels, "MODE").values()) if "MODE" in dynamic_labels else [] replaced_mode_values = list(mapping_labels(dynamic_labels, "REPLACED_MODE").values()) if "REPLACED_MODE" in dynamic_labels else [] purpose_values = list(mapping_labels(dynamic_labels, "PURPOSE").values()) if "PURPOSE" in dynamic_labels else [] combined_mode_values = mode_values + replaced_mode_values else: - # Addition of 'Other' is required to the list since it's missing from auxillary_files/mode_labels.csv and auxillary_files/purpose_labels.csv combined_mode_values = (list(OrderedDict.fromkeys(dic_re.values())) + ['Other']) - purpose_values = (list(OrderedDict.fromkeys(dic_pur.values())) + ['Other']) + purpose_values = list(OrderedDict.fromkeys(dic_pur.values())) colors_mode = dict(zip(combined_mode_values, plt.cm.tab20.colors[:len(combined_mode_values)])) colors_purpose = dict(zip(purpose_values, plt.cm.tab20.colors[:len(purpose_values)])) + colors_sensed = dict(zip(sensed_values, plt.cm.tab20.colors[:len(sensed_values)])) - return colors_mode, colors_purpose + return colors_mode, colors_purpose, colors_sensed def load_viz_notebook_sensor_inference_data(year, month, program, include_test_users=False, sensed_algo_prefix="cleaned"): """ Inputs: @@ -236,7 +237,7 @@ def load_viz_notebook_sensor_inference_data(year, month, program, include_test_u # Document data quality file_suffix = get_file_suffix(year, month, program) - quality_text = get_quality_text_sensed(expanded_ct, include_test_users) + quality_text = get_quality_text_sensed(expanded_ct, "", include_test_users) debug_df = pd.DataFrame.from_dict({ "year": year, @@ -296,10 +297,10 @@ def get_quality_text(before_df, after_df, mode_of_interest=None, include_test_us print(quality_text) return quality_text -def get_quality_text_sensed(df, include_test_users=False): +def get_quality_text_sensed(df, cutoff_text="", include_test_users=False): cq = (len(df), unique_users(df)) user_str = 'testers and participants' if include_test_users else 'users' - quality_text = f"Based on %s trips from %d {user_str}" % cq + quality_text = f"Based on %s trips ({cutoff_text}) from %d {user_str}" % cq if cutoff_text else f"Based on %s trips from %d {user_str}" % cq print(quality_text) return quality_text