diff --git a/frontend/index.html b/frontend/index.html index a7e3ac1..2b5627f 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -394,7 +394,7 @@ const study_name = first_domain.substr(0, openpath_index); return study_name; } - + var mode_studied $(function () { $(document).ready(function () { // Simple solution to program/study plots dropdown; load the config and corresponding metrics.html @@ -403,6 +403,7 @@ // get units const dist_units = (data.display_config.use_imperial) ? "miles" : "kilometers"; console.log("Units for display are", data.display_config.use_imperial, dist_units); + mode_studied = data.intro.mode_studied // Load list of plots corresponding to study/program dynamic_labels = data.label_options if (data.intro.program_or_study == 'program') { @@ -422,11 +423,9 @@ const unitConfigured = configuredResponse.replaceAll("${data.display_config.use_imperial}", dist_units); $('#metric').append(unitConfigured); addPreconfiguredMetrics([ - "ntrips_mode_confirm", - "miles_mode_confirm", - "ntrips_sensed_mode", - "miles_sensed_mode", - `ntrips_${data.intro.mode_studied}_purpose`, + "ntrips_total", + "total_trip_length", + "ntrips_purpose", `ntrips_${data.intro.mode_studied}_per_weekday`, `sketch_CO2impact_${data.intro.mode_studied}` ]); @@ -450,10 +449,8 @@ const unitConfigured = response.replaceAll("${data.display_config.use_imperial}", dist_units); $('#metric').append(unitConfigured); addPreconfiguredMetrics([ - "ntrips_mode_confirm", - "miles_mode_confirm", - "ntrips_sensed_mode", - "miles_sensed_mode", + "ntrips_total", + "total_trip_length", "ntrips_purpose", "ntrips_sensed_per_weekday", "ts_emissions_user" @@ -531,15 +528,59 @@ // and without this change, even if we set the dateVal to '' // we would try to load ntrips_purpose__default.png const imgFile = "plots/" + metric + "_" + dateVal + program + ".png"; + const htmlFile = "plots/" + metric + "_" + dateVal + program + ".html"; const altTextFile = "plots/" + metric + "_" + dateVal + program + ".txt"; const altText = loadFile(altTextFile); - const nw = ["
  • " + const isStackedMetric = ['ntrips_total', 'ntrips_purpose', 'ntrips_under80', 'ntrips_commute_mode_confirm', + 'total_trip_length', 'total_trip_length_land',`ntrips_${mode_studied}_total`, + `ntrips_${mode_studied}_purpose`,`total_trip_length_${mode_studied}_replaced_mode`] + .includes(metric); + const jsonData = { metric, dateVal, program, metricLabel, dateLabel, programLabel, sizex, sizey }; + + if (isStackedMetric){ + const nw = ["
  • " + + labelText + + " " + + "" + + " " + + "" + + "" + altText + "" + + "
  • ", + sizex, sizey]; + gridster.add_widget.apply(gridster, nw); + } + else{ + const nw = ["
  • " + + labelText + + "" + + "" + altText + "" + + "
  • ", + sizex, sizey]; + gridster.add_widget.apply(gridster, nw); + } + }); + + $('body').on("click", ".gridster ul > li .addInfo", function () { + const addButton = $(this); + const additionalInfo = JSON.parse(decodeURIComponent(addButton.data('info'))); + const metric = additionalInfo.metric; + const dateVal = additionalInfo.dateVal; + const program = additionalInfo.program; + const metricLabel = additionalInfo.metricLabel; + const dateLabel = additionalInfo.dateLabel; + const programLabel = additionalInfo.programLabel; + const sizex = additionalInfo.sizex; + const sizey = additionalInfo.sizey; + const labelText = metricLabel + " " + dateLabel + " " + programLabel; + const htmlFile = "plots/" + metric + "_" + dateVal + program + ".html"; + + const nw_additionalInformation = ["
  • " + labelText + "" - + "" + altText + "" + + "" + "
  • ", sizex, sizey]; - gridster.add_widget.apply(gridster, nw); + gridster.add_widget.apply(gridster, nw_additionalInformation); }); $('body').on("click", ".gridster ul > li .remove", function () { diff --git a/frontend/metrics_program.html b/frontend/metrics_program.html index d54d04a..2c1ba23 100644 --- a/frontend/metrics_program.html +++ b/frontend/metrics_program.html @@ -1,13 +1,10 @@ - - - - - - - - - + + + + + + @@ -20,9 +17,10 @@ - - - + + + + diff --git a/frontend/metrics_program_withoutEnergyMetrics.html b/frontend/metrics_program_withoutEnergyMetrics.html index e820cc9..f912048 100644 --- a/frontend/metrics_program_withoutEnergyMetrics.html +++ b/frontend/metrics_program_withoutEnergyMetrics.html @@ -1,14 +1,11 @@ - - - - - - - - - + + + + + + @@ -20,9 +17,10 @@ - - - + + + + diff --git a/frontend/metrics_study.html b/frontend/metrics_study.html index 79bf1f0..48e65c8 100644 --- a/frontend/metrics_study.html +++ b/frontend/metrics_study.html @@ -1,13 +1,10 @@ - - - - - - - - - + + + + + + diff --git a/frontend/metrics_study_withoutEnergyMetrics.html b/frontend/metrics_study_withoutEnergyMetrics.html index 4a2137e..26018f1 100644 --- a/frontend/metrics_study_withoutEnergyMetrics.html +++ b/frontend/metrics_study_withoutEnergyMetrics.html @@ -1,14 +1,11 @@ - - - - - - - - - + + + + + + diff --git a/viz_scripts/docker/crontab b/viz_scripts/docker/crontab index 6e7cc55..3aec6bb 100644 --- a/viz_scripts/docker/crontab +++ b/viz_scripts/docker/crontab @@ -1,8 +1,6 @@ 0 7 * * * python bin/update_mappings.py mapping_dictionaries.ipynb >> /var/log/intake.stdinout 2>&1 0 8 * * * python bin/generate_plots.py generic_metrics.ipynb default >> /var/log/intake.stdinout 2>&1 -0 8 * * * python bin/generate_plots.py generic_metrics_sensed.ipynb default >> /var/log/intake.stdinout 2>&1 0 8 * * * python bin/generate_plots.py generic_timeseries.ipynb default >> /var/log/intake.stdinout 2>&1 -0 8 * * * python bin/generate_plots.py mode_specific_metrics.ipynb default >> /var/log/intake.stdinout 2>&1 0 8 * * * python bin/generate_plots.py mode_specific_timeseries.ipynb default >> /var/log/intake.stdinout 2>&1 0 8 * * * python bin/generate_plots.py energy_calculations.ipynb default >> /var/log/intake.stdinout 2>&1 # For testing only diff --git a/viz_scripts/energy_calculations.ipynb b/viz_scripts/energy_calculations.ipynb index 4095929..0d28c09 100644 --- a/viz_scripts/energy_calculations.ipynb +++ b/viz_scripts/energy_calculations.ipynb @@ -106,7 +106,7 @@ }, "outputs": [], "source": [ - "expanded_ct, file_suffix, quality_text, debug_df = scaffolding.load_viz_notebook_data(year,\n", + "expanded_ct, _, file_suffix, quality_text, debug_df, _ = scaffolding.load_viz_notebook_data(year,\n", " month,\n", " program,\n", " study_type,\n", diff --git a/viz_scripts/generic_metrics.ipynb b/viz_scripts/generic_metrics.ipynb index 55c6768..c3a84ee 100644 --- a/viz_scripts/generic_metrics.ipynb +++ b/viz_scripts/generic_metrics.ipynb @@ -28,11 +28,12 @@ "year = 2020\n", "month = 11\n", "program = \"default\"\n", - "study_type = \"study\"\n", - "mode_of_interest = None\n", + "study_type = \"program\"\n", + "mode_of_interest = \"e-bike\"\n", "include_test_users = False\n", "dynamic_labels = {}\n", - "use_imperial = False" + "use_imperial = False\n", + "sensed_algo_prefix = \"cleaned\"" ] }, { @@ -79,7 +80,7 @@ "id": "intellectual-columbus", "metadata": {}, "source": [ - "## Collect Data From Database" + "## Collect Data From Database for Generic Metrics" ] }, { @@ -89,68 +90,302 @@ "metadata": {}, "outputs": [], "source": [ - "expanded_ct, file_suffix, quality_text, debug_df = scaffolding.load_viz_notebook_data(year,\n", + "expanded_ct, expanded_ct_inferred, file_suffix, quality_text, debug_df, values_dict = scaffolding.load_viz_notebook_data(year,\n", " month,\n", " program,\n", " study_type,\n", " dynamic_labels,\n", " dic_re,\n", " dic_pur=dic_pur,\n", - " include_test_users=include_test_users)" + " include_test_users=include_test_users)\n", + "\n", + "if (include_test_users == True):\n", + " users = \"testers and participants\"\n", + "else:\n", + " users = \"participants\"" ] }, { "cell_type": "markdown", - "id": "modified-skiing", + "id": "6d44c87e", "metadata": {}, "source": [ - "## Generic Metrics" + "## Collect Data from Database for Sensed Metrics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "55ec383b", + "metadata": {}, + "outputs": [], + "source": [ + "expanded_ct_sensed, file_suffix_sensed, quality_text_sensed, debug_df_sensed = scaffolding.load_viz_notebook_sensor_inference_data(year,\n", + " month,\n", + " program,\n", + " include_test_users,\n", + " sensed_algo_prefix)\n", + "\n", + "# Create a combined debug df from Generic and Sensed Metrics\n", + "merged_debug_df = debug_df.combine_first(debug_df_sensed)" ] }, { "cell_type": "markdown", - "id": "distributed-peace", + "id": "7f42f272", "metadata": {}, "source": [ - "### Distribution of Mode_confirm attribute" + "## Metrics for Specific Mode" ] }, { "cell_type": "code", "execution_count": null, - "id": "tracked-serbia", - "metadata": { - "scrolled": false - }, + "id": "657d7e18", + "metadata": {}, + "outputs": [], + "source": [ + "data_eb = expanded_ct.query(f\"mode_confirm == '{mode_of_interest}'\") if \"mode_confirm\" in expanded_ct.columns else expanded_ct\n", + "data_eb_inferred = expanded_ct_inferred.query(f\"mode_confirm == '{mode_of_interest}'\") if \"mode_confirm\" in expanded_ct_inferred.columns else expanded_ct_inferred\n", + "mode_dict = scaffolding.get_quality_data_inferred(expanded_ct_inferred, expanded_ct, data_eb_inferred, data_eb)\n", + "\n", + "confirmed_trip_str = f\"Labeled by user\\n ({values_dict['confirmed_trip']} confirmed trips, {values_dict['unique_users_confirmed']} users\\n {values_dict['pct_confirmed']:.2f}% of total trips)\"\n", + "inferred_trip_str = f\"Inferred from history\\n ({values_dict['inferred_trip']} inferred trips, {values_dict['unique_users_inferred']} users \\n {values_dict['pct_inferred']:.2f}% of total trips)\"\n", + "sensed_trip_str = f\"Sensed by OpenPATH\\n ({values_dict['total_trip']} total trips, {values_dict['unique_users_total']} users)\"\n", + "\n", + "replaced_confirmed_trip_str = f\"Labeled `{mode_of_interest}` by user \\n ({mode_dict['mode_confirmed_trip']} confirmed trips, {mode_dict['unique_users_confirmed_mode']} users \\n {mode_dict['after_pct_confirmed']:.2f}% of confirmed trips)\"\n", + "replaced_inferred_trip_str = f\"Inferred `{mode_of_interest}` from history \\n ({mode_dict['mode_inferred_trip']} inferred trips, {mode_dict['unique_users_inferred_mode']} users \\n {mode_dict['after_pct_inferred']:.2f}% of inferred trips)\"\n" + ] + }, + { + "cell_type": "markdown", + "id": "1af1f02b", + "metadata": {}, + "source": [ + "## 100% Stacked Bar Charts" + ] + }, + { + "cell_type": "markdown", + "id": "e50959c1", + "metadata": {}, + "source": [ + "### 1. Represents Number of Trips" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aef741fe", + "metadata": {}, "outputs": [], "source": [ - "file_name='ntrips_mode_confirm%s' % file_suffix\n", - "plot_title_no_quality= \"Number of trips for each mode (selected by users)\"\n", + "def process_data_frame(df, df_col, trip_type):\n", + " labels = df[df_col].value_counts(dropna=True).keys().tolist()\n", + " values = df[df_col].value_counts(dropna=True).tolist()\n", + " return process_trip_data(labels, values, trip_type)\n", + "\n", + "plot_title_no_quality= f\"Number of trips for each mode from {users} :\\n \"\n", + "file_name = f'ntrips_total{file_suffix}'\n", + "\n", "try:\n", - " labels_mc = expanded_ct['Mode_confirm'].value_counts(dropna=True).keys().tolist()\n", - " values_mc = expanded_ct['Mode_confirm'].value_counts(dropna=True).tolist() \n", - " plot_title = plot_title_no_quality+\"\\n\"+quality_text\n", - " pie_chart_mode(plot_title,labels_mc,values_mc,file_name)\n", - " alt_text = store_alt_text_pie(pd.DataFrame(values_mc, labels_mc), file_name, plot_title)\n", - " print(expanded_ct['Mode_confirm'].value_counts(dropna=True))\n", + " bar_count = 0\n", + " all_data_frames = []\n", + " plot_title = plot_title_no_quality\n", + " if not expanded_ct.empty:\n", + " df_confirmed_tc = process_data_frame(expanded_ct, 'Mode_confirm', confirmed_trip_str)\n", + " if not df_confirmed_tc.empty:\n", + " bar_count += 1\n", + " all_data_frames.append(df_confirmed_tc)\n", + " else:\n", + " print(\"df_confirmed_tc is empty.\")\n", + " else:\n", + " print(\"expanded_ct is empty.\")\n", + " if not expanded_ct_inferred.empty:\n", + " df_inferred_tc = process_data_frame(expanded_ct_inferred, 'Mode_confirm', inferred_trip_str)\n", + " if not df_inferred_tc.empty:\n", + " bar_count += 1\n", + " all_data_frames.append(df_inferred_tc)\n", + " else:\n", + " print(\"df_inferred_tc is empty.\")\n", + " else:\n", + " print(\"expanded_ct_inferred is empty.\")\n", + "\n", + " if not expanded_ct_sensed.empty:\n", + " df_sensed_tc = process_data_frame(expanded_ct_sensed, 'primary_mode', sensed_trip_str)\n", + " if not df_sensed_tc.empty:\n", + " bar_count += 1\n", + " all_data_frames.append(df_sensed_tc)\n", + " else:\n", + " print(\"df_sensed_tc is empty.\")\n", + " else:\n", + " print(\"expanded_ct_sensed is empty.\")\n", + "\n", + " result_df = merge_dataframes(all_data_frames)\n", + " stacked_bar_chart_generic(plot_title, result_df, file_name, bar_count)\n", + " alt_text, alt_html = store_alt_text_stacked_bar_chart(result_df[result_df['Count'] > 0], file_name, plot_title) \n", "except:\n", - " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", - " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)" + " generate_missing_plot(plot_title_no_quality,merged_debug_df,file_name)\n", + " alt_text = store_alt_text_missing(merged_debug_df, file_name, plot_title_no_quality)" + ] + }, + { + "cell_type": "markdown", + "id": "52711e01", + "metadata": {}, + "source": [ + "### 2. Represents Number of Trips for Replaced mode " ] }, { "cell_type": "code", "execution_count": null, - "id": "about-seafood", - "metadata": { - "scrolled": false - }, + "id": "e6226112", + "metadata": {}, + "outputs": [], + "source": [ + "# Only applicable for program\n", + "if study_type == \"program\":\n", + " plot_title_no_quality= f\"Number of trips replaced by `{mode_of_interest}` from {users}: \\n\"\n", + " file_name = f'ntrips_{mode_of_interest}_total{file_suffix}'\n", + "\n", + " try:\n", + " bar_count = 0\n", + " all_data_frames = []\n", + " plot_title = plot_title_no_quality\n", + "\n", + " if not data_eb.empty:\n", + " df_replaced_tc = process_data_frame(data_eb, 'Replaced_mode', replaced_confirmed_trip_str)\n", + " if not df_replaced_tc.empty:\n", + " bar_count += 1\n", + " all_data_frames.append(df_replaced_tc)\n", + " else:\n", + " print(\"df_replaced_tc is empty.\")\n", + " else:\n", + " print(\"data_eb is empty.\")\n", + " \n", + " if not data_eb_inferred.empty:\n", + " df_replaced_inferred_tc = process_data_frame(data_eb_inferred, 'Replaced_mode', replaced_inferred_trip_str)\n", + " if not df_replaced_inferred_tc.empty:\n", + " bar_count += 1\n", + " all_data_frames.append(df_replaced_inferred_tc)\n", + " else:\n", + " print(\"df_replaced_inferred_tc is empty.\")\n", + " else:\n", + " print(\"data_eb_inferred is empty.\")\n", + "\n", + " result_df = merge_dataframes(all_data_frames)\n", + " stacked_bar_chart_generic(plot_title, result_df, file_name, bar_count)\n", + " alt_text, alt_html = store_alt_text_stacked_bar_chart(result_df[result_df['Count'] > 0], file_name, plot_title) \n", + " except:\n", + " generate_missing_plot(plot_title_no_quality,merged_debug_df,file_name)\n", + " alt_text = store_alt_text_missing(merged_debug_df, file_name, plot_title_no_quality)" + ] + }, + { + "cell_type": "markdown", + "id": "8fc63a45", + "metadata": {}, + "source": [ + "### 3. Represents 80th percentile of the Number of Trips" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ece8ee", + "metadata": {}, "outputs": [], "source": [ - "plot_title_no_quality= \"Number of commute trips for each mode (selected by users)\"\n", - "file_name= 'ntrips_commute_mode_confirm%s' % file_suffix\n", + "def process_data_for_cutoff(df, cutoff, df_col, trip_type):\n", + " labels = df.loc[(df['distance'] <= cutoff)][df_col].value_counts(dropna=True).keys().tolist()\n", + " values = df.loc[(df['distance'] <= cutoff)][df_col].value_counts(dropna=True).tolist()\n", + " processed_data = process_trip_data(labels, values, trip_type)\n", + " return processed_data\n", "\n", "try:\n", + " cutoff = expanded_ct_sensed.distance.quantile(0.8)\n", + " if pd.isna(cutoff):\n", + " cutoff = 0\n", + "\n", + " dist_threshold = expanded_ct_sensed[distance_col].quantile(0.8).round(1)\n", + " dist_threshold = str(dist_threshold)\n", + "\n", + " plot_title_no_quality= f\"Number of trips under 80th percentile (<={dist_threshold} {label_units_lower}) for each mode from {users}: \\n\"\n", + " file_name = f'ntrips_under80{file_suffix}'\n", + "\n", + " bar_count = 0\n", + " all_data_frames_u80 = []\n", + " plot_title = plot_title_no_quality\n", + "\n", + " if not expanded_ct.empty:\n", + " u80_dict = scaffolding.get_quality_data_u80(expanded_ct_sensed[expanded_ct_sensed['distance'] <= cutoff], expanded_ct, expanded_ct[expanded_ct['distance'] <= cutoff])\n", + " df_confirmed_tc_u80 = process_data_for_cutoff(expanded_ct, cutoff, 'Mode_confirm', f\"Labeled by user\\n ({u80_dict['after_df']} confirmed trips, {u80_dict['unique_users_after']} users \\n {u80_dict['after_pct']:.2f}% of total trips)\")\n", + " if not df_confirmed_tc_u80.empty:\n", + " bar_count += 1\n", + " all_data_frames_u80.append(df_confirmed_tc_u80)\n", + " else:\n", + " print(\"df_confirmed_tc_u80 is empty.\")\n", + " else:\n", + " print(\"expanded_ct is empty.\")\n", + "\n", + " if not expanded_ct_inferred.empty:\n", + " u80_dict_inferred = scaffolding.get_quality_data_u80(expanded_ct_sensed[expanded_ct_sensed['distance'] <= cutoff], expanded_ct_inferred, expanded_ct_inferred[expanded_ct_inferred['distance'] <= cutoff])\n", + " df_inferred_tc_u80 = process_data_for_cutoff(expanded_ct_inferred, cutoff, 'Mode_confirm', f\"Inferred from history\\n ({u80_dict_inferred['after_df']} inferred trips, {u80_dict_inferred['unique_users_after']} users \\n {u80_dict_inferred['after_pct']:.2f}% of total trips)\")\n", + " if not df_inferred_tc_u80.empty:\n", + " bar_count += 1\n", + " all_data_frames_u80.append(df_inferred_tc_u80)\n", + " else:\n", + " print(\"df_inferred_tc_u80 is not empty\")\n", + " else:\n", + " print(\"expanded_ct_inferred is empty.\")\n", + "\n", + " if not expanded_ct_sensed.empty:\n", + " u80_dict_sensed = scaffolding.get_quality_data_u80(expanded_ct_sensed, expanded_ct_sensed, expanded_ct_sensed[expanded_ct_sensed['distance'] <= cutoff])\n", + " df_sensed_tc_u80 = process_data_for_cutoff(expanded_ct_sensed, cutoff, 'primary_mode', f\"Sensed by OpenPATH\\n ({u80_dict_sensed['after_df']} total trips, {u80_dict_sensed['unique_users_after']} users)\")\n", + " if not df_sensed_tc_u80.empty:\n", + " bar_count += 1\n", + " all_data_frames_u80.append(df_sensed_tc_u80)\n", + " else:\n", + " print(\"df_sensed_tc_u80 is empty.\")\n", + " else:\n", + " print(\"expanded_ct_sensed is empty.\")\n", + "\n", + " result_df_u80 = merge_dataframes(all_data_frames_u80)\n", + " stacked_bar_chart_generic(plot_title, result_df_u80, file_name, bar_count)\n", + " alt_text, alt_html = store_alt_text_stacked_bar_chart(result_df_u80[result_df_u80['Count'] > 0], file_name, plot_title)\n", + "except:\n", + " generate_missing_plot(plot_title_no_quality, merged_debug_df, file_name)\n", + " alt_text = store_alt_text_missing(merged_debug_df, file_name, plot_title_no_quality)" + ] + }, + { + "cell_type": "markdown", + "id": "7fa4f3da", + "metadata": {}, + "source": [ + "### 4. Represents Commute Trips" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5c6cd4cf", + "metadata": {}, + "outputs": [], + "source": [ + "def process_commute_data_frame(df, df_col, trip_type):\n", + " labels = df.query(df_col).Mode_confirm.value_counts(dropna=True).keys().tolist()\n", + " values = df.query(df_col).Mode_confirm.value_counts(dropna=True).tolist()\n", + " return process_trip_data(labels, values, trip_type)\n", + "\n", + "plot_title_no_quality= f\"Number of `commute` trips for each mode from {users}: \\n\"\n", + "file_name = f\"ntrips_commute_mode_confirm{file_suffix}\"\n", + "\n", + "try:\n", + " bar_count = 0\n", + " all_data_frames_commute = []\n", + " plot_title = plot_title_no_quality\n", " if (len(dynamic_labels)):\n", " purpose_map_label = scaffolding.mapping_labels(dynamic_labels, \"PURPOSE\")\n", " translation_work = purpose_map_label['work']\n", @@ -158,135 +393,337 @@ " else:\n", " trip_purpose_query = \"Trip_purpose == 'Work'\"\n", "\n", - " labels_mc = expanded_ct.query(trip_purpose_query).Mode_confirm.value_counts(dropna=True).keys().tolist()\n", - " values_mc = expanded_ct.query(trip_purpose_query).Mode_confirm.value_counts(dropna=True).tolist()\n", - " commute_quality_text = scaffolding.get_quality_text(expanded_ct, expanded_ct.query(trip_purpose_query), \"commute\", include_test_users)\n", - " plot_title= plot_title_no_quality+\"\\n\"+commute_quality_text\n", - " pie_chart_mode(plot_title,labels_mc,values_mc,file_name)\n", - " alt_text = store_alt_text_pie(pd.DataFrame(values_mc, labels_mc), file_name, plot_title)\n", + " commute_dict = scaffolding.get_quality_data_inferred(expanded_ct_inferred, expanded_ct, expanded_ct_inferred.query(trip_purpose_query), expanded_ct.query(trip_purpose_query))\n", + "\n", + " if not expanded_ct.empty:\n", + " df_total_trip_commute = process_commute_data_frame(expanded_ct, trip_purpose_query, f\"Labeled `commute` by users \\n ({commute_dict['mode_confirmed_trip']} confirmed trips, {commute_dict['unique_users_confirmed_mode']} users \\n {commute_dict['after_pct_confirmed']:.2f}% of {commute_dict['confirmed_trip']} confirmed trips)\")\n", + " if not df_total_trip_commute.empty:\n", + " all_data_frames_commute.append(df_total_trip_commute)\n", + " bar_count += 1\n", + " else:\n", + " print(\"df_total_trip_commute is empty.\")\n", + " else:\n", + " print(\"expanded_ct is empty.\")\n", + "\n", + " if not expanded_ct_inferred.empty:\n", + " df_total_trip_commute_inferred = process_commute_data_frame(expanded_ct_inferred, trip_purpose_query,f\" Inferred `commute` from history \\n ({commute_dict['mode_inferred_trip']} inferred trips, {commute_dict['unique_users_inferred_mode']} users \\n {commute_dict['after_pct_inferred']:.2f}% of {commute_dict['inferred_trip']} inferred trips)\")\n", + " if not df_total_trip_commute_inferred.empty:\n", + " all_data_frames_commute.append(df_total_trip_commute_inferred)\n", + " bar_count += 1\n", + " else:\n", + " print(\"df_total_trip_commute_inferred is empty.\")\n", + " else:\n", + " print(\"expanded_ct_inferred is empty.\")\n", + "\n", + " result_df_commute = merge_dataframes(all_data_frames_commute)\n", + " stacked_bar_chart_generic(plot_title, result_df_commute, file_name, bar_count)\n", + " alt_text, alt_html = store_alt_text_stacked_bar_chart(df_total_trip_commute, file_name, plot_title)\n", "except:\n", - " debug_df.loc[\"Commute_trips\"] = len(expanded_ct.query(trip_purpose_query)) if \"Trip_purpose\" in expanded_ct.columns else 0\n", - " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", + " generate_missing_plot(plot_title_no_quality, debug_df, file_name)\n", " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)" ] }, { "cell_type": "markdown", - "id": "careful-spencer", + "id": "b560cb32", "metadata": {}, "source": [ - "### Distribution of Trip_purpose attribute" + "### 5. Represents Total Trip Length covered by each mode" ] }, { "cell_type": "code", "execution_count": null, - "id": "conservative-september", - "metadata": { - "scrolled": false - }, + "id": "ffccb96f", + "metadata": {}, "outputs": [], "source": [ - "plot_title_no_quality=\"Number of trips for each purpose (selected by users)\"\n", - "file_name= 'ntrips_purpose%s' % file_suffix\n", + "def process_distance_data(df, df_col, trip_type):\n", + " dist = df.groupby(df_col).agg({distance_col: ['sum', 'count', 'mean']})\n", + " dist.columns = ['Total (' + label_units_lower + ')', 'Count', 'Average (' + label_units_lower + ')']\n", + " dist = dist.reset_index()\n", + " dist = dist.sort_values(by=['Total (' + label_units_lower + ')'], ascending=False)\n", + "\n", + " dist_dict = dict(zip(dist[df_col], dist['Total (' + label_units_lower + ')']))\n", + " labels_dist = []\n", + " values_dist = []\n", + "\n", + " for x, y in dist_dict.items():\n", + " labels_dist.append(x)\n", + " values_dist.append(y)\n", + "\n", + " return process_trip_data(labels_dist, values_dist, trip_type)\n", + "\n", + "plot_title_no_quality = f\"Total trip length ({label_units_lower}) covered by each mode from {users}: \\n\"\n", + "file_name = f\"total_trip_length{file_suffix}\"\n", "\n", "try:\n", - " labels_tp = expanded_ct['Trip_purpose'].value_counts(dropna=True).keys().tolist()\n", - " values_tp = expanded_ct['Trip_purpose'].value_counts(dropna=True).tolist()\n", - " plot_title= plot_title_no_quality+\"\\n\"+quality_text\n", - " pie_chart_purpose(plot_title,labels_tp,values_tp,file_name)\n", - " alt_text = store_alt_text_pie(pd.DataFrame(values_tp, labels_tp), file_name, plot_title)\n", - " print(expanded_ct['Trip_purpose'].value_counts(dropna=True))\n", + " bar_count = 0\n", + " all_data_frames = []\n", + " plot_title = plot_title_no_quality\n", + "\n", + " if not expanded_ct.empty:\n", + " df_confirm_dist = process_distance_data(expanded_ct, 'Mode_confirm', confirmed_trip_str)\n", + " if not df_confirm_dist.empty:\n", + " bar_count += 1\n", + " all_data_frames.append(df_confirm_dist)\n", + " else:\n", + " print(\"df_confirm_dist is empty.\")\n", + " else:\n", + " print(\"expanded_ct is empty.\")\n", + "\n", + " if not expanded_ct_inferred.empty:\n", + " df_inferred_dist = process_distance_data(expanded_ct_inferred, 'Mode_confirm', inferred_trip_str)\n", + " if not df_inferred_dist.empty:\n", + " bar_count += 1\n", + " all_data_frames.append(df_inferred_dist)\n", + " else:\n", + " print(\"df_inferred_dist is empty.\")\n", + " else:\n", + " print(\"expanded_ct_inferred is empty.\")\n", + "\n", + " if not expanded_ct_sensed.empty:\n", + " df_sensed_dist = process_distance_data(expanded_ct_sensed, 'primary_mode', sensed_trip_str)\n", + " if not df_sensed_dist.empty:\n", + " bar_count += 1\n", + " all_data_frames.append(df_sensed_dist)\n", + " else:\n", + " print(\"df_sensed_dist is empty.\")\n", + " else:\n", + " print(\"expanded_ct_sensed is empty.\")\n", + "\n", + " result_df = merge_dataframes(all_data_frames)\n", + " stacked_bar_chart_generic(plot_title, result_df, file_name, bar_count)\n", + " alt_text, alt_html = store_alt_text_stacked_bar_chart(result_df[result_df['Count'] > 0], file_name, plot_title)\n", "except:\n", - " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", - " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)" + " generate_missing_plot(plot_title_no_quality, merged_debug_df, file_name)\n", + " alt_text = store_alt_text_missing(merged_debug_df, file_name, plot_title_no_quality)" ] }, { "cell_type": "markdown", - "id": "crucial-keyboard", + "id": "bc9be240", "metadata": {}, "source": [ - "### Mode choice for trips under 80% mark" + "### 6. Represents Total Trip Length covered by each mode in Land" ] }, { "cell_type": "code", "execution_count": null, - "id": "identified-replica", - "metadata": { - "scrolled": false - }, + "id": "b1338268", + "metadata": {}, "outputs": [], "source": [ - "file_name ='ntrips_under10miles_mode_confirm%s' % file_suffix\n", + "plot_title_no_quality = f\"Total trip length ({label_units_lower}) covered by each mode via. land from {users}: \\n\"\n", + "file_name = f\"total_trip_length_land{file_suffix}\"\n", "\n", "try:\n", - " #determine 80th percentile\n", - " cutoff = expanded_ct.distance.quantile(0.8)\n", - " if pd.isna(cutoff):\n", - " cutoff = 0\n", - " dist_threshold = expanded_ct[distance_col].quantile(0.8).round(1)\n", - " dist_threshold = str(dist_threshold) \n", + " bar_count = 0\n", + " all_data_frames = []\n", + " plot_title = plot_title_no_quality\n", "\n", - " plot_title_no_quality=\"Mode confirmations for trips under \" + dist_threshold + \" \" + label_units_lower\n", - " plot_title_no_quality=plot_title_no_quality+\"\\n[\"+dist_threshold + \" \" + label_units_lower+\" represents 80th percentile of trip length]\"\n", + " if not expanded_ct_sensed.empty:\n", + " df_confirm_dist_land = process_distance_data(expanded_ct[expanded_ct['Mode_confirm'] != \"Airplane\"], 'Mode_confirm', confirmed_trip_str)\n", + " if not df_confirm_dist_land.empty:\n", + " bar_count += 1\n", + " all_data_frames.append(df_confirm_dist_land)\n", + " else:\n", + " print(\"df_confirm_dist_land is empty.\")\n", + " else:\n", + " print(\"expanded_ct is empty.\")\n", "\n", - " labels_d10 = expanded_ct.loc[(expanded_ct['distance'] <= cutoff)].Mode_confirm.value_counts(dropna=True).keys().tolist()\n", - " values_d10 = expanded_ct.loc[(expanded_ct['distance'] <= cutoff)].Mode_confirm.value_counts(dropna=True).tolist()\n", - " d10_quality_text = scaffolding.get_quality_text(expanded_ct, expanded_ct[expanded_ct['distance'] <= cutoff], \"< \" + dist_threshold + \" \" + label_units_lower, include_test_users)\n", - " plot_title = plot_title_no_quality+\"\\n\"+d10_quality_text\n", - " pie_chart_mode(plot_title,labels_d10,values_d10,file_name)\n", - " alt_text = store_alt_text_pie(pd.DataFrame(values_d10, labels_d10), file_name, plot_title)\n", - " print(expanded_ct.loc[(expanded_ct['distance'] <= cutoff)].Mode_confirm.value_counts(dropna=True))\n", + " if not expanded_ct_inferred.empty:\n", + " df_inferred_dist_land = process_distance_data(expanded_ct_inferred[expanded_ct_inferred['Mode_confirm'] != \"Airplane\"], 'Mode_confirm', inferred_trip_str)\n", + " if not df_inferred_dist_land.empty:\n", + " bar_count += 1\n", + " all_data_frames.append(df_inferred_dist_land)\n", + " else:\n", + " print(\"df_inferred_dist_land is empty.\")\n", + " else:\n", + " print(\"expanded_ct_inferred is empty.\")\n", + "\n", + " if not expanded_ct_sensed.empty:\n", + " df_sensed_dist_land = process_distance_data(expanded_ct_sensed[expanded_ct_sensed['primary_mode'] != \"AIR_OR_HSR\"], 'primary_mode', sensed_trip_str)\n", + " if not df_sensed_dist_land.empty:\n", + " bar_count += 1\n", + " all_data_frames.append(df_sensed_dist_land)\n", + " else:\n", + " print(\"df_sensed_dist_land is empty.\")\n", + " else:\n", + " print(\"expanded_ct_sensed is empty.\")\n", "\n", + " result_df = merge_dataframes(all_data_frames)\n", + " stacked_bar_chart_generic(plot_title, result_df, file_name, bar_count)\n", + " alt_text, alt_html = store_alt_text_stacked_bar_chart(result_df[result_df['Count'] > 0], file_name, plot_title)\n", "except:\n", - " d10_df = expanded_ct.query(\"distance <= \" + str(cutoff)) if \"distance\" in expanded_ct.columns else expanded_ct\n", - " debug_df.loc[\"Trips_less_than_80th_pct\"] = scaffolding.trip_label_count(\"Mode_confirm\", d10_df)\n", - " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", - " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)" + " generate_missing_plot(plot_title_no_quality, merged_debug_df, file_name)\n", + " alt_text = store_alt_text_missing(merged_debug_df, file_name, plot_title_no_quality)" ] }, { "cell_type": "markdown", - "id": "dominant-company", + "id": "7aa03722", "metadata": {}, "source": [ - "### Miles per chosen transport mode" + "### 7. Represents Total Trip Length covered by Replaced Mode" ] }, { "cell_type": "code", "execution_count": null, - "id": "satisfied-sharing", - "metadata": { - "scrolled": false - }, + "id": "a872545c", + "metadata": {}, + "outputs": [], + "source": [ + "# Applicable only for program\n", + "if study_type == \"program\":\n", + " plot_title_no_quality = f\"Total trip length ({label_units_lower}) replaced by `{mode_of_interest}` from {users}: \\n\"\n", + " file_name = f\"total_trip_length_{mode_of_interest}_replaced_mode{file_suffix}\"\n", + " \n", + " try:\n", + " bar_count = 0\n", + " all_data_frames = []\n", + " plot_title = plot_title_no_quality\n", + "\n", + " if not data_eb.empty:\n", + " df_replaced_dist = process_distance_data(data_eb, 'Replaced_mode', replaced_confirmed_trip_str)\n", + " if not df_replaced_dist.empty:\n", + " bar_count += 1\n", + " all_data_frames.append(df_replaced_dist)\n", + " else:\n", + " print(\"df_replaced_dist is empty.\")\n", + " else:\n", + " print(\"data_eb is empty.\")\n", + " \n", + " if not data_eb_inferred.empty:\n", + " df_replaced_inferred_dist = process_distance_data(data_eb_inferred, 'Replaced_mode',replaced_inferred_trip_str)\n", + " if not df_replaced_inferred_dist.empty:\n", + " bar_count += 1\n", + " all_data_frames.append(df_replaced_inferred_dist)\n", + " else:\n", + " print(\"df_replaced_inferred_dist is empty.\")\n", + " else:\n", + " print(\"data_eb_inferred is empty.\")\n", + "\n", + " result_df = merge_dataframes(all_data_frames)\n", + " stacked_bar_chart_generic(plot_title, result_df, file_name, bar_count)\n", + " alt_text, alt_html = store_alt_text_stacked_bar_chart(result_df[result_df['Count'] > 0], file_name, plot_title)\n", + " except:\n", + " generate_missing_plot(plot_title_no_quality, merged_debug_df, file_name)\n", + " alt_text = store_alt_text_missing(merged_debug_df, file_name, plot_title_no_quality)" + ] + }, + { + "cell_type": "markdown", + "id": "ffb2df0b", + "metadata": {}, + "source": [ + "### 8. Represents number of trips for each purpose" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "46474ada", + "metadata": {}, "outputs": [], "source": [ - "plot_title_no_quality= label_units + \" for each mode (selected by users)\"\n", - "plot_title=plot_title_no_quality + '\\n' + quality_text\n", - "file_name ='miles_mode_confirm%s' % file_suffix\n", + "plot_title_no_quality = f\"Number of trips for each purpose from {users}: \\n\"\n", + "file_name = f\"ntrips_purpose{file_suffix}\"\n", "\n", "try:\n", - " dist = expanded_ct.groupby('Mode_confirm').agg({distance_col: ['sum', 'count' , 'mean']})\n", - " dist.columns = ['Total ('+label_units_lower+')', 'Count', 'Average ('+label_units_lower+')']\n", - " dist = dist.reset_index()\n", - " dist =dist.sort_values(by=['Total ('+label_units_lower+')'], ascending=False)\n", - " dist_dict = dict(zip(dist['Mode_confirm'], dist['Total ('+label_units_lower+')']))\n", - " \n", - " labels_m = []\n", - " values_m = []\n", + " bar_count = 0\n", + " all_tp_data_frames = []\n", + " plot_title = plot_title_no_quality\n", + "\n", + " if not expanded_ct.empty:\n", + " df_purpose_trip = process_data_frame(expanded_ct, 'Trip_purpose', confirmed_trip_str)\n", + " if not df_purpose_trip.empty:\n", + " bar_count += 1\n", + " all_tp_data_frames.append(df_purpose_trip)\n", + " else:\n", + " print(\"df_purpose_trip is empty.\")\n", + " else:\n", + " print(\"expanded_ct is empty.\")\n", " \n", - " for x, y in dist_dict.items():\n", - " labels_m.append(x)\n", - " values_m.append(y)\n", + " if not expanded_ct_inferred.empty:\n", + " df_purpose_trip_inferred = process_data_frame(expanded_ct_inferred, 'Trip_purpose', inferred_trip_str)\n", + " if not df_purpose_trip_inferred.empty:\n", + " bar_count += 1\n", + " all_tp_data_frames.append(df_purpose_trip_inferred)\n", + " else:\n", + " print(\"df_purpose_trip_inferred is empty.\")\n", + " else:\n", + " print(\"expanded_ct_inferred is empty.\")\n", "\n", - " pie_chart_mode(plot_title,labels_m,values_m,file_name)\n", - " alt_text = store_alt_text_pie(pd.DataFrame(values_m, labels_m), file_name, plot_title)\n", + " result_tp_df = merge_dataframes(all_tp_data_frames)\n", + " stacked_bar_chart_generic(plot_title, result_tp_df, file_name, bar_count)\n", + " alt_text, alt_html = store_alt_text_stacked_bar_chart(result_tp_df[result_tp_df['Count'] > 0], file_name, plot_title)\n", "except:\n", - " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", - " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality) " + " generate_missing_plot(plot_title_no_quality, merged_debug_df, file_name)\n", + " alt_text = store_alt_text_missing(merged_debug_df, file_name, plot_title_no_quality)" + ] + }, + { + "cell_type": "markdown", + "id": "db3e20c3", + "metadata": {}, + "source": [ + "### 9. Represents number of trips for each purpose by replaced mode" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6d45e74", + "metadata": {}, + "outputs": [], + "source": [ + "# Applicable only for program\n", + "if study_type == \"program\":\n", + " plot_title_no_quality = f\"Number of `{mode_of_interest}` trips for each purpose from {users}: \\n\"\n", + " file_name = f\"ntrips_{mode_of_interest}_purpose{file_suffix}\"\n", + "\n", + " try:\n", + " bar_count = 0\n", + " all_tp_data_frames = []\n", + " plot_title = plot_title_no_quality\n", + "\n", + " if not data_eb.empty:\n", + " df_purpose_trip_eb = process_data_frame(data_eb, 'Trip_purpose', replaced_confirmed_trip_str)\n", + " if not df_purpose_trip_eb.empty:\n", + " bar_count += 1\n", + " all_tp_data_frames.append(df_purpose_trip_eb)\n", + " else:\n", + " print(\"df_purpose_trip_eb is empty.\")\n", + " else:\n", + " print(\"data_eb is empty.\")\n", + " \n", + " if not data_eb_inferred.empty:\n", + " df_purpose_trip_inferred_eb = process_data_frame(data_eb_inferred, 'Trip_purpose', replaced_inferred_trip_str)\n", + " if not df_purpose_trip_inferred_eb.empty:\n", + " bar_count += 1\n", + " all_tp_data_frames.append(df_purpose_trip_inferred_eb)\n", + " else:\n", + " print(\"df_purpose_trip_inferred_eb is empty.\")\n", + " else:\n", + " print(\"data_eb is empty.\")\n", + "\n", + " result_tp_df = merge_dataframes(all_tp_data_frames)\n", + " stacked_bar_chart_generic(plot_title, result_tp_df, file_name, bar_count)\n", + " alt_text, alt_html = store_alt_text_stacked_bar_chart(result_tp_df[result_tp_df['Count'] > 0], file_name, plot_title)\n", + " except:\n", + " generate_missing_plot(plot_title_no_quality, merged_debug_df, file_name)\n", + " alt_text = store_alt_text_missing(merged_debug_df, file_name, plot_title_no_quality)" + ] + }, + { + "cell_type": "markdown", + "id": "bcef1f6a", + "metadata": {}, + "source": [ + "## Generic Metrics (Bar Charts)" ] }, { @@ -304,14 +741,19 @@ "metadata": {}, "outputs": [], "source": [ - "plot_title_no_quality=\"Average \"+ label_units+\" for each mode with > 3 entries\"\n", "file_name ='average_miles_mode_confirm%s' % file_suffix\n", + "plot_title_no_quality=\"Average \"+ label_units+\" for each mode with > 3 entries\"\n", "\n", "try:\n", + " dist = expanded_ct.groupby('Mode_confirm').agg({distance_col: ['sum', 'count' , 'mean']})\n", + " dist.columns = ['Total ('+label_units_lower+')', 'Count', 'Average ('+label_units_lower+')']\n", + " dist = dist.reset_index()\n", + " dist =dist.sort_values(by=['Total ('+label_units_lower+')'], ascending=False)\n", + "\n", " x='Mode_confirm'\n", " y='Average ('+label_units_lower+')'\n", " plot_title= plot_title_no_quality+\"\\n\"+quality_text\n", - " \n", + "\n", " data = dist.drop((dist.query(\"Count < 3\").index)).sort_values(by=['Average ('+label_units_lower+')'], ascending=False)\n", "\n", " barplot_mode(data,x,y,plot_title, expanded_ct['Mode_confirm'].dropna().unique().tolist(), file_name)\n", @@ -347,7 +789,7 @@ " data = fq_days\n", " x = 'Day of the Month'\n", " y = 'Number of Trips'\n", - " \n", + "\n", " plot_title= plot_title_no_quality+\"\\n\"+quality_text\n", "\n", " barplot_day(data,x,y,plot_title,file_name)\n", @@ -386,13 +828,252 @@ " y = 'Number of Trips'\n", "\n", " plot_title= plot_title_no_quality+\"\\n\"+quality_text\n", - " \n", + "\n", " barplot_day(data,x,y,plot_title,file_name)\n", " alt_text = store_alt_text_bar(pd.DataFrame(data['Number of Trips'].values, data['Weekday']), file_name, plot_title)\n", "except:\n", " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality) " ] + }, + { + "cell_type": "markdown", + "id": "94454e8f", + "metadata": {}, + "source": [ + "## Sensed Metrics (Bar Charts)" + ] + }, + { + "cell_type": "markdown", + "id": "1ae3bf80", + "metadata": {}, + "source": [ + "### Average miles per transport mode selected (primary_mode)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "882c0891", + "metadata": {}, + "outputs": [], + "source": [ + "plot_title_no_quality=\" Average Miles for each mode with > 3 entries\\n(inferred by OpenPATH from phone sensors)\"\n", + "file_name ='average_miles_sensed_mode%s' % file_suffix\n", + "\n", + "try:\n", + " dist = expanded_ct_sensed.groupby('primary_mode').agg({distance_col: ['sum', 'count' , 'mean']})\n", + " dist.columns = ['Total ('+label_units_lower+')', 'Count', 'Average ('+label_units_lower+')']\n", + " dist = dist.reset_index()\n", + " dist =dist.sort_values(by=['Total ('+label_units_lower+')'], ascending=False)\n", + "\n", + " data = dist.drop((dist.query(\"Count < 3\").index)).sort_values(by=['Average ('+label_units_lower+')'], ascending=False)\n", + " x='primary_mode'\n", + " y='Average ('+label_units_lower+')'\n", + "\n", + " plot_title= plot_title_no_quality+\"\\n\"+quality_text\n", + "\n", + " barplot_mode(data,x,y,plot_title, expanded_ct_sensed['primary_mode'].dropna().unique().tolist(), file_name)\n", + " alt_text = store_alt_text_bar(pd.DataFrame(data['Average ('+label_units_lower+')'].values, data['primary_mode']), file_name, plot_title)\n", + "except:\n", + " generate_missing_plot(plot_title_no_quality,debug_df_sensed,file_name)\n", + " alt_text = store_alt_text_missing(debug_df_sensed, file_name, plot_title_no_quality) " + ] + }, + { + "cell_type": "markdown", + "id": "782926ff", + "metadata": {}, + "source": [ + "### Number of trips by day¶" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b6b51c4e", + "metadata": {}, + "outputs": [], + "source": [ + "plot_title_no_quality=\"Number of trips by day\\n(inferred by OpenPATH from phone sensors)\"\n", + "file_name ='ntrips_sensed_per_day%s' % file_suffix\n", + "\n", + "try:\n", + " fq_days = expanded_ct_sensed.groupby(['start_local_dt_day']).agg({'start_local_dt_day': ['sum', 'count']})\n", + " fq_days = fq_days.reset_index()\n", + " fq_days.columns = ['Day of the Month', 'Total', 'Number of Trips']\n", + "\n", + " data = fq_days\n", + " x = 'Day of the Month'\n", + " y = 'Number of Trips'\n", + "\n", + " plot_title= plot_title_no_quality+\"\\n\"+quality_text\n", + "\n", + " barplot_day(data,x,y,plot_title,file_name)\n", + " alt_text = store_alt_text_bar(pd.DataFrame(data['Number of Trips'].values, data['Day of the Month']), file_name, plot_title)\n", + "except:\n", + " generate_missing_plot(plot_title_no_quality,debug_df_sensed,file_name)\n", + " alt_text = store_alt_text_missing(debug_df_sensed, file_name, plot_title_no_quality)" + ] + }, + { + "cell_type": "markdown", + "id": "5d403785", + "metadata": {}, + "source": [ + "### Number of trips by day of week¶" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9b9789a8", + "metadata": {}, + "outputs": [], + "source": [ + "plot_title_no_quality=\"Number of trips by weekday\\n(inferred by OpenPATH from phone sensors)\"\n", + "file_name ='ntrips_sensed_per_weekday%s' % file_suffix\n", + "try:\n", + " fq_weekdays = expanded_ct_sensed.groupby(['start_local_dt_weekday']).agg({'start_local_dt_weekday': ['sum', 'count']})\n", + " fq_weekdays = fq_weekdays.reset_index()\n", + " fq_weekdays.columns = ['Weekday', 'Total', 'Number of Trips']\n", + " weekday_labels = [\"Mon\", \"Tue\", \"Wed\", \"Thu\", \"Fri\", \"Sat\", \"Sun\"]\n", + " fq_weekdays[\"Weekday\"] = fq_weekdays.Weekday.apply(lambda x: weekday_labels[x])\n", + "\n", + " data = fq_weekdays\n", + " x = 'Weekday'\n", + " y = 'Number of Trips'\n", + "\n", + " plot_title= plot_title_no_quality+\"\\n\"+quality_text\n", + "\n", + " barplot_day(data,x,y,plot_title,file_name)\n", + " alt_text = store_alt_text_bar(pd.DataFrame(data['Number of Trips'].values, data['Weekday']), file_name, plot_title)\n", + "except:\n", + " generate_missing_plot(plot_title_no_quality,debug_df_sensed,file_name)\n", + " alt_text = store_alt_text_missing(debug_df_sensed, file_name, plot_title_no_quality)" + ] + }, + { + "cell_type": "markdown", + "id": "881f2170", + "metadata": {}, + "source": [ + "## Mode Specific (Bar Charts)" + ] + }, + { + "cell_type": "markdown", + "id": "601057fe", + "metadata": {}, + "source": [ + "### Average miles per trip for specified mode" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24dccf66", + "metadata": {}, + "outputs": [], + "source": [ + "if study_type == \"program\":\n", + " plot_title_no_quality=\"Average \" + label_units + \" for each replaced mode with > 3 entries\\n'Other' represents trips with a non-standard or missing replacement\"\n", + " file_name ='average_miles_replaced_mode%s' % file_suffix\n", + "\n", + " try:\n", + " dg=data_eb.groupby('Replaced_mode').agg({distance_col: ['sum', 'count' , 'mean']},)\n", + " dg.columns = ['Total ('+label_units_lower+')', 'Count' ,'Average ('+label_units_lower+')']\n", + " dg = dg.reset_index()\n", + " dg = dg.sort_values(by=['Total ('+label_units_lower+')'], ascending=False)\n", + " data = dg.drop((dg.query(\"Count < 3\").index)).sort_values(by=['Average ('+label_units_lower+')'], ascending=False) \n", + "\n", + " x='Replaced_mode'\n", + " y='Average ('+label_units_lower+')'\n", + " y2 = \"Count\"\n", + "\n", + " plot_title= plot_title_no_quality+\"\\n\"+quality_text\n", + " barplot_mode(data,x,y,plot_title, expanded_ct['Replaced_mode'].dropna().unique().tolist(), file_name)\n", + " alt_text = store_alt_text_bar(pd.DataFrame(data['Average ('+label_units_lower+')'].values, data.Replaced_mode), file_name, plot_title)\n", + "\n", + " except:\n", + " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", + " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)" + ] + }, + { + "cell_type": "markdown", + "id": "14650196", + "metadata": {}, + "source": [ + "### Number of trips by day for specified mode" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0ef2fe77", + "metadata": {}, + "outputs": [], + "source": [ + "if study_type == \"program\":\n", + " plot_title_no_quality=f\"Number of {mode_of_interest} trips by day\"\n", + " file_name =f'ntrips_{mode_of_interest}_per_day%s' % file_suffix\n", + "\n", + " try:\n", + " fq_days = data_eb.groupby(['start_local_dt_day']).agg({'start_local_dt_day': ['sum', 'count']})\n", + " fq_days = fq_days.reset_index()\n", + " fq_days.columns = ['Day of the Month', 'Total', 'Number of Trips']\n", + "\n", + " data = fq_days\n", + " x = 'Day of the Month'\n", + " y = 'Number of Trips'\n", + "\n", + " plot_title= plot_title_no_quality+\"\\n\"+quality_text\n", + " barplot_day(data,x,y,plot_title,file_name)\n", + " alt_text = store_alt_text_bar(pd.DataFrame(data['Number of Trips'].values, data['Day of the Month'].values), file_name, plot_title)\n", + " except:\n", + " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", + " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)" + ] + }, + { + "cell_type": "markdown", + "id": "e807c0f1", + "metadata": {}, + "source": [ + "### Number of trips by day of week¶" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8085e9a1", + "metadata": {}, + "outputs": [], + "source": [ + "if study_type == \"program\":\n", + " plot_title_no_quality=f\"Number of {mode_of_interest} trips by weekday\"\n", + " file_name =f'ntrips_{mode_of_interest}_per_weekday%s' % file_suffix\n", + "\n", + " try:\n", + " fq_weekdays = data_eb.groupby(['start_local_dt_weekday']).agg({'start_local_dt_weekday': ['sum', 'count']})\n", + " fq_weekdays = fq_weekdays.reset_index()\n", + " fq_weekdays.columns = ['Weekday', 'Total', 'Number of Trips']\n", + " weekday_labels = [\"Mon\", \"Tue\", \"Wed\", \"Thu\", \"Fri\", \"Sat\", \"Sun\"]\n", + " fq_weekdays[\"Weekday\"] = fq_weekdays.Weekday.apply(lambda x: weekday_labels[x])\n", + "\n", + " data = fq_weekdays\n", + " x = 'Weekday'\n", + " y = 'Number of Trips'\n", + "\n", + " plot_title= plot_title_no_quality+\"\\n\"+quality_text\n", + " barplot_day(data,x,y,plot_title,file_name)\n", + " alt_text = store_alt_text_bar(pd.DataFrame(data['Number of Trips'].values, data['Weekday'].values), file_name, plot_title)\n", + " except:\n", + " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", + " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)" + ] } ], "metadata": { diff --git a/viz_scripts/generic_metrics_sensed.ipynb b/viz_scripts/generic_metrics_sensed.ipynb deleted file mode 100644 index 762cbf1..0000000 --- a/viz_scripts/generic_metrics_sensed.ipynb +++ /dev/null @@ -1,378 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "academic-context", - "metadata": {}, - "source": [ - "## Generate Static Graphs" - ] - }, - { - "cell_type": "markdown", - "id": "medium-siemens", - "metadata": {}, - "source": [ - "These are the input parameters for the notebook. They will be automatically changed when the scripts to generate monthly statistics are run. You can modify them manually to generate multiple plots locally as well.\n", - "\n", - "Pass in `None` to remove the filters and plot all data. This is not recommended for production settings, but might be useful for reports based on data snapshots." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "narrative-hunter", - "metadata": {}, - "outputs": [], - "source": [ - "year = 2020\n", - "month = 11\n", - "program = \"default\"\n", - "study_type = \"study\"\n", - "mode_of_interest = None\n", - "include_test_users = False\n", - "use_imperial = False\n", - "sensed_algo_prefix = \"cleaned\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "activated-portugal", - "metadata": {}, - "outputs": [], - "source": [ - "from collections import defaultdict\n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", - "\n", - "from plots import *\n", - "import scaffolding\n", - "\n", - "sns.set_style(\"whitegrid\")\n", - "sns.set()\n", - "%matplotlib inline\n", - "\n", - "# get metric vs imperial vars\n", - "label_units, short_label, label_units_lower, distance_col, weight_unit = scaffolding.get_units(use_imperial)" - ] - }, - { - "cell_type": "markdown", - "id": "intellectual-columbus", - "metadata": {}, - "source": [ - "## Collect Data From Database" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "organic-pitch", - "metadata": {}, - "outputs": [], - "source": [ - "expanded_ct, file_suffix, quality_text, debug_df = scaffolding.load_viz_notebook_sensor_inference_data(year,\n", - " month,\n", - " program,\n", - " include_test_users,\n", - " sensed_algo_prefix)" - ] - }, - { - "cell_type": "markdown", - "id": "modified-skiing", - "metadata": {}, - "source": [ - "## Generic Metrics" - ] - }, - { - "cell_type": "markdown", - "id": "distributed-peace", - "metadata": {}, - "source": [ - "### Distribution of Mode_confirm attribute" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "tracked-serbia", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "file_name='ntrips_sensed_mode%s' % file_suffix\n", - "plot_title_no_quality= \"Number of trips for each primary mode\\n(inferred by OpenPATH from phone sensors)\"\n", - "try:\n", - " labels_mc = expanded_ct['primary_mode'].value_counts(dropna=True).keys().tolist()\n", - " values_mc = expanded_ct['primary_mode'].value_counts(dropna=True).tolist() \n", - " plot_title = plot_title_no_quality+\"\\n\"+quality_text\n", - " pie_chart_sensed_mode(plot_title,labels_mc,values_mc,file_name)\n", - " alt_text = store_alt_text_pie(pd.DataFrame(values_mc, labels_mc), file_name, plot_title)\n", - " print(expanded_ct['primary_mode'].value_counts(dropna=True))\n", - "except:\n", - " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", - " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)" - ] - }, - { - "cell_type": "markdown", - "id": "crucial-keyboard", - "metadata": {}, - "source": [ - "### Mode choice for trips under 80% mark" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "identified-replica", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "file_name ='ntrips_under10miles_sensed_mode%s' % file_suffix\n", - "\n", - "try:\n", - " #determine 80th percentile\n", - " cutoff = expanded_ct.distance.quantile(0.8)\n", - " dist_threshold = expanded_ct[distance_col].quantile(0.8).round(1)\n", - " dist_threshold = str(dist_threshold)\n", - "\n", - " plot_title_no_quality=\"Number of trips under \" + dist_threshold + \" \" + label_units_lower + \" for each primary mode\"\n", - " plot_title_no_quality=plot_title_no_quality + \"\\n(inferred by OpenPATH from phone sensors)\" \n", - " plot_title_no_quality=plot_title_no_quality + \"\\n[\"+dist_threshold + \" \" + label_units_lower+\" represents 80th percentile of trip length]\"\n", - "\n", - " labels_d10 = expanded_ct.loc[(expanded_ct['distance'] <= cutoff)].primary_mode.value_counts(dropna=True).keys().tolist()\n", - " values_d10 = expanded_ct.loc[(expanded_ct['distance'] <= cutoff)].primary_mode.value_counts(dropna=True).tolist()\n", - " d10_quality_text = scaffolding.get_quality_text(expanded_ct, expanded_ct[expanded_ct['distance'] <= cutoff], \"< \" + dist_threshold + \" \" + label_units_lower, include_test_users)\n", - " plot_title= plot_title_no_quality+\"\\n\"+d10_quality_text\n", - " pie_chart_sensed_mode(plot_title,labels_d10,values_d10,file_name)\n", - " alt_text = store_alt_text_pie(pd.DataFrame(values_d10, labels_d10), file_name, plot_title)\n", - " print(expanded_ct.loc[(expanded_ct['distance'] <= cutoff)].primary_mode.value_counts(dropna=True))\n", - "except:\n", - " d10_df = expanded_ct.query(\"distance <= \" + cutoff) if \"distance\" in expanded_ct.columns else expanded_ct\n", - " debug_df.loc[\"Trips_less_than_80th_pct\"] = scaffolding.trip_label_count(\"Mode_confirm\", d10_df)\n", - " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", - " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)" - ] - }, - { - "cell_type": "markdown", - "id": "dominant-company", - "metadata": {}, - "source": [ - "### Miles per chosen transport mode" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "satisfied-sharing", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "plot_title_no_quality = label_units + \" for each primary mode\\n(inferred by OpenPATH from phone sensors)\"\n", - "file_name ='miles_sensed_mode%s' % file_suffix\n", - "\n", - "try:\n", - " dist = expanded_ct.groupby('primary_mode').agg({distance_col: ['sum', 'count' , 'mean']})\n", - " dist.columns = ['Total ('+label_units_lower+')', 'Count', 'Average ('+label_units_lower+')']\n", - " dist = dist.reset_index()\n", - " dist = dist.sort_values(by=['Total ('+label_units_lower+')'], ascending=False)\n", - " dist_dict = dict(zip(dist['primary_mode'], dist['Total ('+label_units_lower+')']))\n", - "\n", - " labels_m = []\n", - " values_m = []\n", - "\n", - " for x, y in dist_dict.items():\n", - " labels_m.append(x)\n", - " values_m.append(y)\n", - " \n", - " plot_title = plot_title_no_quality + \"\\n\" + quality_text\n", - " pie_chart_sensed_mode(plot_title,labels_m,values_m,file_name)\n", - " alt_text = store_alt_text_pie(pd.DataFrame(values_m, labels_m), file_name, plot_title)\n", - "\n", - "except:\n", - " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", - " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality) " - ] - }, - { - "cell_type": "markdown", - "id": "1d0c7548", - "metadata": {}, - "source": [ - "### Miles per chosen land transport mode" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "004b7b3c", - "metadata": {}, - "outputs": [], - "source": [ - "plot_title_no_quality= label_units + \" for each land-based primary mode\\n(inferred by OpenPATH from phone sensors)\"\n", - "file_name ='miles_sensed_mode_land%s' % file_suffix\n", - "\n", - "try:\n", - " dist = expanded_ct.groupby('primary_mode').agg({distance_col: ['sum', 'count' , 'mean']})\n", - " dist.columns = ['Total ('+label_units_lower+')', 'Count', 'Average ('+label_units_lower+')']\n", - " dist = dist.reset_index()\n", - " dist =dist.sort_values(by=['Total ('+label_units_lower+')'], ascending=False)\n", - "\n", - " dist_dict = dict(zip(dist['primary_mode'], dist['Total ('+label_units_lower+')']))\n", - "\n", - " labels_m = []\n", - " values_m = []\n", - "\n", - " for x, y in dist_dict.items():\n", - " if x != \"AIR_OR_HSR\":\n", - " labels_m.append(x)\n", - " values_m.append(y)\n", - "\n", - " plot_title = plot_title_no_quality + \"\\n\" + quality_text\n", - " pie_chart_sensed_mode(plot_title,labels_m,values_m,file_name)\n", - " alt_text = store_alt_text_pie(pd.DataFrame(values_m, labels_m), file_name, plot_title)\n", - "\n", - "except:\n", - " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", - " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality) " - ] - }, - { - "cell_type": "markdown", - "id": "43ecc5d7", - "metadata": {}, - "source": [ - "### Average miles per transport mode selected (primary_mode)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5189eaee", - "metadata": {}, - "outputs": [], - "source": [ - "plot_title_no_quality=\" Average Miles for each mode with > 3 entries\\n(inferred by OpenPATH from phone sensors)\"\n", - "file_name ='average_miles_sensed_mode%s' % file_suffix\n", - "\n", - "try:\n", - " data = dist.drop((dist.query(\"Count < 3\").index)).sort_values(by=['Average ('+label_units_lower+')'], ascending=False)\n", - " x='primary_mode'\n", - " y='Average ('+label_units_lower+')'\n", - " \n", - " plot_title= plot_title_no_quality+\"\\n\"+quality_text\n", - " \n", - " barplot_mode(data,x,y,plot_title, expanded_ct['primary_mode'].dropna().unique().tolist(), file_name)\n", - " alt_text = store_alt_text_bar(pd.DataFrame(data['Average ('+label_units_lower+')'].values, data['primary_mode']), file_name, plot_title)\n", - "except:\n", - " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", - " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality) " - ] - }, - { - "cell_type": "markdown", - "id": "130100ee", - "metadata": {}, - "source": [ - "### Number of trips by day¶" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9933d138", - "metadata": {}, - "outputs": [], - "source": [ - "plot_title_no_quality=\"Number of trips by day\\n(inferred by OpenPATH from phone sensors)\"\n", - "file_name ='ntrips_sensed_per_day%s' % file_suffix\n", - "\n", - "try:\n", - " fq_days = expanded_ct.groupby(['start_local_dt_day']).agg({'start_local_dt_day': ['sum', 'count']})\n", - " fq_days = fq_days.reset_index()\n", - " fq_days.columns = ['Day of the Month', 'Total', 'Number of Trips']\n", - "\n", - " data = fq_days\n", - " x = 'Day of the Month'\n", - " y = 'Number of Trips'\n", - " \n", - " plot_title= plot_title_no_quality+\"\\n\"+quality_text\n", - "\n", - " barplot_day(data,x,y,plot_title,file_name)\n", - " alt_text = store_alt_text_bar(pd.DataFrame(data['Number of Trips'].values, data['Day of the Month']), file_name, plot_title)\n", - "except:\n", - " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", - " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality) " - ] - }, - { - "cell_type": "markdown", - "id": "be9479ad", - "metadata": {}, - "source": [ - "### Number of trips by day of week¶" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9878ceaf", - "metadata": {}, - "outputs": [], - "source": [ - "plot_title_no_quality=\"Number of trips by weekday\\n(inferred by OpenPATH from phone sensors)\"\n", - "file_name ='ntrips_sensed_per_weekday%s' % file_suffix\n", - "try:\n", - " fq_weekdays = expanded_ct.groupby(['start_local_dt_weekday']).agg({'start_local_dt_weekday': ['sum', 'count']})\n", - " fq_weekdays = fq_weekdays.reset_index()\n", - " fq_weekdays.columns = ['Weekday', 'Total', 'Number of Trips']\n", - " weekday_labels = [\"Mon\", \"Tue\", \"Wed\", \"Thu\", \"Fri\", \"Sat\", \"Sun\"]\n", - " fq_weekdays[\"Weekday\"] = fq_weekdays.Weekday.apply(lambda x: weekday_labels[x])\n", - "\n", - " data = fq_weekdays\n", - " x = 'Weekday'\n", - " y = 'Number of Trips'\n", - "\n", - " plot_title= plot_title_no_quality+\"\\n\"+quality_text\n", - " \n", - " barplot_day(data,x,y,plot_title,file_name)\n", - " alt_text = store_alt_text_bar(pd.DataFrame(data['Number of Trips'].values, data['Weekday']), file_name, plot_title)\n", - "except:\n", - " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", - " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality) " - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/viz_scripts/generic_timeseries.ipynb b/viz_scripts/generic_timeseries.ipynb index e1e473a..9b0feec 100644 --- a/viz_scripts/generic_timeseries.ipynb +++ b/viz_scripts/generic_timeseries.ipynb @@ -86,7 +86,7 @@ }, "outputs": [], "source": [ - "expanded_ct, file_suffix, quality_text, debug_df = scaffolding.load_viz_notebook_data(year,\n", + "expanded_ct, _, file_suffix, quality_text, debug_df, _ = scaffolding.load_viz_notebook_data(year,\n", " month,\n", " program,\n", " study_type,\n", diff --git a/viz_scripts/mode_specific_metrics.ipynb b/viz_scripts/mode_specific_metrics.ipynb deleted file mode 100644 index 6f03dd3..0000000 --- a/viz_scripts/mode_specific_metrics.ipynb +++ /dev/null @@ -1,382 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "worldwide-portuguese", - "metadata": {}, - "source": [ - "## Generate Static Graphs" - ] - }, - { - "cell_type": "markdown", - "id": "alive-integration", - "metadata": {}, - "source": [ - "These are the input parameters for the notebook. They will be automatically changed when the scripts to generate monthly statistics are run. You can modify them manually to generate multiple plots locally as well.\n", - "\n", - "Pass in `None` to remove the filters and plot all data. This is not recommended for production settings, but might be useful for reports based on data snapshots." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "former-luther", - "metadata": {}, - "outputs": [], - "source": [ - "year = 2020\n", - "month = 11\n", - "program = \"default\"\n", - "study_type = \"program\"\n", - "mode_of_interest = \"e-bike\"\n", - "include_test_users = False\n", - "dynamic_labels = { }\n", - "use_imperial = True" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dated-compromise", - "metadata": {}, - "outputs": [], - "source": [ - "from collections import defaultdict\n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", - "\n", - "from plots import *\n", - "import scaffolding\n", - "\n", - "sns.set_style(\"whitegrid\")\n", - "sns.set()\n", - "%matplotlib inline" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "assisted-mathematics", - "metadata": {}, - "outputs": [], - "source": [ - "# Do not run this notebook at all unless it is for a program; nbclient will run up through this cell\n", - "if study_type != \"program\":\n", - " ipython = get_ipython()\n", - " ipython._showtraceback = scaffolding.no_traceback_handler\n", - " raise Exception(\"The plots in this notebook are only relevant to programs\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cathedral-scanning", - "metadata": {}, - "outputs": [], - "source": [ - "# Loading mapping dictionaries from mapping_dictionaries notebook\n", - "%store -r dic_re\n", - "%store -r dic_pur\n", - "\n", - "# convert a dictionary to a defaultdict\n", - "dic_re = defaultdict(lambda: 'Other',dic_re)\n", - "dic_pur = defaultdict(lambda: 'Other',dic_pur)\n", - "\n", - "# get metric vs imperial vars\n", - "label_units, short_label, label_units_lower, distance_col, weight_unit = scaffolding.get_units(use_imperial)" - ] - }, - { - "cell_type": "markdown", - "id": "built-occupation", - "metadata": {}, - "source": [ - "## Collect Data From Database" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "empty-intensity", - "metadata": {}, - "outputs": [], - "source": [ - "expanded_ct, file_suffix, quality_text, debug_df = scaffolding.load_viz_notebook_data(year,\n", - " month,\n", - " program,\n", - " study_type,\n", - " dynamic_labels,\n", - " dic_re,\n", - " dic_pur=dic_pur,\n", - " include_test_users=include_test_users)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "77eedae6", - "metadata": {}, - "outputs": [], - "source": [ - "if 'mode_confirm' in expanded_ct.columns:\n", - " mode_of_interest_df = expanded_ct.query(f\"mode_confirm == '{mode_of_interest}'\")\n", - " debug_df.loc[f\"{mode_of_interest}_trips\"] = len(mode_of_interest_df)\n", - " debug_df.loc[f\"{mode_of_interest}_trips_with_replaced_mode\"] = scaffolding.trip_label_count(\"Replaced_mode\", mode_of_interest_df)" - ] - }, - { - "cell_type": "markdown", - "id": "surgical-continuity", - "metadata": {}, - "source": [ - "## Metrics for Specific Mode" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "romance-green", - "metadata": {}, - "outputs": [], - "source": [ - "data_eb = expanded_ct.query(f\"mode_confirm == '{mode_of_interest}'\") if \"mode_confirm\" in expanded_ct.columns else expanded_ct" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "seeing-court", - "metadata": {}, - "outputs": [], - "source": [ - "quality_text = scaffolding.get_quality_text(expanded_ct, data_eb, mode_of_interest, include_test_users)" - ] - }, - { - "cell_type": "markdown", - "id": "loaded-expert", - "metadata": {}, - "source": [ - "### Trips by purpose for specified mode" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "respiratory-breach", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "plot_title_no_quality=f\"Number of trips for each purpose for {mode_of_interest} only\"\n", - "file_name= f'ntrips_{mode_of_interest}_purpose%s' % file_suffix\n", - "\n", - "try:\n", - " labels_tp = data_eb['Trip_purpose'].value_counts(dropna=True).keys().tolist()\n", - " values_tp = data_eb['Trip_purpose'].value_counts(dropna=True).tolist()\n", - " plot_title= plot_title_no_quality+\"\\n\"+quality_text\n", - " pie_chart_purpose(plot_title,labels_tp,values_tp,file_name)\n", - " alt_text = store_alt_text_pie(pd.DataFrame(values_tp, labels_tp), file_name, plot_title)\n", - "except:\n", - " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", - " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "thermal-midnight", - "metadata": {}, - "outputs": [], - "source": [ - "plot_title_no_quality=f\"Number of trips for each replaced transport mode for {mode_of_interest} only\"\n", - "file_name =f'ntrips_{mode_of_interest}_replaced_mode%s' % file_suffix\n", - "\n", - "try:\n", - " labels_eb = data_eb.Replaced_mode.value_counts(dropna=True).keys().tolist()\n", - " values_eb = data_eb.Replaced_mode.value_counts(dropna=True).tolist()\n", - " plot_title= plot_title_no_quality+\"\\n\"+quality_text\n", - " pie_chart_mode(plot_title,labels_eb,values_eb,file_name)\n", - " alt_text = store_alt_text_pie(pd.DataFrame(values_eb, labels_eb), file_name, plot_title)\n", - "except:\n", - " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", - " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)" - ] - }, - { - "cell_type": "markdown", - "id": "dependent-reservoir", - "metadata": {}, - "source": [ - "### Miles for each mode replaced by the specified mode" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "pointed-velvet", - "metadata": {}, - "outputs": [], - "source": [ - "plot_title_no_quality=f\"Distribution of \"+label_units+f\" Replaced by {mode_of_interest}\"\n", - "file_name =f'miles_{mode_of_interest}_replaced_mode%s' % file_suffix\n", - "\n", - "try:\n", - " dg=data_eb.groupby('Replaced_mode').agg({distance_col: ['sum', 'count' , 'mean']},)\n", - " dg.columns = ['Total ('+label_units_lower+')', 'Count' ,'Average ('+label_units_lower+')']\n", - " dg = dg.reset_index()\n", - " dg = dg.sort_values(by=['Total ('+label_units_lower+')'], ascending=False)\n", - "\n", - " dg_dict = dict(zip(dg['Replaced_mode'], dg['Total ('+label_units_lower+')']))\n", - " \n", - " labels_m = []\n", - " values_m = []\n", - "\n", - " for x, y in dg_dict.items():\n", - " labels_m.append(x)\n", - " values_m.append(y)\n", - "\n", - " plot_title= plot_title_no_quality+\"\\n\"+quality_text\n", - " pie_chart_mode(plot_title,labels_m,values_m,file_name)\n", - " alt_text = store_alt_text_pie(pd.DataFrame(values_m, labels_m), file_name, plot_title)\n", - " print(dg)\n", - "except:\n", - " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", - " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)" - ] - }, - { - "cell_type": "markdown", - "id": "honest-dylan", - "metadata": {}, - "source": [ - "### Average miles per trip for specified mode" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "binary-program", - "metadata": {}, - "outputs": [], - "source": [ - "plot_title_no_quality=\"Average \" + label_units + \" for each replaced mode with > 3 entries\\n'Other' represents trips with a non-standard or missing replacement\"\n", - "file_name ='average_miles_replaced_mode%s' % file_suffix\n", - "\n", - "try:\n", - " data = dg.drop((dg.query(\"Count < 3\").index)).sort_values(by=['Average ('+label_units_lower+')'], ascending=False) \n", - " \n", - " x='Replaced_mode'\n", - " y='Average ('+label_units_lower+')'\n", - " y2 = \"Count\"\n", - "\n", - " plot_title= plot_title_no_quality+\"\\n\"+quality_text\n", - " barplot_mode(data,x,y,plot_title, expanded_ct['Replaced_mode'].dropna().unique().tolist(), file_name)\n", - " alt_text = store_alt_text_bar(pd.DataFrame(data['Average ('+label_units_lower+')'].values, data.Replaced_mode), file_name, plot_title)\n", - "\n", - "except:\n", - " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", - " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)" - ] - }, - { - "cell_type": "markdown", - "id": "committed-favorite", - "metadata": {}, - "source": [ - "### Number of trips by day for specified mode" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "limiting-handling", - "metadata": {}, - "outputs": [], - "source": [ - "plot_title_no_quality=f\"Number of {mode_of_interest} trips by day\"\n", - "file_name =f'ntrips_{mode_of_interest}_per_day%s' % file_suffix\n", - "\n", - "\n", - "try:\n", - " fq_days = data_eb.groupby(['start_local_dt_day']).agg({'start_local_dt_day': ['sum', 'count']})\n", - " fq_days = fq_days.reset_index()\n", - " fq_days.columns = ['Day of the Month', 'Total', 'Number of Trips']\n", - "\n", - " data = fq_days\n", - " x = 'Day of the Month'\n", - " y = 'Number of Trips'\n", - "\n", - " plot_title= plot_title_no_quality+\"\\n\"+quality_text\n", - " barplot_day(data,x,y,plot_title,file_name)\n", - " alt_text = store_alt_text_bar(pd.DataFrame(data['Number of Trips'].values, data['Day of the Month'].values), file_name, plot_title)\n", - "except:\n", - " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", - " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)" - ] - }, - { - "cell_type": "markdown", - "id": "pediatric-cowboy", - "metadata": {}, - "source": [ - "### Number of trips by day of week¶" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "metropolitan-musical", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "plot_title_no_quality=f\"Number of {mode_of_interest} trips by weekday\"\n", - "file_name =f'ntrips_{mode_of_interest}_per_weekday%s' % file_suffix\n", - "\n", - "try:\n", - " fq_weekdays = data_eb.groupby(['start_local_dt_weekday']).agg({'start_local_dt_weekday': ['sum', 'count']})\n", - " fq_weekdays = fq_weekdays.reset_index()\n", - " fq_weekdays.columns = ['Weekday', 'Total', 'Number of Trips']\n", - " weekday_labels = [\"Mon\", \"Tue\", \"Wed\", \"Thu\", \"Fri\", \"Sat\", \"Sun\"]\n", - " fq_weekdays[\"Weekday\"] = fq_weekdays.Weekday.apply(lambda x: weekday_labels[x])\n", - "\n", - " data = fq_weekdays\n", - " x = 'Weekday'\n", - " y = 'Number of Trips'\n", - "\n", - " plot_title= plot_title_no_quality+\"\\n\"+quality_text\n", - " barplot_day(data,x,y,plot_title,file_name)\n", - " alt_text = store_alt_text_bar(pd.DataFrame(data['Number of Trips'].values, data['Weekday'].values), file_name, plot_title)\n", - "except:\n", - " generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n", - " alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.12" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/viz_scripts/mode_specific_timeseries.ipynb b/viz_scripts/mode_specific_timeseries.ipynb index d696794..abd8a3a 100644 --- a/viz_scripts/mode_specific_timeseries.ipynb +++ b/viz_scripts/mode_specific_timeseries.ipynb @@ -96,7 +96,7 @@ "metadata": {}, "outputs": [], "source": [ - "expanded_ct, file_suffix, quality_text, debug_df = scaffolding.load_viz_notebook_data(year,\n", + "expanded_ct, _, file_suffix, quality_text, debug_df, _ = scaffolding.load_viz_notebook_data(year,\n", " month,\n", " program,\n", " study_type,\n", diff --git a/viz_scripts/plots.py b/viz_scripts/plots.py index c27e94f..c9f1ab8 100644 --- a/viz_scripts/plots.py +++ b/viz_scripts/plots.py @@ -5,6 +5,7 @@ import matplotlib.pyplot as plt import seaborn as sns from matplotlib.patches import Patch +from itertools import product sns.set_style("whitegrid") sns.set() @@ -19,115 +20,87 @@ SAVE_DIR="/plots/" - -def merge_small_entries(labels, values): +def calculate_pct(labels, values): v2l_df = pd.DataFrame({"vals": values}, index=labels) # Calculate % for all the values vs = v2l_df.vals.sum() - v2l_df["pct"] = v2l_df.vals.apply(lambda x: (x/vs) * 100) - disp.display(v2l_df) - - # Find small chunks to combine - small_chunk = v2l_df.where(lambda x: x.pct <= 2).dropna() - misc_count = small_chunk.sum() - - v2l_df = v2l_df.drop(small_chunk.index) - disp.display(v2l_df) - - # This part if a bit tricky - # We could have already had a non-zero other, and it could be small or large - if "Other" not in v2l_df.index: - # zero other will end up with misc_count - v2l_df.loc["Other"] = misc_count - elif "Other" in small_chunk.index: - # non-zero small other will already be in misc_count - v2l_df.loc["Other"] = misc_count - else: - # non-zero large other, will not already be in misc_count - v2l_df.loc["Other"] = v2l_df.loc["Other"] + misc_count - disp.display(v2l_df) - - return (v2l_df.index.to_list(),v2l_df.vals.to_list()) - - -def format_pct(pct, values): - total = sum(values) - absolute = int(round(pct*total/100.0)) - return "{:.1f}%\n({:d})".format(pct, absolute) if pct > 4 else'' - - -def pie_chart_mode(plot_title,labels,values,file_name): - - colours = dict(zip(labels, plt.cm.tab20.colors[:len(labels)])) - fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(aspect="equal")) - - m_labels, m_values = merge_small_entries(labels, values) - - wedges, texts, autotexts = ax.pie(m_values, - labels = m_labels, - colors=[colours[key] for key in labels], - pctdistance=0.75, - autopct= lambda pct: format_pct(pct, values), - textprops={'size': 23}) - - ax.set_title(plot_title, size=25) - plt.text(-1.3,-1.3,f"Last updated {arrow.get()}", fontsize=10) - plt.setp(autotexts, **{'color':'white', 'weight':'bold', 'fontsize':20}) - plt.savefig(SAVE_DIR+file_name+".png", bbox_inches='tight') - plt.show() + v2l_df["pct"] = v2l_df.vals.apply(lambda x: round((x/vs) * 100, 1)) -def pie_chart_sensed_mode(plot_title,labels,values,file_name): - all_labels= ['IN_VEHICLE', - 'UNKNOWN', - 'WALKING', - 'AIR_OR_HSR', - 'BICYCLING', - 'OTHER'] + return (v2l_df.index.to_list(),v2l_df.vals.to_list(), v2l_df.pct.to_list()) - val2labeldf = pd.DataFrame({"labels": labels, "values": values}) +# Create dataframe with cols: 'Mode' 'Count' and 'Proportion' +def process_trip_data(labels, values, trip_type): + m_labels, m_values, m_pct = calculate_pct(labels, values) + data_trip = {'Mode': m_labels, 'Count': m_values, 'Proportion': m_pct} + df_total_trip = pd.DataFrame(data_trip) + df_total_trip['Trip Type'] = trip_type + return df_total_trip - colours = dict(zip(all_labels, plt.cm.tab10.colors[:len(all_labels)])) - fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(aspect="equal")) +# Input: List of all dataframes +# Ouput: A single dataframe such that Trip Type has all Mode +def merge_dataframes(all_data_frames): + # Concatenate DataFrames + df = pd.concat(all_data_frames, ignore_index=True) - m_labels, m_values = merge_small_entries(labels, values) + # Create DataFrame with unique combinations of 'Trip Type' and 'Mode' + unique_combinations = pd.DataFrame(list(product(df['Trip Type'].unique(), df['Mode'].unique())), columns=['Trip Type', 'Mode']) - wedges, texts, autotexts = ax.pie(m_values, - labels = m_labels, - colors=[colours[key] for key in labels], - pctdistance=0.75, - autopct= lambda pct: format_pct(pct, values), - textprops={'size': 23}) - - ax.set_title(plot_title, size=25) - plt.text(-1.3,-1.3,f"Last updated {arrow.get()}", fontsize=10) - plt.setp(autotexts, **{'color':'white', 'weight':'bold', 'fontsize':20}) - plt.savefig(SAVE_DIR+file_name+".png", bbox_inches='tight') - plt.show() + # Merge the original DataFrame with the unique combinations DataFrame + merged_df = pd.merge(unique_combinations, df, on=['Trip Type', 'Mode'], how='left').fillna(0) + return merged_df -def pie_chart_purpose(plot_title,labels,values,file_name): - - colours = dict(zip(labels, plt.cm.tab20.colors[:len(labels)])) - fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(aspect="equal")) +def stacked_bar_chart_generic(plot_title, df, file_name, num_bars): + sns.set(font_scale=1.5) + fig, ax = plt.subplots(1, 1, figsize=(15, 6)) - m_labels, m_values = merge_small_entries(labels, values) - - def func(pct, values): - total = sum(values) - absolute = int(round(pct*total/100.0)) - return "{:.1f}%\n({:d})".format(pct, absolute) if pct > 3 else'' - - wedges, texts, autotexts = ax.pie(m_values, - labels = m_labels, - colors=[colours[key] for key in labels], - pctdistance=0.85, - autopct=lambda pct: func(pct, values), - textprops={'size': 23}) - - ax.set_title(plot_title, size=25) - plt.text(-1.3,-1.3,f"Last updated {arrow.get()}", fontsize=10) - plt.setp(autotexts, **{'color':'white', 'weight':'bold', 'fontsize':20}) - plt.savefig(SAVE_DIR+file_name+".png", bbox_inches='tight') + if num_bars == 1: + width = 2 + ax.set_ylim(-0.4, 3) + else: + width = 0.8 + + running_total_long = [0] * num_bars + + mode_mapping = { + "IN_VEHICLE": "IN_VEHICLE (Sensed)", + "UNKNOWN": "UNKNOWN (Sensed)", + "OTHER": "OTHER (Sensed)", + "BICYCLING": "BICYCLING (Sensed)", + "WALKING": "WALKING (Sensed)", + "AIR_OR_HSR": "AIR_OR_HSR (Sensed)" + } + + colors = plt.cm.tab20.colors[:len(pd.unique(df['Mode']))] + + for idx, mode in enumerate(pd.unique(df.Mode)): + long = df[df['Mode'] == mode] + + if not long.empty: + labels = long['Trip Type'] + vals = long['Proportion'] + bar_labels = long['Count'] + + mode = mode_mapping.get(mode, mode) + vals_str = [f'{y:.1f} %\n({x:.0f})' if y>4 else '' for x, y in zip(bar_labels, vals)] + bar = ax.barh(labels, vals, width, left=running_total_long, label=mode, color = colors[idx]) + ax.bar_label(bar, label_type='center', labels=vals_str, rotation=90, fontsize=16) + running_total_long = [total + val for total, val in zip(running_total_long, vals)] + else: + print(f"{mode} is unavailable.") + + ax.set_title(plot_title, fontsize=25) + ax.set_xlabel('Proportion (Count)', fontsize=20) + ax.set_ylabel('Trip Types', fontsize=20) + ax.tick_params(axis='y', labelsize=18) + ax.tick_params(axis='x', labelsize=18, rotation=90) + # The Last updated text is placed just right below the X-axis + plt.text(0,ax.xaxis.get_label().get_position()[0] - 1,f"Last updated {arrow.get()}", fontsize=12) + # Fix for the error: RuntimeError("Unknown return type"), adding the below line to address as mentioned here https://github.com/matplotlib/matplotlib/issues/25625/ + ax.set_xlim(right=ax.get_xlim()[1]+1.0, auto=True) + ax.legend(bbox_to_anchor=(1, 1), loc='upper left', fancybox=True, shadow=True, fontsize = 15) + plt.subplots_adjust(bottom=0.25) + fig.savefig(SAVE_DIR+file_name+".png", bbox_inches='tight') plt.show() def energy_impact(x,y,color,plot_title,file_name): @@ -342,18 +315,25 @@ def store_alt_text_bar(df, chart_name, var_name): alt_text = access_alt_text(alt_text, chart_name) return alt_text -def store_alt_text_pie(df, chart_name, var_name): +def store_alt_text_stacked_bar_chart(df, chart_name, var_name): """ Inputs: - df = dataframe with index of item names, first column is counts - chart_name = what to label chart by in the dictionary - var_name = the variable being analyzed across pie slices + df = dataframe combining columns as Trip Type, Mode, Count, Proportion + chart_name = name of the chart + var_name = the variable being analyzed across bars """ - # Fill out the alt text based on components of the chart and passed data - alt_text = f"Pie chart of {var_name}." - for i in range(0,len(df)): - alt_text += f" {df.index[i]} is {np.round(df.iloc[i,0] / np.sum(df.iloc[:,0]) * 100, 1)}%." + # Generate alt text file + alt_text = f"Stacked Bar chart of {var_name}." + for i in range(len(df)): + alt_text += f"Trip Type: {df['Trip Type'].iloc[i]} - Mode: {df['Mode'].iloc[i]} - Count: {df['Count'].iloc[i]} - Proportion: {df['Proportion'].iloc[i]}%\n" alt_text = access_alt_text(alt_text, chart_name) - return alt_text + + # Generate html table + alt_html = "" + for i in range(len(df)): + alt_html += f"{df['Trip Type'].iloc[i]}{df['Mode'].iloc[i]}{df['Count'].iloc[i]}{df['Proportion'].iloc[i]}%" + alt_html = access_alt_html(alt_html, chart_name, var_name) + + return alt_text, alt_html def store_alt_text_timeseries(df, chart_name, var_name): """ Inputs: @@ -369,6 +349,38 @@ def store_alt_text_timeseries(df, chart_name, var_name): alt_text = access_alt_text(alt_text, chart_name) return alt_text +# Creating html table with col as Trip Type, Mode, Count, and Proportion +def access_alt_html(alt_text, chart_name, var_name): + """ Inputs: + alt_text = the text describing the chart + chart_name = the alt text file to save or update + var_name = the variable being analyzed across bars + """ + html_content = f""" + + + + {var_name} + + +

    {var_name}

    + + + + + + + + {alt_text} +
    Trip TypeModeCountProportion
    + + + """ + with open(SAVE_DIR + chart_name + ".html", 'w') as f: + f.write(html_content) + + return alt_text + def generate_missing_plot(plot_title,debug_df,file_name): f, ax = plt.subplots(figsize=(10,10)) diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py index 6f21989..15f15bb 100644 --- a/viz_scripts/scaffolding.py +++ b/viz_scripts/scaffolding.py @@ -76,6 +76,15 @@ def filter_labeled_trips(mixed_trip_df): disp.display(labeled_ct.head()) return labeled_ct +def filter_inferred_trips(mixed_trip_df): + # CASE 1 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 + if len(mixed_trip_df) == 0: + return mixed_trip_df + inferred_ct = mixed_trip_df[mixed_trip_df['inferred_labels'].apply(lambda x: bool(x))] + print("After filtering, found %s inferred trips" % len(inferred_ct)) + disp.display(inferred_ct.head()) + return inferred_ct + def expand_userinputs(labeled_ct): ''' param: labeled_ct: a dataframe of confirmed trips, some of which have labels @@ -103,45 +112,67 @@ def expand_userinputs(labeled_ct): disp.display(expanded_ct.head()) return expanded_ct +def expand_inferredlabels(inferred_ct): + if len(inferred_ct) == 0: + return inferred_ct + + # Function to find the labels with the highest 'p' + def find_max_p_labels(item): + max_p = 0 + max_labels = {} + for value in item: + if value['p'] > max_p: + max_p = value['p'] + max_labels = value['labels'] + return max_labels, max_p + + # Create two empty lists to store labels and p values + max_labels_list = [] + max_p_list = [] + + # Iterate over the Series and apply the function + for item in inferred_ct.inferred_labels: + labels, p = find_max_p_labels(item) + max_labels_list.append(labels) + max_p_list.append(p) + + print(f"\n Length of the list is {len(max_labels_list)} \n") + + inferred_only_labels = pd.DataFrame(max_labels_list, index=inferred_ct.index) + disp.display(inferred_only_labels) + inferred_only_p = pd.DataFrame(max_p_list, index=inferred_ct.index, columns=['p']) + disp.display(inferred_only_p) + expanded_inferred_ct = pd.concat([inferred_ct, inferred_only_labels, inferred_only_p], axis=1) + expanded_inferred_ct.reset_index(drop=True, inplace=True) + disp.display(expanded_inferred_ct.head()) + return expanded_inferred_ct + # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 unique_users = lambda df: len(df.user_id.unique()) if "user_id" in df.columns else 0 trip_label_count = lambda s, df: len(df[s].dropna()) if s in df.columns else 0 -def load_viz_notebook_data(year, month, program, study_type, dynamic_labels, dic_re, dic_pur=None, include_test_users=False): - """ Inputs: - year/month/program/study_type = parameters from the visualization notebook - dic_* = label mappings; if dic_pur is included it will be used to recode trip purpose - - Pipeline to load and process the data before use in visualization notebooks. - """ - # Access database - tq = get_time_query(year, month) - participant_ct_df = load_all_participant_trips(program, tq, include_test_users) - labeled_ct = filter_labeled_trips(participant_ct_df) - expanded_ct = expand_userinputs(labeled_ct) - expanded_ct = data_quality_check(expanded_ct) - +def process_notebook_data(df, study_type, dynamic_labels, dic_re, dic_pur): # Change meters to miles # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 - if "distance" in expanded_ct.columns: - unit_conversions(expanded_ct) - + if "distance" in df.columns: + unit_conversions(df) + # Map new mode labels with translations dictionary from dynamic_labels # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 - if "mode_confirm" in expanded_ct.columns: + if "mode_confirm" in df.columns: if (len(dynamic_labels)): dic_mode_mapping = mapping_labels(dynamic_labels, "MODE") - expanded_ct['Mode_confirm'] = expanded_ct['mode_confirm'].map(dic_mode_mapping) + df['Mode_confirm'] = df['mode_confirm'].map(dic_mode_mapping) else: - expanded_ct['Mode_confirm'] = expanded_ct['mode_confirm'].map(dic_re) + df['Mode_confirm'] = df['mode_confirm'].map(dic_re) if study_type == 'program': # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 - if 'replaced_mode' in expanded_ct.columns: + if 'replaced_mode' in df.columns: if (len(dynamic_labels)): dic_replaced_mapping = mapping_labels(dynamic_labels, "REPLACED_MODE") - expanded_ct['Replaced_mode'] = expanded_ct['replaced_mode'].map(dic_replaced_mapping) + df['Replaced_mode'] = df['replaced_mode'].map(dic_replaced_mapping) else: - expanded_ct['Replaced_mode'] = expanded_ct['replaced_mode'].map(dic_re) + df['Replaced_mode'] = df['replaced_mode'].map(dic_re) else: print("This is a program, but no replaced modes found. Likely cold start case. Ignoring replaced mode mapping") else: @@ -149,15 +180,34 @@ def load_viz_notebook_data(year, month, program, study_type, dynamic_labels, dic # Trip purpose mapping # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 - if dic_pur is not None and "purpose_confirm" in expanded_ct.columns: + if dic_pur is not None and "purpose_confirm" in df.columns: if (len(dynamic_labels)): dic_purpose_mapping = mapping_labels(dynamic_labels, "PURPOSE") - expanded_ct['Trip_purpose'] = expanded_ct['purpose_confirm'].map(dic_purpose_mapping) + df['Trip_purpose'] = df['purpose_confirm'].map(dic_purpose_mapping) else: - expanded_ct['Trip_purpose'] = expanded_ct['purpose_confirm'].map(dic_pur) + df['Trip_purpose'] = df['purpose_confirm'].map(dic_pur) + return df - # Document data quality +def load_viz_notebook_data(year, month, program, study_type, dynamic_labels, dic_re, dic_pur=None, include_test_users=False): + """ Inputs: + year/month/program/study_type = parameters from the visualization notebook + dic_* = label mappings; if dic_pur is included it will be used to recode trip purpose + + Pipeline to load and process the data before use in visualization notebooks. + """ + # Access database + tq = get_time_query(year, month) file_suffix = get_file_suffix(year, month, program) + participant_ct_df = load_all_participant_trips(program, tq, include_test_users) + + labeled_ct = filter_labeled_trips(participant_ct_df) + expanded_ct = expand_userinputs(labeled_ct) + expanded_ct = data_quality_check(expanded_ct) + expanded_ct = process_notebook_data(expanded_ct, study_type, dynamic_labels, dic_re, dic_pur) + inferred_ct = filter_inferred_trips(participant_ct_df) + expanded_it = expand_inferredlabels(inferred_ct) + expanded_it = process_notebook_data(expanded_it, study_type, dynamic_labels, dic_re, dic_pur) + values_dict = get_quality_data(participant_ct_df, expanded_it, expanded_ct) quality_text = get_quality_text(participant_ct_df, expanded_ct, None, include_test_users) debug_df = pd.DataFrame.from_dict({ @@ -166,13 +216,17 @@ def load_viz_notebook_data(year, month, program, study_type, dynamic_labels, dic "Registered_participants": len(get_participant_uuids(program, include_test_users)), "Participants_with_at_least_one_trip": unique_users(participant_ct_df), "Participant_with_at_least_one_labeled_trip": unique_users(labeled_ct), + "Participant_with_at_least_one_inferred_trip": unique_users(inferred_ct), "Trips_with_at_least_one_label": len(labeled_ct), + "Trips_with_at_least_one_inference": len(inferred_ct), "Trips_with_mode_confirm_label": trip_label_count("Mode_confirm", expanded_ct), - "Trips_with_trip_purpose_label": trip_label_count("Trip_purpose", expanded_ct) + "Trips_with_trip_purpose_label": trip_label_count("Trip_purpose", expanded_ct), + "Trips_with_mode_confirm_label_inferred": trip_label_count("Mode_confirm", expanded_it), + "Trips_with_trip_purpose_label_inferred": trip_label_count("Trip_purpose", expanded_it) }, orient='index', columns=["value"]) - return expanded_ct, file_suffix, quality_text, debug_df + return expanded_ct, expanded_it, file_suffix, quality_text, debug_df, values_dict # Function to map the "MODE", "REPLACED_MODE", "PURPOSE" to respective en-translations # Input: dynamic_labels, label_type: MODE, REPLACED_MODE, PURPOSE @@ -275,10 +329,50 @@ def get_quality_text(before_df, after_df, mode_of_interest=None, include_test_us print(quality_text) return quality_text -def get_quality_text_sensed(df, include_test_users=False): +def get_quality_data_u80(total_df, before_df, after_df): + after_pct = (len(after_df) * 100) / len(total_df) if len(total_df) != 0 else np.nan + main_dict = { + 'before_df':len(before_df), + 'unique_users_before': unique_users(before_df), + 'after_df':len(after_df), + 'unique_users_after': unique_users(after_df), + 'after_pct':after_pct } + return main_dict + +def get_quality_data_inferred(total_inferred_df, total_confirmed_df, inferred_df, confirmed_df): + after_pct_confirmed = (len(confirmed_df) * 100) / len(total_confirmed_df) if len(total_confirmed_df) != 0 else np.nan + after_pct_inferred = (len(inferred_df) * 100) / len(total_inferred_df) if len(total_inferred_df) != 0 else np.nan + mode_values_dict = \ + {'inferred_trip': len(total_inferred_df), + 'unique_users_inferred': unique_users(total_inferred_df), + 'mode_inferred_trip': len(inferred_df), + 'after_pct_inferred': after_pct_inferred, + 'unique_users_inferred_mode': unique_users(inferred_df), + 'confirmed_trip': len(total_confirmed_df), + 'unique_users_confirmed':unique_users(confirmed_df), + 'mode_confirmed_trip': len(confirmed_df), + 'after_pct_confirmed': after_pct_confirmed, + 'unique_users_confirmed_mode': unique_users(confirmed_df)} + return mode_values_dict + +def get_quality_data(total_df, inferred_df, confirmed_df): + after_pct_confirmed = (len(confirmed_df) * 100) / len(total_df) if len(total_df) != 0 else np.nan + after_pct_inferred = (len(inferred_df) * 100) / len(total_df) if len(total_df) != 0 else np.nan + values_dict = \ + {'total_trip' : len(total_df), + 'unique_users_total': unique_users(total_df), + 'inferred_trip': len(inferred_df), + 'pct_inferred': after_pct_inferred, + 'unique_users_inferred': unique_users(inferred_df), + 'confirmed_trip': len(confirmed_df), + 'pct_confirmed': after_pct_confirmed, + 'unique_users_confirmed': unique_users(confirmed_df)} + return values_dict + +def get_quality_text_sensed(df, cutoff_text="", include_test_users=False): cq = (len(df), unique_users(df)) user_str = 'testers and participants' if include_test_users else 'users' - quality_text = f"Based on %s trips from %d {user_str}" % cq + quality_text = f"Based on %s trips ({cutoff_text}) from %d {user_str}" % cq if cutoff_text else f"Based on %s trips from %d {user_str}" % cq print(quality_text) return quality_text