Skip to content

Commit

Permalink
Add performance stats to the admin dashboard (e-mission#141)
Browse files Browse the repository at this point in the history
Use the new `store_dashboard_time` function from https://github.com/e-mission/e-mission-server to instrument sections of the admin dashboard codebase, both in summary and in detail. We will use this to document the effect of our scalability improvements.

TODO/Future fix: Revisit the method in which we instrument to be less repetitive by using a decorator or enhancing the timer. e-mission#141 (comment)
  • Loading branch information
TeachMeTW authored Oct 20, 2024
1 parent e8d4d61 commit 7769769
Show file tree
Hide file tree
Showing 3 changed files with 1,176 additions and 405 deletions.
315 changes: 249 additions & 66 deletions pages/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@
from dash import dcc, html, Input, Output, callback, register_page, dash_table, State, callback_context, Patch
# Etc
import logging
import time
import pandas as pd
from dash.exceptions import PreventUpdate
from concurrent.futures import ThreadPoolExecutor, as_completed
from utils import constants
from utils import permissions as perm_utils
from utils import db_utils
from utils.db_utils import df_to_filtered_records, query_trajectories
from utils.datetime_utils import iso_to_date_only
import emission.core.timer as ect
import emission.storage.decorations.stats_queries as esdsq

register_page(__name__, path="/data")

intro = """## Data"""
Expand Down Expand Up @@ -54,33 +54,125 @@
)



def clean_location_data(df):
if 'data.start_loc.coordinates' in df.columns:
df['data.start_loc.coordinates'] = df['data.start_loc.coordinates'].apply(lambda x: f'({x[0]}, {x[1]})')
if 'data.end_loc.coordinates' in df.columns:
df['data.end_loc.coordinates'] = df['data.end_loc.coordinates'].apply(lambda x: f'({x[0]}, {x[1]})')
"""
Cleans the 'data.start_loc.coordinates' and 'data.end_loc.coordinates' columns
by formatting them as strings in the format '(latitude, longitude)'.
:param df (pd.DataFrame): The DataFrame containing location data.
:return: The cleaned DataFrame with formatted coordinate strings.
"""
with ect.Timer(verbose=False) as total_timer:
# Stage 1: Clean 'data.start_loc.coordinates'
with ect.Timer(verbose=False) as stage1_timer:
if 'data.start_loc.coordinates' in df.columns:
df['data.start_loc.coordinates'] = df['data.start_loc.coordinates'].apply(
lambda x: f'({x[0]}, {x[1]})' if isinstance(x, (list, tuple)) and len(x) >= 2 else x
)
esdsq.store_dashboard_time(
"admin/data/clean_location_data/clean_start_loc_coordinates",
stage1_timer # Pass the Timer object
)

# Stage 2: Clean 'data.end_loc.coordinates'
with ect.Timer(verbose=False) as stage2_timer:
if 'data.end_loc.coordinates' in df.columns:
df['data.end_loc.coordinates'] = df['data.end_loc.coordinates'].apply(
lambda x: f'({x[0]}, {x[1]})' if isinstance(x, (list, tuple)) and len(x) >= 2 else x
)
esdsq.store_dashboard_time(
"admin/data/clean_location_data/clean_end_loc_coordinates",
stage2_timer # Pass the Timer object
)

esdsq.store_dashboard_time(
"admin/data/clean_location_data/total_time",
total_timer # Pass the Timer object
)

return df

def update_store_trajectories(start_date: str, end_date: str, tz: str, excluded_uuids, key_list):
df = query_trajectories(start_date=start_date, end_date=end_date, tz=tz, key_list=key_list)

records = df_to_filtered_records(df, 'user_id', excluded_uuids["data"])
store = {
"data": records,
"length": len(records),
}

def update_store_trajectories(start_date: str, end_date: str, tz: str, excluded_uuids, key_list):
"""
Queries trajectories within a specified date range and timezone, filters out excluded UUIDs,
and returns a store dictionary containing the filtered records and their count.
:param start_date (str): Start date in ISO format.
:param end_date (str): End date in ISO format.
:param tz (str): Timezone string.
:param excluded_uuids (dict): Dictionary containing UUIDs to exclude.
:param key_list (list[str]): List of keys to filter trajectories.
:return: Dictionary with 'data' as filtered records and 'length' as the count of records.
"""
with ect.Timer(verbose=False) as total_timer:
# Stage 1: Query Trajectories
with ect.Timer(verbose=False) as stage1_timer:
df = query_trajectories(start_date=start_date, end_date=end_date, tz=tz, key_list=key_list)
esdsq.store_dashboard_time(
"admin/data/update_store_trajectories/query_trajectories",
stage1_timer # Pass the Timer object
)

# Stage 2: Filter Records
with ect.Timer(verbose=False) as stage2_timer:
records = df_to_filtered_records(df, 'user_id', excluded_uuids["data"])
esdsq.store_dashboard_time(
"admin/data/update_store_trajectories/filter_records",
stage2_timer # Pass the Timer object
)

# Stage 3: Create Store Dictionary
with ect.Timer(verbose=False) as stage3_timer:
store = {
"data": records,
"length": len(records),
}
esdsq.store_dashboard_time(
"admin/data/update_store_trajectories/create_store_dict",
stage3_timer # Pass the Timer object
)

esdsq.store_dashboard_time(
"admin/data/update_store_trajectories/total_time",
total_timer # Pass the Timer object
)

return store



@callback(
Output('keylist-switch-container', 'style'),
Input('tabs-datatable', 'value'),
)
def show_keylist_switch(tab):
if tab == 'tab-trajectories-datatable':
return {'display': 'block'}
return {'display': 'none'} # Hide the keylist-switch on all other tabs
"""
Toggles the visibility of the keylist switch container based on the selected tab.
:param tab (str): The currently selected tab.
:return: Dictionary with CSS style to show or hide the container.
"""
with ect.Timer(verbose=False) as total_timer:
# Stage 1: Determine Display Style
with ect.Timer(verbose=False) as stage1_timer:
if tab == 'tab-trajectories-datatable':
style = {'display': 'block'}
else:
style = {'display': 'none'} # Hide the keylist-switch on all other tabs
esdsq.store_dashboard_time(
"admin/data/show_keylist_switch/determine_display_style",
stage1_timer # Pass the Timer object
)

esdsq.store_dashboard_time(
"admin/data/show_keylist_switch/total_time",
total_timer # Pass the Timer object
)

return style



@callback(
Expand Down Expand Up @@ -236,67 +328,158 @@ def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_de
return None, loaded_uuids_store, True



# handle subtabs for demographic table when there are multiple surveys
@callback(
Output('subtabs-demographics-content', 'children'),
Input('subtabs-demographics', 'value'),
Input('store-demographics', 'data'),
)

def update_sub_tab(tab, store_demographics):
data = store_demographics["data"]
if tab in data:
data = data[tab]
if data:
columns = list(data[0].keys())

df = pd.DataFrame(data)
if df.empty:
return None

df = df.drop(columns=[col for col in df.columns if col not in columns])
"""
Updates the content of the demographics subtabs based on the selected tab and stored demographics data.
:param tab (str): The currently selected subtab.
:param store_demographics (dict): Dictionary containing demographics data.
:return: Dash components to populate the subtabs content.
"""
with ect.Timer(verbose=False) as total_timer:
# Stage 1: Extract Data for Selected Tab
with ect.Timer(verbose=False) as stage1_timer:
data = store_demographics.get("data", {})
if tab in data:
data = data[tab]
if data:
columns = list(data[0].keys())
else:
data = {}
columns = []
esdsq.store_dashboard_time(
"admin/data/update_sub_tab/extract_data_for_selected_tab",
stage1_timer # Pass the Timer object
)

# Stage 2: Create DataFrame
with ect.Timer(verbose=False) as stage2_timer:
df = pd.DataFrame(data)
esdsq.store_dashboard_time(
"admin/data/update_sub_tab/create_dataframe",
stage2_timer # Pass the Timer object
)

# Stage 3: Check if DataFrame is Empty
with ect.Timer(verbose=False) as stage3_timer:
if df.empty:
return None
esdsq.store_dashboard_time(
"admin/data/update_sub_tab/check_if_dataframe_empty",
stage3_timer # Pass the Timer object
)

# Stage 4: Drop Unnecessary Columns
with ect.Timer(verbose=False) as stage4_timer:
df = df.drop(columns=[col for col in df.columns if col not in columns])
esdsq.store_dashboard_time(
"admin/data/update_sub_tab/drop_unnecessary_columns",
stage4_timer # Pass the Timer object
)

# Stage 5: Populate DataTable
with ect.Timer(verbose=False) as stage5_timer:
table = populate_datatable(df)
esdsq.store_dashboard_time(
"admin/data/update_sub_tab/populate_datatable",
stage5_timer # Pass the Timer object
)

esdsq.store_dashboard_time(
"admin/data/update_sub_tab/total_time",
total_timer # Pass the Timer object
)

return populate_datatable(df)
return table


@callback(
Output('trips-table', 'hidden_columns'), # Output hidden columns in the trips-table
Output('button-clicked', 'children'), #updates button label
Input('button-clicked', 'n_clicks'), #number of clicks on the button
State('button-clicked', 'children') #State representing the current label of button
Output('trips-table', 'hidden_columns'), # Output hidden columns in the trips-table
Output('button-clicked', 'children'), # Updates button label
Input('button-clicked', 'n_clicks'), # Number of clicks on the button
State('button-clicked', 'children') # Current label of button
)
#Controls visibility of columns in trips table and updates the label of button based on the number of clicks.
def update_dropdowns_trips(n_clicks, button_label):
if n_clicks % 2 == 0:
hidden_col = ["data.duration_seconds", "data.distance_meters","data.distance"]
button_label = 'Display columns with raw units'
else:
hidden_col = ["data.duration", "data.distance_miles", "data.distance_km", "data.distance"]
button_label = 'Display columns with humanzied units'
#return the list of hidden columns and the updated button label
"""
Controls the visibility of columns in the trips table and updates the button label based on the number of clicks.
:param n_clicks (int): Number of times the button has been clicked.
:param button_label (str): Current label of the button.
:return: Tuple containing the list of hidden columns and the updated button label.
"""
with ect.Timer(verbose=False) as total_timer:
# Stage 1: Determine Hidden Columns and Button Label
with ect.Timer(verbose=False) as stage1_timer:
if n_clicks is None:
n_clicks = 0 # Handle initial state when button hasn't been clicked
if n_clicks % 2 == 0:
hidden_col = ["data.duration_seconds", "data.distance_meters", "data.distance"]
button_label = 'Display columns with raw units'
else:
hidden_col = ["data.duration", "data.distance_miles", "data.distance_km", "data.distance"]
button_label = 'Display columns with humanized units'
esdsq.store_dashboard_time(
"admin/data/update_dropdowns_trips/determine_hidden_columns_and_button_label",
stage1_timer # Pass the Timer object
)

esdsq.store_dashboard_time(
"admin/data/update_dropdowns_trips/total_time",
total_timer # Pass the Timer object
)

return hidden_col, button_label


def populate_datatable(df, table_id=''):
if not isinstance(df, pd.DataFrame):
raise PreventUpdate
return dash_table.DataTable(
id= table_id,
# columns=[{"name": i, "id": i} for i in df.columns],
data=df.to_dict('records'),
export_format="csv",
filter_options={"case": "sensitive"},
# filter_action="native",
sort_action="native", # give user capability to sort columns
sort_mode="single", # sort across 'multi' or 'single' columns
page_current=0, # page number that user is on
page_size=50, # number of rows visible per page
style_cell={
'textAlign': 'left',
# 'minWidth': '100px',
# 'width': '100px',
# 'maxWidth': '100px',
},
style_table={'overflowX': 'auto'},
css=[{"selector":".show-hide", "rule":"display:none"}]
"""
Populates a Dash DataTable with the provided DataFrame.
:param df (pd.DataFrame): The DataFrame to display in the table.
:param table_id (str, optional): The ID to assign to the DataTable.
:return: Dash DataTable component.
"""
with ect.Timer(verbose=False) as total_timer:
# Stage 1: Validate DataFrame
with ect.Timer(verbose=False) as stage1_timer:
if not isinstance(df, pd.DataFrame):
raise PreventUpdate
esdsq.store_dashboard_time(
"admin/data/populate_datatable/validate_dataframe",
stage1_timer # Pass the Timer object
)

# Stage 2: Create DataTable
with ect.Timer(verbose=False) as stage2_timer:
table = dash_table.DataTable(
id=table_id,
data=df.to_dict('records'),
columns=[{"name": i, "id": i} for i in df.columns],
export_format="csv",
filter_options={"case": "sensitive"},
sort_action="native", # Give user capability to sort columns
sort_mode="single", # Sort across 'multi' or 'single' columns
page_current=0, # Page number that user is on
page_size=50, # Number of rows visible per page
style_cell={
'textAlign': 'left',
},
style_table={'overflowX': 'auto'},
css=[{"selector": ".show-hide", "rule": "display:none"}]
)
esdsq.store_dashboard_time(
"admin/data/populate_datatable/create_datatable",
stage2_timer # Pass the Timer object
)

esdsq.store_dashboard_time(
"admin/data/populate_datatable/total_time",
total_timer # Pass the Timer object
)

return table
Loading

0 comments on commit 7769769

Please sign in to comment.