From ad759c6cf814140a59124398ed01cdc2922c016f Mon Sep 17 00:00:00 2001 From: Virginia Partridge Date: Tue, 30 Apr 2024 12:03:20 -0400 Subject: [PATCH 1/5] Inital download button, but still bugs --- app.py | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/app.py b/app.py index 11dc5a3..7627499 100644 --- a/app.py +++ b/app.py @@ -101,6 +101,13 @@ ] ) +DOWNLOAD_CLUSTER_CSV = dash.html.Div( + [ + dbc.Button("Download Clusters CSV", id="cluster_csv_button"), + dash.dcc.Download(id="download_cluster_csv"), + ] +) + # First section of page, define KMeans paramters, train model button and metrics values and explanation KMEANS_PARAM_SECTION = [ @@ -149,7 +156,10 @@ dash.dcc.Loading( id="loading-metrics", type="default", - children=[dash.html.Article(id="cluster-metrics")], + children=[ + dash.html.Article(id="cluster-metrics"), + DOWNLOAD_CLUSTER_CSV, + ], ), dash.html.Br(), ] @@ -267,7 +277,7 @@ dash.html.Br(), SUBREDDIT_FILTERING_SECTION, dash.html.Br(), - # Stores the dataframe with cluster assingments and the name of the cluster model (for exporting labels) + # Stores the dataframe with cluster assignments and the name of the cluster model (for exporting labels) dash.dcc.Store(id="cluster-assignment"), # Stores the list of subbreddits available in the c2v model, for user to select in drop down dash.dcc.Store(id="subreddits"), @@ -625,6 +635,22 @@ def get_display_table( export_format="csv", ) +@app.callback( + dash.Output("download_cluster_csv", "data"), + dash.Input("cluster_csv_button", "n_clicks"), + dash.Input("cluster-assignment", "data"), + prevent_initial_call=True +) +def download_cluster_csv(n_clicks, cluster_json): + if n_clicks is None: + raise dash.exceptions.PreventUpdate + + model_name = cluster_json["name"] + cluster_df = iv.unjsonify_stored_df(cluster_json["clusters"], [model_name]) + cluster_df[CLUSTER_ASSIGNMENT_DISPLAY_NAME] = cluster_df[model_name] + csv_name = f"{model_name}.csv" + logger.info("Downloading clustering data to %s", csv_name) + return dash.dcc.send_data_frame(cluster_df.to_csv, csv_name, index=False) if __name__ == "__main__": print("Starting IHOP subreddit visualization application") @@ -632,6 +658,6 @@ def get_display_table( try: # TODO Plotly handles logging strangely, so use logger.info or workaround to not silence logging, # see https://community.plotly.com/t/logging-debug-messages-suppressed-in-callbacks/17854 - app.run_server() + app.run_server(debug=True) except Exception as e: logger.error(e) From 97fd27f97a727ba7cf1d92701029aceb9eef137e Mon Sep 17 00:00:00 2001 From: Virginia Partridge Date: Tue, 30 Apr 2024 14:14:22 -0400 Subject: [PATCH 2/5] Button to download clusters is only triggered on click, not loads --- app.py | 24 ++++++++++++++---------- setup.cfg | 10 +++------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/app.py b/app.py index 7627499..105f4fe 100644 --- a/app.py +++ b/app.py @@ -103,7 +103,7 @@ DOWNLOAD_CLUSTER_CSV = dash.html.Div( [ - dbc.Button("Download Clusters CSV", id="cluster_csv_button"), + dbc.Button("Download Clusters CSV", id="cluster_csv_button", n_clicks=0), dash.dcc.Download(id="download_cluster_csv"), ] ) @@ -145,7 +145,7 @@ ] ), dash.html.Br(), - dbc.Button("Train clustering model", id="clustering_button"), + dbc.Button("Train clustering model", id="clustering_button",), ] ), dash.html.Br(), @@ -401,6 +401,7 @@ def load_vector_model(selected_month): dash.State("random-seed", "value"), dash.Input("month-dropdown", "value"), dash.Input("tsne-df", "data"), + running=[(dash.Output("clustering_button", "disabled"), True, False)] ) def train_clusters(n_clicks, n_clusters, random_seed, c2v_identifier, tsne_json_data): """Trains kmeans cluster with given number of clusters and random seed. @@ -642,15 +643,18 @@ def get_display_table( prevent_initial_call=True ) def download_cluster_csv(n_clicks, cluster_json): - if n_clicks is None: + trigger = dash.ctx.triggered_id + logger.info("Cluster download triggered by '%s'", trigger) + if trigger == "cluster_csv_button": + logger.info("Cluster download button clicked times: %s", n_clicks) + model_name = cluster_json["name"] + cluster_df = iv.unjsonify_stored_df(cluster_json["clusters"], [model_name]) + cluster_df[CLUSTER_ASSIGNMENT_DISPLAY_NAME] = cluster_df[model_name] + csv_name = f"{model_name}.csv" + logger.info("Downloading clustering data to %s", csv_name) + return dash.dcc.send_data_frame(cluster_df.to_csv, csv_name, index=False) + else: raise dash.exceptions.PreventUpdate - - model_name = cluster_json["name"] - cluster_df = iv.unjsonify_stored_df(cluster_json["clusters"], [model_name]) - cluster_df[CLUSTER_ASSIGNMENT_DISPLAY_NAME] = cluster_df[model_name] - csv_name = f"{model_name}.csv" - logger.info("Downloading clustering data to %s", csv_name) - return dash.dcc.send_data_frame(cluster_df.to_csv, csv_name, index=False) if __name__ == "__main__": print("Starting IHOP subreddit visualization application") diff --git a/setup.cfg b/setup.cfg index e8b4858..531c595 100644 --- a/setup.cfg +++ b/setup.cfg @@ -25,13 +25,9 @@ install_requires = [options.extras_require] app = - dash==2.3.1 - dash-bootstrap-components==1.1.0 - dash-core-components==2.0.0 - dash-daq==0.5.0 - dash-html-components==2.0.0 - dash-renderer==1.9.0 - dash-table==5.0.0 + dash>=2.4.0 + dash_bootstrap_components + dash_daq gunicorn matplotlib==3.5.0 plotly==5.6.0 From 17a1c1f0bac6f795053ca5aedf028e14ed52320c Mon Sep 17 00:00:00 2001 From: Virginia Partridge Date: Tue, 30 Apr 2024 17:05:46 -0400 Subject: [PATCH 3/5] Changelog notes updated --- CHANGELOG.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e489516..e708222 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [2.1.0] ### Changed - Added support for python 3.10 in build and tests -- Made dependency verions less restrictive, except when necessary to avoid deprecations (sklearn, numpy) +- Made dependency versions less restrictive, except when necessary to avoid deprecations (sklearn, numpy) - Unit tests updated to handle sklearn deprecations - Updated prototype cluster browser to display 2023 data @@ -20,11 +20,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Support for processing Reddit comments from manually downloaded archives - Data and models for Reddit comments in 2023 tracked in DVC - Instructions and support for running the prototype cluster browser with gunicorn +- Added button to download all subreddit cluster assignments in prototype cluster browser ### Removed - Removed Unity documentation - Removed argparse from app.py so that it can be served with gunicorn +## [2.1.0] +### Changed +- Update visualizations for WebScience 2024 paper + +### Added +- Added citation information in Readme +- Trigger Zenodo DOI assignment for repository + + ## [2.0.0] ### Changed - Removed prefilled anti-immigrant subreddits selected in subreddit clustering app dropdown. Now the dropdown is initially empty. From bacd4ffc41f08bed734556ff9e411344412e4917 Mon Sep 17 00:00:00 2001 From: Virginia Partridge Date: Tue, 30 Apr 2024 17:09:32 -0400 Subject: [PATCH 4/5] Added note about Dash version update in changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e708222..d0257a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Made dependency versions less restrictive, except when necessary to avoid deprecations (sklearn, numpy) - Unit tests updated to handle sklearn deprecations - Updated prototype cluster browser to display 2023 data +- Upgraded Dash dependency version to >=2.4.1 for the cluster prototype browser app ### Fixed - Upgraded DVC version from 2.10.0 to 3.33.1 to avoid https://github.com/iterative/dvc-objects/issues/241 From def85d3bca515c32dbf12fd4d4234efb26f891dd Mon Sep 17 00:00:00 2001 From: Virginia Partridge Date: Tue, 30 Apr 2024 17:13:55 -0400 Subject: [PATCH 5/5] App server not run in debug mode --- app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app.py b/app.py index 105f4fe..294c6a2 100644 --- a/app.py +++ b/app.py @@ -662,6 +662,6 @@ def download_cluster_csv(n_clicks, cluster_json): try: # TODO Plotly handles logging strangely, so use logger.info or workaround to not silence logging, # see https://community.plotly.com/t/logging-debug-messages-suppressed-in-callbacks/17854 - app.run_server(debug=True) + app.run_server() except Exception as e: logger.error(e)