Skip to content

Commit

Permalink
Add button to export all clusters as CSV download (#25)
Browse files Browse the repository at this point in the history
* Inital download button, but still bugs

* Button to download clusters is only triggered on click, not loads

* Changelog notes updated

* Added note about Dash version update in changelog

* App server not run in debug mode
  • Loading branch information
ginic authored May 1, 2024
1 parent 4ba699b commit 8f761ed
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 11 deletions.
13 changes: 12 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [2.1.0]
### Changed
- Added support for python 3.10 in build and tests
- Made dependency verions less restrictive, except when necessary to avoid deprecations (sklearn, numpy)
- Made dependency versions less restrictive, except when necessary to avoid deprecations (sklearn, numpy)
- Unit tests updated to handle sklearn deprecations
- Updated prototype cluster browser to display 2023 data
- Upgraded Dash dependency version to >=2.4.1 for the cluster prototype browser app

### Fixed
- Upgraded DVC version from 2.10.0 to 3.33.1 to avoid https://github.com/iterative/dvc-objects/issues/241
Expand All @@ -20,11 +21,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Support for processing Reddit comments from manually downloaded archives
- Data and models for Reddit comments in 2023 tracked in DVC
- Instructions and support for running the prototype cluster browser with gunicorn
- Added button to download all subreddit cluster assignments in prototype cluster browser

### Removed
- Removed Unity documentation
- Removed argparse from app.py so that it can be served with gunicorn

## [2.1.0]
### Changed
- Update visualizations for WebScience 2024 paper

### Added
- Added citation information in Readme
- Trigger Zenodo DOI assignment for repository


## [2.0.0]
### Changed
- Removed prefilled anti-immigrant subreddits selected in subreddit clustering app dropdown. Now the dropdown is initially empty.
Expand Down
36 changes: 33 additions & 3 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,13 @@
]
)

DOWNLOAD_CLUSTER_CSV = dash.html.Div(
[
dbc.Button("Download Clusters CSV", id="cluster_csv_button", n_clicks=0),
dash.dcc.Download(id="download_cluster_csv"),
]
)


# First section of page, define KMeans paramters, train model button and metrics values and explanation
KMEANS_PARAM_SECTION = [
Expand Down Expand Up @@ -138,7 +145,7 @@
]
),
dash.html.Br(),
dbc.Button("Train clustering model", id="clustering_button"),
dbc.Button("Train clustering model", id="clustering_button",),
]
),
dash.html.Br(),
Expand All @@ -149,7 +156,10 @@
dash.dcc.Loading(
id="loading-metrics",
type="default",
children=[dash.html.Article(id="cluster-metrics")],
children=[
dash.html.Article(id="cluster-metrics"),
DOWNLOAD_CLUSTER_CSV,
],
),
dash.html.Br(),
]
Expand Down Expand Up @@ -267,7 +277,7 @@
dash.html.Br(),
SUBREDDIT_FILTERING_SECTION,
dash.html.Br(),
# Stores the dataframe with cluster assingments and the name of the cluster model (for exporting labels)
# Stores the dataframe with cluster assignments and the name of the cluster model (for exporting labels)
dash.dcc.Store(id="cluster-assignment"),
# Stores the list of subbreddits available in the c2v model, for user to select in drop down
dash.dcc.Store(id="subreddits"),
Expand Down Expand Up @@ -391,6 +401,7 @@ def load_vector_model(selected_month):
dash.State("random-seed", "value"),
dash.Input("month-dropdown", "value"),
dash.Input("tsne-df", "data"),
running=[(dash.Output("clustering_button", "disabled"), True, False)]
)
def train_clusters(n_clicks, n_clusters, random_seed, c2v_identifier, tsne_json_data):
"""Trains kmeans cluster with given number of clusters and random seed.
Expand Down Expand Up @@ -625,6 +636,25 @@ def get_display_table(
export_format="csv",
)

@app.callback(
dash.Output("download_cluster_csv", "data"),
dash.Input("cluster_csv_button", "n_clicks"),
dash.Input("cluster-assignment", "data"),
prevent_initial_call=True
)
def download_cluster_csv(n_clicks, cluster_json):
trigger = dash.ctx.triggered_id
logger.info("Cluster download triggered by '%s'", trigger)
if trigger == "cluster_csv_button":
logger.info("Cluster download button clicked times: %s", n_clicks)
model_name = cluster_json["name"]
cluster_df = iv.unjsonify_stored_df(cluster_json["clusters"], [model_name])
cluster_df[CLUSTER_ASSIGNMENT_DISPLAY_NAME] = cluster_df[model_name]
csv_name = f"{model_name}.csv"
logger.info("Downloading clustering data to %s", csv_name)
return dash.dcc.send_data_frame(cluster_df.to_csv, csv_name, index=False)
else:
raise dash.exceptions.PreventUpdate

if __name__ == "__main__":
print("Starting IHOP subreddit visualization application")
Expand Down
10 changes: 3 additions & 7 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,9 @@ install_requires =

[options.extras_require]
app =
dash==2.3.1
dash-bootstrap-components==1.1.0
dash-core-components==2.0.0
dash-daq==0.5.0
dash-html-components==2.0.0
dash-renderer==1.9.0
dash-table==5.0.0
dash>=2.4.0
dash_bootstrap_components
dash_daq
gunicorn
matplotlib==3.5.0
plotly==5.6.0
Expand Down

0 comments on commit 8f761ed

Please sign in to comment.