Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add button to export all clusters as CSV download #25

Merged
merged 5 commits into from
May 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [2.1.0]
### Changed
- Added support for python 3.10 in build and tests
- Made dependency verions less restrictive, except when necessary to avoid deprecations (sklearn, numpy)
- Made dependency versions less restrictive, except when necessary to avoid deprecations (sklearn, numpy)
- Unit tests updated to handle sklearn deprecations
- Updated prototype cluster browser to display 2023 data
- Upgraded Dash dependency version to >=2.4.1 for the cluster prototype browser app

### Fixed
- Upgraded DVC version from 2.10.0 to 3.33.1 to avoid https://github.com/iterative/dvc-objects/issues/241
Expand All @@ -20,11 +21,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Support for processing Reddit comments from manually downloaded archives
- Data and models for Reddit comments in 2023 tracked in DVC
- Instructions and support for running the prototype cluster browser with gunicorn
- Added button to download all subreddit cluster assignments in prototype cluster browser

### Removed
- Removed Unity documentation
- Removed argparse from app.py so that it can be served with gunicorn

## [2.1.0]
### Changed
- Update visualizations for WebScience 2024 paper

### Added
- Added citation information in Readme
- Trigger Zenodo DOI assignment for repository


## [2.0.0]
### Changed
- Removed prefilled anti-immigrant subreddits selected in subreddit clustering app dropdown. Now the dropdown is initially empty.
Expand Down
36 changes: 33 additions & 3 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,13 @@
]
)

DOWNLOAD_CLUSTER_CSV = dash.html.Div(
[
dbc.Button("Download Clusters CSV", id="cluster_csv_button", n_clicks=0),
dash.dcc.Download(id="download_cluster_csv"),
]
)


# First section of page, define KMeans paramters, train model button and metrics values and explanation
KMEANS_PARAM_SECTION = [
Expand Down Expand Up @@ -138,7 +145,7 @@
]
),
dash.html.Br(),
dbc.Button("Train clustering model", id="clustering_button"),
dbc.Button("Train clustering model", id="clustering_button",),
]
),
dash.html.Br(),
Expand All @@ -149,7 +156,10 @@
dash.dcc.Loading(
id="loading-metrics",
type="default",
children=[dash.html.Article(id="cluster-metrics")],
children=[
dash.html.Article(id="cluster-metrics"),
DOWNLOAD_CLUSTER_CSV,
],
),
dash.html.Br(),
]
Expand Down Expand Up @@ -267,7 +277,7 @@
dash.html.Br(),
SUBREDDIT_FILTERING_SECTION,
dash.html.Br(),
# Stores the dataframe with cluster assingments and the name of the cluster model (for exporting labels)
# Stores the dataframe with cluster assignments and the name of the cluster model (for exporting labels)
dash.dcc.Store(id="cluster-assignment"),
# Stores the list of subbreddits available in the c2v model, for user to select in drop down
dash.dcc.Store(id="subreddits"),
Expand Down Expand Up @@ -391,6 +401,7 @@ def load_vector_model(selected_month):
dash.State("random-seed", "value"),
dash.Input("month-dropdown", "value"),
dash.Input("tsne-df", "data"),
running=[(dash.Output("clustering_button", "disabled"), True, False)]
)
def train_clusters(n_clicks, n_clusters, random_seed, c2v_identifier, tsne_json_data):
"""Trains kmeans cluster with given number of clusters and random seed.
Expand Down Expand Up @@ -625,6 +636,25 @@ def get_display_table(
export_format="csv",
)

@app.callback(
dash.Output("download_cluster_csv", "data"),
dash.Input("cluster_csv_button", "n_clicks"),
dash.Input("cluster-assignment", "data"),
prevent_initial_call=True
)
def download_cluster_csv(n_clicks, cluster_json):
trigger = dash.ctx.triggered_id
logger.info("Cluster download triggered by '%s'", trigger)
if trigger == "cluster_csv_button":
logger.info("Cluster download button clicked times: %s", n_clicks)
model_name = cluster_json["name"]
cluster_df = iv.unjsonify_stored_df(cluster_json["clusters"], [model_name])
cluster_df[CLUSTER_ASSIGNMENT_DISPLAY_NAME] = cluster_df[model_name]
csv_name = f"{model_name}.csv"
logger.info("Downloading clustering data to %s", csv_name)
return dash.dcc.send_data_frame(cluster_df.to_csv, csv_name, index=False)
else:
raise dash.exceptions.PreventUpdate

if __name__ == "__main__":
print("Starting IHOP subreddit visualization application")
Expand Down
10 changes: 3 additions & 7 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,9 @@ install_requires =

[options.extras_require]
app =
dash==2.3.1
dash-bootstrap-components==1.1.0
dash-core-components==2.0.0
dash-daq==0.5.0
dash-html-components==2.0.0
dash-renderer==1.9.0
dash-table==5.0.0
dash>=2.4.0
dash_bootstrap_components
dash_daq
gunicorn
matplotlib==3.5.0
plotly==5.6.0
Expand Down
Loading