-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
92 lines (78 loc) · 4.4 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import streamlit as st
import pandas as pd
import cardsort as cs
import matplotlib.pyplot as plt
st.set_option('deprecation.showPyplotGlobalUse', False)
st.set_page_config(page_title="Card Sorting Analysis")
st.header('Analyse your card sorting data', divider='rainbow')
st.write("This app provides a UI for the main functions of the [cardsort](https://cardsort.readthedocs.io/en/latest/) Python package:")
st.write('''
* Hierarchical Cluster Analysis
* Dendrogram visualization
* Extracting user-generated labels for clusters
''')
st.write("Card sorting is a user research method that can help you create user-friendly information architectures for websites. See [here](https://www.nngroup.com/articles/card-sorting-definition/) for an introduction to card sorting.")
st.header('Upload your dataset')
file = st.file_uploader(label = "**Accepted input:** .csv files in 'Casolysis Data' format. As generated, for example, by [kardSort](https://kardsort.com/).", help="Columns: 'card_id', 'card_label', 'category_id', 'category_label', 'user_id'")
st.download_button(
label="Get a test dataset",
data=pd.read_csv('test-data.csv').to_csv().encode('utf-8'),
file_name='test-data.csv',
mime='text/csv'
)
if file:
df = pd.read_csv(file)
st.write('Data preview (first 5 lines)')
st.write(df.head())
st.header('Visualize your data')
st.write("Visualize your data as a dendrogram using hierarchical cluster analysis. For an introduction to this method, see [here](https://www.nngroup.com/videos/ia-dendrogram/).")
st.subheader('Parameters')
linkage_option = st.radio(
"Which linkage method would you like to use to calculate distance?",
["average", "complete", "single"],
index = 0,
help = "For an introduction to the different linkage methods, [see here](https://medium.com/@iqra.bismi/different-linkage-methods-used-in-hierarchical-clustering-627bde3787e8)."
)
count_option = st.radio(
"Would you like to display distance labels as fraction or absolute value (= number of participants)?",
["absolute", "fraction"],
index = 0,
help="This choice impacts the labels on the x-axis of the dendrogram."
)
if count_option == "fraction":
min = 0.00
max = 1.01
value = 0.75
step = 0.05
else:
min = 0
max = df['user_id'].max()
value = round(2/3*max)
step = 1
threshold_option = st.number_input(
"Please set the color threshold (absolute value or fraction depending on your choice above). Default = 2/3 distance.",
min_value = min,
max_value = max,
value = value,
step = step,
help="The threshold is the distance limit to which you want to consider (and thus color) clusters. You can determine this limit yourself, based on which clusters make sense to you. The closer to the left the branches merge, the more people grouped the respective cards together."
)
if 'dendrogram_created' not in st.session_state:
st.session_state.dendrogram_created = False
def create_dendrogram():
st.session_state.dendrogram_created = True
st.button('Create dendrogram', on_click=create_dendrogram, type="primary")
if st.session_state.dendrogram_created:
dm = cs.get_distance_matrix(df)
fig = cs.create_dendrogram(df,dm, count=count_option, linkage=linkage_option, color_threshold=threshold_option)
st.pyplot(fig)
st.header('Analyze user-generated cluster labels')
card_selection = st.multiselect(
'Select one or more cards to see which category labels participants chose for them. If the output is empty, no participant grouped this exact selection of cards together.',
sorted(df['card_label'].unique())
)
if st.button("Extract labels", type="primary"):
list = card_selection
st.write(cs.get_cluster_labels(df,list))
st.write("**What do the columns mean?** *user_id:* ID of each participant who grouped your card selection together; *cluster_label:* The label this participant gave to the cluster that contains the cards you selected; *cards:* All cards that the user grouped together under this label.")
st.write("Feel free to contribute to improving this app by leaving feedback or contributing code on GitHub ([app](https://github.com/katoss/cardsort-analysis-ui) | [Python package](https://github.com/katoss/cardsort)) :sparkles:")