Skip to content

Commit

Permalink
added more instructions to dashboard
Browse files Browse the repository at this point in the history
  • Loading branch information
rmj3197 committed Feb 8, 2025
1 parent 577f8f9 commit 4a66244
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 35 deletions.
58 changes: 54 additions & 4 deletions QuadratiK/ui/pages/4_Tuning_Parameter_h_Selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,74 @@

st.title("Tuning Parameter h selection")
st.write(
"Computes the kernel bandwidth of the Gaussian kernel for the Two-sample\
"Computes the kernel bandwidth of the Gaussian kernel for the One-Sample, Two-sample\
and K-sample kernel-based quadratic distance (KBQD) tests."
)

with st.expander("Click to view example code in Python and R"):
code_python = """
import numpy as np
np.random.seed(0)
from scipy.stats import skewnorm
from QuadratiK.kernel_test import select_h
h_selected, all_values = select_h(x = x, y = y,alternative = 'skewness')
X_2 = np.random.multivariate_normal(mean=np.zeros(4), cov=np.eye(4), size=200)
Y_2 = skewnorm.rvs(
size=(200, 4),
loc=np.zeros(4),
scale=np.ones(4),
a=np.repeat(0.5, 4),
random_state=20,
)
# Perform the algorithm for selecting h
h_selected, all_powers, plot = select_h(
x=X_2, y=Y_2, alternative="location", power_plot=True
)
print(f"Selected h is: {h_selected}")
"""
st.code(code_python, language="python")

code_R = """
library(QuadratiK)
h_k <- select_h(dat_x=dat_k, dat_y=y, alternative="skewness")
h_k$h_sel
# Select the value of h using the mid-power algorithm
# Create two random normal matrices with 100 elements each
x <- matrix(rnorm(100), ncol = 2)
y <- matrix(rnorm(100), ncol = 2)
# Perform h selection for location alternative
h_sel <- select_h(x, y, alternative = "location")
"""
st.code(code_R, language="r")

st.subheader("Input Instructions", divider="grey")
st.write("1. Upload the data file in .txt or .csv format.")
st.write(
"2. The file may contain a header (see image below for reference). If headers are present, check the box. The checkbox is selected by default."
)
st.write("3. Specify the separator or delimiter used; the default is a comma (,).")

st.write(
"""Once the data is uploaded, specify the column in the data file that contains the labels. Additionally,
- For One-Sample test: All rows should have the same label
- For Two-Sample test: Use two distinct labels to identify the groups
- For K-Sample test: Use K distinct labels to identify the K groups"""
)
st.write(
"5. Furthermore please specify the values umber of iterations to be used for critical value estimation, proportion of subsampling samples to be used, and the alternative for computing the value of h. Default values are provided."
)

st.image(
str(
importlib.resources.files("QuadratiK.ui").joinpath(
"pages/assets/hselect_format.png"
)
),
caption="Sample data format for tuning parameter selection.",
use_container_width=True,
)

delim = st.text_input("**Enter the delimiter**", ",")
header_exist = st.checkbox(
"**Select, if the header is present in the data file.**", value=True
Expand Down
70 changes: 40 additions & 30 deletions QuadratiK/ui/pages/7_Clustering_on_Sphere.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,21 @@

with st.expander("Click to view code"):
code_python = """
# In case you do not have the true labels, do not read y.
X,y = Read the data and the cluster label files.
import warnings
from QuadratiK.datasets import load_wireless_data
from QuadratiK.spherical_clustering import PKBC
cluster_fit = PKBC(num_clust = Input the number of clusters).fit(X)
warnings.filterwarnings("ignore")
X, y = load_wireless_data(return_X_y=True)
# number of clusters tried are from 2 to 10
pkbc = PKBC(num_clust=range(2, 11), random_state=42).fit(X)
validation_metrics, elbow_plots = pkbc.validation(y_true=y)
print(validation_metrics.round(2))
print(pkbc.summary())
"""
st.code(code_python, language="python")

Expand All @@ -49,6 +59,30 @@
"""
st.code(code_R, language="r")

st.subheader("Input Instructions", divider="grey")

st.write("1. Upload the data file in .txt or .csv format.")
st.write(
"2. The file may contain a header (see image below for reference). If headers are present, check the box. The checkbox is selected by default."
)
st.write("3. Specify the separator or delimiter used; the default is a comma (,).")
st.write(
"4. If true labels for the data points are available, check the box and specify which column contains the labels."
)
st.write("5. Enter the desired number of clusters for the clustering (default is 2).")
st.write(
"6. For the K-Sample test on identified clusters, specify the number of iterations for critical value estimation, tuning parameter h, and proportion of subsampling."
)
st.write(
"7. For visualization, you can generate elbow plots to help determine optimal number of clusters, as well as plot the identified clusters on a circle/sphere."
)

st.image(
str(importlib.resources.files("QuadratiK.ui").joinpath("pages/assets/pkbd.png")),
caption="Sample data format for normality test",
use_container_width=True,
)

head = st.checkbox("**Select, if the header is present in the data file.**", value=True)
delim = st.text_input("**Enter the delimiter**", ",")
data = st.file_uploader(
Expand Down Expand Up @@ -102,6 +136,7 @@
st.error(f"An error occurred: {e}")
else:
x = copy.copy(data)
y = None

try:
with st.spinner("getting results ready..."):
Expand Down Expand Up @@ -222,22 +257,7 @@

st.header("Visualizations", divider="grey")

st.subheader("Elbow Plot")

with st.expander("Click to view code"):
elbow_code = """
import matplotlib.pyplot as plt
wcss_list = []
for clus in range(2,10):
cluster_fit = PKBC(num_clust=clus).fit(X)
wcss_list.append(cluster_fit.euclidean_wcss_)
plt.plot(list(range(2,10)),wcss_list, "--o")
plt.xlabel("Number of Cluster")
plt.ylabel("Within Cluster Sum of Squares (WCSS)")
plt.title("Elbow Plot")
"""
st.code(elbow_code, language="python")
st.subheader("Elbow Plot", divider="grey")


def get_wcss_euclid(x, k):
Expand Down Expand Up @@ -284,18 +304,8 @@ def get_wcss_cosine(x, k):
st.error(f"An error occurred: {e}")


st.subheader("Data on Sphere")
with st.expander("Click to view code"):
viz_code = """
from QuadratiK.tools import sphere3d
sphere3d(X,y)
st.subheader("Data on Sphere", divider="grey")

# or in case the input data is 2d
from QuadratiK.tools import plot_clusters_2d
plot_clusters_2d(X,y)
"""
st.code(viz_code, language="python")

if data is not None:
try:
Expand Down
Binary file added QuadratiK/ui/pages/assets/hselect_format.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added QuadratiK/ui/pages/assets/pkbd.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 2 additions & 1 deletion docs/source/user_guide/basic_usage.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@
"metadata": {},
"outputs": [],
"source": [
"# please feel free to chanege the default renderer, for options see: https://plotly.com/python/renderers/\n",
"# please feel free to change the default renderer, for options see: https://plotly.com/python/renderers/\n",
"import plotly.io as pio\n",
"\n",
"pio.renderers.default = \"png\""
]
},
Expand Down

0 comments on commit 4a66244

Please sign in to comment.