added more instructions to dashboard

rmj3197 · Feb 8, 2025 · 4a66244 · 4a66244
1 parent 577f8f9
commit 4a66244
Show file tree

Hide file tree

Showing 5 changed files with 96 additions and 35 deletions.
diff --git a/QuadratiK/ui/pages/4_Tuning_Parameter_h_Selection.py b/QuadratiK/ui/pages/4_Tuning_Parameter_h_Selection.py
@@ -12,24 +12,74 @@
 
 st.title("Tuning Parameter h selection")
 st.write(
-    "Computes the kernel bandwidth of the Gaussian kernel for the Two-sample\
+    "Computes the kernel bandwidth of the Gaussian kernel for the One-Sample, Two-sample\
     and K-sample kernel-based quadratic distance (KBQD) tests."
 )
 
 with st.expander("Click to view example code in Python and R"):
     code_python = """
+    import numpy as np
+    np.random.seed(0)
+    
+    from scipy.stats import skewnorm
+    
     from QuadratiK.kernel_test import select_h
-    h_selected, all_values = select_h(x = x, y = y,alternative = 'skewness')
+    
+    X_2 = np.random.multivariate_normal(mean=np.zeros(4), cov=np.eye(4), size=200)
+    Y_2 = skewnorm.rvs(
+    size=(200, 4),
+    loc=np.zeros(4),
+    scale=np.ones(4),
+    a=np.repeat(0.5, 4),
+    random_state=20,
+    )
+
+    # Perform the algorithm for selecting h
+    h_selected, all_powers, plot = select_h(
+        x=X_2, y=Y_2, alternative="location", power_plot=True
+    )
+    print(f"Selected h is: {h_selected}")
     """
     st.code(code_python, language="python")
 
     code_R = """
     library(QuadratiK)
-    h_k <- select_h(dat_x=dat_k, dat_y=y, alternative="skewness")
-    h_k$h_sel
+    # Select the value of h using the mid-power algorithm
+    # Create two random normal matrices with 100 elements each
+    x <- matrix(rnorm(100), ncol = 2)
+    y <- matrix(rnorm(100), ncol = 2)
+    # Perform h selection for location alternative
+    h_sel <- select_h(x, y, alternative = "location")
     """
     st.code(code_R, language="r")
 
+st.subheader("Input Instructions", divider="grey")
+st.write("1. Upload the data file in .txt or .csv format.")
+st.write(
+    "2. The file may contain a header (see image below for reference). If headers are present, check the box. The checkbox is selected by default."
+)
+st.write("3. Specify the separator or delimiter used; the default is a comma (,).")
+
+st.write(
+    """Once the data is uploaded, specify the column in the data file that contains the labels. Additionally, 
+    - For One-Sample test: All rows should have the same label
+    - For Two-Sample test: Use two distinct labels to identify the groups
+    - For K-Sample test: Use K distinct labels to identify the K groups"""
+)
+st.write(
+    "5. Furthermore please specify the values umber of iterations to be used for critical value estimation, proportion of subsampling samples to be used, and the alternative for computing the value of h. Default values are provided."
+)
+
+st.image(
+    str(
+        importlib.resources.files("QuadratiK.ui").joinpath(
+            "pages/assets/hselect_format.png"
+        )
+    ),
+    caption="Sample data format for tuning parameter selection.",
+    use_container_width=True,
+)
+
 delim = st.text_input("**Enter the delimiter**", ",")
 header_exist = st.checkbox(
     "**Select, if the header is present in the data file.**", value=True

diff --git a/QuadratiK/ui/pages/7_Clustering_on_Sphere.py b/QuadratiK/ui/pages/7_Clustering_on_Sphere.py
@@ -32,11 +32,21 @@
 
 with st.expander("Click to view code"):
     code_python = """
-    # In case you do not have the true labels, do not read y.
-    X,y = Read the data and the cluster label files. 
+    import warnings
 
+    from QuadratiK.datasets import load_wireless_data
     from QuadratiK.spherical_clustering import PKBC
-    cluster_fit = PKBC(num_clust = Input the number of clusters).fit(X)
+
+    warnings.filterwarnings("ignore")
+
+    X, y = load_wireless_data(return_X_y=True)
+    # number of clusters tried are from 2 to 10
+    pkbc = PKBC(num_clust=range(2, 11), random_state=42).fit(X)
+    
+    validation_metrics, elbow_plots = pkbc.validation(y_true=y)
+    
+    print(validation_metrics.round(2))
+    print(pkbc.summary())
     """
     st.code(code_python, language="python")
 
@@ -49,6 +59,30 @@
     """
     st.code(code_R, language="r")
 
+st.subheader("Input Instructions", divider="grey")
+
+st.write("1. Upload the data file in .txt or .csv format.")
+st.write(
+    "2. The file may contain a header (see image below for reference). If headers are present, check the box. The checkbox is selected by default."
+)
+st.write("3. Specify the separator or delimiter used; the default is a comma (,).")
+st.write(
+    "4. If true labels for the data points are available, check the box and specify which column contains the labels."
+)
+st.write("5. Enter the desired number of clusters for the clustering (default is 2).")
+st.write(
+    "6. For the K-Sample test on identified clusters, specify the number of iterations for critical value estimation, tuning parameter h, and proportion of subsampling."
+)
+st.write(
+    "7. For visualization, you can generate elbow plots to help determine optimal number of clusters, as well as plot the identified clusters on a circle/sphere."
+)
+
+st.image(
+    str(importlib.resources.files("QuadratiK.ui").joinpath("pages/assets/pkbd.png")),
+    caption="Sample data format for normality test",
+    use_container_width=True,
+)
+
 head = st.checkbox("**Select, if the header is present in the data file.**", value=True)
 delim = st.text_input("**Enter the delimiter**", ",")
 data = st.file_uploader(
@@ -102,6 +136,7 @@
                 st.error(f"An error occurred: {e}")
     else:
         x = copy.copy(data)
+        y = None
 
     try:
         with st.spinner("getting results ready..."):
@@ -222,22 +257,7 @@
 
 st.header("Visualizations", divider="grey")
 
-st.subheader("Elbow Plot")
-
-with st.expander("Click to view code"):
-    elbow_code = """
-    import matplotlib.pyplot as plt
-    wcss_list = []
-    for clus in range(2,10):
-        cluster_fit = PKBC(num_clust=clus).fit(X)
-        wcss_list.append(cluster_fit.euclidean_wcss_)
-        
-    plt.plot(list(range(2,10)),wcss_list, "--o")
-    plt.xlabel("Number of Cluster")
-    plt.ylabel("Within Cluster Sum of Squares (WCSS)")
-    plt.title("Elbow Plot")
-    """
-    st.code(elbow_code, language="python")
+st.subheader("Elbow Plot", divider="grey")
 
 
 def get_wcss_euclid(x, k):
@@ -284,18 +304,8 @@ def get_wcss_cosine(x, k):
             st.error(f"An error occurred: {e}")
 
 
-st.subheader("Data on Sphere")
-with st.expander("Click to view code"):
-    viz_code = """
-    from QuadratiK.tools import sphere3d
-    sphere3d(X,y)
+st.subheader("Data on Sphere", divider="grey")
 
-    # or in case the input data is 2d
-
-    from QuadratiK.tools import plot_clusters_2d
-    plot_clusters_2d(X,y)
-    """
-    st.code(viz_code, language="python")
 
 if data is not None:
     try:

diff --git a/QuadratiK/ui/pages/assets/hselect_format.png b/QuadratiK/ui/pages/assets/hselect_format.png
diff --git a/QuadratiK/ui/pages/assets/pkbd.png b/QuadratiK/ui/pages/assets/pkbd.png
diff --git a/docs/source/user_guide/basic_usage.ipynb b/docs/source/user_guide/basic_usage.ipynb
@@ -25,8 +25,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# please feel free to chanege the default renderer, for options see: https://plotly.com/python/renderers/\n",
+    "# please feel free to change the default renderer, for options see: https://plotly.com/python/renderers/\n",
     "import plotly.io as pio\n",
+    "\n",
     "pio.renderers.default = \"png\""
    ]
   },