changes to Dashboard and conf.py

rmj3197 · Feb 7, 2025 · baee3f9 · baee3f9
1 parent 33d1c6d
commit baee3f9
Show file tree

Hide file tree

Showing 13 changed files with 264 additions and 47 deletions.
diff --git a/QuadratiK/__init__.py b/QuadratiK/__init__.py
@@ -1,6 +1,6 @@
 from importlib import import_module
 
-__version__ = "1.1.2"
+__version__ = "1.1.3dev0"
 
 submodules = [
     "kernel_test",

diff --git a/QuadratiK/ui/pages/1_Normality_Test.py b/QuadratiK/ui/pages/1_Normality_Test.py
@@ -19,22 +19,58 @@ def run_normality_test(h_val, num_iter, b, x):
 st.title("Normality Test")
 st.write("Performs the Parametric Multivariate Normality Test.")
 
-with st.expander("Click to view code"):
+with st.expander("Click to view example code in Python and R"):
     code_python = """
+    # Example of performing the normality test using QuadratiK in Python
+    import numpy as np
+
+    np.random.seed(78990)
     from QuadratiK.kernel_test import KernelTest
-    X = Read your data file here
-    normality_test = KernelTest(h = 0.5, centering_type="param").test(X)
-    normality_test.summary()
+
+    # data generation
+    data_norm = np.random.multivariate_normal(mean=np.zeros(4), cov=np.eye(4), size=500)
+
+    # performing the normality test
+    normality_test = KernelTest(
+    h=0.4, num_iter=150, method="subsampling", random_state=42
+    ).test(data_norm)
+
+    # printing the summary for normality test
+    print(normality_test.summary())
     """
     st.code(code_python, language="python")
 
     code_R = """
+    # Example of performing the normality test using QuadratiK in R
     library(QuadratiK)
-    norm_test <- kb.test(x=dat_norm, h=h)
-    summary(norm_test)
+    
+    # random data generation
+    x <- matrix(rnorm(100,4), ncol = 2)
+    
+    # performing the normality test
+    kb.test(x, mu_hat = c(4,4), Sigma_hat = diag(2), h = 0.4)
     """
     st.code(code_R, language="r")
 
+st.subheader("Input Instructions", divider="grey")
+
+st.write("1. Upload the data file in .txt or .csv format.")
+st.write(
+    "2. The file may contain a header (see image below for reference). If headers are present, check the box. The checkbox is selected by default."
+)
+st.write("3. Specify the separator or delimiter used; the default is a comma (,).")
+st.write("4. Once the data is uploaded, specify the values of bandwidth parameter, proportion of subsampling samples to be used, and number of iterations for critical value estimation. Default values are provided.")
+
+st.image(
+    str(
+        importlib.resources.files("QuadratiK.ui").joinpath(
+            "pages/assets/normality_test_format.png"
+        )
+    ),
+    caption="Sample data format for normality test",
+    use_container_width=True,
+)
+
 delim = st.text_input("**Enter the delimiter**", ",")
 header_exist = st.checkbox(
     "**Select, if the header is present in the data file.**", value=True

diff --git a/QuadratiK/ui/pages/2_Two_Sample_Test.py b/QuadratiK/ui/pages/2_Two_Sample_Test.py
@@ -21,25 +21,77 @@ def run_twosample_test(h_val, num_iter, b, X, Y):
 
 st.write("Performs the Nonparametric Two Sample Test")
 
-with st.expander("Click to view code"):
+with st.expander("Click to view example code in Python and R"):
     code_python = """
+    import numpy as np
+
+    np.random.seed(0)
+    from scipy.stats import skewnorm
+
     from QuadratiK.kernel_test import KernelTest
-    X = Read your data file here
-    Y = Read your data file here
-    two_sample_test = KernelTest(h = 0.5).test(X,Y)
-    two_sample_test.summary()
+
+    # data generation
+    X_2 = np.random.multivariate_normal(mean=np.zeros(4), cov=np.eye(4), size=200)
+    Y_2 = skewnorm.rvs(
+    size=(200, 4),
+    loc=np.zeros(4),
+    scale=np.ones(4),
+    a=np.repeat(0.5, 4),
+    random_state=20,
+    )
+    # performing the two sample test
+    two_sample_test = KernelTest(h=2, num_iter=150, random_state=42).test(X_2, Y_2)
+
+    # printing the summary for the two sample test
+    print(two_sample_test.summary())
     """
     st.code(code_python, language="python")
 
     code_R = """
-    library(QuadratiK)
-    X = Read your data file here
-    Y = Read your data file here
-    two_test <- kb.test(x=X, y=Y, h=2)
-    summary(two_test)
+    library(sn)         # For generating skew-normal distributed data
+    library(mvtnorm)    # For generating multivariate normal data
+    library(QuadratiK)  
+
+    # Set parameters
+    n <- 100           # Number of samples
+    d <- 4             # Dimension of the data
+    skewness_y <- 0.5  # Skewness parameter for Y distribution
+
+    # Set seed for reproducibility
+    set.seed(2468)
+
+    # Generate multivariate normal data for X
+    x_2 <- rmvnorm(n, mean = rep(0, d))  # Mean vector of zeros, identity covariance
+
+    # Generate skew-normal data for Y
+    y_2 <- rmsn(n = n, xi = 0, Omega = diag(d), alpha = rep(skewness_y, d))  
+
+    # Perform a statistical test to compare the two datasets
+    two_test <- kb.test(x = x_2, y = y_2, h = 2)
+
+    # Output the test result
+    two_test
     """
     st.code(code_R, language="r")
 
+st.subheader("Input Instructions", divider="grey")
+st.write("1. Upload the data file in .txt or .csv format for both the X and Y datasets.")
+st.write(
+    "2. The file may contain a header (see image below for reference). If headers are present, check the box. The checkbox is selected by default. Please ensure that both X and Y either contain headers or neither contain headers."
+)
+st.write("3. Specify the separator or delimiter used in both the X and Y datasets; the default is a comma (,).")
+st.write("4. Once the data files are uploaded, specify the values of bandwidth parameter, proportion of subsampling samples to be used, and number of iterations for critical value estimation. Default values are provided.")
+
+st.image(
+    str(
+        importlib.resources.files("QuadratiK.ui").joinpath(
+            "pages/assets/two_sample_test_format.png"
+        )
+    ),
+    caption="Sample data format for two sample test",
+    use_container_width=True,
+)
+
 delim = st.text_input("**Enter the delimiter**", ",")
 header_exist = st.checkbox(
     "**Select, if the header is present in the data file.**", value=True

diff --git a/QuadratiK/ui/pages/3_K_Sample_Test.py b/QuadratiK/ui/pages/3_K_Sample_Test.py
@@ -20,24 +20,75 @@ def run_ksample_test(h_val, num_iter, b, X, y):
 st.title("K Sample Test")
 st.write("Performs the Nonparametric K-Sample Test")
 
-with st.expander("Click to view code"):
+with st.expander("Click to view example code in Python and R"):
     code_python = """
+    import numpy as np
+
+    np.random.seed(0)
     from QuadratiK.kernel_test import KernelTest
-    X,y = Read your data file here
-    k_sample_test = KernelTest(h = 0.5).kb_test(X,y)
-    k_sample_test.summary()
+
+    size = 200
+    eps = 1
+    x1 = np.random.multivariate_normal(
+        mean=[0, np.sqrt(3) * eps / 3], cov=np.eye(2), size=size
+    )
+    x2 = np.random.multivariate_normal(
+        mean=[-eps / 2, -np.sqrt(3) * eps / 6], cov=np.eye(2), size=size
+    )
+    x3 = np.random.multivariate_normal(
+        mean=[eps / 2, -np.sqrt(3) * eps / 6], cov=np.eye(2), size=size
+    )
+    # Merge the three samples into a single dataset
+    X_k = np.concatenate([x1, x2, x3])
+    # The memberships are needed for k-sample test
+    y_k = np.repeat(np.array([1, 2, 3]), size).reshape(-1, 1)
+
+    # performing the k-sample test
+    k_sample_test = KernelTest(h=1.5, method="subsampling", random_state=42).test(X_k, y_k)
+
+    # printing the summary for the k-sample test
+    print(k_sample_test.summary())
     """
     st.code(code_python, language="python")
 
     code_R = """
+    library(mvtnorm)
     library(QuadratiK)
-    X,y = Read your data file here
-    k_test <- kb.test(x=X, y=y, h=1.5)
-    summary(k_test)
+    library(ggplot2)
+    sizes <- rep(50,3)
+    eps <- 1
+    set.seed(2468)
+    x1 <- rmvnorm(sizes[1], mean = c(0,sqrt(3)*eps/3))
+    x2 <- rmvnorm(sizes[2], mean = c(-eps/2,-sqrt(3)*eps/6))
+    x3 <- rmvnorm(sizes[3], mean = c(eps/2,-sqrt(3)*eps/6))
+    x <- rbind(x1, x2, x3)
+    y <- as.factor(rep(c(1, 2, 3), times = sizes))
+    k_test <- kb.test(x = x, y = y, h = 2)
+    show(k_test)
     """
     st.code(code_R, language="r")
 
-delim = st.text_input("**Enter the delimiter**", " ")
+st.subheader("Input Instructions", divider="grey")
+st.write("1. Upload the data file in .txt or .csv format for both the X and Y datasets.")
+st.write(
+    "2. The file may contain a header (see image below for reference). If headers are present, check the box. The checkbox is selected by default. Please ensure that both X and Y either contain headers or neither contain headers."
+)
+st.write("3. Specify the separator or delimiter used in both the X and Y datasets; the default is a comma (,).")
+st.write("4. Once the data is uploaded, specify the column in the data file that contains the labels.")
+st.write("5. Furthermore please specify the values of bandwidth parameter, proportion of subsampling samples to be used, and number of iterations for critical value estimation. Default values are provided.")
+
+st.image(
+    str(
+        importlib.resources.files("QuadratiK.ui").joinpath(
+            "pages/assets/ksample_test_format.png"
+        )
+    ),
+    caption="Sample data format for k-sample test",
+    use_container_width=True,
+)
+
+
+delim = st.text_input("**Enter the delimiter**", ",")
 header_exist = st.checkbox(
     "**Select, if the header is present in the data file.**", value=True
 )

diff --git a/QuadratiK/ui/pages/4_Tuning_Parameter_h_Selection.py b/QuadratiK/ui/pages/4_Tuning_Parameter_h_Selection.py
@@ -16,7 +16,7 @@
     and K-sample kernel-based quadratic distance (KBQD) tests."
 )
 
-with st.expander("Click to view code"):
+with st.expander("Click to view example code in Python and R"):
     code_python = """
     from QuadratiK.kernel_test import select_h
     h_selected, all_values = select_h(x = x, y = y,alternative = 'skewness')

diff --git a/QuadratiK/ui/pages/5_Uniformity_Test.py b/QuadratiK/ui/pages/5_Uniformity_Test.py
@@ -23,24 +23,70 @@ def run_uniformity_test(rho, num_iter, x):
     spherical data using the Poisson kernel with concentration parameter rho ($\rho$)"
 )
 
-with st.expander("Click to view code"):
+with st.expander("Click to view example code in Python and R"):
     code_python = """
+    import numpy as np
+
+    np.random.seed(0)
     from QuadratiK.poisson_kernel_test import PoissonKernelTest
-    X = Read your data file here
-    unif_test = PoissonKernelTest(rho = 0.7).test(X)
-    unif_test.summary()
+
+    # data generation
+    z = np.random.normal(size=(200, 3))
+    data_unif = z / np.sqrt(np.sum(z**2, axis=1, keepdims=True))
+
+    # performing the uniformity test
+    unif_test = PoissonKernelTest(rho=0.7, random_state=42).test(data_unif)
+
+    # printing the summary for uniformity test
+    print(unif_test.summary())
     """
     st.code(code_python, language="python")
 
     code_R = """
+    # Load the QuadratiK library
     library(QuadratiK)
-    X = Read your data file here
-    res_unif <- pk.test(x=X, rho=rho)
-    summary(res_unif)
+    
+    # Set parameters for data generation
+    n <- 200
+    d <- 3
+    
+    # Generate random data on the sphere
+    set.seed(2468)
+    z <- matrix(rnorm(n * d), n, d)
+    dat_sphere <- z/sqrt(rowSums(z^2))
+    
+    # Set the concentration parameter rho
+    rho <- 0.7
+    
+    # Perform the uniformity test using the Poisson Kernel Test
+    set.seed(2468)
+    res_unif <- pk.test(x = dat_sphere, rho = rho)
+    
+    # Display the results of the uniformity test
+    show(res_unif)
     """
     st.code(code_R, language="r")
+
+st.subheader("Input Instructions", divider="grey")
+
+st.write("1. Upload the data file in .txt or .csv format.")
+st.write(
+    "2. The file may contain a header (see image below for reference). If headers are present, check the box. The checkbox is selected by default."
+)
+st.write("3. Specify the separator or delimiter used; the default is a comma (,).")
+st.write(r"4. Once the data is uploaded, specify the number of iterations for critical value estimation and concentration parameter ($\rho$). Default values are provided.")
+
+st.image(
+    str(
+        importlib.resources.files("QuadratiK.ui").joinpath(
+            "pages/assets/uniformity_test_format.png"
+        )
+    ),
+    caption="Sample data format for uniformity test",
+    use_container_width=True,
+)
 
-delim = st.text_input("**Enter the delimiter**", " ")
+delim = st.text_input("**Enter the delimiter**", ",")
 header_exist = st.checkbox(
     "**Select, if the header is present in the data file.**", value=True
 )

diff --git a/QuadratiK/ui/pages/6_Data_generation_from_PKBD_Models.py b/QuadratiK/ui/pages/6_Data_generation_from_PKBD_Models.py
@@ -17,27 +17,50 @@
 st.title("Data generation from PKBD Models")
 st.write('Generates samples from the supported PKBD Models - "rejvmf" and "rejacg"')
 
-with st.expander("Click to view code"):
+with st.expander("Click to view example code in Python and R"):
     code_python = """
+    # Import the PKBD class from the QuadratiK.spherical_clustering module
     from QuadratiK.spherical_clustering import PKBD
-    rho = specify your rho value here
-    n_samples = specify the number of samples here
-    mu = specify a list of location parameters
-    data1 = PKBD().rpkb(n_samples,mu,rho,method = "rejvmf")
-    data2 = PKBD().rpkb(n_samples,mu,rho,method = "rejacg")
+    
+    # Create an instance of the PKBD class
+    pkbd = PKBD()
+    
+    # Generate samples using the "rejvmf" method
+    x_rejvmf = pkbd.rpkb(n = 1000, mu = [0, 1, 1], rho = 0.8, method = "rejvmf", random_state=42)
+    
+    # Generate samples using the "rejacg" method
+    x_rejacg = pkbd.rpkb(n = 1000, mu = [0, 1, 1], rho = 0.8, method = "rejacg", random_state=42)
     """
     st.code(code_python, language="python")
 
     code_R = """
-    library(QuadratiK)
-    rho = specify your rho value here
-    n_samples = specify the number of samples here
-    mu = specify a list of location parameters
-    dat1 <- rpkb(n_samples, rho=rho, mu=mu, method="rejvmf")$x
-    dat2 <- rpkb(n_samples, rho=rho, mu=mu, method="rejacg")$x
+    # Define the location parameter vector
+    mu <- c(0, 0, 1)
+    # Define the number of dimensions
+    d <- 3
+    # Define the number of samples to generate
+    n <- 1000
+    # Define the concentration parameter
+    rho <- 0.8
+
+    # Set the seed for reproducibility
+    set.seed(2468)
+    # Generate observations using the rejection algorithm with von-Mises 
+    # distribution envelopes
+    dat1 <- rpkb(n = n, rho = rho, mu = mu, method = "rejvmf")
+    # Generate observations using the rejection algorithm with angular central 
+    # Gaussian distribution envelopes
+    dat2 <- rpkb(n = n, rho = rho, mu = mu, method = "rejacg")
     """
     st.code(code_R, language="r")
 
+st.subheader("Input Instructions", divider="grey")
+
+st.write("1. Enter the total number of samples to be generated.")
+st.write(r"2. Enter the value of the concentration parameter ($\rho$).")
+st.write(r"3. Enter the location parameter ($\mu$) separated by space.")
+
+
 n_samples = int(
     st.number_input("Enter the total number of samples to be generated", value=100)
 )

diff --git a/QuadratiK/ui/pages/assets/ksample_test_format.png b/QuadratiK/ui/pages/assets/ksample_test_format.png
diff --git a/QuadratiK/ui/pages/assets/normality_test_format.png b/QuadratiK/ui/pages/assets/normality_test_format.png
diff --git a/QuadratiK/ui/pages/assets/two_sample_test_format.png b/QuadratiK/ui/pages/assets/two_sample_test_format.png
diff --git a/QuadratiK/ui/pages/assets/uniformity_test_format.png b/QuadratiK/ui/pages/assets/uniformity_test_format.png