Skip to content

Commit

Permalink
Added tests, made fixes, made changes to OPT OUT streamlit,
Browse files Browse the repository at this point in the history
Added additional test cases to increase coverage (excluding the ui module). Made minor fixes to the code, and made slight changes to documentation.
  • Loading branch information
rmj3197 committed Mar 11, 2024
1 parent 3e132a4 commit 86b7470
Show file tree
Hide file tree
Showing 23 changed files with 643 additions and 159 deletions.
Binary file added .DS_Store
Binary file not shown.
8 changes: 8 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[run]
omit =
# omit anything in a .local directory anywhere
*/.local/*
# omit everything in /usr
/usr/*
# omit the UI folder
QuadratiK/ui/*
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,4 @@ cython_debug/
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
.vscode/
htmlcov/
6 changes: 5 additions & 1 deletion QuadratiK/datasets/_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,9 @@ def load_wireless_data(desc=False, return_X_y=False, as_dataframe=True, scaled=F
)

if scaled:
data = data / np.linalg.norm(data, axis=1, keepdims=True)
data[:, :-1] = data[:, :-1] / np.linalg.norm(
data[:, :-1], axis=1, keepdims=True
)

feature_names = ["WS1", "WS2", "WS3", "WS4", "WS5", "WS6", "WS7", "Class"]

Expand All @@ -103,3 +105,5 @@ def load_wireless_data(desc=False, return_X_y=False, as_dataframe=True, scaled=F
return (fdescr, pd.DataFrame(data, columns=feature_names))
else:
return pd.DataFrame(data, columns=feature_names)
else:
return data
2 changes: 1 addition & 1 deletion QuadratiK/kernel_test/_h_selection.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Contains the tuning parameter selecrtion algorithm
Contains the tuning parameter selection algorithm
"""

import numpy as np
Expand Down
7 changes: 4 additions & 3 deletions QuadratiK/kernel_test/_kernel_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,9 +204,9 @@ def test(self, x, y=None):
raise TypeError("x must be a numpy array or a pandas dataframe")

if self.y is not None:
if isinstance(self.y, np.ndarray):
if isinstance(self.y, (np.ndarray, pd.Series)):
if self.y.ndim == 1:
self.y = self.y.reshape(-1, 1)
self.y = np.array(self.y).reshape(-1, 1)
elif isinstance(y, pd.DataFrame):
self.y = self.y.to_numpy()
else:
Expand Down Expand Up @@ -349,13 +349,14 @@ def test(self, x, y=None):
return self

else:
print("Entered this K Sample Else")
if (self.y is not None) and (self.x.shape[0] != self.y.shape[0]):
raise ValueError("'x' and 'y' must have the same number of rows.")

if self.h is None:
self.h = select_h(
self.x,
y=None,
self.y,
alternative=self.alternative,
method=self.method,
num_iter=self.num_iter,
Expand Down
4 changes: 0 additions & 4 deletions QuadratiK/kernel_test/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,6 @@ def stat_two_sample(x_mat, y_mat, h, mu_hat, sigma_hat, centering_type="nonparam
k_center = nonparam_centering(kmat_zz, n_z)
elif centering_type == "param":
k_center = param_centering(kmat_zz, z_mat, cov_h, mu_hat, sigma_hat)
else:
raise ValueError("Unknown centering type.")
np.fill_diagonal(k_center, 0)
test_non_par = (
(np.sum(k_center[:n_x, :n_x]) / (n_x * (n_x - 1)))
Expand Down Expand Up @@ -187,8 +185,6 @@ def stat_normality_test(x_mat, h, mu_hat, sigma_hat, centering_type="param"):
k_center = nonparam_centering(kmat_zz, n_x)
elif centering_type == "param":
k_center = param_centering(kmat_zz, x_mat, cov_h, mu_hat, sigma_hat)
else:
raise ValueError("Unknown centering type.")
np.fill_diagonal(k_center, 0)
test_normality = np.sum(k_center) / (n_x * (n_x - 1))
return test_normality
Expand Down
6 changes: 1 addition & 5 deletions QuadratiK/spherical_clustering/_pkbc.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,9 +413,5 @@ def stats(self):
Dataframe of descriptive statistics
"""

summary_stats = stats(self.dat, self.labels_)
summary_stats_df = pd.concat(
summary_stats.values(), keys=summary_stats.keys(), axis=0
)
return summary_stats_df
return summary_stats
9 changes: 7 additions & 2 deletions QuadratiK/spherical_clustering/_pkbd.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,14 @@ def dpkb(self, x, mu, rho, logdens=False):
raise ValueError("mu must have length >= 2")
if isinstance(x, pd.DataFrame):
x = x.to_numpy()
p = x.shape[1]
if p < 2:

if x.ndim == 1:
raise ValueError("vectors must have length >= 2")
else:
p = x.shape[1]
if p < 2:
raise ValueError("vectors must have length >= 2")

if len(mu) != p:
raise ValueError("vectors and mu must have the same length")
if (rho >= 1) or (rho < 0):
Expand Down
20 changes: 17 additions & 3 deletions QuadratiK/tools/graphics.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def sphere3d(x, y=None):
where each row represents the coordinates of a point in
3D space
y : numpy.ndarray, list, optional
y : numpy.ndarray, list, pandas.series, optional
The parameter `y` is an optional input that determines the color and
shape of each data point in the plot. If `y` is not provided, the
scatter plot will have the default marker symbol and color.
Expand All @@ -85,7 +85,14 @@ def sphere3d(x, y=None):
x = x.to_numpy()

if isinstance(y, pd.DataFrame):
y = y.to_numpy()
y = y.to_numpy().flatten()
elif isinstance(y, pd.Series):
y = y.values
elif isinstance(y, np.ndarray):
if y.ndim == 1:
pass
elif y.ndim == 2:
y = y.flatten()

r = 1
pi = np.pi
Expand Down Expand Up @@ -187,7 +194,14 @@ def plot_clusters_2d(x, y=None):
x = x.to_numpy()

if isinstance(y, pd.DataFrame):
y = y.to_numpy()
y = y.to_numpy().flatten()
elif isinstance(y, pd.Series):
y = y.values
elif isinstance(y, np.ndarray):
if y.ndim == 1:
pass
elif y.ndim == 2:
y = y.flatten()

fig = plt.figure()
if y is not None:
Expand Down
2 changes: 2 additions & 0 deletions QuadratiK/ui/_dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ async def main(self):
"#E5E4E2",
"--theme.textColor",
"#0e0e0e",
"--browser.gatherUsageStats",
"false",
]
sys.exit(stcli.main())

Expand Down
39 changes: 23 additions & 16 deletions doc/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,16 @@

import os
import sys
basedir = os.path.abspath(os.path.join(
os.path.dirname(__file__), '..', '..'))

basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
sys.path.insert(0, basedir)

project = 'QuadratiK'
copyright = '2023, Giovanni Saraceno, Marianthi Markatou, Raktim Mukhopadhyay, Mojgan Golzy'
author = 'Giovanni Saraceno, Marianthi Markatou, Raktim Mukhopadhyay, Mojgan Golzy'
release = '1.0.0'
project = "QuadratiK"
copyright = (
"2023, Giovanni Saraceno, Marianthi Markatou, Raktim Mukhopadhyay, Mojgan Golzy"
)
author = "Giovanni Saraceno, Marianthi Markatou, Raktim Mukhopadhyay, Mojgan Golzy"
release = "1.0.0"

# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
Expand All @@ -29,24 +31,29 @@
"sphinx.ext.githubpages",
"nbsphinx",
"sphinx.ext.intersphinx",
"myst_parser"
"myst_parser",
]

templates_path = ['_templates']
templates_path = ["_templates"]
exclude_patterns = []


# -- Options for HTML output -------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output

html_theme = 'sphinx_book_theme'
html_static_path = ['_static']
html_css_files = ['css/custom.css']
html_theme = "sphinx_book_theme"
html_static_path = ["_static"]
html_css_files = ["css/custom.css"]
strip_signature_backslash = True

# -- Options for Latex output -------------------------------------------------
latex_elements = {
'extraclassoptions': 'openany,oneside'
}
latex_documents = [('index', 'QuadratiK.tex', 'QuadratiK', author.replace(
'Raktim', '\\and Raktim'), 'manual')]
latex_elements = {"extraclassoptions": "openany,oneside"}
latex_documents = [
(
"index",
"QuadratiK.tex",
"QuadratiK",
author.replace("Raktim", "\\and Raktim"),
"manual",
)
]
41 changes: 23 additions & 18 deletions doc/source/user_guide/basic_usage.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline \n",
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"\n",
"np.random.seed(42)\n",
"import pandas as pd"
]
Expand Down Expand Up @@ -49,9 +50,9 @@
"source": [
"from QuadratiK.kernel_test import KernelTest\n",
"\n",
"data = np.random.randn(100,2)\n",
"data = np.random.randn(100, 2)\n",
"\n",
"normality_test = KernelTest(h=0.4, centering_type=\"param\",random_state=42).test(data)\n",
"normality_test = KernelTest(h=0.4, centering_type=\"param\", random_state=42).test(data)\n",
"print(\"Test : {}\".format(normality_test.test_type_))\n",
"print(\"Execution time: {:.3f}\".format(normality_test.execution_time))\n",
"print(\"H0 is Rejected : {}\".format(normality_test.h0_rejected_))\n",
Expand Down Expand Up @@ -116,6 +117,7 @@
"outputs": [],
"source": [
"from QuadratiK.tools import qq_plot\n",
"\n",
"qq_plot(data)"
]
},
Expand All @@ -141,10 +143,10 @@
"source": [
"from QuadratiK.kernel_test import KernelTest\n",
"\n",
"X = np.random.randn(100,2)\n",
"Y = np.random.randn(100,2)\n",
"X = np.random.randn(100, 2)\n",
"Y = np.random.randn(100, 2)\n",
"\n",
"two_sample_test = KernelTest(h=0.4, random_state=42).test(X,Y)\n",
"two_sample_test = KernelTest(h=0.4, random_state=42).test(X, Y)\n",
"print(\"Test : {}\".format(two_sample_test.test_type_))\n",
"print(\"Execution time: {:.3f}\".format(two_sample_test.execution_time))\n",
"print(\"H0 is Rejected : {}\".format(two_sample_test.h0_rejected_))\n",
Expand Down Expand Up @@ -185,10 +187,11 @@
"outputs": [],
"source": [
"from QuadratiK.kernel_test import KernelTest\n",
"X = np.random.randn(500,2)\n",
"y = np.random.randint(0,5,500)\n",
"\n",
"k_sample_test = KernelTest(h = 1.5, method = \"permutation\").test(X,y)\n",
"X = np.random.randn(500, 2)\n",
"y = np.random.randint(0, 5, 500)\n",
"\n",
"k_sample_test = KernelTest(h=1.5, method=\"permutation\").test(X, y)\n",
"\n",
"print(\"Test : {}\".format(k_sample_test.test_type_))\n",
"print(\"Execution time: {:.3f} seconds\".format(k_sample_test.execution_time))\n",
Expand Down Expand Up @@ -232,9 +235,9 @@
"from QuadratiK.tools import sample_hypersphere\n",
"from QuadratiK.poisson_kernel_test import PoissonKernelTest\n",
"\n",
"X = sample_hypersphere(100,3, random_state=42)\n",
"X = sample_hypersphere(100, 3, random_state=42)\n",
"\n",
"unif_test = PoissonKernelTest(rho = 0.7, random_state=42).test(X)\n",
"unif_test = PoissonKernelTest(rho=0.7, random_state=42).test(X)\n",
"\n",
"print(\"Execution time: {:.3f} seconds\".format(unif_test.execution_time))\n",
"\n",
Expand Down Expand Up @@ -273,7 +276,7 @@
"source": [
"from QuadratiK.tools import qq_plot\n",
"\n",
"qq_plot(X,dist = \"uniform\")"
"qq_plot(X, dist=\"uniform\")"
]
},
{
Expand Down Expand Up @@ -348,7 +351,7 @@
"plt.show()\n",
"\n",
"fig = plt.figure(figsize=(6, 4))\n",
"plt.plot(list(range(2,10)),wcss_cos, \"--o\")\n",
"plt.plot(list(range(2, 10)), wcss_cos, \"--o\")\n",
"plt.xlabel(\"Number of Cluster\")\n",
"plt.ylabel(\"Within Cluster Sum of Squares (WCSS)\")\n",
"plt.title(\"Elbow Plot for Wireless Indoor Localization dataset\")\n",
Expand All @@ -369,8 +372,9 @@
"outputs": [],
"source": [
"from QuadratiK.spherical_clustering import PKBD\n",
"pkbd_data = PKBD().rpkb(10,[0.5,0],0.5, \"rejvmf\", random_state= 42)\n",
"dens_val = PKBD().dpkb(pkbd_data, [0.5,0.5],0.5)\n",
"\n",
"pkbd_data = PKBD().rpkb(10, [0.5, 0], 0.5, \"rejvmf\", random_state=42)\n",
"dens_val = PKBD().dpkb(pkbd_data, [0.5, 0.5], 0.5)\n",
"print(dens_val)"
]
},
Expand Down Expand Up @@ -401,7 +405,8 @@
"y = np.random.randint(0, 2, 200)\n",
"\n",
"h_selected, all_values, power_plot = select_h(\n",
" X, y, alternative='location', power_plot=True, random_state=None)\n",
" X, y, alternative=\"location\", power_plot=True, random_state=None\n",
")\n",
"print(\"Selected h is: \", h_selected)"
]
},
Expand All @@ -411,7 +416,7 @@
"metadata": {},
"outputs": [],
"source": [
"#shows the detailed power vs h table\n",
"# shows the detailed power vs h table\n",
"all_values"
]
},
Expand All @@ -421,7 +426,7 @@
"metadata": {},
"outputs": [],
"source": [
"#shows the power plot\n",
"# shows the power plot\n",
"power_plot"
]
}
Expand Down
Loading

0 comments on commit 86b7470

Please sign in to comment.