Skip to content

Commit

Permalink
ropensci review 14-11-24
Browse files Browse the repository at this point in the history
- the function rpkb() returns directly the generated data set and not a list
- packages ggplot2 and foreach not called as full packages but using importFrom
 - nClust argument of pkbc function has no default value
- usage of standardGeneric (see https://adv-r.hadley.nz/s4.html#s4-generics)
- goodpractice sugestions
- Uniform notation: d is dimension in Eucldean space with spherical data lying on the (d-1)-dimensional sphere
- Specify dimension of input matrix x in clustering functions.
- Improve explaination of Average Silhouette Width
- Update NEWS.md file
- Add Hingee Kassel as reviewer in the DESCRIPTION file
- Update package version
  • Loading branch information
giovsaraceno committed Nov 15, 2024
1 parent 32b794e commit e94750c
Show file tree
Hide file tree
Showing 89 changed files with 551 additions and 501 deletions.
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
Type: Package
Package: QuadratiK
Title: Collection of Methods Constructed using Kernel-Based Quadratic Distances
Version: 1.1.2
Version: 1.1.3
Authors@R: c(
person("Giovanni", "Saraceno", ,"[email protected]", role = c("aut", "cre"),
comment = "ORCID 000-0002-1753-2367"),
person("Marianthi", "Markatou", role = "aut"),
person("Raktim", "Mukhopadhyay", role = "aut"),
person("Mojgan", "Golzy", role = c("aut"))
person("Mojgan", "Golzy", role = c("aut")),
person("Hingee", "Kassel", role = "rev")
)
Maintainer: Giovanni Saraceno <[email protected]>
Description: It includes test for multivariate normality, test for uniformity on the d-dimensional
Expand Down
16 changes: 14 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,24 @@ exportMethods(stats_clusters)
exportMethods(summary)
import(Rcpp)
import(RcppEigen)
import(foreach)
import(ggplot2)
import(rlecuyer)
importFrom(Rcpp,sourceCpp)
importFrom(doParallel,registerDoParallel)
importFrom(foreach,"%dopar%")
importFrom(foreach,foreach)
importFrom(ggplot2,geom_abline)
importFrom(ggplot2,geom_line)
importFrom(ggplot2,geom_point)
importFrom(ggplot2,ggplot)
importFrom(ggplot2,ggtitle)
importFrom(ggplot2,labs)
importFrom(ggplot2,scale_color_brewer)
importFrom(ggplot2,theme)
importFrom(ggplot2,theme_light)
importFrom(ggplot2,theme_minimal)
importFrom(ggplot2,xlab)
importFrom(ggplot2,ylab)
importFrom(ggpubr,ggarrange)
importFrom(grDevices,rainbow)
importFrom(graphics,legend)
Expand Down Expand Up @@ -56,4 +69,3 @@ importFrom(stats,sd)
importFrom(stats,uniroot)
importFrom(utils,install.packages)
useDynLib(QuadratiK)
useDynLib(QuadratiK, .registration = TRUE)
37 changes: 34 additions & 3 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,35 @@
QuadratiK 1.1.2 (Development version)
QuadratiK 1.1.3 (Development version)
=========================

### NEW FEATURES

* the function rpkb() returns directly the generated data set and not a list


### MINOR IMPROVEMENTS

* packages ggplot2 and foreach not called as full packages but using
importFrom

* nClust argument of pkbc function has no default value

* usage of standardGeneric (see https://adv-r.hadley.nz/s4.html#s4-generics)

### BUG FIXES

* goodpractice sugestions

### DOCUMENTATION FIXES

* Uniform notation: d is dimension in Eucldean space with spherical data lying
on the (d-1)-dimensional sphere

* Specify dimension of input matrix x in clustering functions.

* Improve explaination of Average Silhouette Width


QuadratiK 1.1.2 (2024-10-29)
=========================

### NEW FEATURES
Expand All @@ -25,8 +56,8 @@ QuadratiK 1.1.2 (Development version)
### DOCUMENTATION FIXES

* The help documentation for the methods defined for the classes in the
package can be accessed directly (addition of roxygen tag @name and
@aliases)
package can be accessed directly (addition of roxygen tag name and
aliases)


QuadratiK 1.1.1 (2024-06-05)
Expand Down
3 changes: 2 additions & 1 deletion R/QuadratiK-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@
#' The `QuadratiK` package is also available in Python on PyPI
#' <https://pypi.org/project/QuadratiK/> and also as a Dashboard application.
#' Usage instruction for the Dashboard can be found at
#' <https://quadratik.readthedocs.io/en/latest/user_guide/dashboard_application_usage.html>.
#' <https://quadratik.readthedocs.io/en/latest/user_guide/
#' dashboard_application_usage.html>.
#'
#' @author
#' Giovanni Saraceno, Marianthi Markatou, Raktim Mukhopadhyay, Mojgan Golzy
Expand Down
20 changes: 10 additions & 10 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#'
#' @noRd
computeKernelMatrix <- function(x_mat, y_mat, H) {
.Call(`_QuadratiK_computeKernelMatrix`, x_mat, y_mat, H)
.Call('_QuadratiK_computeKernelMatrix', PACKAGE = 'QuadratiK', x_mat, y_mat, H)
}

#' Compute the Poisson kernel matrix between observations in a sample.
Expand All @@ -32,7 +32,7 @@ computeKernelMatrix <- function(x_mat, y_mat, H) {
#'
#' @noRd
computePoissonMatrix <- function(x_mat, rho) {
.Call(`_QuadratiK_computePoissonMatrix`, x_mat, rho)
.Call('_QuadratiK_computePoissonMatrix', PACKAGE = 'QuadratiK', x_mat, rho)
}

#' Non-parametric centered kernel
Expand All @@ -53,7 +53,7 @@ computePoissonMatrix <- function(x_mat, rho) {
#'
#' @noRd
NonparamCentering <- function(kmat_zz, n_z) {
.Call(`_QuadratiK_NonparamCentering`, kmat_zz, n_z)
.Call('_QuadratiK_NonparamCentering', PACKAGE = 'QuadratiK', kmat_zz, n_z)
}

#' Parametric centered kernel
Expand All @@ -75,7 +75,7 @@ NonparamCentering <- function(kmat_zz, n_z) {
#'
#' @noRd
ParamCentering <- function(kmat_zz, z_mat, H, mu_hat, Sigma_hat) {
.Call(`_QuadratiK_ParamCentering`, kmat_zz, z_mat, H, mu_hat, Sigma_hat)
.Call('_QuadratiK_ParamCentering', PACKAGE = 'QuadratiK', kmat_zz, z_mat, H, mu_hat, Sigma_hat)
}

#' Compute kernel-based quadratic distance test for Normality
Expand All @@ -93,7 +93,7 @@ ParamCentering <- function(kmat_zz, z_mat, H, mu_hat, Sigma_hat) {
#'
#' @noRd
kbNormTest <- function(x_mat, h, mu_hat, Sigma_hat) {
.Call(`_QuadratiK_kbNormTest`, x_mat, h, mu_hat, Sigma_hat)
.Call('_QuadratiK_kbNormTest', PACKAGE = 'QuadratiK', x_mat, h, mu_hat, Sigma_hat)
}

#' Poisson kernel-based test for Uniformity on the Sphere
Expand All @@ -111,7 +111,7 @@ kbNormTest <- function(x_mat, h, mu_hat, Sigma_hat) {
#'
#' @noRd
statPoissonUnif <- function(x_mat, rho) {
.Call(`_QuadratiK_statPoissonUnif`, x_mat, rho)
.Call('_QuadratiK_statPoissonUnif', PACKAGE = 'QuadratiK', x_mat, rho)
}

#'
Expand All @@ -127,7 +127,7 @@ statPoissonUnif <- function(x_mat, rho) {
#'
#' @keywords internal
var_two <- function(Kcen, nsamples) {
.Call(`_QuadratiK_var_two`, Kcen, nsamples)
.Call('_QuadratiK_var_two', PACKAGE = 'QuadratiK', Kcen, nsamples)
}

#' Compute kernel-based quadratic distance two-sample test with Normal kernel
Expand All @@ -154,7 +154,7 @@ var_two <- function(Kcen, nsamples) {
#'
#' @noRd
stat2sample <- function(x_mat, y_mat, h, mu_hat, Sigma_hat, centeringType = "Nonparam", compute_variance = TRUE) {
.Call(`_QuadratiK_stat2sample`, x_mat, y_mat, h, mu_hat, Sigma_hat, centeringType, compute_variance)
.Call('_QuadratiK_stat2sample', PACKAGE = 'QuadratiK', x_mat, y_mat, h, mu_hat, Sigma_hat, centeringType, compute_variance)
}

#'
Expand All @@ -171,7 +171,7 @@ stat2sample <- function(x_mat, y_mat, h, mu_hat, Sigma_hat, centeringType = "Non
#'
#' @keywords internal
var_k <- function(Kcen, sizes, cum_size) {
.Call(`_QuadratiK_var_k`, Kcen, sizes, cum_size)
.Call('_QuadratiK_var_k', PACKAGE = 'QuadratiK', Kcen, sizes, cum_size)
}

#' Kernel-based quadratic distance k-sample tests
Expand All @@ -194,6 +194,6 @@ var_k <- function(Kcen, sizes, cum_size) {
#'
#' @noRd
stat_ksample_cpp <- function(x, y, h, sizes, cum_size, compute_variance = TRUE) {
.Call(`_QuadratiK_stat_ksample_cpp`, x, y, h, sizes, cum_size, compute_variance)
.Call('_QuadratiK_stat_ksample_cpp', PACKAGE = 'QuadratiK', x, y, h, sizes, cum_size, compute_variance)
}

43 changes: 22 additions & 21 deletions R/clustering_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
#' are fairly well concentrated around the vectors \eqn{\mu_j} of each cluster;
#' (3) the percentage of noise in the data increases.
#'
#' @param dat Data matrix or data.frame of data points on the sphere to be
#' clustered. The observations in \code{dat} are normalized by
#' dividing with the length of the vector to ensure
#' @param dat \eqn{(n \times d)}-data matrix or data.frame of data points on the
#' sphere to be clustered. The observations in \code{dat} are
#' normalized by dividing with the length of the vector to ensure
#' that they lie on the \eqn{d}-dimensional sphere. Note that
#' \eqn{d > 1}.
#' @param nClust Number of clusters. It can be a single value or a numeric
Expand Down Expand Up @@ -68,7 +68,7 @@
#' \eqn{\mathcal{S}^{d-1}}, but it can also be performed on spherically
#' transformed observations, i.e. data points on the Euclidean space
#' \eqn{\mathbb{R}^d} that are normalized such that they lie on the
#' corresponding \eqn{d}-dimensional sphere \eqn{\mathcal{S}^{d-1}}.
#' corresponding \eqn{(d-1)}-dimensional sphere \eqn{\mathcal{S}^{d-1}}.
#'
#' @return An S4 object of class \code{pkbc} containing the results of the
#' clustering procedure based on Poisson kernel-based distributions. The object
Expand Down Expand Up @@ -114,7 +114,7 @@
#' data1<-rpkb(size, c(1,0,0),rho)
#' data2<-rpkb(size, c(0,1,0),rho)
#' data3<-rpkb(size, c(0,0,1),rho)
#' dat<-rbind(data1$x,data2$x, data3$x)
#' dat<-rbind(data1,data2, data3)
#'
#' #Perform the clustering algorithm with number of clusters k=3.
#' pkbd<- pkbc(dat=dat, nClust=3)
Expand All @@ -130,14 +130,12 @@
#'
#' @export
setGeneric("pkbc",function(dat,
nClust = NULL,
nClust,
maxIter = 300,
stoppingRule = "loglik",
initMethod = "sampleData",
numInit = 10){

standardGeneric("pkbc")
})
numInit = 10)
standardGeneric("pkbc"))
#' @rdname pkbc
#'
#' @srrstats {G2.0} input nClust
Expand All @@ -155,7 +153,7 @@ setGeneric("pkbc",function(dat,
#' @export
setMethod("pkbc", signature(dat = "ANY"),
function(dat,
nClust = NULL,
nClust,
maxIter = 300,
stoppingRule = "loglik",
initMethod = "sampleData",
Expand Down Expand Up @@ -490,8 +488,9 @@ setMethod("summary", "pkbc", function(object) {
function(res) {
c(LogLik = res$LogLik, wcss = sum(res$wcss))
}))
summaryMatrix <- cbind(object@input$nClust, summaryMatrix)
rownames(summaryMatrix) <- names(object@res_k[object@input$nClust])
colnames(summaryMatrix) <- c("LogLik", "WCSS")
colnames(summaryMatrix) <- c("nClust", "LogLik", "WCSS")
cat("Summary:\n")
print(summaryMatrix)
cat("\n")
Expand Down Expand Up @@ -706,7 +705,7 @@ setMethod("plot", c(x = "pkbc"),
#' @importFrom grDevices rainbow
#' @importFrom graphics legend
#' @importFrom graphics par
#' @import ggplot2
#' @importFrom ggplot2 ggplot geom_point theme_minimal labs theme
#' @importFrom rrcov PcaLocantore
#'
#' @srrstats {G1.4} roxigen2 is used
Expand Down Expand Up @@ -831,7 +830,7 @@ scatterplotMethod <- function(object, k, true_label = NULL, pca_res = FALSE) {
#' within sum of squares computed with the Euclidean distance and the
#' cosine similarity.
#'
#' @import ggplot2
#' @importFrom ggplot2 ggplot geom_line geom_point labs theme_minimal
#' @importFrom ggpubr ggarrange
#'
#' @srrstats {G1.4} roxigen2 is used
Expand Down Expand Up @@ -1008,10 +1007,12 @@ setMethod("predict", signature(object="pkbc"),
#' between the partitions and a value close to 0 indicates a random assignment
#' of data points to clusters.
#'
#' Each cluster can represented by a so-called silhouette which is based on the
#' comparison of its tightness and separation. The average silhouette width
#' provides an evaluation of clustering validity, and might be used to select
#' an *appropriate* number of clusters (Rousseeuw 1987).
#' The average silhouette width quantifies the quality of clustering by
#' measuring how well each object fits within its assigned cluster. It is the
#' mean of silhouette values, which compare the tightness of an object within
#' its cluster to its separation from other clusters. Higher values indicate
#' well-separated, cohesive clusters, making it useful for selecting the
#' *appropriate* number of clusters (Rousseeuw 1987).
#'
#' Macro Precision is a metric used in multi-class classification that
#' calculates the precision for each class independently and then takes the
Expand Down Expand Up @@ -1045,8 +1046,8 @@ setMethod("predict", signature(object="pkbc"),
#' \linkS4class{pkbc} for the class object definition.
#'
#' @references
#' Kapp, A.V. and Tibshirani, R. (2007) "Are clusters found in one dataset present
#' in another dataset?", Biostatistics, 8(1), 9–31,
#' Kapp, A.V. and Tibshirani, R. (2007) "Are clusters found in one dataset
#' present in another dataset?", Biostatistics, 8(1), 9–31,
#' https://doi.org/10.1093/biostatistics/kxj029
#'
#' Rousseeuw, P.J. (1987) Silhouettes: A graphical aid to the interpretation and
Expand All @@ -1067,7 +1068,7 @@ setMethod("predict", signature(object="pkbc"),
#' data1<-rpkb(size, c(1,0,0),rho,method='rejvmf')
#' data2<-rpkb(size, c(0,1,0),rho,method='rejvmf')
#' data3<-rpkb(size, c(1,0,0),rho,method='rejvmf')
#' data<-rbind(data1$x,data2$x, data3$x)
#' data<-rbind(data1,data2, data3)
#'
#' #Perform the clustering algorithm
#' pkbc_res<- pkbc(data, 3)
Expand Down
10 changes: 6 additions & 4 deletions R/h_selection.R
Original file line number Diff line number Diff line change
Expand Up @@ -123,16 +123,17 @@
#' @importFrom parallel clusterExport
#' @importFrom parallel detectCores
#' @importFrom parallel stopCluster
#' @import foreach
#' @importFrom foreach foreach %dopar%
#' @importFrom stats cov
#' @importFrom stats aggregate
#' @importFrom stats power
#' @import ggplot2
#' @importFrom ggplot2 ggplot geom_line labs theme_minimal theme_light theme
#' @importFrom ggplot2 scale_color_brewer
#' @import RcppEigen
#' @import rlecuyer
#' @importFrom Rcpp sourceCpp
#'
#' @useDynLib QuadratiK, .registration = TRUE
#' @useDynLib QuadratiK
#'
#' @srrstats {G1.4} roxigen2 is used
#' @srrstats {G2.0, G2.0a} input y, delta_dim, B, b
Expand Down Expand Up @@ -358,12 +359,13 @@ select_h <- function(x, y=NULL, alternative=NULL, method="subsampling", b=0.8,
# "compute_CV","stat2sample"))

D <- length(delta)
len_h <- length(h_values)

k_values <- 1:D
rep_values <- 1:Nrep

params <- expand.grid(Rep=rep_values, h = h_values)
params <- split(params, seq(nrow(params)))
params <- split(params, seq_len(Nrep*len_h))

res <- data.frame(delta=numeric(),
h=numeric(), power=numeric())
Expand Down
9 changes: 4 additions & 5 deletions R/kb.test.R
Original file line number Diff line number Diff line change
Expand Up @@ -256,10 +256,8 @@ setGeneric("kb.test",function(x, y=NULL, h = NULL, method = "subsampling",
B = 150, b = NULL, Quantile = 0.95,
mu_hat = NULL, Sigma_hat = NULL,
centeringType="Nonparam",
K_threshold=10, alternative="skewness"){

standardGeneric("kb.test")
})
K_threshold=10, alternative="skewness")
standardGeneric("kb.test"))
#' @rdname kb.test
#'
#' @srrstats {G1.4} roxigen2 is used
Expand Down Expand Up @@ -517,7 +515,8 @@ setMethod("show", "kb.test",
#' @seealso [kb.test()] and \linkS4class{kb.test} for more details.
#'
#' @importFrom ggpubr ggarrange
#' @import ggplot2
#' @importFrom ggplot2 ggplot geom_line theme_minimal geom_abline ggtitle
#' @importFrom ggplot2 xlab ylab
#'
#'@examples
#' # create a kb.test object
Expand Down
Loading

0 comments on commit e94750c

Please sign in to comment.