diff --git a/man/as_red_analysis.Rd b/man/as_red_analysis.Rd new file mode 100644 index 0000000..60b21e9 --- /dev/null +++ b/man/as_red_analysis.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spectre_oop.R +\name{as_red_analysis} +\alias{as_red_analysis} +\alias{as_red_analysis.spectre} +\title{Convert an object to a \code{red_analysis} instance.} +\usage{ +as_red_analysis(x, ...) + +\method{as_red_analysis}{spectre}(x, kdim = 3, skip_last = TRUE, ...) +} +\arguments{ +\item{x}{an object.} + +\item{...}{extra arguments passed to the methods.} + +\item{kdim}{number of the smallest eigenvectors included used as components +of the \code{red_analysis} object.} + +\item{skip_last}{logical, should the last eigenvector be omitted before +drawing the k trailing eigenvectors?} +} +\description{ +Convert an object to an instance of \code{\link{red_analysis}} class. +} +\details{ +In case of \code{\link{spectre}} objects, the \code{kdim} trailing eigenvectors, +i.e. eigenvectors with the smallest eigenvalues will be used as components +of the \code{red_analysis} objects. They will be sorted in an ascending order, +i.e. the last eigenvector will be named 'comp_1', the one before last +'comp_2' and so on. +} diff --git a/man/calculate_dist.Rd b/man/calculate_dist.Rd new file mode 100644 index 0000000..aaf7e61 --- /dev/null +++ b/man/calculate_dist.Rd @@ -0,0 +1,71 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{calculate_dist} +\alias{calculate_dist} +\alias{calculate_weighted_dist} +\alias{get_kernel_info} +\title{Calculate distance.} +\usage{ +calculate_dist(data, method) + +calculate_weighted_dist( + data, + method = "euclidean", + weights = 1, + FUN = function(x, y) x + y, + ... +) + +get_kernel_info() +} +\arguments{ +\item{data}{a numeric data frame or matrix (\code{calculate_dist()}) or a list +of such objects (for \code{calculate_weighted_dist()}). Row names are preserved as +observation IDs.} + +\item{method}{the name of dissimilarity measure (for \code{calculate_dist()}) +or a vector of distance nammes (\code{calculate_weighted_dist()}). See: +\code{\link{get_kernel_info}} for available distances.} + +\item{weights}{a numeric vector of weights.} + +\item{FUN}{a function used to integrate the weighted distances.} + +\item{...}{extra arguments passed to \code{FUN}.} +} +\value{ +\code{calculate_dist()}: a matrix with the distance statistics; +\code{get_kernel_info()}: a vector with names of available distance measures. +} +\description{ +The \code{calculate_dist()} function calculates distances +between observations (rows) of a data +frame or matrix. +\code{calculate_weighted_dist()} computes a matrix of weighted distances +between observations (rows) for a list of numeric data frames or matrices. +See \code{\link{get_kernel_info}} for a vector of available +distance measures. +} +\details{ +\code{calculate_dist()} and \code{calculate_weighted_dist()} provide handy wrappers for +\code{\link[philentropy]{distance}}. +The smc (simple matching coefficient) distance is calculated with the +\code{\link[nomclust]{sm}} function. Similarity coefficients returned by +\code{\link[philentropy]{distance}} (methods: cosine, ruzicka, intersection, +inner_product, harmonic_mean, hassebrook, fidelity) are handled with the +formula \code{dist = 1 - simil}. +} +\references{ +Drost H-G. Philentropy: Information Theory and Distance Quantification +with R. J Open Source Softw (2018) 3:765. doi:10.21105/joss.00765 + +Boriah S, Chandola V, Kumar V. Similarity measures for categorical data: +A comparative evaluation. in Society for Industrial and +Applied Mathematics - 8th SIAM International Conference on Data Mining +2008, Proceedings in Applied Mathematics 130, 243–254. +doi:10.1137/1.9781611972788.22 + +Sulc Z, Cibulkova J, Rezankova H. nomclust: Hierarchical Cluster Analysis +of Nominal Data. (2021) +Available at: https://cran.r-project.org/package=nomclust +} diff --git a/man/center_data.Rd b/man/center_data.Rd new file mode 100644 index 0000000..955dcca --- /dev/null +++ b/man/center_data.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/preprocessing_functions.R +\name{center_data} +\alias{center_data} +\alias{min_max} +\title{Normalization of a data frame.} +\usage{ +center_data(data, type = c("mean", "median"), complete_cases = FALSE) + +min_max(data, complete_cases = FALSE) +} +\arguments{ +\item{data}{a data frame or a tibble. All variables need to be numeric.} + +\item{type}{type of the centering, mean (default) or median.} + +\item{complete_cases}{logical, should the observations with the complete +variable record only be included as an output?} +} +\value{ +a data frame or a tibble. +} +\description{ +Normalization with median or mean centering of a data frame +or tibble (\code{center_data()}) or simple min/max normalization (\code{min_max()}). +Preserves the row names. +} +\details{ +A wrapper around \code{\link[base]{scale}}. Mean scaling is equal +to canonical Z-score normalization. +} diff --git a/man/check_numeric.Rd b/man/check_numeric.Rd new file mode 100644 index 0000000..bde3181 --- /dev/null +++ b/man/check_numeric.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{check_numeric} +\alias{check_numeric} +\title{Check for a numeric data frame or a matrix.} +\usage{ +check_numeric(object) +} +\arguments{ +\item{object}{an object.} +} +\value{ +none. Throws exceptions if the object is not a numeric data frame or +a matrix. +} +\description{ +Checks if an object is a numeric data frame or a matrix. +} diff --git a/man/check_topo.Rd b/man/check_topo.Rd new file mode 100644 index 0000000..c9cd57d --- /dev/null +++ b/man/check_topo.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/neighborhood_utils.R +\name{check_topo} +\alias{check_topo} +\title{Topology error of self-organizing maps.} +\usage{ +check_topo(x) +} +\arguments{ +\item{x}{a \code{clust_analysis} object.} +} +\value{ +An object of the \code{\link{knb}} class with +\code{\link{summary.knb}} and \code{\link{plot.knb}} methods. +} +\description{ +Checks for topology errors for consecutive data points. +} +\details{ +The procedure of topology error computation is as follows: for each +observation, two nearest self-organizing map (SOM) nodes are identified. +If such nodes are neighbors in the initial layout of SOM prior to data +fitting, correct topology (coded with 0) is returned and an error otherwise +(coded as 1). +} diff --git a/man/clust_analysis.Rd b/man/clust_analysis.Rd new file mode 100644 index 0000000..2a57a2f --- /dev/null +++ b/man/clust_analysis.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/constructors.R +\name{clust_analysis} +\alias{clust_analysis} +\title{Clustering analysis object.} +\usage{ +clust_analysis(x) +} +\arguments{ +\item{x}{a named list, see Details.} +} +\value{ +a \code{clust_analysis} object with the elements listed in Details. +} +\description{ +Constructs a \code{clust_analysis} class object given a list with +results of a clustering analysis. +} +\details{ +A named list with the following elements is required +as the \code{x} argument: +\itemize{ +\item \code{data}: a quosure calling the original data set, +\item \code{dist_mtx}: a numeric matrix with the distances between the observations, +\item \code{dist_method}: name of the distance statistic, +\item \code{clust_obj}: the output object of the clustering analysis, +\item \code{clust_fun}: the name of the clustering function or prediction, +\item \code{clust_assignment}: a data frame with the cluster assignment of the +observations. It has to contain the variables \code{observation} and \code{clust_id}, +\item \code{dots}: additional arguments passed to the clustering function. +} + +The \code{clust_analysis} object can be created for clustering solutions based on +data frames or matrices or user-provided distance matrices (clustering +functions working with dissimilarity objects of the \code{dist} class). +In the later case an instance of the subclass \code{min_analysis} is returned. +As such, the \code{min_analysis} class inherits most of the methods for +\code{clust_analysis} objects. However, some methods requiring source tabular data +like reduction analysis with the genuine data frame, variable importance +or heterologous cross-distances will not be available. +Semi-supervised clustering and cross-validation will be implemented for +the \code{min_analysis} class in the future. +} diff --git a/man/cluster_cv.Rd b/man/cluster_cv.Rd new file mode 100644 index 0000000..933f8b4 --- /dev/null +++ b/man/cluster_cv.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/constructors.R +\name{cluster_cv} +\alias{cluster_cv} +\title{Cross validation results.} +\usage{ +cluster_cv(x, ...) +} +\arguments{ +\item{x}{a named list with elements listed in Details.} + +\item{...}{extra arguments, currently none} +} +\value{ +an instance of the \code{cluster_cv} class with the elements specified +in Details. +} +\description{ +Creates an object of the \code{cluster_cv} class on a top of a list. +} +\details{ +The \code{x} argument has to be a list with the following elements: +\itemize{ +\item \code{clust_analysis_object} which stores a \code{clust_analysis} or +\code{combi_analysis} object with the global cluster assignment +\item \code{predictions} with a data frame storing out-of-fold predictions with the +variables \code{observation}, \code{fold_clust} (out-of-fold cluster assignment), +\code{global_clust} (cluster assignment in the global clustering structure), +\code{correct} (a logical indicating the out-of-fold - global assignment +concordance) and \code{fold}(fold ID). +\item \code{fold_stats}, a data frame which stores fold means of +accuracy (\code{corr_rate}), classification error (\code{err_rate}), fraction of +explained clustering variance (\code{frac_var}) and silhouette width (\code{sil_width}) +\item \code{summary} storing the global means and BCA 95\% confidence intervals listed +in \code{fold_stats} +} +} diff --git a/man/combi_analysis.Rd b/man/combi_analysis.Rd new file mode 100644 index 0000000..35eaaf9 --- /dev/null +++ b/man/combi_analysis.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/constructors.R +\name{combi_analysis} +\alias{combi_analysis} +\title{Combined SOM - clustering analysis.} +\usage{ +combi_analysis(x) +} +\arguments{ +\item{x}{a named list, see Details.} +} +\value{ +a \code{combi_analysis} object with the elements specified in Details. +} +\description{ +Constructs a \code{combi_anlysis()} class object given a list with +results of reduction analysis or self-organizing map and clustering analysis. +} +\details{ +A named list with the following elements is required as the \code{x} argument: +\itemize{ +\item \code{clust_analyses}: a list of \code{red_analysis} or \code{clust_analysis} objects, +\item \code{clust_assignment}: a data frame with the cluster assignment with the +\code{observation} and \code{clust_id} variables. +} + +For combined solutions involving unsupervised clustering of the SOM U matrix, +the function returns an object of subclass +\code{umatrix_analysis}, which inherits virtually all methods from the +\code{combi_analysis} class. +} diff --git a/man/combi_cluster.Rd b/man/combi_cluster.Rd new file mode 100644 index 0000000..9817038 --- /dev/null +++ b/man/combi_cluster.Rd @@ -0,0 +1,110 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/combi_clustering.R +\name{combi_cluster} +\alias{combi_cluster} +\alias{multi_cluster} +\title{Cluster self-organizing map nodes.} +\usage{ +combi_cluster( + data, + distance_som = "euclidean", + xdim = 5, + ydim = 4, + topo = "hexagonal", + neighbourhood.fct = "gaussian", + toroidal = FALSE, + rlen = 500, + som_args = NULL, + node_clust_fun = hcluster, + distance_nodes = distance_som, + seed = 1234, + ... +) + +multi_cluster( + data, + distance_method = "euclidean", + xdim = 5, + ydim = 4, + topo = "hexagonal", + neighbourhood.fct = "gaussian", + toroidal = FALSE, + rlen = 500, + som_args = NULL, + node_clust_fun = hcluster, + seed = 1234, + ... +) +} +\arguments{ +\item{data}{for \code{combi_clust()}, a numeric data frame, matrix or +a \code{red_analysis} object. If a \code{red_analysis} object is provided, its +component/score table will be clustered. For \code{multi_clust()} a list +of such objects.} + +\item{distance_som}{metric of distance between the observations, used for SOM +development. See: \code{\link{get_kernel_info}}.} + +\item{xdim}{x dimension of the SOM grid, +see: \code{\link[kohonen]{somgrid}} for details.} + +\item{ydim}{y dimension of the SOM grid, +#' see: \code{\link[kohonen]{somgrid}} for details.} + +\item{topo}{SOM grid topology, see: \code{\link[kohonen]{somgrid}} +for details. 'hexagonal' for default.} + +\item{neighbourhood.fct}{neighborhood function, 'gaussian' for default.} + +\item{toroidal}{logical, should toroidal grid be used?} + +\item{rlen}{number of the SOM algorithm iterations.} + +\item{som_args}{a list of extra arguments passed to +\code{\link{som_cluster}}, \code{\link[kohonen]{som}} or +\code{\link[kohonen]{supersom}}. They may include weights for data layers +or the learning rate.} + +\item{node_clust_fun}{a function provided by the clustTools package used to +cluster the SOM nodes. An alternative for \code{combi_cluster()}: a user-provided +function that takes a numeric data frame or matrix and returns a +\code{clust_analysis} object. An alternative for \code{multi_cluster()}: a +user-provided function that takes a dissimilarity object (R's \code{dist} class) +and returns a \code{clust_analysis} object.} + +\item{distance_nodes}{metric of distance between the nodes, used for SOM +development. Defaults to \code{distance_som}. See: \code{\link{get_kernel_info}}.} + +\item{seed}{initial setting of the random number generator.} + +\item{...}{extra arguments. For \code{combi_clust()}, they are passed to +\code{node_clust_fun} and may include e.g. \code{k} number of clusters.} + +\item{distance_method}{a vector of distance names, that matches elemnts of +the \code{data} list.} +} +\value{ +an object of the class \code{\link{combi_analysis}}. +} +\description{ +Performs clustering of the self-organizing map (SOM) with +one of the clustering functions provided by the clustTools package. +} +\details{ +The clustering procedure involves construction of SOM with the user-provided +data followed by unsupervised clustering of the inter-node distance matrix. +For \code{combi_cluster()} tackling single-layer SOM, the user is allowed to +specify distances both for the input data and the nodes. +In case when both distance methods are the same, the inter-node +distance matrix corresponds to a classical U-matrix as computed by +\code{\link[kohonen]{object.distances}} - this is also the +recommended default option. +\code{multi_cluster()}, which takes a multi-layer data set as the \code{data} argument, +the U matrix constructed by \code{\link[kohonen]{object.distances}} is +always used as the inter-node distance matrix subjected to unsupervised +clustering. +} +\references{ +Vesanto J, Alhoniemi E. Clustering of the self-organizing map. +IEEE Trans Neural Networks (2000) 11:586–600. doi:10.1109/72.846731 +} diff --git a/man/components.clust_analysis.Rd b/man/components.clust_analysis.Rd new file mode 100644 index 0000000..b8559bb --- /dev/null +++ b/man/components.clust_analysis.Rd @@ -0,0 +1,91 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/reduction.R +\name{components.clust_analysis} +\alias{components.clust_analysis} +\alias{components.min_analysis} +\alias{components.combi_analysis} +\alias{components.umatrix_analysis} +\title{Dimensionality reduction analysis of the analysis data or distance matrix.} +\usage{ +\method{components}{clust_analysis}( + object, + kdim = NULL, + red_fun = c("pca", "mds", "umap"), + with = c("distance", "data", "umatrix"), + distance_method = NULL, + train_object = NULL, + ... +) + +\method{components}{min_analysis}(object, kdim = NULL, red_fun = c("pca", "mds", "umap"), ...) + +\method{components}{combi_analysis}( + object, + kdim = NULL, + red_fun = c("pca", "mds", "umap"), + with = c("distance", "data", "umatrix"), + distance_method = NULL, + train_object = NULL, + ... +) + +\method{components}{umatrix_analysis}( + object, + kdim = NULL, + red_fun = c("pca", "mds", "umap"), + with = c("distance", "data", "umatrix"), + distance_method = NULL, + ... +) +} +\arguments{ +\item{object}{an object.} + +\item{kdim}{number of dimensions. If NULL, kdim is set to the number of +clusters.} + +\item{red_fun}{reduction analysis function: 'pca' (PCA), 'mds' (MDS) or +'umap' (UMAP).} + +\item{with}{type of the input data for the reduction analysis: +the clustering data ('data'), the matrix of distances between observations +('distance') or U matrix between SOM nodes ('umatrix').} + +\item{distance_method}{an optional name of the distance metric or, for +analyses of multi-layer data, a vector of distance names. If not provided, +the distance metric will be extracted from the object.} + +\item{train_object}{an optional \code{red_analysis} object which will be used as +a training layout. This works currently only for PCA and UMAP and in case +of single layer data frames as clustering data. +Please refer to \code{\link[stats]{prcomp}} and \code{\link[umap]{umap}} for +details.} + +\item{...}{extra arguments passed to \code{\link{reduce_data}}.} +} +\value{ +a \code{red_analysis} object with the component/score table containing +the cluster assignment information (\code{clust_id} variable). In case of +multi-layer SOM analyses with the clustering data set, a list of +\code{red_analysis} objects is returned. +} +\description{ +Performs principal component analysis (PCA), multi-dimensional +scaling (MDS) or uniform manifold approximation and projection (UMAP) of the +analysis data set used for clustering or distance matrix. +} +\details{ +See \code{\link{reduce_data}} for the implementation details. +The distance method, relevant for MDS and UMAP. is taken over from the +\code{clust_object}. Hence, some distances may crash the analysis with UMAP, see: +\code{\link[umap]{umap.defaults}} for the compatible distances. +For \code{combi_analysis} objects, the analysis is done for the global clustering, +i.e. assignment of observations to the clusters and not to the SOM nodes. +In cases, when the clustering analysis was done with an user-provided +dissimilarity object (subclass \code{min_analysis} of \code{clust_analysis}) it is not +possible to perform a dimensionality reduction analysis with the genuine data +set - the analysis can be performed only for the distance matrix. +In case of multi-layer SOM analyses or combined multi-layer SOM - clustering +analyses and \code{with} set to 'data', the dimensionality reduction analysis +is done separately for each data layer. +} diff --git a/man/create_clust_folds.Rd b/man/create_clust_folds.Rd new file mode 100644 index 0000000..264e392 --- /dev/null +++ b/man/create_clust_folds.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{create_clust_folds} +\alias{create_clust_folds} +\title{Create cross-validation folds for clustering.} +\usage{ +create_clust_folds(data, k, seed = 1234) +} +\arguments{ +\item{data}{the input data: a data frame, matrix or a list of data frames +or matrices.} + +\item{k}{number of cross-validaiton folds.} + +\item{seed}{an integer or \code{NULL} which specifies the seed for random +number generator.} +} +\value{ +a list with two elements: \code{train} and \code{test} with the data subsets +intended for development of the clustering object and predictions, +respectively. +} +\description{ +Creates cross-validation folds in the single-layer (data frame or matrix) +or multi-layer clustering data. +} +\details{ +The folds are generated with \code{\link[caret]{createFolds}}. +Intended for internal use. +} diff --git a/man/cross2dist.Rd b/man/cross2dist.Rd new file mode 100644 index 0000000..4f2e4fa --- /dev/null +++ b/man/cross2dist.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/neighborhood_utils.R +\name{cross2dist} +\alias{cross2dist} +\title{Convert cross-distance objects to a distance matrix.} +\usage{ +cross2dist(x, zero_diag = TRUE) +} +\arguments{ +\item{x}{a \code{cross_dist} object.} + +\item{zero_diag}{logical, should diagonals be filled with zeros?} +} +\value{ +a \code{dist} object. +} +\description{ +Converts a \code{cross_dist} class object to a matrix of mean +cross-distances between the clusters. +} diff --git a/man/cross_dist.Rd b/man/cross_dist.Rd new file mode 100644 index 0000000..6514ae0 --- /dev/null +++ b/man/cross_dist.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/constructors.R +\name{cross_dist} +\alias{cross_dist} +\title{Construct a cross_dist object.} +\usage{ +cross_dist( + x, + type = c("homologous", "heterologous"), + method = "euclidean", + x_levels = NULL, + y_levels = NULL, + ... +) +} +\arguments{ +\item{x}{a named list of cross-distance matrices.#'} + +\item{type}{type of the cross-distances: +\code{homologous} for comparison of within the same clustering structure or +\code{heterologous} for comparison of two clustering structures.} + +\item{method}{name of the distance metric.} + +\item{x_levels}{order of the clusters of the +first cluster/combi analysis object.} + +\item{y_levels}{order of the clusters of the +second cluster/combi analysis object.} + +\item{...}{extra arguments, currently none defined.} +} +\value{ +an object of class \code{cross-distance} being a list of +cross-distance matrices. +Information on the comparison type and distance +metric are stored as the \code{type} and \code{dist_method} attributes. +Information on the cluster order is stored as the +\code{x_levels} and \code{y_levels} attributes. +} +\description{ +The \code{cross_dist} class objects a created on the top of a named list +of matrices of cross-distances between clusters. +The list elements have to be named after the compared clusters following +the 'clust1 vs clust2' scheme. +} diff --git a/man/cross_distance.Rd b/man/cross_distance.Rd new file mode 100644 index 0000000..91d8179 --- /dev/null +++ b/man/cross_distance.Rd @@ -0,0 +1,79 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cross_distance.R +\name{cross_distance} +\alias{cross_distance} +\alias{cross_distance.data.frame} +\alias{cross_distance.clust_analysis} +\alias{cross_distance.min_analysis} +\alias{cross_distance.combi_analysis} +\alias{cross_distance.umatrix_analysis} +\title{Compute cross-distances between data frames or clusters} +\usage{ +cross_distance(x, ...) + +\method{cross_distance}{data.frame}(x, y = NULL, method = "euclidean", ...) + +\method{cross_distance}{clust_analysis}(x, y = NULL, method = NULL, .parallel = FALSE, ...) + +\method{cross_distance}{min_analysis}(x, ...) + +\method{cross_distance}{combi_analysis}(x, y = NULL, method = NULL, .parallel = FALSE, ...) + +\method{cross_distance}{umatrix_analysis}(x, y = NULL, method = NULL, .parallel = FALSE, ...) +} +\arguments{ +\item{x}{a data frame, \code{clust_analysis} or \code{combi_analysis} object.} + +\item{...}{extra arguments passed to methods.} + +\item{y}{an object like \code{x} or NULL (default).} + +\item{method}{distance metric name as specified by +\code{\link{get_kernel_info}}. +For \code{clust_analysis} or \code{combi_analysis} instances. +If \code{method} is set to NULL, +the metric name is extracted from the object (distance between observations +for \code{clust_analysis} and \code{combi_analysis}, not between the SOM nodes). For +multi-layer SOM and their prediction, the distance methods are extracted +from the clustering objects.} + +\item{.parallel}{logical, should the operation be run in parallel?} +} +\value{ +For data frames: a matrix with pairwise distances, +observations of the \code{x} data frame +are present in rows, observations of the \code{y} data frame are presented +in columns. +For \code{clust_analysis} and \code{combi_analysis} results: +a list of cross-distance matrices of class \code{\link{cross_dist}} +with defined \code{\link{summary.cross_dist}} and +\code{\link{plot.cross_dist}} methods. +} +\description{ +Computes cross-distances between two data frames with the same variable sets. +} +\details{ +\code{cross_distance()} is a S3 generic function. +Distances (for available distances, see: \code{\link{get_kernel_info}}) are +computed in a pair-wise manner employing \code{\link[philentropy]{distance}}. +Preserves row names. +If a single data frame is provided, pairwise observations +between the observations are computed with \code{\link{calculate_dist}}. +If a single \code{clust_analysis} or \code{combi_analysis} object is provided, +cross-distances between the clusters within the object are computed - +so called 'homologous' distances, as opposed to 'heterologous' distances +computed in a pair-wise manner between clusters of two clustering analysis +objects. +Note: it is not possible to compute heterologous distances is cases of +clustering analyses done with an user-provided dissimilarity matrix +(subclass \code{min_analysis} of \code{clust_analysis} parent class). In such cases, +the \code{method} argument is ignored as well. +} +\references{ +Drost H-G. Philentropy: Information Theory and Distance Quantification +with R. J Open Source Softw (2018) 3:765. doi:10.21105/joss.00765 + +Sulc Z, Cibulkova J, Rezankova H. nomclust: Hierarchical Cluster +Analysis of Nominal Data. (2021) +Available at: https://cran.r-project.org/package=nomclust +} diff --git a/man/cross_single_homolog.Rd b/man/cross_single_homolog.Rd new file mode 100644 index 0000000..11df229 --- /dev/null +++ b/man/cross_single_homolog.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cross_distance_utils.R +\name{cross_single_homolog} +\alias{cross_single_homolog} +\alias{cross_single_heterolog} +\alias{cross_multi_homolog} +\alias{cross_multi_heterolog} +\title{Cross-distances calculation helpers.} +\usage{ +cross_single_homolog(x, method) + +cross_single_heterolog(x, y, method) + +cross_multi_homolog(x) + +cross_multi_heterolog(x, y) +} +\arguments{ +\item{x}{a \code{clust_analysis} or \code{combi_analysis} object.} + +\item{method}{name of the distance metric. If NULL, it will be extracted +from the \code{x} object.} + +\item{y}{a \code{clust_analysis} or \code{combi_analysis} object.} +} +\value{ +an instance of the \code{cross_dist} class. +} +\description{ +The functions compute homologous and heterologous distance between the +clusters of a single or two clustering analysis objects, respectively. +} +\details{ +Designed solely for internal use. \code{cross_single_homolog()} and +\code{cross_single_heterolog()} compute cross-distances for single layer data. +\code{cross_multi_homolog()} and \code{cross_multi_heterolog()} tackle multi-layer SOM. +In multi-layer analyses, the cross-distance matrix is a weighted sum of +cross-distances between the corresponding data layers. The weights are +extracted from the SOM analysis object. +} diff --git a/man/cv.Rd b/man/cv.Rd new file mode 100644 index 0000000..d2a63c9 --- /dev/null +++ b/man/cv.Rd @@ -0,0 +1,155 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cross_validation.R +\name{cv} +\alias{cv} +\alias{cv.clust_analysis} +\alias{cv.min_analysis} +\alias{cv.combi_analysis} +\title{Cross-validate the clustering analysis object.} +\usage{ +cv(x, ...) + +\method{cv}{clust_analysis}( + x, + nfolds = 5, + type = c("propagation", "som"), + kNN = 5, + active_variables = FALSE, + simple_vote = TRUE, + resolve_ties = FALSE, + kernel_fun = function(x) 1/x, + kNN_data = 5, + kNN_cluster = NULL, + seed = 1234, + .parallel = FALSE, + ... +) + +\method{cv}{min_analysis}(x, ...) + +\method{cv}{combi_analysis}( + x, + nfolds = 5, + type = c("propagation", "som"), + kNN = 5, + active_variables = FALSE, + simple_vote = TRUE, + resolve_ties = FALSE, + kernel_fun = function(x) 1/x, + kNN_data = 5, + kNN_cluster = NULL, + seed = 1234, + .parallel = FALSE, + ... +) +} +\arguments{ +\item{x}{an object.} + +\item{...}{extra arguments, currently none.} + +\item{nfolds}{number of CV folds.} + +\item{type}{type of the prediction algorithm: k-nearest neighbors +(propagation) or via the self-organizing map ('som', available only +for SOM and combined SOM clustering).} + +\item{kNN}{number of the nearest neighbors.} + +\item{active_variables}{logical, should only the active variables be used for +predictions of the cluster assignment with the k-NN classifier? Applies only +to analyses made with hard threshold regularization and ignored otherwise.} + +\item{simple_vote}{logical, should classical unweighted k-NN classification +be applied? If FALSE, distance-weighted k-NN is used with the provided kernel +function.} + +\item{resolve_ties}{logical, should the ties be resolved at random? Applies +only to the simple unweighted voting algorithm.} + +\item{kernel_fun}{kernel function transforming the distance into weight.} + +\item{kNN_data}{number of the nearest neighbors in the genuine data set +used for calculation of neighborhood preservation. See \code{\link{np}} +for details.} + +\item{kNN_cluster}{number of the nearest neighbors of the given cluster used +for calculation of neighborhood preservation. See \code{\link{np}} for +details.} + +\item{seed}{initial setting of the random number generator.} + +\item{.parallel}{logical, should the CV be run in parallel?} +} +\value{ +a list of class \code{cluster_cv} containing the following elements: +\itemize{ +\item the global \code{\link{clust_analysis}} object (\code{clust_analysis_object}) +\item kNN projection (prediction) results (\code{predictions}) +\item a data frame with the classification error, accuracy, fraction of +explained clustering variance, silhouette and neighbor preservation for +the out-of-fold predictions (\code{fold_stats}) +\item means and BCA's 95\% confidence intervals for the classification error, +accuracy, fraction of explained variance, silhouette and neighborhood +preservation (\code{summary}) +} + +Note the \code{\link{summary.cluster_cv}} and +\code{\link{extract.cluster_cv}} methods. +} +\description{ +Checks the quality of a clustering solution by +cross-validation (CV) with k-nearest neighbors (kNN) out-of-fold predictions +or predictions made by a self-organizing map (SOM). +Stability of the clustering structure is measured by cluster assignment +classification error in the out-of-fold predictions as compared with the +genuine clustering structure. Explanatory value and cluster separation are +determined by clustering variance and silhouette statistics. +} +\details{ +\code{cv()} is a S3 generic function. +By principle, cross-validation of a clustering structure is similar to +cross-validation of any machine learning multi-class classifier. +The training portion of a CV split is used to develop +of a cluster structure and the projection on the test portion is accomplished +by k-nearest neighbor (kNN) label propagation algorithm or derives the +cluster assignment from a trained SOM. +For implementation details, see: \code{\link{propagate}}, +\code{\link{map_som}} and \code{\link{map_supersom}}. +The folds are generated with \code{\link[caret]{createFolds}}. +For \code{combi_analysis} objects, assignment of the observations to the CV folds +is done with the kNN algorithm for the 'top' assignment of the observations +to the clusters: nodes are ignored! +For \code{clust_analysis} and \code{combi_analysis} objects with multi-layered data +and clustering of U matrix, the SOM prediction method is the sole option. +Currently, it is not possible to cross-validate clustering analysis objects +generated with an user-provided dissimilarity matrices +(subclass \code{min_analysis} of \code{clust_analysis}). +} +\references{ +Lange T, Roth V, Braun ML, Buhmann JM. Stability-based validation of +clustering solutions. Neural Comput (2004) 16:1299–1323. +doi:10.1162/089976604773717621 + +Leng M, Wang J, Cheng J, Zhou H, Chen X. Adaptive semi-supervised +clustering algorithm with label propagation. J Softw Eng (2014) 8:14–22. +doi:10.3923/jse.2014.14.22 + +Kuhn M. Building predictive models in R using the caret package. +J Stat Softw (2008) 28:1–26. doi:10.18637/jss.v028.i05 + +Rousseeuw PJ. Silhouettes: A graphical aid to the interpretation and +validation of cluster analysis. J Comput Appl Math (1987) 20:53–65. +doi:10.1016/0377-0427(87)90125-7 + +Kohonen T. Self-Organizing Maps. Berlin, Heidelberg: Springer Berlin +Heidelberg (1995). doi:10.1007/978-3-642-97610-0 + +Wehrens R, Kruisselbrink J. Flexible self-organizing maps in kohonen 3.0. +J Stat Softw (2018) 87:1–18. doi:10.18637/jss.v087.i07 + +Venna J, Kaski S. Neighborhood preservation in nonlinear projection +methods: An experimental study. Lect Notes Comput Sci (including Subser Lect +Notes Artif Intell Lect Notes Bioinformatics) (2001) 2130:485–491. +doi:10.1007/3-540-44668-0_68 +} diff --git a/man/cv_cluster.Rd b/man/cv_cluster.Rd new file mode 100644 index 0000000..dfb0075 --- /dev/null +++ b/man/cv_cluster.Rd @@ -0,0 +1,131 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cross_validation.R +\name{cv_cluster} +\alias{cv_cluster} +\title{Cross-validate a clustering algorithm.} +\usage{ +cv_cluster( + data, + clust_assignment = NULL, + nfolds = 5, + type = c("propagation", "som"), + kNN = 5, + active_variables = FALSE, + simple_vote = TRUE, + resolve_ties = FALSE, + kernel_fun = function(x) 1/x, + clustering_fun = clustTools::kcluster, + kNN_data = 5, + kNN_cluster = NULL, + seed = 1234, + .parallel = FALSE, + ... +) +} +\arguments{ +\item{data}{a numeric data frame, matrix or a \code{red_analysis} object. If a +\code{red_analysis} object is provided as the data argument, the observation +component/score table is subjected to clustering. For multi-layer SOM +analysis, the \code{data} argument is a list of such objects.} + +\item{clust_assignment}{an optional data frame with two variables: +\code{observation} and \code{clust_id}, which defines the global cluster assignment. +If \code{NULL} (default), the global cluster assignment will be derived by +fitting the \code{clustering_fun} to the \code{data} - which can be a bit slower.} + +\item{nfolds}{number of CV folds.} + +\item{type}{type of the prediction algorithm: k-nearest neighbors +(propagation) or via the self-organizing map ('som', available only +for SOM and combined SOM clustering).} + +\item{kNN}{number of the nearest neighbors.} + +\item{active_variables}{logical, should only the active variables be used for +predictions of the cluster assignment with the k-NN classifier? Applies only +to analyses made with hard threshold regularization and ignored otherwise.} + +\item{simple_vote}{logical, should classical unweighted k-NN classification +be applied? If FALSE, distance-weighted k-NN is used with the provided kernel +function.} + +\item{resolve_ties}{logical, should the ties be resolved at random? Applies +only to the simple unweighted voting algorithm.} + +\item{kernel_fun}{kernel function transforming the distance into weight.} + +\item{clustering_fun}{clustering function. Should return a +\code{clust_analysis} object.} + +\item{kNN_data}{number of the nearest neighbors in the genuine data set +used for calculation of neighborhood preservation. See \code{\link{np}} +for details.} + +\item{kNN_cluster}{number of the nearest neighbors of the given cluster used +for calculation of neighborhood preservation. See \code{\link{np}} for +details.} + +\item{seed}{initial setting of the random number generator.} + +\item{.parallel}{logical, should the CV be run in parallel?} + +\item{...}{extra arguments passed to the clustering_fun.} +} +\value{ +a list containing the following elements: +\itemize{ +\item the global \code{\link{clust_analysis}} object (\code{clust_analysis_object}) +\item kNN projection (prediction) results (\code{predictions}) +\item a data frame with the classification error, accuracy, fraction of +explained clustering variance and silhouette for the out-of-fold +predictions (\code{fold_stats}) +\item means and BCA's 95\% confidence intervals for the classification error, +accuracy, fraction of explained variance and silhouette (\code{summary}) +} +} +\description{ +Checks the quality of a clustering solution by +cross-validation (CV) with k-nearest neighbors (kNN) out-of-fold predictions +or predictions made by a self-organizing map (SOM). +Stability of the clustering structure is measured by cluster assignment +classification error in the out-of-fold predictions as compared with the +genuine clustering structure. Explanatory value and cluster separation are +determined by clustering variance and silhouette statistics. +} +\details{ +By principle, similar to cross-validation of any machine learning +multi-class classifier. The training portion of a CV split is used to develop +of a cluster structure and the projection on the test portion is accomplished +by k-nearest neighbor (kNN) label propagation algorithm or derives the +cluster assignment from a trained SOM. +For implementation details, see: \code{\link{propagate}} +and \code{\link{map_som}}. +The folds are generated with \code{\link[caret]{createFolds}}. +} +\references{ +Lange T, Roth V, Braun ML, Buhmann JM. Stability-based validation of +clustering solutions. Neural Comput (2004) 16:1299–1323. +doi:10.1162/089976604773717621 + +Leng M, Wang J, Cheng J, Zhou H, Chen X. Adaptive semi-supervised +clustering algorithm with label propagation. J Softw Eng (2014) 8:14–22. +doi:10.3923/jse.2014.14.22 + +Kuhn M. Building predictive models in R using the caret package. +J Stat Softw (2008) 28:1–26. doi:10.18637/jss.v028.i05 + +Rousseeuw PJ. Silhouettes: A graphical aid to the interpretation and +validation of cluster analysis. J Comput Appl Math (1987) 20:53–65. +doi:10.1016/0377-0427(87)90125-7 + +Kohonen T. Self-Organizing Maps. Berlin, Heidelberg: Springer Berlin +Heidelberg (1995). doi:10.1007/978-3-642-97610-0 + +Wehrens R, Kruisselbrink J. Flexible self-organizing maps in kohonen 3.0. +J Stat Softw (2018) 87:1–18. doi:10.18637/jss.v087.i07 + +Venna J, Kaski S. Neighborhood preservation in nonlinear projection +methods: An experimental study. Lect Notes Comput Sci (including Subser Lect +Notes Artif Intell Lect Notes Bioinformatics) (2001) 2130:485–491. +doi:10.1007/3-540-44668-0_68 +} diff --git a/man/data_node_distance.Rd b/man/data_node_distance.Rd new file mode 100644 index 0000000..418e4df --- /dev/null +++ b/man/data_node_distance.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/neighborhood_utils.R +\name{data_node_distance} +\alias{data_node_distance} +\title{Distance between observations and nodes of a self-organizing map.} +\usage{ +data_node_distance(x, kNN = NULL) +} +\arguments{ +\item{x}{a \code{clust_analysis} object.} + +\item{kNN}{optional, number of the nearest neighbor nodes. If \code{NULL}, a numeric +matrix is returned. If provided, a list of nearest nodes named after +observations is returned.} +} +\value{ +a numeric matrix with observations in rows and nodes in columns. +If \code{kNN} is specified a list of nearest neighbor nodes for observations. +} +\description{ +Computes distances between observations and nodes of a self-organizing map +(SOM) with distance metrics extracted from a SOM clustering object. +} diff --git a/man/dbscan_cluster.Rd b/man/dbscan_cluster.Rd new file mode 100644 index 0000000..c8e51c6 --- /dev/null +++ b/man/dbscan_cluster.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clustering_functions.R +\name{dbscan_cluster} +\alias{dbscan_cluster} +\title{Density clustering with DBSCAN.} +\usage{ +dbscan_cluster( + data, + distance_method = "euclidean", + eps, + minPts = 5, + seed = 1234, + ... +) +} +\arguments{ +\item{data}{a numeric data frame or matrix or a red_analysis object.} + +\item{distance_method}{name of the distance metric, see: +\code{\link{get_kernel_info}}.} + +\item{eps}{size (radius) of the epsilon neighborhood.} + +\item{minPts}{number of minimum points required in the eps neighborhood for +core points (including the point itself).} + +\item{seed}{initial setting of the random number generator.} + +\item{...}{extra arguments passed to \code{\link[dbscan]{dbscan}}.} +} +\value{ +an object of the class \code{\link{clust_analysis}}. +} +\description{ +Performs DBSCAN clustering analysis of a numeric data frame, +matrix or the results of a reduction analysis. +} +\details{ +Technically, a wrapper around \code{\link[dbscan]{dbscan}}. If a +red_analysis object is provided as the data argument, the observation +component/score table is subjected to clustering. +If a dissimilarity object of class \code{dist} is provided as \code{data}, the function +returns an instance of subclass of the \code{\link{clust_analysis}} class, +the \code{min_analysis} object. Such objects inherit most of the methods of the +fully fledged clustering analysis object. Yet, methods requiring the source +data frame such as reduction analysis with the genuine data, variable +importance analysis or computation of heterologous cross-distances +are not possible. Semi-supervised clustering and cross-validation are not +implemented at the moment for the +\code{min_analysis} objects. +} +\references{ +Hahsler M, Piekenbrock M, Doran D. Dbscan: Fast density-based clustering +with R. J Stat Softw (2019) 91:1–30. doi:10.18637/jss.v091.i01 +} diff --git a/man/dist.Rd b/man/dist.Rd new file mode 100644 index 0000000..d8d57ce --- /dev/null +++ b/man/dist.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generics.R +\name{dist} +\alias{dist} +\alias{dist.default} +\title{Distance between observations.} +\usage{ +dist(x, ...) + +\method{dist}{default}(x, ...) +} +\arguments{ +\item{x}{an object. For the default method a numeric matrix, data frame +or \code{dist} object.} + +\item{...}{arguments for methods, e.g. passed to \code{\link[stats]{dist}}.} +} +\description{ +Computes the distance between observations in a matrix, data frame +or other compatible objects. +} +\details{ +The default \code{dist()} method is a wrapper around +\code{\link[stats]{dist}}. +} diff --git a/man/dist2affi.Rd b/man/dist2affi.Rd new file mode 100644 index 0000000..15a5b00 --- /dev/null +++ b/man/dist2affi.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spectralization.R +\name{dist2affi} +\alias{dist2affi} +\title{Compute an affinity matrix.} +\usage{ +dist2affi( + dist_mtx, + kNN = 5, + weighted = TRUE, + simil_fun = function(x) 1/(1 + x) +) +} +\arguments{ +\item{dist_mtx}{a square distance matrix.} + +\item{kNN}{numeric, number of the nearest neighbors., has to be lower than +the dimension of \code{dist_mtx}.} + +\item{weighted}{logical, should the affinity matrix be weighted by +similarity? Defaults to \code{TRUE}.} + +\item{simil_fun}{a function used to convert pairwise distances +to pairwise similarities.} +} +\value{ +a numeric matrix. +} +\description{ +The function computes an affinity matrix given a dissimilarity/distance +matrix. +} +\details{ +The code is inspired by two sources: +https://rpubs.com/gargeejagtap/SpectralClustering +http://www.di.fc.ul.pt/~jpn/r/spectralclustering/spectralclustering.html. + +In brief, affinity matrices A in two flavors are computed: +\itemize{ +\item \emph{weighted}: if observations \code{i} and \code{j} are nearest neighbors as defined by +\code{kNN}, \code{A[i,j] = similarity(i,j)}. Otherwise, \code{A[i,j] = 0} +\item \emph{unweighted}: if observations \code{i} and \code{j} are nearest neighbors as +defined by \code{kNN}, \code{A[i,j] = 1}. Otherwise, \code{A[i,j] = 0} +} + +Note, that the conversion of of pairwise distances to pairwise similarities +is done with the user-provided function \code{simil_fun}. The default \code{siml_fun} +is a popular transformation of Euclidean distances to Euclidean similarities. +It is, however, recommended to experiment with other functions such as +\code{1 - x} for binary distances, \code{2 - x} for cosine distances, or even with +some common kernel functions (e.g. Gaussian kernel). +} diff --git a/man/extract.Rd b/man/extract.Rd new file mode 100644 index 0000000..da79b12 --- /dev/null +++ b/man/extract.Rd @@ -0,0 +1,89 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/extraction.R +\name{extract} +\alias{extract} +\alias{extract.clust_analysis} +\alias{model.frame.clust_analysis} +\alias{dist.clust_analysis} +\alias{dist.min_analysis} +\alias{extract.combi_analysis} +\alias{model.frame.combi_analysis} +\alias{dist.combi_analysis} +\alias{extract.red_analysis} +\alias{model.frame.red_analysis} +\alias{dist.red_analysis} +\title{Extract features of a clust_analysis object.} +\usage{ +extract(x, ...) + +\method{extract}{clust_analysis}( + x, + type = c("distance", "assignment", "clust_object", "data", "object", "umatrix"), + ... +) + +\method{model.frame}{clust_analysis}(formula, ...) + +\method{dist}{clust_analysis}(x, type = c("distance", "umatrix"), ...) + +\method{dist}{min_analysis}(x, ...) + +\method{extract}{combi_analysis}( + x, + type = c("distance", "assignment", "clust_object", "data", "object", "umatrix"), + ... +) + +\method{model.frame}{combi_analysis}(formula, ...) + +\method{dist}{combi_analysis}(x, type = c("distance", "umatrix"), ...) + +\method{extract}{red_analysis}( + x, + type = c("component_tbl", "scores", "loadings", "data", "sdev", "object", "distance", + "layout_distance"), + ... +) + +\method{model.frame}{red_analysis}(formula, ...) + +\method{dist}{red_analysis}(x, type = c("distance", "layout"), ...) +} +\arguments{ +\item{x}{an object.} + +\item{...}{extra arguments, currently none.} + +\item{type}{the feature name: +\itemize{ +\item \code{distance} extracts the matrix with distances between the observations, +\item \code{data} the data set used for the analysis, +\item \code{assignment} assignment of the observations to the clusters, +\item \code{clust_object} or \code{object} returns the wrapped clustering object. +\item \code{clust_object} or \code{scores} return the component pr score tables for the +observations, +\item \code{loadings} retrieves the table of variable loadings, +\item \code{sdev} returns standard deviations, associated with the +components. +\item \code{umatrix} computes the U-matrix, i.e. weighted distance between +the self-organizing map (SOM) nodes. Available only for clustering analyses +done with SOM. +The U matrix is computed with \code{\link[kohonen]{object.distances}}. +\item \code{layout} and \code{layout_distance} computes distances between observations in +the dimensionality reduction layout. +}} + +\item{formula}{an object.} +} +\value{ +the requested feature/property. +} +\description{ +A general extractor method for accessing properties and features +of a \code{clust_analysis}, \code{combi_analysis} and \code{red_analysis} object, +and specific methods for accessing the modeling data frame +and distance matrix. +} +\details{ +\code{extract()} is a S3 generic function. +} diff --git a/man/extract.cluster_cv.Rd b/man/extract.cluster_cv.Rd new file mode 100644 index 0000000..6cf5aeb --- /dev/null +++ b/man/extract.cluster_cv.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cluster_cv_oop.R +\name{extract.cluster_cv} +\alias{extract.cluster_cv} +\alias{summary.cluster_cv} +\title{Cross-validation predictions and result summary.} +\usage{ +\method{extract}{cluster_cv}(x, type = c("predictions", "fold_stats"), ...) + +\method{summary}{cluster_cv}(object, ...) +} +\arguments{ +\item{x}{results of clustering cross-validation, e.g. generated with +\code{\link{cv}} for \code{clust_analysis} or \code{combi_analysis} objects.} + +\item{type}{the element to be extracted: 'predictions' or 'fold_stats'.} + +\item{...}{extra arguments, currently none.} + +\item{object}{results of clustering cross-validation, e.g. generated with +\code{\link{cv}} for \code{clust_analysis} or \code{combi_analysis} objects.} +} +\description{ +The functions \code{extract.cluster_cv()} and \code{summary.cluster_cv()} retrieve +out-of-fold predictions, fold-means and global means with 95\% confidence +intervals from results of cross-validation. +} diff --git a/man/extract.tuner.Rd b/man/extract.tuner.Rd new file mode 100644 index 0000000..1f7395c --- /dev/null +++ b/man/extract.tuner.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tuner_oop.R +\name{extract.tuner} +\alias{extract.tuner} +\title{Extract cluster assignment predictions.} +\usage{ +\method{extract}{tuner}( + x, + type = c("clust_object", "analysis", "stats", "criteria", "best_tune"), + ... +) +} +\arguments{ +\item{x}{a \code{tuner} object.} + +\item{type}{feature to be extracted from the object: +\itemize{ +\item \code{clust_object} or \code{analysis} returns the \code{clust_analysis} object generated +with the best combination of the tuning parameters +\item \code{stats} extracts quality statistics for combinations of the tuning +parameters +\item \code{criteria} returns a data frame with criteria of selection of the best +combination of the tuning parameters +\item \code{best_tune} extracts a data frame with the best values of the tuning +parameters +}} + +\item{...}{extra arguments, currently none.} +} +\value{ +a \code{clust_analysis} object. +} +\description{ +The function extracts the \code{clust_analysis} object, quality statistics or +the best combination of the tuning parameters. +} diff --git a/man/ft_hm_single.Rd b/man/ft_hm_single.Rd new file mode 100644 index 0000000..06b0ec6 --- /dev/null +++ b/man/ft_hm_single.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotting_utils.R +\name{ft_hm_single} +\alias{ft_hm_single} +\alias{ft_hm_multi} +\title{Plot clustering feature heat map.} +\usage{ +ft_hm_single(x_object, y_object, line_color = NA, discrete_fill = FALSE) + +ft_hm_multi(x_object, line_color = NA, discrete_fill = FALSE) +} +\arguments{ +\item{x_object}{a \code{clust_analysis} or \code{combi_analysis} object, specifies +clustering of the observations.} + +\item{y_object}{a \code{clust_analysis} or \code{combi_analysis} object, specifies +clustering of the features, an optional parameter. Ignored in case of +multi-layer SOM.} + +\item{line_color}{color of the line around heat map tiles.} + +\item{discrete_fill}{logical, force a discrete fill scale?} +} +\value{ +a \code{ggplot} object (single-layer analysis) or a list of +\code{ggplot} objects (multi-layer case). +} +\description{ +Plotting of clustering features as a heat map (single layer analysis) +or a list of heat maps (multi-layer SOM). Intended for internal use. +} diff --git a/man/get_clust_tendency.Rd b/man/get_clust_tendency.Rd new file mode 100644 index 0000000..a7dd86d --- /dev/null +++ b/man/get_clust_tendency.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clustering_functions.R +\name{get_clust_tendency} +\alias{get_clust_tendency} +\title{Check clustering tendency of a data set.} +\usage{ +get_clust_tendency(data, n, seed = 1234, ...) +} +\arguments{ +\item{data}{a data frame, tibble or a matrix. Numeric variables only.} + +\item{n}{the number of points selected from sample space which is also the +number of points selected from the given sample (data).} + +\item{seed}{initial setting of the random number generator.} + +\item{...}{extra arguments passed to +\code{\link[factoextra]{get_clust_tendency}}.} +} +\value{ +The values of the Hopkins statistic, p value and a heat map plot. +} +\description{ +Check clustering tendency of a data set as compared with a +random data set using Hopkins statistic. +} +\details{ +The p value for the Hopkins statistic is calculated based on the +beta distribution of its values. Technically, the function is an enriched +wrapper around \code{\link[factoextra]{get_clust_tendency}}. +} +\references{ +Hopkins B, Skellam JG. A New Method for determining the Type of +Distribution of Plant Individuals. +Ann Bot (1954) 18:213–227. doi:10.1093/OXFORDJOURNALS.AOB.A083391 +} diff --git a/man/get_data_dim.Rd b/man/get_data_dim.Rd new file mode 100644 index 0000000..84598ff --- /dev/null +++ b/man/get_data_dim.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{get_data_dim} +\alias{get_data_dim} +\title{Get dimensions of a data frame or matrix.} +\usage{ +get_data_dim(data) +} +\arguments{ +\item{data}{a data frame or matrix.} +} +\value{ +a list with the requested statistics. +} +\description{ +Gets the number of observations and variables of +the given object. +} diff --git a/man/get_sum_sq.Rd b/man/get_sum_sq.Rd new file mode 100644 index 0000000..a7e2b9b --- /dev/null +++ b/man/get_sum_sq.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{get_sum_sq} +\alias{get_sum_sq} +\title{Calculate clustering sum of squares.} +\usage{ +get_sum_sq(dist_mtx, assignment) +} +\arguments{ +\item{dist_mtx}{a numeric matrix with the distances.} + +\item{assignment}{a data frame with the variable 'clust_id' specifying the +assignment of the observations to the clusters.} +} +\value{ +a list with the values of within-cluster ss for the particular +clusters, total within-cluster ss, total ss, total between-cluster ss as well +as the ratio of between-cluster ss to total ss, interpreted as the fraction +of 'explained' clustering variance. +} +\description{ +Calculates total, within cluster and between cluster +sum of squares (ss). +} +\details{ +The calculation method is independent of the clustering method. +} diff --git a/man/hcluster.Rd b/man/hcluster.Rd new file mode 100644 index 0000000..ea1bb4d --- /dev/null +++ b/man/hcluster.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clustering_functions.R +\name{hcluster} +\alias{hcluster} +\title{Hierarchical clustering.} +\usage{ +hcluster( + data, + distance_method = "euclidean", + k = 2, + hc_method = "ward.D2", + seed = 1234, + ... +) +} +\arguments{ +\item{data}{a numeric data frame or matrix or a \code{red_analysis} object.} + +\item{distance_method}{name of the distance metric, see: +\code{\link{get_kernel_info}}. Ignored if data is provided as +a distance matrix.} + +\item{k}{number of clusters.} + +\item{hc_method}{the hierarchical clustering algorithm, see: +\code{\link[stats]{hclust}} for details.} + +\item{seed}{initial setting of the random number generator.} + +\item{...}{extra arguments passed to \code{\link[stats]{hclust}}.} +} +\value{ +an object of the class \code{\link{clust_analysis}}. +} +\description{ +Performs hierarchical clustering analysis of a numeric data +frame, matrix or the results of a reduction analysis. +} +\details{ +Technically, a wrapper around \code{\link[stats]{hclust}}. If a +\code{red_analysis} object is provided as the \code{data} argument, the observation +component/score table is subjected to clustering. +If a dissimilarity object of class \code{dist} is provided as \code{data}, the function +returns an instance of subclass of the \code{\link{clust_analysis}} class, +the \code{min_analysis} object. Such objects inherit most of the methods of the +fully fledged clustering analysis object. Yet, methods requiring the source +data frame such as reduction analysis with the genuine data, variable +importance analysis or computation of heterologous cross-distances +are not possible. Semi-supervised clustering and cross-validation are not +implemented at the moment for the +\code{min_analysis} objects. +} +\references{ +Murtagh F, Contreras P. Algorithms for hierarchical clustering: +An overview. Wiley Interdiscip Rev Data Min Knowl Discov +(2012) 2:86–97. doi:10.1002/widm.53 +} diff --git a/man/htk_cluster.Rd b/man/htk_cluster.Rd new file mode 100644 index 0000000..a0b1344 --- /dev/null +++ b/man/htk_cluster.Rd @@ -0,0 +1,117 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/regularized_clustering.R +\name{htk_cluster} +\alias{htk_cluster} +\alias{tune_htk} +\title{Regularized hard threshold KMEANS clustering.} +\usage{ +htk_cluster( + data, + k = 2, + lambdas = NULL, + select_stat = c("AIC", "BIC"), + seed = 1234, + ... +) + +tune_htk( + data, + k = 2, + lambdas = NULL, + select_stat = c("silhouette", "misclassification", "variance", "np"), + type = c("train", "cv"), + nfolds = 5, + kNN = 5, + simple_vote = TRUE, + resolve_ties = FALSE, + kernel_fun = function(x) 1/x, + kNN_data = 5, + kNN_cluster = NULL, + seed = 1234, + .parallel = FALSE, + ... +) +} +\arguments{ +\item{data}{a numeric data frame with observations in the rows and +variables in the columns.} + +\item{k}{number of centers (clusters).} + +\item{lambdas}{a numeric vector of the regularization parameter. See Details.} + +\item{select_stat}{statistic used for selection of the optimal lambda value. +Ignored if \code{lambdas} is a single numeric value. For \code{htk_cluster()} they are +'AIC' (Akaike Information Criterion) or 'BIC' (Bayesian Information +Criterion). For \code{tune_htk()} they are silhouette width ('silhouette'), +fraction of observations with negative silhouette values +('misclassification'), fraction of explained clustering variance ('variance'), +or neighborhood preservation ('np').} + +\item{seed}{root of the random number generator.} + +\item{...}{extra arguments provided to \code{\link[clusterHD]{HTKmeans}}.} + +\item{type}{type of the tuning procedure. When set to 'train' (default), +cluster structure quality statistics are computed for the entire data set. +When set to 'cv', cross-validated statistics for subsequent lambda values +are calculated.} + +\item{nfolds}{number of CV folds.} + +\item{kNN}{number of the nearest neighbors used by the cluster assignment +classifier in the cross-validation folds. Ignored if \code{type = 'train'}.} + +\item{simple_vote}{logical, should classical unweighted k-NN classification +be applied? If FALSE, distance-weighted k-NN is used with the provided kernel +function. Ignored if \code{type = 'train'}.} + +\item{resolve_ties}{logical, should the ties be resolved at random? Applies +only to the simple unweighted voting algorithm. Ignored if \code{type = 'train'}.} + +\item{kernel_fun}{kernel function transforming the distance into weight. +Ignored if \code{type = 'train'}.} + +\item{kNN_data}{number of the nearest neighbors in the genuine data set +used for calculation of neighborhood preservation. See \code{\link{np}} +for details.} + +\item{kNN_cluster}{number of the nearest neighbors of the given cluster used +for calculation of neighborhood preservation. See \code{\link{np}} for +details.} + +\item{.parallel}{logical, shoudl the analysis be run in parallel?} +} +\value{ +\code{htk_cluster()} returns an object of the +class \code{\link{clust_analysis}}. +} +\description{ +Implements the hard threshold KMEANS algorithm proposed by Raymaekers +and Zamar and provided by the \code{clusterHD} package. +} +\details{ +The algorithm offers an interesting approach to clustering of +multi-dimensional data, especially those containing variables +of little relevance for the clustering analysis (e.g. as investigated by +permutation importance with \code{\link{impact}}). For details, please refer +to the genuine R function \code{\link[clusterHD]{HTKmeans}} and the seminal +paper. The function works with the squared Euclidean distance metric and +accepts only a numeric data frame as the input data. +There are two crucial parameters to be provided by the user, the number of +centers/clusters \code{k}, which can be determined by methods such as peak mean +silhouette width, and the regularization argument \code{lambdas}, whose value can +be found by tuning (e.g. comparing silhouette widths or clustering variances +for various lambda values). If \code{lambdas} is set to \code{NULL} or provided as +a numeric vector, the best lambda value is found with the +\code{\link[clusterHD]{getLambda}}. If a single value is provided, it +will be used for clustering. +Tuning of the lambda parameter using explained clustering variance, +silhouette widths and neighbor preservation statistic is facilitated by the +\code{tune_htk()} function. +} +\references{ +Raymaekers J, Zamar RH. Regularized K-means Through Hard-Thresholding. +J Mach Learn Res (2022) 23:1–48. +Available at: http://jmlr.org/papers/v23/21-0052.html +} diff --git a/man/impact.Rd b/man/impact.Rd new file mode 100644 index 0000000..3474bd3 --- /dev/null +++ b/man/impact.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/feature_importance.R +\name{impact} +\alias{impact} +\alias{impact.clust_analysis} +\alias{impact.min_analysis} +\alias{impact.combi_analysis} +\title{Permutation importance of clustering features.} +\usage{ +impact(x, ...) + +\method{impact}{clust_analysis}(x, n_iter = 1, seed = 1234, .parallel = FALSE, ...) + +\method{impact}{min_analysis}(x, ...) + +\method{impact}{combi_analysis}(x, n_iter = 1, seed = 1234, .parallel = FALSE, ...) +} +\arguments{ +\item{x}{a \code{clust_analysis} object.} + +\item{...}{extra arguments, currently none.} + +\item{n_iter}{number of iterations, 1 by default. +If the arguments is larger that 1, the function is run multiple times, +which may help at testing variable importance in a more objective way +for different permutations.} + +\item{seed}{initial setting of the random number generator.} + +\item{.parallel}{logical, should the CV be run in parallel? Experimental.} +} +\value{ +a data frame of class \code{\link{importance}} with the defined +\code{\link{plot.importance}} and \code{\link{summary.importance}} methods. +} +\description{ +Determines importance of specific clustering variables by +comparing the fraction of 'explained' clustering variance of the input +clustering object and the object generated with the variable +re-shuffled randomly - so called 'permutation' importance. +} +\details{ +\code{impact()} is a S3 generic function. +The permutation importance algorithm is 'blind' or agnostic to the +clustering procedure. +Note that it is not possible to compute clustering feature importance +for clustering analyses done with an user-provided dissimilarity objects +(subclass \code{min_analysis} of \code{clust_analysis}). In such cases, \code{NULL} is +returned with a warning. +} +\references{ +Breiman L. Random forests. Mach Learn (2001) 45:5–32. +doi:10.1023/A:1010933404324 +} diff --git a/man/importance.Rd b/man/importance.Rd new file mode 100644 index 0000000..d181f0c --- /dev/null +++ b/man/importance.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/constructors.R +\name{importance} +\alias{importance} +\title{Construct an importance object.} +\usage{ +importance(x) +} +\arguments{ +\item{x}{a data frame with the following columns: +\itemize{ +\item \code{variable} with the names of variables +\item \code{total_wss} with total within-cluster sum of squares +\item \code{total_ss} with total sum of squares +\item \code{between_ss} with between-cluster sum of squares +\item \code{frac_var} with fraction of explained clustering variance +\item \code{frac_diff} with difference in fraction of explained clustering variance +between the genuine clustering analysis and the clustering analysis done with +the variable of interest modified. This variable stores the actual variable +importance metric. +}} +} +\value{ +a tibble of the \code{importance} class with the variables listed +in Details. +} +\description{ +Constructs an object of class \code{importance} on the top of +a tibble with the clustering variable importance testing results. +} diff --git a/man/importance_cluster.Rd b/man/importance_cluster.Rd new file mode 100644 index 0000000..5f6f7b8 --- /dev/null +++ b/man/importance_cluster.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/feature_importance.R +\name{importance_cluster} +\alias{importance_cluster} +\title{Determine clustering feature importance.} +\usage{ +importance_cluster( + data, + clustering_fun = kcluster, + seed = 1234, + .parallel = FALSE, + ... +) +} +\arguments{ +\item{data}{a numeric data frame, matrix or a red_analysis object. If a +red_analysis object is provided as the data argument, the observation +component/score table is subjected to clustering.} + +\item{clustering_fun}{clustering function. Should return a +\code{clust_analysis} object.} + +\item{seed}{initial setting of the random number generator.} + +\item{.parallel}{logical, should the CV be run in parallel?} + +\item{...}{extra arguments passed to the clustering_fun.} +} +\value{ +a data frame with the values of sum of squares and the clustering +variances. +} +\description{ +Determines importance of specific clustering variables by +comparing the fraction of 'explained' clustering variance of the input +clustering object and the object generated with the variable +re-shuffled randomly - i.e. so called 'permutation' importance. +} +\references{ +Breiman L. Random forests. Mach Learn (2001) 45:5–32. +doi:10.1023/A:1010933404324 +} diff --git a/man/is_clust_analysis.Rd b/man/is_clust_analysis.Rd new file mode 100644 index 0000000..2db7c7a --- /dev/null +++ b/man/is_clust_analysis.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/class_testing.R +\name{is_clust_analysis} +\alias{is_clust_analysis} +\alias{is_min_analysis} +\alias{is_umatrix_analysis} +\alias{is_combi_analysis} +\alias{is_red_analysis} +\alias{is_importance} +\alias{is_cross_dist} +\alias{is_sil_extra} +\alias{is_cluster_cv} +\alias{is_knb} +\alias{is_tuner} +\alias{is_spectre} +\title{Test class inheritance.} +\usage{ +is_clust_analysis(x) + +is_min_analysis(x) + +is_umatrix_analysis(x) + +is_combi_analysis(x) + +is_red_analysis(x) + +is_importance(x) + +is_cross_dist(x) + +is_sil_extra(x) + +is_cluster_cv(x) + +is_knb(x) + +is_tuner(x) + +is_spectre(x) +} +\arguments{ +\item{x}{an object.} +} +\value{ +a logical value. +} +\description{ +Tests if the object is an instance of the \code{red_analysis}, \code{clust_analysis}, +\code{combi_analysis}, \code{importance}, \code{cross_dist}, \code{sil_extra}, \code{min_analysis}, +\code{umatrix_analysis}, \code{cluster_cv}, \code{knb}, \code{tuner}, or \code{spectre} class. +} diff --git a/man/is_multi_layer.Rd b/man/is_multi_layer.Rd new file mode 100644 index 0000000..0d2922e --- /dev/null +++ b/man/is_multi_layer.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{is_multi_layer} +\alias{is_multi_layer} +\title{Check for multi-layer analysis.} +\usage{ +is_multi_layer(x) +} +\arguments{ +\item{x}{an instance of the \code{clust_analysis} or \code{combi_analysis} class.} +} +\value{ +a logical value +} +\description{ +Checks if the \code{clust_analysis} or \code{combi_analysis} object uses +multi-layer SOM. +} diff --git a/man/kcluster.Rd b/man/kcluster.Rd new file mode 100644 index 0000000..d69ab7d --- /dev/null +++ b/man/kcluster.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clustering_functions.R +\name{kcluster} +\alias{kcluster} +\title{K-means or medoid clustering.} +\usage{ +kcluster( + data, + distance_method = "euclidean", + clust_fun = c("kmeans", "pam"), + k = 2, + seed = 1234, + ... +) +} +\arguments{ +\item{data}{a numeric data frame or matrix or a red_analysis object.} + +\item{distance_method}{name of the distance metric, see: +\code{\link{get_kernel_info}}.} + +\item{clust_fun}{the name of the clustering function, currently implemented +are 'kmeans' and 'pam'.} + +\item{k}{number of clusters.} + +\item{seed}{initial setting of the random number generator.} + +\item{...}{extra arguments passed to \code{\link[stats]{kmeans}} or +\code{\link[cluster]{pam}}.} +} +\value{ +an object of the class \code{\link{clust_analysis}}. +} +\description{ +Performs k-means and PAM (partition around medoids) clustering +analysis of of a numeric data frame, matrix or the results of +a reduction analysis. +} +\details{ +Technically, a wrapper around \code{\link[stats]{kmeans}} and +\code{\link[cluster]{pam}}. If a red_analysis object is provided as the +data argument, the observation component/score table is subjected to +clustering. +If a dissimilarity object of class \code{dist} is provided as \code{data}, the function +returns an instance of subclass of the \code{\link{clust_analysis}} class, +the \code{min_analysis} object. Such objects inherit most of the methods of the +fully fledged clustering analysis object. Yet, methods requiring the source +data frame such as reduction analysis with the genuine data, variable +importance analysis or computation of heterologous cross-distances +are not possible. Semi-supervised clustering and cross-validation are not +implemented at the moment for the +\code{min_analysis} objects. +} +\references{ +Hartigan JA, Wong MA. Algorithm AS 136: A K-Means Clustering Algorithm. +Appl Stat (1979) 28:100. doi:10.2307/2346830 + +Schubert E, Rousseeuw PJ. Faster k-Medoids Clustering: Improving the PAM, +CLARA, and CLARANS Algorithms. in Lecture Notes in Computer Science +(including subseries Lecture Notes in Artificial Intelligence and +Lecture Notes in Bioinformatics) (Springer), 171–187. +doi:10.1007/978-3-030-32047-8_16 +} diff --git a/man/knb.Rd b/man/knb.Rd new file mode 100644 index 0000000..f7f54f9 --- /dev/null +++ b/man/knb.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/constructors.R +\name{knb} +\alias{knb} +\title{Create a \code{knb} class object.} +\usage{ +knb(x, ...) +} +\arguments{ +\item{x}{a data frame as specified in Details.} + +\item{...}{extra arguments, currently none.} +} +\value{ +an instance of the \code{knb} class as described in Details. +} +\description{ +Creates an instance of the \code{knb} class at the top of a data frame. +} +\details{ +\code{knb} objects store results of testing neighborhood preservation by +self-organizing maps and clustering analyses and inherit most of their +methods from data frames. +The input data frame has to contain the following columns: +\itemize{ +\item \code{observation} with observation identifiers +\item \code{clust_id} with assignment of the observations to the clusters +\item \code{kNN_data} with the number of nearest data point neighbors +\item \code{kNN_cluster} with the number of nearest cluster neighbors +} +} diff --git a/man/map_som.Rd b/man/map_som.Rd new file mode 100644 index 0000000..e7b1cf3 --- /dev/null +++ b/man/map_som.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/semi_supervised.R +\name{map_som} +\alias{map_som} +\alias{map_supersom} +\title{Predict node assignment for self-organizing map.} +\usage{ +map_som(object, newdata = NULL, ...) + +map_supersom(object, newdata = NULL, ...) +} +\arguments{ +\item{object}{a \code{clust_analysis} or a \code{combi_analysis} object.} + +\item{newdata}{a numeric data frame, matrix or a \code{red_analysis} object or a +list containing such objects. +If NULL (default), the bare cluster assignment table is returned.} + +\item{...}{extra arguments passed to \code{\link[kohonen]{map.kohonen}}.} +} +\value{ +a \code{\link{clust_analysis}} object. +} +\description{ +Predictions of assignment of the observations to the self-organizing map +SOM nodes are made with the trained SOM neuronal network. The distances, +weights and SOM architecture are extracted from the provided \code{clust_analysis} +or \code{combi_analysis} object. +} +\details{ +Uses \code{\link[kohonen]{map.kohonen}} for mapping of the observations onto +the SOM nodes. +If a \code{red_analysis} object is provided as \code{newdata}, the cluster +assignment is predicted for the component/score table. The \code{newdata} input +has to have the same variables as those used for development of the input +cluster_analysis object. +} +\references{ +Kohonen T. Self-Organizing Maps. Berlin, Heidelberg: Springer Berlin +Heidelberg (1995). doi:10.1007/978-3-642-97610-0 + +Wehrens R, Kruisselbrink J. Flexible self-organizing maps in kohonen 3.0. +J Stat Softw (2018) 87:1–18. doi:10.18637/jss.v087.i07 +} diff --git a/man/mds.Rd b/man/mds.Rd new file mode 100644 index 0000000..7543da1 --- /dev/null +++ b/man/mds.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/reduction_functions.R +\name{mds} +\alias{mds} +\alias{pca} +\alias{umap} +\alias{fa} +\title{Perform dimensionality reduction.} +\usage{ +mds(data, distance_method = "euclidean", kdim = 2, ...) + +pca(data, kdim = 2, ...) + +umap(data, distance_method, kdim, ...) + +fa(data, kdim = 2, ...) +} +\arguments{ +\item{data}{a data frame or a distance object(class \code{dist}).} + +\item{distance_method}{name of the distance metric, see: +\code{\link{get_kernel_info}}. Ignored if \code{data} is a distance object.} + +\item{kdim}{dimension number.} + +\item{...}{extra arguments passed to +\code{\link[stats]{cmdscale}}, \code{\link[pcaPP]{PCAproj}}, +\code{\link[umap]{umap}} or \code{\link[stats]{factanal}}.} +} +\value{ +an object of the class \code{\link{red_analysis}}. +} +\description{ +Performs +multi-dimensional scaling (\code{mds()} via ' \code{\link[stats]{cmdscale}}), +principal component analysis (\code{pca()} via \code{\link[pcaPP]{PCAproj}}), +UMAP (\code{umap()} via \code{\link[umap]{umap}}), +or factor analysis (\code{fa()} via \code{\link[stats]{factanal}}). +} +\details{ +UMAP parameters such as dimension number or distance are provided +as a \code{\link[umap]{umap.defaults}} object. +} +\references{ +McInnes L, Healy J, Melville J. UMAP: Uniform Manifold Approximation and +Projection for Dimension Reduction. (2018) +Available at: https://arxiv.org/abs/1802.03426v3 + +Croux C, Filzmoser P, Oliveira MR. Algorithms for Projection-Pursuit robust +principal component analysis. Chemom Intell Lab Syst (2007) 87:218–225. +doi:10.1016/j.chemolab.2007.01.004 + +BARTLETT MS. THE STATISTICAL CONCEPTION OF MENTAL FACTORS. Br J Psychol +Gen Sect (1937) 28:97–104. doi:10.1111/j.2044-8295.1937.tb00863.x +} diff --git a/man/n_preservation.Rd b/man/n_preservation.Rd new file mode 100644 index 0000000..dd8ab68 --- /dev/null +++ b/man/n_preservation.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/neighborhood_utils.R +\name{n_preservation} +\alias{n_preservation} +\alias{np_reduction} +\title{Neighborhood preservation.} +\usage{ +n_preservation( + data_dist, + clust_assignment, + clust_dist = NULL, + kNN_data = 5, + kNN_cluster = 1 +) + +np_reduction(data_dist, layout_dist, kNN_data = 5) +} +\arguments{ +\item{data_dist}{a matrix of distances between data points.} + +\item{clust_assignment}{a data frame with the cluster assignment scheme.} + +\item{clust_dist}{an optional matrix of distances between the clusters. +Ignored if \code{kNN_cluster = 1}.} + +\item{kNN_data}{number of k-nearest neighbors for data points.} + +\item{kNN_cluster}{number of k-nearest neighbors for clusters} + +\item{layout_dist}{a matrix of distances between data points following +dimensionality reduction.} +} +\value{ +An object of the \code{\link{knb}} class with +\code{\link{summary.knb}} and +\code{\link{plot.knb}} methods. +} +\description{ +Computes neighborhood preservation stats. +} +\details{ +For internal use. For computation details, see: \code{\link{np}}. +} diff --git a/man/ngroups.Rd b/man/ngroups.Rd new file mode 100644 index 0000000..0119931 --- /dev/null +++ b/man/ngroups.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/numbers.R +\name{ngroups} +\alias{ngroups} +\alias{ngroups.clust_analysis} +\alias{ngroups.combi_analysis} +\title{Numbers of observations in the clusters.} +\usage{ +ngroups(x, ...) + +\method{ngroups}{clust_analysis}(x, ...) + +\method{ngroups}{combi_analysis}(x, ...) +} +\arguments{ +\item{x}{an object.} + +\item{...}{extra arguments passed to methods, currently none.} +} +\description{ +Compute numbers of observations in the clusters or, for \code{combi_analysis} +objects, numbers of observations in the SOM nodes and clusters. +} +\details{ +\code{ngroups()} is a S3 generic function. +} diff --git a/man/nobs.clust_analysis.Rd b/man/nobs.clust_analysis.Rd new file mode 100644 index 0000000..f2ad69d --- /dev/null +++ b/man/nobs.clust_analysis.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/numbers.R +\name{nobs.clust_analysis} +\alias{nobs.clust_analysis} +\alias{nobs.min_analysis} +\alias{nobs.red_analysis} +\alias{nobs.combi_analysis} +\title{Number of observations and variables for dimensionality reduction and clustering.} +\usage{ +\method{nobs}{clust_analysis}(object, ...) + +\method{nobs}{min_analysis}(object, ...) + +\method{nobs}{red_analysis}(object, ...) + +\method{nobs}{combi_analysis}(object, ...) +} +\arguments{ +\item{object}{an object.} + +\item{...}{extra arguments, currently none.} +} +\value{ +a list with the numbers of observations and variables. +} +\description{ +Computes numbers of observations and variables used in the analyses. +} +\details{ +Please note that variable numbers for clustering analyses done on +user-provided dissimilarity objects (the \code{min_analysis} subclass of +\code{clust_analysis}) can not be calculated. +} diff --git a/man/nobs.spectre.Rd b/man/nobs.spectre.Rd new file mode 100644 index 0000000..a67e27e --- /dev/null +++ b/man/nobs.spectre.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spectre_oop.R +\name{nobs.spectre} +\alias{nobs.spectre} +\title{Numbers of observations used for spectral decomposition.} +\usage{ +\method{nobs}{spectre}(object, ...) +} +\arguments{ +\item{object}{an object of class \code{spectre}.} + +\item{...}{extra arguments, currently none.} +} +\value{ +a numeric value. +} +\description{ +Number of observations used for spectral decomposition. +} diff --git a/man/node_neighbors.Rd b/man/node_neighbors.Rd new file mode 100644 index 0000000..dac3797 --- /dev/null +++ b/man/node_neighbors.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/neighborhood_utils.R +\name{node_neighbors} +\alias{node_neighbors} +\title{Find neighbor nodes in the initial layout of a self-organizing map.} +\usage{ +node_neighbors(x) +} +\arguments{ +\item{x}{a \code{clust_analysis} object.} +} +\value{ +a list named after node identifiers with vectors of identifiers of +the nearest nodes. +} +\description{ +Identifies nearest nodes in the initial layout of a self-organizing map, +i.e. prior to fitting to the data points. +} +\details{ +The grid architecture information is extracted from the \code{kohonen} object. +The function returns NULL and a warning is applied to a non-SOM clustering +analysis object. +} diff --git a/man/np.Rd b/man/np.Rd new file mode 100644 index 0000000..e310240 --- /dev/null +++ b/man/np.Rd @@ -0,0 +1,69 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/neighborhood.R +\name{np} +\alias{np} +\alias{np.clust_analysis} +\alias{np.combi_analysis} +\alias{np.red_analysis} +\title{Neighborhood preservation.} +\usage{ +np(x, ...) + +\method{np}{clust_analysis}(x, kNN_data = 5, kNN_cluster = NULL, ...) + +\method{np}{combi_analysis}(x, kNN_data = 5, kNN_cluster = NULL, type = c("data", "node", "final"), ...) + +\method{np}{red_analysis}(x, kNN_data = 5, ...) +} +\arguments{ +\item{x}{an object.} + +\item{...}{extra arguments passed to methods.} + +\item{kNN_data}{number of k-nearest neighbors for data points.} + +\item{kNN_cluster}{number of k-nearest neighbors for clusters. If \code{NULL}, +the values will be determined automatically. In this case \code{kNN_cluster = 1} +for non-SOM cluster analyses and \code{kNN_cluster = kNN_data} for SOM analyses.} + +\item{type}{type of data used for calculation of the neighborhood +preservation. For \code{type = 'data'}, the comparison of neighborhoods is done +between the data points and their SOM node assignment. For \code{type = 'node'}, +neighborhoods of the nodes and final clusters are compared. +For \code{type = 'final'}, the analysis is done at the top level, i.e. +neighborhoods of the data points and the final cluster assignment +are evaluated.} +} +\value{ +An object of the \code{\link{knb}} class with +\code{\link{summary.knb}} and +\code{\link{plot.knb}} methods. +} +\description{ +Checks the fraction of the nearest neighbors of data points, that are +located in the same cluster or in the node of a self-organizing map (SOM). +} +\details{ +The function computes fractions of nearest neighbors of each data point that +are preserved after dimensionality reduction or clustering analysis. +In case of reduction analyses, the neighborhoods in the genuine data set and +the data set following dimensionality reduction ('layout') are compared. +In case of simple clustering analyses, the function calculates fractions of +the k-nearest neighbors located in k-nearest clusters of the given data +point. By default, the number of nearest clusters is set to one +(\code{kNN_cluster}), which means that \code{np()} simply checks which fraction of the +nearest data point neighbors is played in the same cluster. In such case, +the neighborhood preservation fraction averaged for the entire clustering +object and particular clusters gives a measure of cluster separation, similar +to \code{\link[cluster]{silhouette}}. +} +\references{ +Venna J, Kaski S. Neighborhood preservation in nonlinear projection methods: +An experimental study. Lect Notes Comput Sci (including Subser Lect Notes +Artif Intell Lect Notes Bioinformatics) (2001) 2130:485–491. +doi:10.1007/3-540-44668-0_68#' + +Breard GT. Evaluating Self-Organizing Map Quality Measures as Convergence +Criteria Criteria. Open Access Master’s Theses. Paper 1033. +Available at: https://digitalcommons.uri.edu/theses/1033 +} diff --git a/man/permute_clust_data.Rd b/man/permute_clust_data.Rd new file mode 100644 index 0000000..d52d021 --- /dev/null +++ b/man/permute_clust_data.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{permute_clust_data} +\alias{permute_clust_data} +\title{Permute clustering variables.} +\usage{ +permute_clust_data(data) +} +\arguments{ +\item{data}{a data frame, matrix or a list of such objects.} +} +\value{ +a list of data frames or data lists. +The first element \code{data} stores the genuine data set. +Each subsequent element is named after the permuted variable. +} +\description{ +Creates a series of clustering data sets with consecutive variables +reshuffled by random (permuted). Works for single- and multiple-layer data. +} diff --git a/man/plot.clust_analysis.Rd b/man/plot.clust_analysis.Rd new file mode 100644 index 0000000..911e9ba --- /dev/null +++ b/man/plot.clust_analysis.Rd @@ -0,0 +1,82 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clust_analysis_oop.R +\name{plot.clust_analysis} +\alias{plot.clust_analysis} +\title{Plot selected features of a clust_analysis object.} +\usage{ +\method{plot}{clust_analysis}( + x, + type = c("diagnostic", "components", "heat_map", "training", "data"), + cust_theme = ggplot2::theme_classic(), + jitter_width = 0, + jitter_height = 0, + point_alpha = 1, + ... +) +} +\arguments{ +\item{x}{a \code{clust_analysis} object.} + +\item{type}{the type of plots: +\itemize{ +\item 'diagnostic' returns a series of diagnostic plots, +for non-SOM clustering those include a dendrogram (hierarchical clustering), +WSS and silhouette curve (see: \code{\link{plot_nbclust}}) or the complete +output of \code{\link[kohonen]{plot.kohonen}} +\item 'components' plots the results of reduction analysis done with the +clustering data, distance matrix or, for SOM, with U matrix +(see: \code{\link{components.clust_analysis}}) +\item 'heat_map' plots the distances between observations as a heat map +\item 'training' plots the mean distance to the SOM winning unit as a function +of the iteration number +\item 'data' works only if reduction analysis results were used +for clustering and plots the first two components/dimensions. +}} + +\item{cust_theme}{a ggplot theme.} + +\item{jitter_width}{horizontal jittering of the points in the plots.} + +\item{jitter_height}{vertical jittering of the points in the plots.} + +\item{point_alpha}{scatter plot's point alpha.} + +\item{...}{extra arguments passed to \code{\link{components.clust_analysis}}.} +} +\value{ +a \code{ggplot} object or a list of \code{ggplot} objects, as specified by the +'type' argument and character of the object. +} +\description{ +The plotting method for the \code{clust_analysis} class. Enables +plotting of the standard diagnostic plots used for the optimal cluster number +determination (dendrogram, WSS- and silhouette curve), results of the +reduction analysis, heat map of the distances between the observations as +well as the self-organizing map training process. It is also possible to plot +the first two variables of the clustering data frame, an option which is +attractive, if the clustering of reduction analysis was performed. +} +\references{ +Kassambara A, Mundt F. factoextra: Extract and Visualize the Results +of Multivariate Data Analyses. (2020) Available +at: https://cran.r-project.org/web/packages/factoextra/index.html + +Galili T. dendextend: an R package for visualizing, adjusting and +comparing trees of hierarchical clustering. +Bioinformatics (2015) 31:3718–20. doi:10.1093/bioinformatics/btv428 + +McInnes L, Healy J, Melville J. UMAP: Uniform Manifold Approximation +and Projection for Dimension Reduction. (2018) Available +at: https://arxiv.org/abs/1802.03426v3 + +Belyadi H, Haghighat A, Nguyen H, Guerin A-J. IOP Conference Series: +Earth and Environmental Science Determination of Optimal Epsilon (Eps) +Value on DBSCAN Algorithm to Clustering Data on Peatland Hotspots in +Sumatra Related content EPS conference comes to London-EPS rewards +quasiparticle research-EP. IOP Conf Ser Earth Environ Sci (2016) 31: +doi:10.1088/1755-1315/31/1/012012 + +Rousseeuw PJ. Silhouettes: A graphical aid to the interpretation and +validation of cluster analysis. J Comput Appl Math (1987) 20:53–65. +doi:10.1016/0377-0427(87)90125-7 +} diff --git a/man/plot.combi_analysis.Rd b/man/plot.combi_analysis.Rd new file mode 100644 index 0000000..2665a68 --- /dev/null +++ b/man/plot.combi_analysis.Rd @@ -0,0 +1,95 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/combi_analysis_oop.R +\name{plot.combi_analysis} +\alias{plot.combi_analysis} +\alias{plot.umatrix_analysis} +\title{Plot selected features of a combi_analysis object.} +\usage{ +\method{plot}{combi_analysis}( + x, + type = c("diagnostic", "components", "heat_map", "training", "data"), + cust_theme = ggplot2::theme_classic(), + jitter_width = 0, + jitter_height = 0, + point_alpha = 1, + ... +) + +\method{plot}{umatrix_analysis}( + x, + type = c("diagnostic", "components", "heat_map", "training", "data"), + cust_theme = ggplot2::theme_classic(), + jitter_width = 0, + jitter_height = 0, + point_alpha = 1, + ... +) +} +\arguments{ +\item{x}{a \code{combi_analysis} object.} + +\item{type}{the type of plots: +\itemize{ +\item \code{diagnostic} returns a series of diagnostic plots for the SOM construction +and clustering of the nodes +\item \code{components} plots the results of reduction analysis done with the +clustering data or the distance matrix +(see: \code{\link{components.clust_analysis}}) +\item \code{heat_map} plots the distances between observations and nodes as heat maps +\item \code{training} plots the mean distance to the SOM winning unit as a function +of the iteration number +\item \code{data} works only if reduction analysis results were used for clustering +and plots the first two components/dimensions. +}} + +\item{cust_theme}{a ggplot theme.} + +\item{jitter_width}{horizontal jittering of the points in the plots.} + +\item{jitter_height}{vertical jittering of the points in the plots.} + +\item{point_alpha}{scatter plot's point alpha.} + +\item{...}{extra arguments passed to \code{\link{components.clust_analysis}}.} +} +\value{ +a ggplot object or a list of ggplot objects, as specified by the +'type' argument +} +\description{ +The plotting method for the \code{combi_analysis} class. Enables +plotting of the standard diagnostic plots used for the optimal cluster number +determination for the node clustering (dendrogram, WSS- and +silhouette curve), results of the reduction analysis, +heat map of the distances between the observations and SOM nodes as +well as the self-organizing map training process. It is also possible to plot +the first two variables of the clustering data frame, an option which is +attractive, if the clustering of reduction analysis was performed. +} +\references{ +Wehrens R, Kruisselbrink J. Flexible self-organizing maps in kohonen 3.0. +J Stat Softw (2018) 87:1–18. doi:10.18637/jss.v087.i07 + +Kassambara A, Mundt F. factoextra: Extract and Visualize the Results +of Multivariate Data Analyses. (2020) Available +at: https://cran.r-project.org/web/packages/factoextra/index.html + +Galili T. dendextend: an R package for visualizing, adjusting and +comparing trees of hierarchical clustering. +Bioinformatics (2015) 31:3718–20. doi:10.1093/bioinformatics/btv428 + +McInnes L, Healy J, Melville J. UMAP: Uniform Manifold Approximation +and Projection for Dimension Reduction. (2018) Available +at: https://arxiv.org/abs/1802.03426v3 + +Belyadi H, Haghighat A, Nguyen H, Guerin A-J. IOP Conference Series: +Earth and Environmental Science Determination of Optimal Epsilon (Eps) +Value on DBSCAN Algorithm to Clustering Data on Peatland Hotspots in +Sumatra Related content EPS conference comes to London-EPS rewards +quasiparticle research-EP. IOP Conf Ser Earth Environ Sci (2016) 31: +doi:10.1088/1755-1315/31/1/012012 + +Rousseeuw PJ. Silhouettes: A graphical aid to the interpretation and +validation of cluster analysis. J Comput Appl Math (1987) 20:53–65. +doi:10.1016/0377-0427(87)90125-7 +} diff --git a/man/plot.cross_dist.Rd b/man/plot.cross_dist.Rd new file mode 100644 index 0000000..6499425 --- /dev/null +++ b/man/plot.cross_dist.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cross_dist_oop.R +\name{plot.cross_dist} +\alias{plot.cross_dist} +\title{Plots of cross-distances.} +\usage{ +\method{plot}{cross_dist}( + x, + type = c("heat_map", "mean", "histogram"), + reorder = FALSE, + upper = TRUE, + signif_digits = 2, + line_color = "black", + show_txt = TRUE, + txt_size = 2.75, + labeller = NULL, + cust_theme = ggplot2::theme_classic(), + ... +) +} +\arguments{ +\item{x}{a \code{cross_distance} class object.} + +\item{type}{type of the plot: +\code{heat_map} (default) generates a heat map of +homologous or heterologous cross-distances for observation pairs with +mean distances and 95\% ranges of distances, +\code{mean} plots mean distances with 95\% ranges as a heat map, and +\code{histogram} generates a faceted panel of cross-distance histograms +(for heterologous distances, x object clusters are represented by horizontal +facets, y object clusters are represented by vertical facets) .} + +\item{reorder}{logical: should distances in the heat maps be ordered +by mean distance? Defaults to FALSE.} + +\item{upper}{should the upper half of the distance heat map be plotted? +Defaults to TRUE.} + +\item{signif_digits}{significant digits for mean distances and distance +ranges presented in the heat map.} + +\item{line_color}{color of the line around the tile, used only if \code{type} +is set to 'mean'.} + +\item{show_txt}{logical, should the mean distance be presented in the plot?} + +\item{txt_size}{of the mean distance text.} + +\item{labeller}{a \code{\link[ggplot2]{labeller}} object to provide +customized labels of the facets of the histogram panel.} + +\item{cust_theme}{a custom ggplot theme.} + +\item{...}{extra arguments, such as color or number of bins, passed to +\code{\link[ggplot2]{geom_histogram}}.} +} +\value{ +a \code{ggplot} graphic, whose elements like themes or fill scales can be easily +modified by the user. +} +\description{ +Visualizes pairwise cross-distances as heat maps for observation pairs, +heat maps of average cross-distances between the clusters or histograms. +} diff --git a/man/plot.importance.Rd b/man/plot.importance.Rd new file mode 100644 index 0000000..f29f046 --- /dev/null +++ b/man/plot.importance.Rd @@ -0,0 +1,79 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/importance_oop.R +\name{plot.importance} +\alias{plot.importance} +\title{Plot feature importance as a scatter or bar plot.} +\usage{ +\method{plot}{importance}( + x, + type = c("scatter", "bar"), + fill_color = "cornsilk3", + point_color = fill_color, + point_size = 2, + point_alpha = 0.5, + point_wjitter = 0, + point_hjitter = 0.1, + box_alpha = 0.25, + label = TRUE, + txt_size = 2.75, + signif_digits = 2, + plot_title = NULL, + plot_subtitle = NULL, + plot_tag = NULL, + cust_theme = ggplot2::theme_classic(), + ... +) +} +\arguments{ +\item{x}{an \code{importance} object.} + +\item{type}{type of the plot: scatter or bar. Defaults to scatter. +This parameter is silently ignored, if evaluation of the importance was done +in multiple iterations +(e.g. \code{n_iter} set to > 1 in \code{\link{impact.clust_analysis}}). +In such cases, a box plot of importance metrics obtained in algorithm +iteration is generated.} + +\item{fill_color}{fill color for the bars or boxes.} + +\item{point_color}{size of the points, refers only to scatter and box plots.} + +\item{point_size}{size of the points, refers only to scatter and box plots.} + +\item{point_alpha}{alpha of the points, refers only to box plots.} + +\item{point_wjitter}{width of the data point jittering, refers only to +box plots.} + +\item{point_hjitter}{height of the data point jittering, refers only to +box plots.} + +\item{box_alpha}{alpha of the boxes, refers only to box plots.} + +\item{label}{logical, should the points be labeled with the importance +stat value?} + +\item{txt_size}{label text size.} + +\item{signif_digits}{significant digits for rounding of the statistic value.} + +\item{plot_title}{plot title.} + +\item{plot_subtitle}{plot subtitle.} + +\item{plot_tag}{plot tag.} + +\item{cust_theme}{a ggplot theme.} + +\item{...}{extra arguments, currently none.} +} +\value{ +a ggplot bar or scatter plot. +} +\description{ +Generates a bar, scatter or box plot with the importance +statistic for the clustering variables. +The importance statistic is the difference in the +clustering variance fraction between the original clustering structure and +the clustering objects with the given variable reshuffled randomly. +} diff --git a/man/plot.knb.Rd b/man/plot.knb.Rd new file mode 100644 index 0000000..012551e --- /dev/null +++ b/man/plot.knb.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/knb_oop.R +\name{plot.knb} +\alias{plot.knb} +\title{Plot distribution of neighborhood preservation statistic.} +\usage{ +\method{plot}{knb}( + x, + show_stats = TRUE, + signif_digits = 2, + cust_theme = ggplot2::theme_classic(), + bar_color = "black", + bar_fill = "steelblue", + ... +) +} +\arguments{ +\item{x}{an object of the \code{\link{knb}} class.} + +\item{show_stats}{logical, should the number of observations in the cluster, +percentage of negative silhouette widths and average silhouette statistic +be shown in the plot? Defaults to TRUE.} + +\item{signif_digits}{significant digits used for rounding of the statistics +presented in the plot.} + +\item{cust_theme}{custom ggplot theme.} + +\item{bar_color}{color of the bar line.} + +\item{bar_fill}{color of the bars, relevant only for objects without +cluster assignment.} + +\item{...}{extra arguments passed to \code{\link[ggplot2]{geom_bar}}.} +} +\value{ +a \code{ggplot} class graphic. +} +\description{ +Generates a bar plot of neighborhood preservation statistic values for +observations and clusters, similar to a classical silhouette plot (see: +\code{\link{plot.sil_extra}}). +} diff --git a/man/plot.red_analysis.Rd b/man/plot.red_analysis.Rd new file mode 100644 index 0000000..16d5682 --- /dev/null +++ b/man/plot.red_analysis.Rd @@ -0,0 +1,72 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/red_analysis_oop.R +\name{plot.red_analysis} +\alias{plot.red_analysis} +\alias{plot.clust_red} +\title{Plot features of a \code{red_analysis} object.} +\usage{ +\method{plot}{red_analysis}( + x, + type = c("component_tbl", "scores", "loadings", "scree", "neighborhood"), + label_points = TRUE, + label_clust = FALSE, + cust_theme = ggplot2::theme_classic(), + segment_color = "steelblue", + ... +) + +\method{plot}{clust_red}( + x, + type = c("component_tbl", "scores", "loadings", "scree", "neighborhood"), + label_points = TRUE, + label_clust = TRUE, + cust_theme = ggplot2::theme_classic(), + segment_color = "steelblue", + ... +) +} +\arguments{ +\item{x}{a \code{red_analysis} object, created with \code{\link{reduce_data}} or +\code{components()} called for clustering analyses.} + +\item{type}{plot type: +\itemize{ +\item 'component_tbl' or 'score' present the scores (layout) for particular +observations in a scatter plot. If \code{label_clust = TRUE}, point color codes +for the optional cluster assignment information. +\item 'loadings' plot the variable PCA loadings as a scatter plot. +\item 'scree' plots the percentage of component's variances as a line plot. +\item 'neighborhood' plots fractions of nearest neighbors of the data points +preserved in the neighborhood in the reduced layout. See: \code{\link{np}}. +}} + +\item{label_points}{logical, should the variable names be displayed in the +plot? Valid only for the PCA loadings plot.} + +\item{label_clust}{logical, should the cluster assignment (if available) be +coded be the point color?} + +\item{cust_theme}{a ggplot plot theme.} + +\item{segment_color}{color of the lines presented in the PCA loading plot.} + +\item{...}{extra arguments passed to \code{\link{plot_point}} +('component_tbl', 'score', 'loadings') or \code{\link{np}} ('neighborhood').} +} +\value{ +a \code{ggplot} object. +} +\description{ +Plots the component table, loadings table - in both cases the +first two components/dimensions in form of scatter plots - or generates +a scree plot of the variance percentages associated with +the components/dimensions. +} +\details{ +The loadings table plot is available only for the PCA and factor +analysis \code{red_analysis} objects. +For \code{red_analysis} objects created with \code{clust_analysis} objects, scatter +plots of the scores/components table/layout can convey +the cluster assignment information coded by the point or bar color +(\code{label_clust = TRUE}). +} diff --git a/man/plot.sil_extra.Rd b/man/plot.sil_extra.Rd new file mode 100644 index 0000000..d0d5d6a --- /dev/null +++ b/man/plot.sil_extra.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sil_oop.R +\name{plot.sil_extra} +\alias{plot.sil_extra} +\title{Plots of silhouette statistics.} +\usage{ +\method{plot}{sil_extra}( + x, + fill_by = c("cluster", "neighbor", "value", "sign"), + show_stats = TRUE, + signif_digits = 2, + cust_theme = ggplot2::theme_classic(), + bar_color = "black", + ... +) +} +\arguments{ +\item{x}{an object of the \code{\link{sil_extra}} class.} + +\item{fill_by}{defines the color coding of the bar fill color. +For \code{cluster}, the bars are colored after cluster assignment of the +observations (default). +For \code{neighbor}, the bar color codes for the nearest neighbor cluster. +For \code{value}, the bar color codes for the silhouette width. +For \code{sign}, the bar color represents the sign of the silhouette width.} + +\item{show_stats}{logical, should the number of observations in the cluster, +percentage of negative silhouette widths and average silhouette statistic +be shown in the plot? Defaults to TRUE.} + +\item{signif_digits}{significant digits used for rounding of the statistics +presented in the plot.} + +\item{cust_theme}{custom ggplot theme.} + +\item{bar_color}{color of the bar line.} + +\item{...}{extra arguments passed to \code{\link[ggplot2]{geom_bar}}.} +} +\value{ +a \code{ggplot} class graphic. +} +\description{ +Generates a classical bar plot of silhouette width distribution in clusters. +} +\references{ +Rousseeuw PJ. Silhouettes: A graphical aid to the interpretation and +validation of cluster analysis. J Comput Appl Math (1987) 20:53–65. +doi:10.1016/0377-0427(87)90125-7 +} diff --git a/man/plot.spectre.Rd b/man/plot.spectre.Rd new file mode 100644 index 0000000..b4dfba5 --- /dev/null +++ b/man/plot.spectre.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spectre_oop.R +\name{plot.spectre} +\alias{plot.spectre} +\title{Diagnostic plots for \code{spectre} objects.} +\usage{ +\method{plot}{spectre}( + x, + type = c("eigenvalues", "eigenvectors"), + cust_theme = ggplot2::theme_classic(), + ... +) +} +\arguments{ +\item{x}{an object of \code{spectre} class.} + +\item{type}{type of the plot as specified in the Details. \code{eigenvalues} by +default.} + +\item{cust_theme}{custom \code{ggplot} \code{theme} object.} + +\item{...}{extra arguments passed to \code{\link{plot_eigenvalues}} and +\code{\link{plot_eigenvectors}} which control e.g. the number of the trailing +eigenvalues or the eigenvector pair to be plotted.} +} +\value{ +a \code{ggplot} graphic object. +} +\description{ +Generates scatter plots of eigenvalues and selected eigenvectors. +} +\details{ +Two types of plots are generated as specified by the \code{type} argument: +\itemize{ +\item \emph{eigenvalues}: the k smallest eigenvalues (by default the trailing 50 ones). +This plot type may be usefull at assessment of the number of eigenvectors +used in other analyses such as clustering. +\item \emph{eigenvectors}: a scatter plot of two selected eigenvectors, be default, +the last and one before last eigenvector. +} +} diff --git a/man/plot.tuner.Rd b/man/plot.tuner.Rd new file mode 100644 index 0000000..2e5801d --- /dev/null +++ b/man/plot.tuner.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tuner_oop.R +\name{plot.tuner} +\alias{plot.tuner} +\title{Plot cluster quality statistic values for tuning.} +\usage{ +\method{plot}{tuner}( + x, + cust_theme = ggplot2::theme_classic(), + line_alpha = 1, + point_size = 2, + ... +) +} +\arguments{ +\item{x}{a \code{tuner} object.} + +\item{cust_theme}{a custom \code{ggplot} theme.} + +\item{line_alpha}{alpha of the lines, applies only to plots of +regularization paths.} + +\item{point_size}{size of the data points.} + +\item{...}{extra arguments, currently none.} +} +\value{ +a list of \code{ggplot} graphic objects. +} +\description{ +The function plots quality statistic values (such as explained clustering +variance, silhouette width or mean neighborhood preservation) for +combinations of the tuning parameters. +} diff --git a/man/plot_clust_hm.Rd b/man/plot_clust_hm.Rd new file mode 100644 index 0000000..5af830d --- /dev/null +++ b/man/plot_clust_hm.Rd @@ -0,0 +1,91 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/feature_heat_maps.R +\name{plot_clust_hm} +\alias{plot_clust_hm} +\alias{plot_clust_hm.clust_analysis} +\alias{plot_clust_hm.min_analysis} +\alias{plot_clust_hm.combi_analysis} +\alias{plot_clust_hm.umatrix_analysis} +\title{Plot levels of clustering features in a heat map.} +\usage{ +plot_clust_hm(x_object, ...) + +\method{plot_clust_hm}{clust_analysis}( + x_object, + y_object = NULL, + line_color = NA, + plot_title = NULL, + plot_subtitle = NULL, + x_lab = "Sample", + fill_lab = "Feature level", + cust_theme = ggplot2::theme_classic(), + discrete_fill = FALSE, + ... +) + +\method{plot_clust_hm}{min_analysis}(x_object, ...) + +\method{plot_clust_hm}{combi_analysis}( + x_object, + y_object = NULL, + line_color = NA, + plot_title = NULL, + plot_subtitle = NULL, + x_lab = "Sample", + fill_lab = "Feature level", + cust_theme = ggplot2::theme_classic(), + discrete_fill = FALSE, + ... +) + +\method{plot_clust_hm}{umatrix_analysis}( + x_object, + line_color = NA, + plot_title = NULL, + plot_subtitle = NULL, + x_lab = "Sample", + fill_lab = "Feature level", + cust_theme = ggplot2::theme_classic(), + discrete_fill = FALSE, + ... +) +} +\arguments{ +\item{x_object}{a \code{clust_analysis} or \code{combi_analysis} object, specifies +clustering of the observations.} + +\item{...}{extra arguments passed to methods.} + +\item{y_object}{a \code{clust_analysis} or \code{combi_analysis} object, specifies +clustering of the features, an optional parameter. Ignored in case of +multi-layer SOM.} + +\item{line_color}{color of the line around heat map tiles.} + +\item{plot_title}{plot title. If \code{NULL}, the plots generated for multi-layer +SOM analyses will be named after the data layers.} + +\item{plot_subtitle}{plot subtitle.} + +\item{x_lab}{x axis title.} + +\item{fill_lab}{fill scale title.} + +\item{cust_theme}{a ggplot theme.} + +\item{discrete_fill}{logical, force a discrete fill scale?} +} +\value{ +a \code{ggplot} object (single-layer analysis) or a list of +\code{ggplot} objects (multi-layer cases). +} +\description{ +Generates a heat map of the clustering features, cluster +assignment is indicated by the plot faceting. +} +\details{ +\code{plot_clust_hm()} is a S3 generic function. +Note that it is not possible to visualize clustering variable levels for +\code{clust_analysis} objects generated with user-provided dissimilarity matrices. +In such cases, \code{NULL} is returned with a warning. +} diff --git a/man/plot_dendro.Rd b/man/plot_dendro.Rd new file mode 100644 index 0000000..c397de9 --- /dev/null +++ b/man/plot_dendro.Rd @@ -0,0 +1,65 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotting_utils.R +\name{plot_dendro} +\alias{plot_dendro} +\title{Plot a dendrogram.} +\usage{ +plot_dendro( + clust_str, + k, + labels = TRUE, + cluster_colors = NULL, + cluster_labels = paste0("Cluster #", 1:k), + cluster_leg_title = "Cluster", + plot_title = NULL, + plot_subtitle = NULL, + plot_tag = NULL, + y_lab = NULL, + cust_theme = ggplot2::theme_classic(), + ... +) +} +\arguments{ +\item{clust_str}{an object of the 'hclust' class.} + +\item{k}{an integer, the cluster number.} + +\item{labels}{logical, should observation labels be presented in the x axis?} + +\item{cluster_colors}{colors of the cluster branches, a vector of the length +k + 1. The last color codes for the connector branches.} + +\item{cluster_labels}{cluster names, a text vector of the lenght k.} + +\item{cluster_leg_title}{cluster legend title.} + +\item{plot_title}{plot title.} + +\item{plot_subtitle}{plot subtitle.} + +\item{plot_tag}{plot tag.} + +\item{y_lab}{y axis title.} + +\item{cust_theme}{custom plot theme, a ggplot2 theme object.} + +\item{...}{extra arguments, currently none.} +} +\value{ +A ggplot object. +} +\description{ +Plots a dendrogram given a clustering object generated by +\code{\link[stats]{hclust}}. +} +\details{ +The dendrogram structure is generated with the +\code{\link[stats]{as.dendrogram}} function and graphical layout provided +by \code{\link[dendextend]{color_branches}} and +\code{\link[dendextend]{set}}. +} +\references{ +Galili T. dendextend: an R package for visualizing, adjusting and +comparing trees of hierarchical clustering. Bioinformatics (2015) 31:3718–20. +doi:10.1093/bioinformatics/btv428 +} diff --git a/man/plot_eigenvalues.Rd b/man/plot_eigenvalues.Rd new file mode 100644 index 0000000..651514e --- /dev/null +++ b/man/plot_eigenvalues.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spectralization.R +\name{plot_eigenvalues} +\alias{plot_eigenvalues} +\title{Plot trailing eigenvalues.} +\usage{ +plot_eigenvalues( + spectre_object, + k = 50, + plot_title = NULL, + point_size = 2, + point_color = "steelblue", + point_alpha = 0.75, + cust_theme = ggplot2::theme_classic() +) +} +\arguments{ +\item{spectre_object}{an object of \code{spectre} class} + +\item{k}{numeric that specifies the number of the least eigenvalues +to be plotted.} + +\item{plot_title}{plot title.} + +\item{point_size}{size of the data points.} + +\item{point_color}{point color.} + +\item{point_alpha}{point alpha.} + +\item{cust_theme}{custom \code{ggplot} \code{theme} object.} +} +\value{ +a \code{ggplot} graphic. +} +\description{ +Plots k least eigenvalues. +} diff --git a/man/plot_eigenvectors.Rd b/man/plot_eigenvectors.Rd new file mode 100644 index 0000000..d9374b9 --- /dev/null +++ b/man/plot_eigenvectors.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spectralization.R +\name{plot_eigenvectors} +\alias{plot_eigenvectors} +\title{Scatter plot of selected eigenvectors.} +\usage{ +plot_eigenvectors( + spectre_object, + x_eigen = NULL, + y_eigen = NULL, + plot_title = NULL, + point_size = 2, + point_color = "steelblue", + point_alpha = 0.75, + point_wjitter = 0, + point_hjitter = 0, + cust_theme = ggplot2::theme_classic() +) +} +\arguments{ +\item{spectre_object}{an object of \code{spectre} class} + +\item{x_eigen}{a numeric. The number of eigenvector to be presented in +the x axis. If \code{NULL}, this is the last eigenvector available in the object.} + +\item{y_eigen}{a numeric. The number of eigenvector to be presented in the +y axis. If \code{NULL}, this is the one before last eigenvectro available in +the object.} + +\item{plot_title}{plot title.} + +\item{point_size}{size of the data points.} + +\item{point_color}{point color.} + +\item{point_alpha}{point alpha.} + +\item{point_wjitter}{jittering width for the data points.} + +\item{point_hjitter}{jittering height for the data points.} + +\item{cust_theme}{custom \code{ggplot} \code{theme} object.} +} +\value{ +a \code{ggplot} graphic object. +} +\description{ +Generates a two-dimensional scatter plot for selected eigenvectors. +} diff --git a/man/plot_htk.Rd b/man/plot_htk.Rd new file mode 100644 index 0000000..35e2d29 --- /dev/null +++ b/man/plot_htk.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotting_utils.R +\name{plot_htk} +\alias{plot_htk} +\title{Generate diagnostic plots for hard threshold KMEANS clustering.} +\usage{ +plot_htk( + x, + k = NULL, + plot_title = NULL, + plot_subtitle = NULL, + plot_tag = NULL, + cust_theme = ggplot2::theme_classic() +) +} +\arguments{ +\item{x}{a \code{clust_analysis} object.} + +\item{k}{a numeric vector with the k cluster numbers. If \code{NULL}, it +will be determined automatically.} + +\item{plot_title}{title of the plots.} + +\item{plot_subtitle}{subtitle of the plots.} + +\item{plot_tag}{plot tag.} + +\item{cust_theme}{a custom \code{ggplot} theme.} +} +\value{ +a list of two elements \code{wss} and \code{silhouette} storing \code{ggplot} +objects with the plots of within-cluster sum of squares and silhouette widths. +} +\description{ +Creates the plot of within-cluster sum of squares and mean silhouette width +for the varying cluster numbers. +} +\details{ +Corresponds to the output of \code{\link[factoextra]{fviz_nbclust}}. +Intended for internal use. +} diff --git a/man/plot_knn_distance.Rd b/man/plot_knn_distance.Rd new file mode 100644 index 0000000..bc79ca1 --- /dev/null +++ b/man/plot_knn_distance.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotting_utils.R +\name{plot_knn_distance} +\alias{plot_knn_distance} +\title{Plot the mean distance to k-nearest neighbors.} +\usage{ +plot_knn_distance( + diss_obj, + k, + eps = NULL, + plot_title = NULL, + plot_subtitle = NULL, + plot_tag = NULL, + cust_theme = ggplot2::theme_classic() +) +} +\arguments{ +\item{diss_obj}{a dissimilarity object (e.g. 'dist' class).} + +\item{k}{the k number of the nearest neighbors.} + +\item{eps}{the distance to be presented in the plot as a horizontal dashed +line. If NULL, the line is hidden.} + +\item{plot_title}{plot title.} + +\item{plot_subtitle}{plot subtitle.} + +\item{plot_tag}{plot tag.} + +\item{cust_theme}{custom plot theme, a ggplot2 theme object.} +} +\value{ +A ggplot object. +} +\description{ +Plots the sorted (ascending) distances to k-nearest neighbors +(kNN) for each observation in the provided dissimilarity object. +} +\details{ +Internally, the mean kNN distances are calculated with the +\code{\link[dbscan]{kNNdist}} function. +} +\references{ +Hahsler M, Piekenbrock M, Doran D. Dbscan: Fast density-based clustering +with R. J Stat Softw (2019) 91:1–30. doi:10.18637/jss.v091.i01 + +Belyadi H, Haghighat A, Nguyen H, Guerin A-J. IOP Conference Series: +Earth and Environmental Science Determination of Optimal Epsilon (Eps) +Value on DBSCAN Algorithm to Clustering Data on Peatland Hotspots in +Sumatra Related content EPS conference comes to London-EPS rewards +quasiparticle research-EP. IOP Conf Ser Earth Environ Sci (2016) 31: +doi:10.1088/1755-1315/31/1/012012 +} diff --git a/man/plot_nbclust.Rd b/man/plot_nbclust.Rd new file mode 100644 index 0000000..7c986a7 --- /dev/null +++ b/man/plot_nbclust.Rd @@ -0,0 +1,61 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotting_utils.R +\name{plot_nbclust} +\alias{plot_nbclust} +\title{Plot WSS curve and silhouette statistic values as a function of cluster +number.} +\usage{ +plot_nbclust( + data, + k, + FUNcluster = NULL, + method = c("wss", "silhouette", "gap_stat"), + plot_title = NULL, + plot_subtitle = NULL, + plot_tag = NULL, + cust_theme = ggplot2::theme_classic(), + ... +) +} +\arguments{ +\item{data}{a numeric matrix with the distances or a data frame.} + +\item{k}{an integer, the cluster number.} + +\item{FUNcluster}{a clustering function. See: +\code{\link[factoextra]{fviz_nbclust}} for details.} + +\item{method}{a statistic to be plotted. See: +\code{\link[factoextra]{fviz_nbclust}} for details.} + +\item{plot_title}{plot title.} + +\item{plot_subtitle}{plot subtitle.} + +\item{plot_tag}{plot tag.} + +\item{cust_theme}{custom plot theme, a ggplot2 theme object.} + +\item{...}{extra arguments passed to \code{\link[factoextra]{fviz_nbclust}}.} +} +\value{ +a ggplot object. +} +\description{ +Plots the values of the total within-cluster sum-of-squares and +silhouette statistic as a function of the cluster number. +} +\details{ +Takes a distance matrix (e.g. the \code{\link{get_kernel_info}} +output) and a clustering function, for the details, see: +\code{\link[factoextra]{fviz_nbclust}}. +} +\references{ +Kassambara A, Mundt F. factoextra: Extract and Visualize the Results +of Multivariate Data Analyses. (2020) Available at: +https://cran.r-project.org/web/packages/factoextra/index.html + +Rousseeuw PJ. Silhouettes: A graphical aid to the interpretation and +validation of cluster analysis. J Comput Appl Math (1987) 20:53–65. +doi:10.1016/0377-0427(87)90125-7 +} diff --git a/man/plot_point.Rd b/man/plot_point.Rd new file mode 100644 index 0000000..04facd7 --- /dev/null +++ b/man/plot_point.Rd @@ -0,0 +1,88 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotting_utils.R +\name{plot_point} +\alias{plot_point} +\title{Generate a custom scatter ggplot.} +\usage{ +plot_point( + data, + x_var, + y_var, + fill_var = NULL, + label_var = NULL, + plot_title = NULL, + plot_subtitle = NULL, + plot_tag = NULL, + x_lab = x_var, + y_lab = y_var, + fill_lab = NULL, + cust_theme = ggplot2::theme_classic(), + point_color = "steelblue", + point_alpha = 1, + show_segments = FALSE, + segment_color = "steelblue", + segment_alpha = 1, + label_color = point_color, + txt_color = "black", + txt_size = 2.5, + txt_type = c("label", "text"), + jitter_width = 0, + jitter_height = 0 +) +} +\arguments{ +\item{data}{a data frame.} + +\item{x_var}{the name of the variable to be presented in the x axis.} + +\item{y_var}{the name of the variable to be presented in the y axis.} + +\item{fill_var}{optional, the name of the variable coded by the point fill. +If NULL, the point fill is specified by the point_color argument.} + +\item{label_var}{optional, the name of the variable to be presented in the +point labels. If NULL, no point labels are displayed.} + +\item{plot_title}{plot title.} + +\item{plot_subtitle}{plot subtitle.} + +\item{plot_tag}{plot tag.} + +\item{x_lab}{x axis title.} + +\item{y_lab}{y axis title.} + +\item{fill_lab}{fill legend title.} + +\item{cust_theme}{custom plot theme, a ggplot2 theme object.} + +\item{point_color}{point fill color.} + +\item{point_alpha}{point alpha.} + +\item{show_segments}{logical, should lines connecting the (0,0) point with +the plot point be displayed?} + +\item{segment_color}{color of the connecting lines.} + +\item{segment_alpha}{alpha of the connecting lines.} + +\item{label_color}{color of the text labels.} + +\item{txt_color}{color of the text presented in the labels.} + +\item{txt_size}{size of the text presented in the labels.} + +\item{txt_type}{type of the displayed text: either as geom_text or geom_label} + +\item{jitter_width}{horizontal jittering of the points.} + +\item{jitter_height}{vertical jittering of the points.} +} +\value{ +a ggplot object. +} +\description{ +Generates a simple scatter ggplot. +} diff --git a/man/plot_som.Rd b/man/plot_som.Rd new file mode 100644 index 0000000..07bcbdb --- /dev/null +++ b/man/plot_som.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotting_utils.R +\name{plot_som} +\alias{plot_som} +\title{Plot diagnostic plots for the self-organizing map.} +\usage{ +plot_som(kohonen_object) +} +\arguments{ +\item{kohonen_object}{a 'kohonen' class object. +See: \code{\link[kohonen]{som}} for details.} +} +\value{ +If \code{base_plots} is set to false, a list of non-editable ggplot +objects is returned. +} +\description{ +Generates a set of diagnostic plots for the 'kohonen' class +object as specified by \code{\link[kohonen]{plot.kohonen}}. +} +\references{ +Wehrens R, Kruisselbrink J. Flexible self-organizing maps in kohonen 3.0. +J Stat Softw (2018) 87:1–18. doi:10.18637/jss.v087.i07 +} diff --git a/man/plot_train_som.Rd b/man/plot_train_som.Rd new file mode 100644 index 0000000..9d57372 --- /dev/null +++ b/man/plot_train_som.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotting_utils.R +\name{plot_train_som} +\alias{plot_train_som} +\title{Visualize the SOM training process.} +\usage{ +plot_train_som( + kohonen_object, + plot_title = NULL, + plot_subtitle = NULL, + cust_theme = ggplot2::theme_classic(), + ... +) +} +\arguments{ +\item{kohonen_object}{a 'kohonen' class object.} + +\item{plot_title}{plot title.} + +\item{plot_subtitle}{plot subtitle.} + +\item{cust_theme}{custom plot theme, a ggplot2 theme object.} + +\item{...}{extra arguments, currently none specified.} +} +\value{ +a ggplot object. +} +\description{ +Plots the mean distance to the neuron/winning unit as a +function of the iteration number. +} +\references{ +Wehrens R, Kruisselbrink J. Flexible self-organizing maps in kohonen 3.0. +J Stat Softw (2018) 87:1–18. doi:10.18637/jss.v087.i07 +} diff --git a/man/predict.clust_analysis.Rd b/man/predict.clust_analysis.Rd new file mode 100644 index 0000000..fe769c1 --- /dev/null +++ b/man/predict.clust_analysis.Rd @@ -0,0 +1,110 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/semi_supervised.R +\name{predict.clust_analysis} +\alias{predict.clust_analysis} +\alias{predict.min_analysis} +\alias{predict.combi_analysis} +\alias{predict.umatrix_analysis} +\title{Semi-supervised clustering.} +\usage{ +\method{predict}{clust_analysis}( + object, + newdata = NULL, + type = c("class", "propagation", "som"), + active_variables = FALSE, + ... +) + +\method{predict}{min_analysis}(object, ...) + +\method{predict}{combi_analysis}( + object, + newdata = NULL, + type = c("class", "propagation", "som"), + active_variables = FALSE, + ... +) + +\method{predict}{umatrix_analysis}(object, newdata = NULL, ...) +} +\arguments{ +\item{object}{an object.} + +\item{newdata}{a numeric data frame, matrix or a red_analysis object. +If NULL (default), the bare cluster assignment table is returned.} + +\item{type}{type of the projection: simple observation matching +('class', default), kNN label propagation ('propagation') or prediction +via SOM neuronal network ('som'). The SOM prediction method is the sole +prediction algorithm implemented for multi-layer SOM.} + +\item{active_variables}{logical, should only active variables be used for the +cluster assignment prediction? Relevant only for objects created with hard threshold regularization algorithms and ignored otherwise See Details.} + +\item{...}{extra arguments passed to \code{\link{propagate}}.} +} +\value{ +a \code{\link{clust_analysis}} object. +} +\description{ +Projects the cluster assignment onto new data using simple +observation matching, a k-nearest neighbor (kNN) label propagation +algorithm or, specifically for self-organizing map (SOM), predicts the node +assignment based on the trained SOM neuronal network. +} +\details{ +For the implementation details of the kNN label propagation +algorithm, see: \code{\link{propagate}}. + +The default distance metric is extracted from the \code{clust_analysis} object. +For \code{combi_analysis} objects, the default distance metric is the distance +between observations (not nodes!). +In case of clustering analyses performed with hard threshold regularization +algorithms (currently only \code{\link{htk_cluster}}), the prediction by +the kNN classifier is done by default by using all available variables. +However, by setting \code{active_variables = TRUE}, the user may switch to +prediction of the cluster assignment only with variables contributing to +development of the clustering structure. See the paper by Raymaekers and +Zamar for rationale of the hard thresholding regularization and active +variable selection. + +Currently, it is not possible to perform semi-supervised clustering for +clustering analysis objects generated with user-provided dissimilarity +objects (subclass \code{min_analysis} of \code{clust_analysis}). In such cases, \code{NULL} +is returned with a warning. + +For the kNN propagation, the cluster projection is done on the top level, +i.e. takes into account the final assignment of the observations to the +clusters and ignoring the SOM nodes. +Predictions via the trained SOM neuronal network are accomplished with +\code{\link{map_som}} (single-layer SOM) or \code{\link{map_supersom}} +(multi-layer SOM), which internally use +the \code{\link[kohonen]{map.kohonen}} function. In this case, the distances, +weights and SOM architecture are extracted from the \code{clust_analysis} or +\code{combi_analysis} object. +If the SOM prediction method is applied to a \code{combi_analysis} object, the +cluster assignment is done in a bottom - top direction: the observations +are mapped onto the SOM nodes and the nodes assigned to the clusters as +specified by the assignment data frame (component \code{clust_assignment} of +the \code{combi_analysis} object). +If the user tries to apply the SOM method with a \code{clust_analysis} method +that was not generated with a non-SOM algorithm, \code{NULL} is returned +with a warning. +The SOM method is also the only method applicable to analyses employing +multi-layer SOM. +} +\references{ +Leng M, Wang J, Cheng J, Zhou H, Chen X. Adaptive +semi-supervised clustering algorithm with label propagation. +J Softw Eng (2014) 8:14–22. doi:10.3923/jse.2014.14.22 + +Kohonen T. Self-Organizing Maps. Berlin, Heidelberg: Springer Berlin +Heidelberg (1995). doi:10.1007/978-3-642-97610-0 + +Wehrens R, Kruisselbrink J. Flexible self-organizing maps in kohonen 3.0. +J Stat Softw (2018) 87:1–18. doi:10.18637/jss.v087.i07 + +Raymaekers J, Zamar RH. Regularized K-means Through Hard-Thresholding. +J Mach Learn Res (2022) 23:1–48. Available at: +http://jmlr.org/papers/v23/21-0052.html +} diff --git a/man/predict.red_analysis.Rd b/man/predict.red_analysis.Rd new file mode 100644 index 0000000..ab80f86 --- /dev/null +++ b/man/predict.red_analysis.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/red_analysis_oop.R +\name{predict.red_analysis} +\alias{predict.red_analysis} +\title{Project new data onto a reduction analysis layout.} +\usage{ +\method{predict}{red_analysis}(object, newdata, ...) +} +\arguments{ +\item{object}{a \code{red_analysis} object, see Details.} + +\item{newdata}{a numeric data frame or a numeric matrix with the new +data set.} + +\item{...}{extra arguments, currently none.} +} +\value{ +an object of the \code{\link{red_analysis}} class. +} +\description{ +Predicts reduction analysis scores for a new piece of data with a reduction +algorithm-specific methodology. +} +\details{ +Currently implemented only for PCA and UMAP reduction analysis objects. +The method employs internally the function \code{predict()} from the +packages \code{stats} and \code{umap}. For reduction analysis objects +created with other methods, \code{NULL} is returned with a warning. +Of note, specifically for UMAP reduction analysis objects, the method will +work only for few basic distances implemented by the \code{umap} package by +default (Euclidean, Manhattan and cosine). +} diff --git a/man/prediter.Rd b/man/prediter.Rd new file mode 100644 index 0000000..fa04479 --- /dev/null +++ b/man/prediter.Rd @@ -0,0 +1,90 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/iterative_semi_supervised.R +\name{prediter} +\alias{prediter} +\alias{prediter.clust_analysis} +\alias{prediter.min_analysis} +\alias{prediter.umatrix_analysis} +\alias{prediter.combi_analysis} +\title{Iterative k-nearest neighbor label propagation algorithm.} +\usage{ +prediter(x, ...) + +\method{prediter}{clust_analysis}( + x, + newdata = NULL, + select_stat = c("silhouette", "misclassification", "variance", "np"), + max_k = 20, + kNN_data = 5, + kNN_cluster = NULL, + .parallel = FALSE, + ... +) + +\method{prediter}{min_analysis}(x, ...) + +\method{prediter}{umatrix_analysis}(x, ...) + +\method{prediter}{combi_analysis}( + x, + newdata = NULL, + select_stat = c("silhouette", "misclassification", "variance", "np"), + max_k = 20, + kNN_data = 5, + kNN_cluster = NULL, + .parallel = FALSE, + ... +) +} +\arguments{ +\item{x}{a \code{clust_analysis} or \code{combi_analysis} object.} + +\item{...}{extra arguments passed to \code{\link{predict.clust_analysis}} or +\code{\link{predict.combi_analysis}} such as kernel weighting. Note that you +cannot specify the \code{kNN} and \code{type} arguments.} + +\item{newdata}{a data frame or a matrix with the new data. If \code{NULL}, +the training data is used for fitting the clustering structure.} + +\item{select_stat}{a name of the loss function defining the quality measure +of the prediction. For details, see: Details.} + +\item{max_k}{the maximal number of the nearest neighbors to be tested.} + +\item{kNN_data}{number of the nearest neighbors in the dataset, used for +determination of neighborhood preservation statistic. See: \code{\link{np}} +for details.} + +\item{kNN_cluster}{number of the nearest neighbors in the cluster, used for +determination of neighborhood preservation statistic. See: \code{\link{np}} +for details.} + +\item{.parallel}{logical, should the analysis be run in parallel?} +} +\value{ +an object of the \code{\link{tuner}} class with the \code{plot()} and \code{summary()} +methods. +} +\description{ +Prediction of cluster assignment by the k-nearest neighbor label propagation +algorithm with an automated choice of the k value based on a loss function. +} +\details{ +The function finds the optimal value of k for the k-nearest neighbor +classifier by iteratively checking quality of the cluster assignment. The +quality check is accomplished by one of the loss functions: +silhouette width (\code{select_stat = 'silhouette'}, default), +percentage of observations with negative silhouette widths +('misclassification'), +fraction of explained clustering variance (i.e. ratio of the between-cluster +sum of squares to the total sum of squares, \code{select_stat = 'variance'}), +or neighbor preservation (\code{select_stat = 'np'}). +The \code{prediter()} function is a S3 generic function. +The function works only for clustering analyses and combined SOM - clustering +analyses with the data provided as data frames but not as distance matrices. +} +\references{ +Leng M, Wang J, Cheng J, Zhou H, Chen X. Adaptive semi-supervised +clustering algorithm with label propagation. J Softw Eng (2014) 8:14–22. +doi:10.3923/jse.2014.14.22 +} diff --git a/man/print.clust_analysis.Rd b/man/print.clust_analysis.Rd new file mode 100644 index 0000000..6b7b58b --- /dev/null +++ b/man/print.clust_analysis.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/appearance.R +\name{print.clust_analysis} +\alias{print.clust_analysis} +\alias{print.combi_analysis} +\alias{print.cross_dist} +\alias{print.red_analysis} +\alias{print.tuner} +\title{Printing of objects.} +\usage{ +\method{print}{clust_analysis}(x, ...) + +\method{print}{combi_analysis}(x, ...) + +\method{print}{cross_dist}(x, ...) + +\method{print}{red_analysis}(x, ...) + +\method{print}{tuner}(x, ...) +} +\arguments{ +\item{x}{an object.} + +\item{...}{extra arguments, currently none.} +} +\value{ +nothing, called for side effects. +} +\description{ +Prints a \code{clust_analysis}, \code{combi_analysis}, \code{red_analysis} +or \code{cross_dist} object. +} diff --git a/man/print.spectre.Rd b/man/print.spectre.Rd new file mode 100644 index 0000000..ff1fe36 --- /dev/null +++ b/man/print.spectre.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spectre_oop.R +\name{print.spectre} +\alias{print.spectre} +\title{Print method for the \code{spectre} class.} +\usage{ +\method{print}{spectre}(x, ...) +} +\arguments{ +\item{x}{an object of class \code{spectre}.} + +\item{...}{extra arguments, currently none.} +} +\value{ +none, called for its side effects. +} +\description{ +Prints a \code{spectre} object. +} diff --git a/man/propagate.Rd b/man/propagate.Rd new file mode 100644 index 0000000..aea1efd --- /dev/null +++ b/man/propagate.Rd @@ -0,0 +1,73 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/semi_supervised.R +\name{propagate} +\alias{propagate} +\title{Project the cluster assignment with k-NN label propagation.} +\usage{ +propagate( + object, + newdata = NULL, + variables = NULL, + active_variables = FALSE, + distance_method = NULL, + kNN = 5, + simple_vote = TRUE, + resolve_ties = FALSE, + kernel_fun = function(x) 1/x, + detailed = FALSE +) +} +\arguments{ +\item{object}{a \code{clust_analysis} or a \code{combi_analysis} object} + +\item{newdata}{a numeric data frame, matrix or a \code{red_analysis} object. +If NULL (default), the bare cluster assignment table is returned.} + +\item{variables}{an optional vector with names of variables to be used for +the cluster assignment prediction. If \code{NULL} (default), all variables will +be used.} + +\item{active_variables}{logical, should the prediction be done with active +variables only? refers only to objects created with hard threshold +regularized algorithms.} + +\item{distance_method}{a distance metric, by default it is retrieved from +the input \code{clust_analysis} or \code{combi_analysis} object. For the later, the +distance used for observation clustering is used in the projection.} + +\item{kNN}{number of the nearest neighbors.} + +\item{simple_vote}{logical, should classical unweighted k-NN classification +be applied? If FALSE, distance-weighted k-NN is used with the provided kernel +function.} + +\item{resolve_ties}{logical, should the ties be resolved at random? Applies +only to the simple unweighted voting algorithm.} + +\item{kernel_fun}{kernel function transforming the distance into weight.} + +\item{detailed}{logical, should a detailed output including the kNN table and +voting scheme be returned. If FALSE, the bare \code{clust_analysis} object with the +predictions is returned.} +} +\value{ +a \code{\link{clust_analysis}} object or, +if \code{detailed} = TRUE, a list with the kNN +table and the voting results. +} +\description{ +Projects the cluster assignment with a k-nearest neighbor +classifier onto a new data set. +} +\details{ +If a \code{red_analysis} object is provided as \code{newdata}, the cluster +assignment is projected onto the component/score table. The \code{newdata} input +has to have the same variables as those used for development of the input +cluster_analysis object. This algorithm is not available for multi-layer +SOM. +} +\references{ +Leng M, Wang J, Cheng J, Zhou H, Chen X. Adaptive +semi-supervised clustering algorithm with label propagation. +J Softw Eng (2014) 8:14–22. doi:10.3923/jse.2014.14.22 +} diff --git a/man/qe.Rd b/man/qe.Rd new file mode 100644 index 0000000..af31cba --- /dev/null +++ b/man/qe.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/som_stats.R +\name{qe} +\alias{qe} +\alias{qe.clust_analysis} +\alias{qe.combi_analysis} +\alias{pbc} +\alias{pbc.clust_analysis} +\alias{pbc.combi_analysis} +\title{Quantization error and population-based convergence.} +\usage{ +qe(x, ...) + +\method{qe}{clust_analysis}(x, ...) + +\method{qe}{combi_analysis}(x, ...) + +pbc(x, ...) + +\method{pbc}{clust_analysis}(x, ...) + +\method{pbc}{combi_analysis}(x, ...) +} +\arguments{ +\item{x}{an object.} + +\item{...}{extra arguments passed to methods.} +} +\value{ +\code{qe()} returns a single numeric value. \code{pbc()} returns a list with +two components, \code{variable_stats} with a mean differences, variance ratios, +their lower and upper bounds of 95\% confidence intervals and logical +variables indicating if the convergence was reached. +} +\description{ +The functions compute two numeric statistics helpful at assessing convergence +of a self-organizing maps (SOM): quantization error (\code{qe()}) and +population-based convergence (\code{pbc()}). +} +\details{ +Quantization error is computed as a sum of distances of data points to their +winning units (node). +The idea behind population-base convergence is to check whether data points +and SOM nodes are drawn from the same population as assessed by comparing +means and variances of the data points and nodes in a variable-wise manner. +The \code{qe()} and \code{pbc()} functions are S3 generics. +} +\references{ +\enumerate{ +\item Breard GT. Evaluating Self-Organizing Map Quality Measures as Convergence +Criteria Criteria. Open Access Master’s Theses. Paper 1033. +Available at: https://digitalcommons.uri.edu/theses/1033 +} + +Kohonen T. Self-Organizing Maps. Berlin, Heidelberg: Springer Berlin +Heidelberg (1995). doi:10.1007/978-3-642-97610-0 + +Hamel L, Ott BH. A Population Based Convergence Criterion for +Self-Organizing Maps. (2012) +} diff --git a/man/red_analysis.Rd b/man/red_analysis.Rd new file mode 100644 index 0000000..1ecb61f --- /dev/null +++ b/man/red_analysis.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/constructors.R +\name{red_analysis} +\alias{red_analysis} +\title{Reduction analysis class object.} +\usage{ +red_analysis(x) +} +\arguments{ +\item{x}{a named list, see Details.} +} +\value{ +a \code{red_analysis} object with the elements listed in Details. +} +\description{ +Constructs a \code{red_analysis} class object given a list storing +results of a dimensionality reduction analysis. +} +\details{ +A named list with the following elements is required as the \code{x} argument: +\itemize{ +\item \code{red_obj} with the analysis output, +\item \code{red_fun} name of the reduction function, +\item \code{dist_method} name of the distance metric, +\item \code{component_tbl} a data frame with component/score values for the observations, +\item \code{loadings} a data frame with variable loadings, relevant e.g. for PCA or +factor analysis, +} + +\code{data} a quosure calling the original data set. + +If the \code{component_tbl} data frame contains the cluster assignment information +an object of the \code{clust_red} sub-class is created. This sub-class inherits +almost all methods from the superclass \code{red_analysis}. The only difference +concerns the \code{plot()} method, which, by default generates layout (score) +scatter plots with the cluster assignment coded by the point color. +} diff --git a/man/reduce_data.Rd b/man/reduce_data.Rd new file mode 100644 index 0000000..fe21765 --- /dev/null +++ b/man/reduce_data.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/reduction_functions.R +\name{reduce_data} +\alias{reduce_data} +\title{Dimensionality reduction of a data set.} +\usage{ +reduce_data( + data, + distance_method = "euclidean", + kdim = 2, + red_fun = c("pca", "mds", "umap", "fa"), + ... +) +} +\arguments{ +\item{data}{a numeric data frame, a matrix or a distance object +(class \code{dist}).} + +\item{distance_method}{name of the distance metric, see: +\code{\link{get_kernel_info}}. Explicitly used only by MDS and UMAP.} + +\item{kdim}{dimension number.} + +\item{red_fun}{name of the dimensionality reduction function.} + +\item{...}{extra arguments passed to \code{\link[pcaPP]{PCAproj}} (PCA), +\code{\link[stats]{cmdscale}} (MDS) and \code{\link[umap]{umap}} (UMAP), +like the \code{\link[umap]{umap.defaults}} object for UMAP, +\code{\link[stats]{factanal}} (FA).} +} +\value{ +a \code{\link{red_analysis}} object with, among others, +\code{\link{plot.red_analysis}}, \code{\link{summary.red_analysis}} and +\code{\link{predict.red_analysis}} methods. +} +\description{ +Performs dimensionality reduction of a data frame with principal +component analysis (PCA), multi-dimensional scaling (MDS), Uniform Manifold +Approximation and Projection (UMAP) or factor analysis (FA). +} +\details{ +A wrapper around \code{\link[pcaPP]{PCAproj}} (PCA), +\code{\link[stats]{cmdscale}} (MDS), \code{\link[umap]{umap}} (UMAP) +and \code{\link[stats]{factanal}} (FA). Note: +the distances and other UMAP parameters are specified by a +\code{\link[umap]{umap.defaults}} object. Hence, not all distance measures +returned by \code{\link{get_kernel_info}} are available for UMAP computation. +} +\references{ +McInnes L, Healy J, Melville J. UMAP: Uniform Manifold Approximation and +Projection for Dimension Reduction. (2018) +Available at: https://arxiv.org/abs/1802.03426v3 + +Croux C, Filzmoser P, Oliveira MR. Algorithms for Projection-Pursuit robust +principal component analysis. Chemom Intell Lab Syst (2007) 87:218–225. +doi:10.1016/j.chemolab.2007.01.004 + +BARTLETT MS. THE STATISTICAL CONCEPTION OF MENTAL FACTORS. Br J Psychol +Gen Sect (1937) 28:97–104. doi:10.1111/j.2044-8295.1937.tb00863.x +} diff --git a/man/rename.clust_analysis.Rd b/man/rename.clust_analysis.Rd new file mode 100644 index 0000000..1cb6abe --- /dev/null +++ b/man/rename.clust_analysis.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/names.R +\name{rename.clust_analysis} +\alias{rename.clust_analysis} +\alias{rename.combi_analysis} +\title{Set cluster names.} +\usage{ +\method{rename}{clust_analysis}(.data, nm, ...) + +\method{rename}{combi_analysis}(.data, nm, ...) +} +\arguments{ +\item{.data}{a \code{clust_analysis} or \code{combi_analysis} object.} + +\item{nm}{a named character vector with the new names as elements and old +cluster names as names.} + +\item{...}{extra arguments, currently none.} +} +\value{ +a \code{\link{clust_analysis}} or \code{\link{combi_analysis}} object. +} +\description{ +Sets custom cluster names. +} +\details{ +The package's clustering functions of the clustTools package +name clusters with integer numbers by default. This method poses a handy +tool to set custom cluster names with a named character vector. +The cluster order (i.e. vector levels) is defined by the order of +the naming vector's elements. +} diff --git a/man/set_rownames.Rd b/man/set_rownames.Rd new file mode 100644 index 0000000..8f0d193 --- /dev/null +++ b/man/set_rownames.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/preprocessing_functions.R +\name{set_rownames} +\alias{set_rownames} +\title{Set row names.} +\usage{ +set_rownames(data, row_names = as.character(1:nrow(data))) +} +\arguments{ +\item{data}{a data frame or a tibble.} + +\item{row_names}{a character vector of the proper length.} +} +\value{ +a data frame. +} +\description{ +Sets row names in a data frame. +} +\details{ +a tibble is silently converted to a data frame. +} diff --git a/man/sil_extra.Rd b/man/sil_extra.Rd new file mode 100644 index 0000000..ca1f66c --- /dev/null +++ b/man/sil_extra.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/constructors.R +\name{sil_extra} +\alias{sil_extra} +\title{Generate a sil_extra object.} +\usage{ +sil_extra(x, assignment) +} +\arguments{ +\item{x}{an object of the \code{\link[cluster]{silhouette}} class.} + +\item{assignment}{an data frame with the \code{clust_id} and \code{observation} +columns defining the cluster assignment, e.g. obtained by the +\code{\link{extract}} function applied to a \code{clust_analysis} or +\code{combi_analysis} object.} +} +\value{ +an object of the \code{sil_extra} class. Technically, a tibble with +the observation ID, cluster name, neighbor cluster name and silhouette width. +} +\description{ +Extends the \code{\link[cluster]{silhouette}} object by +cluster order and names. +} +\details{ +The \code{sil_extra} class has \code{\link{summary.sil_extra}} +and \code{\link{plot.sil_extra}} +methods compatible with the tidyverse environment. +} diff --git a/man/silhouette.clust_analysis.Rd b/man/silhouette.clust_analysis.Rd new file mode 100644 index 0000000..f3ab3f5 --- /dev/null +++ b/man/silhouette.clust_analysis.Rd @@ -0,0 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/silhouettes.R +\name{silhouette.clust_analysis} +\alias{silhouette.clust_analysis} +\alias{silhouette.combi_analysis} +\title{Silhouette statistic.} +\usage{ +\method{silhouette}{clust_analysis}(x, output = c("extended", "silhouette"), ...) + +\method{silhouette}{combi_analysis}(x, output = c("extended", "silhouette"), ...) +} +\arguments{ +\item{x}{an object of the \code{\link{clust_analysis}} or +\code{\link{combi_analysis}} class.} + +\item{output}{the function output. +For \code{silhouette}, an object of the canonical cluster's class +\code{\link[cluster]{silhouette}} is returned. +For \code{extended}, an object of the class \code{\link{sil_extra}} is returned. +See the Details.} + +\item{...}{extra arguments passed to \code{\link[cluster]{silhouette}}.} +} +\value{ +an object of the class \code{\link[cluster]{silhouette}} or +\code{\link{sil_extra}}. +} +\description{ +Computes silhouette statistics for a \code{clust_analysis} or \code{combi_analysis} +objects. +} +\details{ +The function employs the default method of the +\code{\link[cluster]{silhouette}} generics and is hence agnostic to the +clustering method. +For SOM clustering, i.e. \code{combi_analysis} objects, the calculation is done +for the simple assignment of observations to the clusters +(SOM nodes are ignored). +For the extended output of the function, an object of the class +\code{\link{sil_extra}} is returned, which preserves cluster order +and names and offers tidyverse-friendly \code{\link{plot.sil_extra}} +and \code{\link{summary.sil_extra}} methods. +Observations with cluster assignment failures, e.g. during predictions +via a trained SOM are silently removed prior to the silhouette computation. +} +\references{ +Rousseeuw PJ. Silhouettes: A graphical aid to the interpretation and +validation of cluster analysis. J Comput Appl Math (1987) 20:53–65. +doi:10.1016/0377-0427(87)90125-7 + +Schubert E, Rousseeuw PJ. Faster k-Medoids Clustering: Improving the PAM, +CLARA, and CLARANS Algorithms. in Lecture Notes in Computer Science +(including subseries Lecture Notes in Artificial Intelligence and Lecture +Notes in Bioinformatics) (Springer), 171–187. +doi:10.1007/978-3-030-32047-8_16 +} diff --git a/man/som_cluster.Rd b/man/som_cluster.Rd new file mode 100644 index 0000000..c1b4cb6 --- /dev/null +++ b/man/som_cluster.Rd @@ -0,0 +1,80 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clustering_functions.R +\name{som_cluster} +\alias{som_cluster} +\title{Self-organizing maps.} +\usage{ +som_cluster( + data, + distance_method = "euclidean", + xdim = 5, + ydim = 4, + topo = c("hexagonal", "rectangular"), + neighbourhood.fct = c("gaussian", "bubble"), + toroidal = FALSE, + seed = 1234, + ... +) +} +\arguments{ +\item{data}{a numeric data frame or matrix or a \code{red_analysis} object or +a list of such objects.} + +\item{distance_method}{a character vector with names of the distance metric. +It has to be of the same length as \code{data}, i.e. single name for a single +object and a vector for a list. See: \code{\link{get_kernel_info}} for +available distances.} + +\item{xdim}{x dimension of the SOM grid, +see: \code{\link[kohonen]{somgrid}} for details.} + +\item{ydim}{y dimension of the SOM grid, +#' see: \code{\link[kohonen]{somgrid}} for details.} + +\item{topo}{SOM grid topology, see: \code{\link[kohonen]{somgrid}} +for details. 'hexagonal' for default.} + +\item{neighbourhood.fct}{neighborhood function, 'gaussian' for default.} + +\item{toroidal}{logical, should toroidal grid be used?} + +\item{seed}{initial setting of the random number generator.} + +\item{...}{extra arguments passed to \code{\link[kohonen]{som}} or +\code{\link[kohonen]{supersom}} such as the learning rate, iteration +number, algorithm mode or layer weights.} +} +\value{ +an object of the class \code{\link{clust_analysis}}. In case of +a multi-layer SOM (\code{data} as a list), the distance matrix corresponds +returned within the \code{clust_analysis} object is the matrix computed by +\code{\link[kohonen]{object.distances}}, which is a weighted distance matrix +over all layers for observations. +} +\description{ +Performs self-organizing map (SOM) clustering of a numeric data +frame, matrix or the results of a reduction analysis with the single or +multi-layered Kohonen's on-line or batch algorithm. +} +\details{ +Technically, a wrapper around \code{\link[kohonen]{som}} and +\code{\link[kohonen]{supersom}}. +The input can be a single object or a list of numeric data frames, +numeric matrices or \code{red_analysis} objects - combinations are allowed! +The user may specify different distances for each of the elements +of the data list, which allows to handle different types of variables +(e.g. a binary data set handled with Tanimoto distance and a numeric data +set handled with Euclidean distance) and specify weights for each layer +determining how they contribute to the node (cluster) assignment. +If any of the data list elements is a \code{red_analysis} object, its +component/score table will be used for clustering. +Note, in order to make use of the full set of distance measures, +the package 'somKernels' need to be installed and loaded. +} +\references{ +Kohonen T. Self-Organizing Maps. Berlin, Heidelberg: +Springer Berlin Heidelberg (1995). doi:10.1007/978-3-642-97610-0 + +Wehrens R, Kruisselbrink J. Flexible self-organizing maps in kohonen +3.0. J Stat Softw (2018) 87:1–18. doi:10.18637/jss.v087.i07 +} diff --git a/man/som_reduce.Rd b/man/som_reduce.Rd new file mode 100644 index 0000000..3e8137f --- /dev/null +++ b/man/som_reduce.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/reduction_functions.R +\name{som_reduce} +\alias{som_reduce} +\title{Dimensionality reduction with self-organizing maps.} +\usage{ +som_reduce( + data, + distance_method = "euclidean", + xdim = 5, + ydim = 4, + topo = c("hexagonal", "rectangular"), + neighbourhood.fct = c("gaussian", "bubble"), + toroidal = FALSE, + seed = 1234, + ... +) +} +\arguments{ +\item{data}{a numeric data frame or matrix.} + +\item{distance_method}{name of the distance metric, see: +\code{\link{get_kernel_info}}.} + +\item{xdim}{x dimension of the SOM grid, +see: \code{\link[kohonen]{somgrid}} for details.} + +\item{ydim}{y dimension of the SOM grid, +#' see: \code{\link[kohonen]{somgrid}} for details.} + +\item{topo}{SOM grid topology, see: \code{\link[kohonen]{somgrid}} +for details. 'hexagonal' for default.} + +\item{neighbourhood.fct}{neighborhood function, 'gaussian' for default.} + +\item{toroidal}{logical, should toroidal grid be used?} + +\item{seed}{initial setting of the random number generator.} + +\item{...}{extra arguments passed to \code{\link[kohonen]{som}} or +\code{\link[kohonen]{supersom}} such as the learning rate, iteration +number, algorithm mode or layer weights.} +} +\value{ +a \code{\link{red_analysis}} object with, among others, +\code{\link{plot.red_analysis}}, \code{\link{summary.red_analysis}} and +\code{\link{predict.red_analysis}} methods. +} +\description{ +The \code{som_reduce()} function applies self-organizing maps (SOM) as a classical +dimensionality reduction method, i.e. to reduce the number of variables. +} +\details{ +In contrast to \code{\link{som_cluster}}, which reduces the observation +number (observation -> nodes), \code{som_reduce()} diminishes the variable number +by representing associated variables as SOM nodes or 'meta-variables'. +The data frame of such meta-variables (to be strict, codebook vectors of +node positions) is stored as the score/component table of the \code{red_analysis} +object returned by the function. +} diff --git a/man/spectralize.Rd b/man/spectralize.Rd new file mode 100644 index 0000000..e25616e --- /dev/null +++ b/man/spectralize.Rd @@ -0,0 +1,90 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spectralization.R +\name{spectralize} +\alias{spectralize} +\title{Laplacian decomposition.} +\usage{ +spectralize( + data, + distance_method = "euclidean", + kNN = 5, + weighted = FALSE, + simil_fun = function(x) 1/(1 + x), + norm_laplacian = TRUE, + return_laplacian = FALSE +) +} +\arguments{ +\item{data}{a numeric data frame or matrix.} + +\item{distance_method}{distance method. Call \code{get_kernel_info} for +available distance metrics.} + +\item{kNN}{numeric, number of the nearest neighbors., has to be lower than +the dimension of \code{dist_mtx}.} + +\item{weighted}{logical, should the affinity matrix be weighted by +similarity? Defaults to \code{TRUE}.} + +\item{simil_fun}{a function used to convert pairwise distances +to pairwise similarities.} + +\item{norm_laplacian}{logical, should the Laplacian matrix be normalized +prior to decomposition?} + +\item{return_laplacian}{logical, should the Laplacian matrix be included in +the function output?} +} +\value{ +a list of class \code{\link{spectre}} with the following elements: +\itemize{ +\item \code{degrees}: degrees of the graph nodes stored in the diagonal of the degree +matrix +\item \code{eigen_values}: a numeric vector of eigenvalues sorted from the largest to +the smallest one +\item \code{eigen_vectors}: a numeric matrix whose rows represent the observations in +he initial data set and columns representing the eigenvectors. +The eigenvectors are sorted by their einegvalues: the largest come first. +} + +Optionally, if \code{return_laplacian == TRUE}, the Laplacian matrix +(with or without normalization) is returned as well. +} +\description{ +Performs a spectral decomposition of a pairwise distances between +observations of a data set. +} +\details{ +The code is inspired by two sources: +https://rpubs.com/gargeejagtap/SpectralClustering +http://www.di.fc.ul.pt/~jpn/r/spectralclustering/spectralclustering.html. + +The function combines the following computation steps: +\itemize{ +\item \emph{distance matrix calculation}. The distance matrix is calculated via +\code{\link{calculate_dist}}, you may check available distance types by +calling \code{\link{get_kernel_info}}. +\item \emph{calculation of affinity matrix}. This step is accomplished by +\code{\link{dist2affi}} and results in a numeric representation of the +nearest neighborhood, i.e. affinity matrix A. Such matrix may be unweighted +(nearest neighbors get \code{1}, all other pairs 0) or weighted by similarity +(nearest neighbors: similarity statistic, all other pairs 0). It is important +to note, that conversion of the distance matrix to similarity matrix is done +by applying a user-provided function \code{simil_fun}. Its default value +corresponds to a common transformation of Euclidean distance to Euclidean +similarity, which must not be optimal for all distance measures. +\item \emph{calculation of degree matrix and Laplacian}. The degree matrix D is a +diagonal matrix, whose diagonal are sums of columns of the affinity matrix A. +As such, D stores the numbers of neighbors of a given observation or, from +a graph perspective, degree of the graph nodes. The Laplacian matrix U is +computed as a simple difference: \eqn{U = D - A}. It is, optionally, +normalized with the following formula: \eqn{U_{norm} = D^{-1/2} U D^{-1/2}}. +\item \emph{decomposition of the Laplacian}. This step is done via base R's +\code{\link[base]{eigen}} and generates eigenvectors and eigenvalues of the +Laplacian matrix U. The trailing smallest eigenvectors with eigenvalues +not departing substantially from 0 represent the base dimensions of the data +set and may be used for further analysis steps such as clustering. +The \code{plot()} method called for the \code{spectralize()} function output may be +helpful ad finding such trailing eigenvectors. +} +} diff --git a/man/spectre.Rd b/man/spectre.Rd new file mode 100644 index 0000000..32acfad --- /dev/null +++ b/man/spectre.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/constructors.R +\name{spectre} +\alias{spectre} +\title{Create a \code{spectre} object.} +\usage{ +spectre(x) +} +\arguments{ +\item{x}{a list with elements specified in Details.} +} +\value{ +an instance of the \code{spectre} class as described in Details. +} +\description{ +Creates an instance of \code{spectre} class. +} +\details{ +Technically, a \code{spectre} object is a list with the following components: +\itemize{ +\item \code{degrees}: a numeric vector with degrees of the graph nodes stored in the +diagonal of the degree matrix +\item \code{eigen_values}: a numeric vector of eigenvalues sorted from the largest to +the smallest one +\item \code{eigen_vectors}: a numeric matrix whose rows represent the observations in +he initial data set and columns representing the eigenvectors. +The eigenvectors are sorted by their einegvalues: the largest come first +} +} diff --git a/man/summary.clust_analysis.Rd b/man/summary.clust_analysis.Rd new file mode 100644 index 0000000..fbe3016 --- /dev/null +++ b/man/summary.clust_analysis.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clust_analysis_oop.R, R/combi_analysis_oop.R +\name{summary.clust_analysis} +\alias{summary.clust_analysis} +\alias{summary.combi_analysis} +\title{Quality control of clustering solutions.} +\usage{ +\method{summary}{clust_analysis}(object, ...) + +\method{summary}{combi_analysis}(object, ...) +} +\arguments{ +\item{object}{a \code{clust_analysis} or \code{combi_analysis} object.} + +\item{...}{extra arguments passed to \code{\link{np}}.} +} +\value{ +a data frame with columns characterized in Details. +} +\description{ +Computes basic global statistics of quality of a clustering analysis object. +} +\details{ +The statistics retrieved by the \code{summary()} method are: +\itemize{ +\item \emph{silhouette width} (\code{sil_width}) +\item \emph{fraction of potentially misclassified observations} with negative +silhouette widths (\code{frac_misclassified}) +\item \emph{fraction of explained clustering variance} expressed as the ratio of total +between sum of squares to total sum of squares (\code{frac_var}) +\item \emph{fraction of preserved nearest neighbors} (\code{frac_np}) +} + +The statistics are computed with \code{\link{silhouette}}, \code{\link{var}}, +and \code{\link{np}} methods for the entire clustering structure and not +for particular clusters. +} +\references{ +Rousseeuw PJ. Silhouettes: A graphical aid to the interpretation and +validation of cluster analysis. J Comput Appl Math (1987) 20:53–65. +doi:10.1016/0377-0427(87)90125-7 + +Venna J, Kaski S. Neighborhood preservation in nonlinear projection methods: +An experimental study. Lect Notes Comput Sci (including Subser Lect Notes +Artif Intell Lect Notes Bioinformatics) (2001) 2130:485–491. +doi:10.1007/3-540-44668-0_68 +} diff --git a/man/summary.cross_dist.Rd b/man/summary.cross_dist.Rd new file mode 100644 index 0000000..287a81b --- /dev/null +++ b/man/summary.cross_dist.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cross_dist_oop.R +\name{summary.cross_dist} +\alias{summary.cross_dist} +\title{Summary of cross-distances between clusters.} +\usage{ +\method{summary}{cross_dist}(object, ...) +} +\arguments{ +\item{object}{a \code{cross_dist} object.} + +\item{...}{extra arguments, currently none.} +} +\value{ +a data frame with mean, SD, median, interquartile range, 95\% range +and range of cross-distances between the cluster pairs. +} +\description{ +Computes summary statistics of homologous or heterologous cross-distances +between the clusters. +} diff --git a/man/summary.importance.Rd b/man/summary.importance.Rd new file mode 100644 index 0000000..0e358bb --- /dev/null +++ b/man/summary.importance.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/importance_oop.R +\name{summary.importance} +\alias{summary.importance} +\title{Importance statistic summary.} +\usage{ +\method{summary}{importance}(object, ...) +} +\arguments{ +\item{object}{an \code{importance} class object.} + +\item{...}{extra arguments, currently none.} +} +\value{ +a data frame with importance metrics. +} +\description{ +If the permutation importance analysis for clustering variables was done +in multiple iterations +(e.g. \code{n_iter} set to > 1 in \code{\link{impact.clust_analysis}}), +number of iterations, mean, SD, median, interquartile range +and range of the difference in +clustering variance for each clustering variable is computed. +Otherwise, a plain data frame with importance statistics is returned. +} diff --git a/man/summary.knb.Rd b/man/summary.knb.Rd new file mode 100644 index 0000000..5a42d8f --- /dev/null +++ b/man/summary.knb.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/knb_oop.R +\name{summary.knb} +\alias{summary.knb} +\title{Summary neighborhood preservation statistic for clusters.} +\usage{ +\method{summary}{knb}(object, ...) +} +\arguments{ +\item{object}{an object of the \code{\link{knb}} class.} + +\item{...}{extra arguments, currently none.} +} +\value{ +a data frame with numeric statistics for the whole clustering +structure (\code{clust_id} = 'global') and particular clusters. +} +\description{ +Computes mean, SD, median, interquartile range, 95\% range and range of the +network preservation statistic for the global clustering structure +and particular clusters. +Low values of the median or median neighborhood preservation statistic may +indicate that the given cluster s poorly separated from other clusters. +For objects without cluster assignment (e.g. +neighborhood analyses for reduction analysis methods), only global +neighborhood statistics are returned. +For such objects, low values of the neighborhood preservation statistic +suggest that the reduction analysis poorly projects the data point +neighborhood into the reduced layout. +} +\references{ +Venna J, Kaski S. Neighborhood preservation in nonlinear projection methods: +An experimental study. Lect Notes Comput Sci (including Subser Lect Notes +Artif Intell Lect Notes Bioinformatics) (2001) 2130:485–491. +doi:10.1007/3-540-44668-0_68#' + +Breard GT. Evaluating Self-Organizing Map Quality Measures as Convergence +Criteria Criteria. Open Access Master’s Theses. Paper 1033. +Available at: https://digitalcommons.uri.edu/theses/1033 +} diff --git a/man/summary.sil_extra.Rd b/man/summary.sil_extra.Rd new file mode 100644 index 0000000..e531ae0 --- /dev/null +++ b/man/summary.sil_extra.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sil_oop.R +\name{summary.sil_extra} +\alias{summary.sil_extra} +\title{Summary silhouette width statistic for clusters.} +\usage{ +\method{summary}{sil_extra}(object, ...) +} +\arguments{ +\item{object}{an object of the \code{\link{sil_extra}} class.} + +\item{...}{extra arguments, currently none.} +} +\value{ +a data frame with numeric statistics for the whole clustering +structure (\code{clust_id} = 'global') and particular clusters (mean, SD, median, +interquartile range, 95\% percentile range, range, number and fraction of +potentially misclassified observations with negative silhouette widths). +} +\description{ +Computes mean, SD, median, interquartile range, 95\% range, range as well as +the number and percentage of observations with negative silhouette width. +Such observations are likely in an improper cluster. +} +\references{ +Rousseeuw PJ. Silhouettes: A graphical aid to the interpretation and +validation of cluster analysis. J Comput Appl Math (1987) 20:53–65. +doi:10.1016/0377-0427(87)90125-7 +} diff --git a/man/summary.tuner.Rd b/man/summary.tuner.Rd new file mode 100644 index 0000000..2dce67b --- /dev/null +++ b/man/summary.tuner.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tuner_oop.R +\name{summary.tuner} +\alias{summary.tuner} +\title{Summary of quality statistics.} +\usage{ +\method{summary}{tuner}(object, ...) +} +\arguments{ +\item{object}{a \code{tuner} object.} + +\item{...}{extra arguments, currently none.} +} +\value{ +a data frame with the following columns: +\itemize{ +\item \code{sil_width}: silhouette width +\item \code{frac_misclassified}: fraction of observations with negative silhouette +widths suggestive of misclassification +\item \code{frac_var}: fraction of explained clustering variance +\item \code{frac_np}: fraction of preserved nearest neighbors +\item columns named after names of the tuning parameters and containing their +values +} +} +\description{ +The \code{summary()} method called for \code{tuner} class objects extracts cluster +assignment prediction statistics for subsequent combinations of the tuning +parameters from the object. +} diff --git a/man/te.Rd b/man/te.Rd new file mode 100644 index 0000000..10ce9a0 --- /dev/null +++ b/man/te.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/neighborhood.R +\name{te} +\alias{te} +\alias{te.clust_analysis} +\alias{te.combi_analysis} +\title{Topology error of self-organizing maps.} +\usage{ +te(x, ...) + +\method{te}{clust_analysis}(x, ...) + +\method{te}{combi_analysis}(x, type = c("node", "final"), ...) +} +\arguments{ +\item{x}{a \code{clust_analysis} or \code{combi_analysis} object.} + +\item{...}{extra arguments passed to methods.} + +\item{type}{type reference clusters. For \code{type = 'node'}, topology error +within SOM nodes is computed, for \code{type = 'final'}, topology error in the +final clusters (clusters of SOM nodes) is calculated.} +} +\value{ +An object of the \code{\link{knb}} class with +\code{\link{summary.knb}} and \code{\link{plot.knb}} methods. + +An object of the \code{\link{knb}} class with +\code{\link{summary.knb}} and \code{\link{plot.knb}} methods. +} +\description{ +Calculates the topology error for data points in a self-organizing map (SOM). +} +\details{ +The procedure of topology error computation is as follows: for each +observation, two nearest self-organizing map (SOM) nodes are identified. +If such nodes are neighbors in the initial layout of SOM prior to data +fitting, correct topology (coded with 0) is returned and an error otherwise +(coded as 1). +The function returns NULL with a warning when called for a non-SOM analysis +object. +\code{te()} is a S3 generic function. +} diff --git a/man/tuner.Rd b/man/tuner.Rd new file mode 100644 index 0000000..7485739 --- /dev/null +++ b/man/tuner.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/constructors.R +\name{tuner} +\alias{tuner} +\title{Create an \code{tuner} class object.} +\usage{ +tuner(x) +} +\arguments{ +\item{x}{a list with elements specified in Details.} +} +\value{ +an instance of the \code{tuner} class as described in Details. +} +\description{ +Creates and object of the \code{tuner} class on the top of a list with tuning +of parameters of cluster analysis or prediction. +} +\details{ +The input list has to have three elements: +\itemize{ +\item \code{analysis}: a \code{clust_analysis} or \code{combi_analysis} object created with the +best set of the tuning parameters +\item \code{stats}: a data frame with values of quality stats (silhouette width, +fraction of potentially misclassified observations/negative silhouette width, +fraction of explained clustering variance, and fraction of preserved nearest +neighbors) +\item \code{fun}: name of the tuning function +\item \code{dataset}: a string specified which data was used during the tuning: the +training data set or cross-validation +\item \code{type}: type of analysis, development or prediction +\item \code{clust_vars}: a vector of names of clustering variables +\item \code{tune_params}: a vector of names of the tuning parameters +\item \code{tune_criteria}: a data frame that specifies which criteria were applied +to select the best combination of the tuning parameters +\item \code{best_tune}: a data frame storing the best values of the tuning parameters +} +} diff --git a/man/var.Rd b/man/var.Rd new file mode 100644 index 0000000..04c9cda --- /dev/null +++ b/man/var.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generics.R +\name{var} +\alias{var} +\alias{var.default} +\title{Object's variance} +\usage{ +var(x, ...) + +\method{var}{default}(x, ...) +} +\arguments{ +\item{x}{an object. For the default method a numeric vector, matrix +or a data frame.} + +\item{...}{extra arguments passed to methods, e.g. \code{\link[stats]{var}}.} +} +\description{ +Computes variance statistic specific for the given object. +} +\details{ +The default \code{var()} method is a wrapper around +\code{\link[stats]{var}}. +} diff --git a/man/var.clust_analysis.Rd b/man/var.clust_analysis.Rd new file mode 100644 index 0000000..ee99d19 --- /dev/null +++ b/man/var.clust_analysis.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/variance.R +\name{var.clust_analysis} +\alias{var.clust_analysis} +\alias{var.combi_analysis} +\title{Calculate clustering variance.} +\usage{ +\method{var}{clust_analysis}(x, ...) + +\method{var}{combi_analysis}(x, ...) +} +\arguments{ +\item{x}{an object.} + +\item{...}{extra arguments, currently none.} +} +\value{ +a list with the total, within-cluster, between-cluster sum of +squares and explained clustering variance (named \code{frac_var}). +} +\description{ +Calculates the clustering sum of squares (total, within +clusters, total within clusters and between clusters) as well as the +fraction of 'explained' clustering variance. The later is the ratio of +the total between-cluster sum of squares to the total sum of squares. +} +\details{ +\code{var()} is a S3 generic function. +\code{var()} overwrites the \code{var()} function provided by the stats package, +but provides a handy default method, so that \code{var()} is expected to behave +the same way as in base R. +For \code{combi_analysis} objects, variance is calculated based on distances +between the observations and the final cluster assignment - +nodes are ignored. +} diff --git a/man/var.red_analysis.Rd b/man/var.red_analysis.Rd new file mode 100644 index 0000000..3ca057f --- /dev/null +++ b/man/var.red_analysis.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/red_analysis_oop.R +\name{var.red_analysis} +\alias{var.red_analysis} +\alias{summary.red_analysis} +\title{Variance and summary for a red_analysis object.} +\usage{ +\method{var}{red_analysis}(x, ...) + +\method{summary}{red_analysis}(object, ...) +} +\arguments{ +\item{x}{a \code{red_analysis} object.} + +\item{...}{extra arguments, currently none.} + +\item{object}{a \code{red_analysis} object.} +} +\value{ +\code{var()} returns a data frame with components' +variances, \code{summary()} returns a set of summary statistic specific for +the wrapped dimensionality reduction function. +} +\description{ +Variance associated with the +components and statistic summary for \code{red_analysis} class objects. +} diff --git a/man/vote_kernel.Rd b/man/vote_kernel.Rd new file mode 100644 index 0000000..2498bd3 --- /dev/null +++ b/man/vote_kernel.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{vote_kernel} +\alias{vote_kernel} +\title{Find the most frequently occurring element with distance weighting.} +\usage{ +vote_kernel(vector, dist_vec, kernel_fun = function(x) 1/x) +} +\arguments{ +\item{vector}{a vector.} + +\item{dist_vec}{a numeric vector with the distance values.} + +\item{kernel_fun}{a kernel function.} +} +\description{ +Finds the element of a vector with the highers number of +occurrences. The voting is distance weighted by the given kernel function. +} +\references{ +the most frequent element. +} diff --git a/man/vote_simple.Rd b/man/vote_simple.Rd new file mode 100644 index 0000000..a4e9a28 --- /dev/null +++ b/man/vote_simple.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{vote_simple} +\alias{vote_simple} +\title{Find the most frequently occurring element of a vector.} +\usage{ +vote_simple(vector, resolve_ties = FALSE) +} +\arguments{ +\item{vector}{a vector.} + +\item{resolve_ties}{logical, should the ties be resolved at random?} +} +\value{ +the most frequent element. +} +\description{ +Finds the element of a vector with the highers number of +occurrences. +} +\details{ +Ties may be resolved at random (resolve_ties = TRUE), otherwise, +if a tie exists, the alphabetically first element is returned. +}