diff --git a/man/calculate_dist.Rd b/man/calculate_dist.Rd deleted file mode 100644 index 7e6384a..0000000 --- a/man/calculate_dist.Rd +++ /dev/null @@ -1,47 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{calculate_dist} -\alias{calculate_dist} -\alias{get_kernel_info} -\title{Calculate distance.} -\usage{ -calculate_dist(data, method) - -get_kernel_info() -} -\arguments{ -\item{data}{a data frame or matrix. Row names are preserved as -observation IDs.} - -\item{method}{the name of dissimilarity measure, see: -\code{\link{get_kernel_info}} for available distances.} -} -\value{ -`calculate_dist()`: a matrix with the distance statistics; -`get_kernel_info()`: a vector with names of available distance measures. -} -\description{ -The `calculate_dist()` function calculates distances -between observations (rows) of a data -frame or matrix. See \code{\link{get_kernel_info}} for a vector of available -distance measures. -} -\details{ -Provides a handy wrapper for \code{\link[philentropy]{distance}}. -The smc (simple matching coefficient) distance is calculated with the -\code{\link[nomclust]{sm}} function. -} -\references{ -Drost H-G. Philentropy: Information Theory and Distance Quantification -with R. J Open Source Softw (2018) 3:765. doi:10.21105/joss.00765 - -Boriah S, Chandola V, Kumar V. Similarity measures for categorical data: -A comparative evaluation. in Society for Industrial and -Applied Mathematics - 8th SIAM International Conference on Data Mining -2008, Proceedings in Applied Mathematics 130, 243–254. -doi:10.1137/1.9781611972788.22 - -Sulc Z, Cibulkova J, Rezankova H. nomclust: Hierarchical Cluster Analysis -of Nominal Data. (2021) -Available at: https://cran.r-project.org/package=nomclust -} diff --git a/man/center_data.Rd b/man/center_data.Rd deleted file mode 100644 index b9c0913..0000000 --- a/man/center_data.Rd +++ /dev/null @@ -1,31 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/preprocessing_functions.R -\name{center_data} -\alias{center_data} -\alias{min_max} -\title{Normalization of a data frame.} -\usage{ -center_data(data, type = c("mean", "median"), complete_cases = FALSE) - -min_max(data, complete_cases = FALSE) -} -\arguments{ -\item{data}{a data frame or a tibble. All variables need to be numeric.} - -\item{type}{type of the centering, mean (default) or median.} - -\item{complete_cases}{logical, should the observations with the complete -variable record only be included as an output?} -} -\value{ -a data frame or a tibble. -} -\description{ -Normalization with median or mean centering of a data frame -or tibble (`center_data()`) or simple min/max normalization (`min_max()`). -Preserves the row names. -} -\details{ -A wrapper around \code{\link[base]{scale}}. Mean scaling is equal -to canonical Z-score normalization. -} diff --git a/man/check_numeric.Rd b/man/check_numeric.Rd deleted file mode 100644 index bde3181..0000000 --- a/man/check_numeric.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{check_numeric} -\alias{check_numeric} -\title{Check for a numeric data frame or a matrix.} -\usage{ -check_numeric(object) -} -\arguments{ -\item{object}{an object.} -} -\value{ -none. Throws exceptions if the object is not a numeric data frame or -a matrix. -} -\description{ -Checks if an object is a numeric data frame or a matrix. -} diff --git a/man/clust_analysis.Rd b/man/clust_analysis.Rd deleted file mode 100644 index ba5d517..0000000 --- a/man/clust_analysis.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/constructors.R -\name{clust_analysis} -\alias{clust_analysis} -\title{Construct a clust_analysis class object.} -\usage{ -clust_analysis(x) -} -\arguments{ -\item{x}{a named list, see details.} -} -\value{ -a `clust_analysis` object. -} -\description{ -Constructs a `clust_analysis` class object given a list with -results of a clustering analysis. -} -\details{ -A named list with the following elements is required: -data (a quosure calling the original data set), -dist_mtx (a matrix with the distances between the observations), -dist_method (name of the distance statistic), -clust_obj (the output object of the clustering analysis), -clust_fun (the name of the clustering function), -clust_assignment (a tibble with the cluster assignment of the -observations with the observation and clust_id variables), -dots (additional arguments passed to the clustering function). -} diff --git a/man/combi_analysis.Rd b/man/combi_analysis.Rd deleted file mode 100644 index 5e77c34..0000000 --- a/man/combi_analysis.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/constructors.R -\name{combi_analysis} -\alias{combi_analysis} -\title{Construct a combi_analysis class object.} -\usage{ -combi_analysis(x) -} -\arguments{ -\item{x}{a named list, see details.} -} -\value{ -a `combi_analysis` object. -} -\description{ -Constructs a `combi_anlysis()` class object given a list with -results of reduction analysis or self-organizing map and clustering analysis. -} -\details{ -A named list with the following elements is required: -clust_analyses (`red_analysis` or `clust_analysis` objects), -clust_assignment (a tibble with the cluster assignment of the -observations with the observation and clust_id variables). -} diff --git a/man/combi_cluster.Rd b/man/combi_cluster.Rd deleted file mode 100644 index eee96d0..0000000 --- a/man/combi_cluster.Rd +++ /dev/null @@ -1,67 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/combi_clustering.R -\name{combi_cluster} -\alias{combi_cluster} -\title{Cluster self-organizing map nodes.} -\usage{ -combi_cluster( - data, - distance_som = "euclidean", - xdim = 5, - ydim = 4, - topo = "hexagonal", - neighbourhood.fct = "gaussian", - toroidal = FALSE, - rlen = 500, - node_clust_fun = hcluster, - distance_nodes = "euclidean", - seed = 1234, - ... -) -} -\arguments{ -\item{data}{a numeric data frame, matrix or a red_analysis object. -If a red_analysis object is provided, its component/score table will be -clustered.} - -\item{distance_som}{metric of distance between the observations, used for SOM -development. See: \code{\link{get_kernel_info}}.} - -\item{xdim}{x dimension of the SOM grid, -see: \code{\link[kohonen]{somgrid}} for details.} - -\item{ydim}{y dimension of the SOM grid, -#' see: \code{\link[kohonen]{somgrid}} for details.} - -\item{topo}{SOM grid topology, see: \code{\link[kohonen]{somgrid}} -for details. 'hexagonal' for default.} - -\item{neighbourhood.fct}{neighborhood function, 'gaussian' for default.} - -\item{toroidal}{logical, should toroidal grid be used?} - -\item{rlen}{number of the SOM algorithm iterations.} - -\item{node_clust_fun}{a function provided by the clustTools package used to -cluster the SOM nodes. Alternatively, a custom function may be provided here, -which returns a clust_analysis class object.} - -\item{distance_nodes}{metric of distance between the nodes, used for SOM -development. See: \code{\link{get_kernel_info}}.} - -\item{seed}{initial setting of the random number generator.} - -\item{...}{extra arguments passed to node_clust_fun, such as k number of -clusters.} -} -\value{ -an object of the class \code{\link{combi_analysis}}. -} -\description{ -Performs clustering of the self-orgnizing map (SOM) nodes with -one of the clustering functions provided by the clustTools package. -} -\references{ -Vesanto J, Alhoniemi E. Clustering of the self-organizing map. -IEEE Trans Neural Networks (2000) 11:586–600. doi:10.1109/72.846731 -} diff --git a/man/components.clust_analysis.Rd b/man/components.clust_analysis.Rd deleted file mode 100644 index 03d351e..0000000 --- a/man/components.clust_analysis.Rd +++ /dev/null @@ -1,54 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/reduction.R -\name{components.clust_analysis} -\alias{components.clust_analysis} -\alias{components.combi_analysis} -\title{Dimensionality reduction analysis of the analysis data or distance matrix.} -\usage{ -\method{components}{clust_analysis}( - object, - kdim = NULL, - red_fun = c("pca", "mds", "umap"), - with = c("distance", "data"), - ... -) - -\method{components}{combi_analysis}( - object, - kdim = NULL, - red_fun = c("pca", "mds", "umap"), - with = c("distance", "data"), - ... -) -} -\arguments{ -\item{object}{an object.} - -\item{kdim}{number of dimensions. If NULL, kdim is set to the number of -clusters.} - -\item{red_fun}{reduction analysis function: 'pca' (PCA), 'mds' (MDS) or -'umap' (UMAP).} - -\item{with}{type of the input data for the reduction analysis: -the clustering data ('data') or the matrix of distances ('distance').} - -\item{...}{extra arguments passed to \code{\link{reduce_data}}.} -} -\value{ -a `red_analysis` object with the component/score table containing -the cluster assignment information ('clust_id' variable). -} -\description{ -Performs principal component analysis (PCA), multi-dimensional -scaling (MDS) or uniform manifold approximation and projection (UMAP) of the -analysis data set used for clustering or distance matrix. -} -\details{ -See \code{\link{reduce_data}} for the implementation details. -The distance method, relevant for MDS and UMAP. is taken over from the -clust_object. Hence, some distances may crash the analysis with UMAP, see: -\code{\link[umap]{umap.defaults}} for the compatible distances. -For `combi_analysis` objects, the analysis is done for the global clustering, -i.e. assignment of observations to the clusters and not to the SOM nodes. -} diff --git a/man/cross_dist.Rd b/man/cross_dist.Rd deleted file mode 100644 index dc5bb1c..0000000 --- a/man/cross_dist.Rd +++ /dev/null @@ -1,46 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/constructors.R -\name{cross_dist} -\alias{cross_dist} -\title{Construct a cross_dist object.} -\usage{ -cross_dist( - x, - type = c("homologous", "heterologous"), - method = "euclidean", - x_levels = NULL, - y_levels = NULL, - ... -) -} -\arguments{ -\item{x}{a named list of cross-distance matrices.} - -\item{type}{type of the cross-distances: -`homologous` for comparison of within the same clustering structure or -`heterologous` for comparison of two clustering structures.} - -\item{method}{name of the distance metric.} - -\item{x_levels}{order of the clusters of the -first cluster/combi analysis object.} - -\item{y_levels}{order of the clusters of the -second cluster/combi analysis object.} - -\item{...}{extra arguments, currently none defined.} -} -\value{ -an object of class `cross-distance` being a list of -cross-distance matrices. -Information on the comparison type and distance -metric are stored as the `type` and `dist_method` attributes. -Information on the cluster order is stored as the -`x_levels` and `y_levels` attributes. -} -\description{ -The `cross_dist` class objects a created on the top of a named list -of matrices of cross-distances between clusters. -The list elements have to be named after the compared clusters following -the 'clust1 vs clust2' scheme. -} diff --git a/man/cross_distance.Rd b/man/cross_distance.Rd deleted file mode 100644 index 21f70d9..0000000 --- a/man/cross_distance.Rd +++ /dev/null @@ -1,62 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cross_distance.R -\name{cross_distance} -\alias{cross_distance} -\alias{cross_distance.data.frame} -\alias{cross_distance.clust_analysis} -\alias{cross_distance.combi_analysis} -\title{Compute cross-distances between two data frames} -\usage{ -cross_distance(x, ...) - -\method{cross_distance}{data.frame}(x, y = NULL, method = "euclidean", ...) - -\method{cross_distance}{clust_analysis}(x, y = NULL, method = NULL, ...) - -\method{cross_distance}{combi_analysis}(x, y = NULL, method = NULL, ...) -} -\arguments{ -\item{x}{a data frame, `clust_analysis` or `combi_analysis` object.} - -\item{...}{extra arguments passed to methods.} - -\item{y}{an object like `x` or NULL (default).} - -\item{method}{distance metric name as specified by -\code{\link{get_kernel_info}}. -For `clust_analysis` or `combi_analysis` instances. -If `method` is set to NULL, -the metric name is extracted from the object (distance between observations -for `clust_analysis` and `combi_analysis`, not between the SOM nodes).} -} -\value{ -For data frames: a matrix with pairwise distances, -observations of the `x` data frame -are present in rows, observations of the `y` data frame are presented -in columns. -For `clust_analysis` and `combi_analysis` results: -a list of cross-distance matrices of class \code{\link{cross_dist}} -with defined \code{\link{summary.cross_dist}} and -\code{\link{plot.cross_dist}} methods. -} -\description{ -Computes cross-distances between two data frames with the same variable sets. -} -\details{ -Distances (for available distances, see: \code{\link{get_kernel_info}}) are -computed in a pair-wise manner employing \code{\link[philentropy]{distance}}. -Preserves row names. -If a single data frame is provided, pairwise observations -between the observations are computed with \code{\link{calculate_dist}}. -If a single `clust_analysis` or `combi_analysis` object is provided, -cross-distances between the clusters within the object are computed. -`cross_distance()` is a S3 generic function. -} -\references{ -Drost H-G. Philentropy: Information Theory and Distance Quantification -with R. J Open Source Softw (2018) 3:765. doi:10.21105/joss.00765 - -Sulc Z, Cibulkova J, Rezankova H. nomclust: Hierarchical Cluster -Analysis of Nominal Data. (2021) -Available at: https://cran.r-project.org/package=nomclust -} diff --git a/man/cv.Rd b/man/cv.Rd deleted file mode 100644 index dd24698..0000000 --- a/man/cv.Rd +++ /dev/null @@ -1,91 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cross_validation.R -\name{cv} -\alias{cv} -\alias{cv.clust_analysis} -\alias{cv.combi_analysis} -\title{Cross-validate the clustering analysis object.} -\usage{ -cv(x, ...) - -\method{cv}{clust_analysis}( - x, - nfolds = 5, - kNN = 5, - simple_vote = TRUE, - resolve_ties = FALSE, - kernel_fun = function(x) 1/x, - seed = 1234, - .parallel = FALSE, - ... -) - -\method{cv}{combi_analysis}( - x, - nfolds = 5, - kNN = 5, - simple_vote = TRUE, - resolve_ties = FALSE, - kernel_fun = function(x) 1/x, - seed = 1234, - .parallel = FALSE, - ... -) -} -\arguments{ -\item{x}{an object.} - -\item{...}{extra arguments, currently none.} - -\item{nfolds}{number of CV folds.} - -\item{kNN}{number of the nearest neighbors.} - -\item{simple_vote}{logical, should classical unweighted k-NN classification -be applied? If FALSE, distance-weighted k-NN is used with the provided kernel -function.} - -\item{resolve_ties}{logical, should the ties be resolved at random? Applies -only to the simple unweighted voting algorithm.} - -\item{kernel_fun}{kernel function transforming the distance into weight.} - -\item{seed}{initial setting of the random number generator.} - -\item{.parallel}{logical, should the CV be run in parallel?} -} -\value{ -a list containing the global \code{\link{clust_analysis}} object, -projection (prediction) results and prediction summary -for each fold and a prediction -summary for the whole CV. -} -\description{ -Checks the stability of a clustering solution by -cross-validation (CV) and the classification error in CV folds -as a measure of the cluster stability. -} -\details{ -By principle similar to cross-validation of any machine learning -multi-class classifier. The training portion of a CV split is used to develop -of a cluster structure and the projection on the test portion is accomplished -by k-nearest neighbor (kNN) label propagation algorithm. For its -implementation details, see: \code{\link{propagate}}. -For `combi_analysis` objects, assignment of the observations in the CV folds -is done for the 'top' assignment of the observations to the clusters; -nodes are ignored! -The fold are generated with \code{\link[caret]{createFolds}}. -`cv()` is a S3 generic function. -} -\references{ -Lange T, Roth V, Braun ML, Buhmann JM. Stability-based validation of -clustering solutions. Neural Comput (2004) 16:1299–1323. -doi:10.1162/089976604773717621 - -Leng M, Wang J, Cheng J, Zhou H, Chen X. Adaptive semi-supervised -clustering algorithm with label propagation. J Softw Eng (2014) 8:14–22. -doi:10.3923/jse.2014.14.22 - -Kuhn M. Building predictive models in R using the caret package. -J Stat Softw (2008) 28:1–26. doi:10.18637/jss.v028.i05 -} diff --git a/man/cv_cluster.Rd b/man/cv_cluster.Rd deleted file mode 100644 index 8d86b75..0000000 --- a/man/cv_cluster.Rd +++ /dev/null @@ -1,77 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cross_validation.R -\name{cv_cluster} -\alias{cv_cluster} -\title{Cross-validate a clustering algorithm.} -\usage{ -cv_cluster( - data, - nfolds = 5, - kNN = 5, - simple_vote = TRUE, - resolve_ties = FALSE, - kernel_fun = function(x) 1/x, - clustering_fun = clustTools::kcluster, - seed = 1234, - .parallel = FALSE, - ... -) -} -\arguments{ -\item{data}{a numeric data frame, matrix or a `red_analysis` object. If a -`red_analysis` object is provided as the data argument, the observation -component/score table is subjected to clustering.} - -\item{nfolds}{number of CV folds.} - -\item{kNN}{number of the nearest neighbors.} - -\item{simple_vote}{logical, should classical unweighted k-NN classification -be applied? If FALSE, distance-weighted k-NN is used with the provided kernel -function.} - -\item{resolve_ties}{logical, should the ties be resolved at random? Applies -only to the simple unweighted voting algorithm.} - -\item{kernel_fun}{kernel function transforming the distance into weight.} - -\item{clustering_fun}{clustering function. Should return a -`clust_analysis` object.} - -\item{seed}{initial setting of the random number generator.} - -\item{.parallel}{logical, should the CV be run in parallel?} - -\item{...}{extra arguments passed to the clustering_fun.} -} -\value{ -a list containing the global \code{\link{clust_analysis}} -object, projection (prediction) results and -prediction summary for each fold and a prediction -summary for the whole CV. -} -\description{ -Checks the stability of a clustering solution by -cross-validation (CV) and the classification error as a measure of -the cluster stability. -} -\details{ -By principle, similar to cross-validation of any machine learning -multi-level classifier. The training portion of a CV split is used to develop -of a cluster structure and the projection on the test portion is accomplished -by k-nearest neighbor (kNN) label propagation algorithm. For its -implementation details, see: \code{\link{propagate}}. -The fold are generated with \code{\link[caret]{createFolds}}. -} -\references{ -Lange T, Roth V, Braun ML, Buhmann JM. Stability-based validation of -clustering solutions. Neural Comput (2004) 16:1299–1323. -doi:10.1162/089976604773717621 - -Leng M, Wang J, Cheng J, Zhou H, Chen X. Adaptive semi-supervised -clustering algorithm with label propagation. J Softw Eng (2014) 8:14–22. -doi:10.3923/jse.2014.14.22 - -Kuhn M. Building predictive models in R using the caret package. -J Stat Softw (2008) 28:1–26. doi:10.18637/jss.v028.i05 -} diff --git a/man/dbscan_cluster.Rd b/man/dbscan_cluster.Rd deleted file mode 100644 index a9684be..0000000 --- a/man/dbscan_cluster.Rd +++ /dev/null @@ -1,46 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/clustering_functions.R -\name{dbscan_cluster} -\alias{dbscan_cluster} -\title{Density clustering with DBSCAN.} -\usage{ -dbscan_cluster( - data, - distance_method = "euclidean", - eps, - minPts = 5, - seed = 1234, - ... -) -} -\arguments{ -\item{data}{a numeric data frame or matrix or a red_analysis object.} - -\item{distance_method}{name of the distance metric, see: -\code{\link{get_kernel_info}}.} - -\item{eps}{size (radius) of the epsilon neighborhood.} - -\item{minPts}{number of minimum points required in the eps neighborhood for -core points (including the point itself).} - -\item{seed}{initial setting of the random number generator.} - -\item{...}{extra arguments passed to \code{\link[dbscan]{dbscan}}.} -} -\value{ -an object of the class \code{\link{clust_analysis}}. -} -\description{ -Performs DBSCAN clustering analysis of a numeric data frame, -matrix or the results of a reduction analysis. -} -\details{ -Technically, a wrapper around \code{\link[dbscan]{dbscan}}. If a -red_analysis object is provided as the data argument, the observation -component/score table is subjected to clustering. -} -\references{ -Hahsler M, Piekenbrock M, Doran D. Dbscan: Fast density-based clustering -with R. J Stat Softw (2019) 91:1–30. doi:10.18637/jss.v091.i01 -} diff --git a/man/dist.Rd b/man/dist.Rd deleted file mode 100644 index 45ca275..0000000 --- a/man/dist.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/generics.R -\name{dist} -\alias{dist} -\alias{dist.default} -\title{Distance between observations.} -\usage{ -dist(x, ...) - -\method{dist}{default}(x, ...) -} -\arguments{ -\item{x}{an object. For the default method a numeric matrix, data frame -or `dist` object.} - -\item{...}{arguments for methods, e.g. passed to \code{\link[stats]{dist}}.} -} -\description{ -Computes the distance between observations in a matrix, data frame -or other compatible objects. -} -\details{ -The default `dist()` method is a wrapper around -\code{\link[stats]{dist}}. -} diff --git a/man/extract.Rd b/man/extract.Rd deleted file mode 100644 index 29ee749..0000000 --- a/man/extract.Rd +++ /dev/null @@ -1,74 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/extraction.R -\name{extract} -\alias{extract} -\alias{extract.clust_analysis} -\alias{model.frame.clust_analysis} -\alias{dist.clust_analysis} -\alias{extract.combi_analysis} -\alias{model.frame.combi_analysis} -\alias{dist.combi_analysis} -\alias{extract.red_analysis} -\alias{model.frame.red_analysis} -\title{Extract features of a clust_analysis object.} -\usage{ -extract(x, ...) - -\method{extract}{clust_analysis}( - x, - type = c("distance", "assignment", "clust_object", "data", "object"), - ... -) - -\method{model.frame}{clust_analysis}(formula, ...) - -\method{dist}{clust_analysis}(x, ...) - -\method{extract}{combi_analysis}( - x, - type = c("distance", "assignment", "clust_object", "data", "object"), - ... -) - -\method{model.frame}{combi_analysis}(formula, ...) - -\method{dist}{combi_analysis}(x, ...) - -\method{extract}{red_analysis}( - x, - type = c("component_tbl", "scores", "loadings", "data", "sdev", "object"), - ... -) - -\method{model.frame}{red_analysis}(formula, ...) -} -\arguments{ -\item{x}{an object.} - -\item{...}{extra arguments, currently none.} - -\item{type}{the feature name: -`distance` extracts the matrix with distances between the observations, -`data` the data set used for the analysis, -`assignment` assignment of the observations to the clusters, -`clust_object` or `object` returns the wrapped clustering object. -`clust_object` or `scores` return the component pr score tables for the -observations, -`loadings` retrieves the table of variable loadings (PCA only), -`sdev` returns standard deviations, associated with the -components (PCA only).} - -\item{formula}{an object.} -} -\value{ -the requested feature/property. -} -\description{ -A general extractor method for accessing properties and features -of a `clust_analysis`, `combi_analysis` and `red_analysis` object, -and specific methods for accessing the modeling data frame -and distance matix. -} -\details{ -`extract()` is a S3 generic function. -} diff --git a/man/get_clust_tendency.Rd b/man/get_clust_tendency.Rd deleted file mode 100644 index a7dd86d..0000000 --- a/man/get_clust_tendency.Rd +++ /dev/null @@ -1,36 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/clustering_functions.R -\name{get_clust_tendency} -\alias{get_clust_tendency} -\title{Check clustering tendency of a data set.} -\usage{ -get_clust_tendency(data, n, seed = 1234, ...) -} -\arguments{ -\item{data}{a data frame, tibble or a matrix. Numeric variables only.} - -\item{n}{the number of points selected from sample space which is also the -number of points selected from the given sample (data).} - -\item{seed}{initial setting of the random number generator.} - -\item{...}{extra arguments passed to -\code{\link[factoextra]{get_clust_tendency}}.} -} -\value{ -The values of the Hopkins statistic, p value and a heat map plot. -} -\description{ -Check clustering tendency of a data set as compared with a -random data set using Hopkins statistic. -} -\details{ -The p value for the Hopkins statistic is calculated based on the -beta distribution of its values. Technically, the function is an enriched -wrapper around \code{\link[factoextra]{get_clust_tendency}}. -} -\references{ -Hopkins B, Skellam JG. A New Method for determining the Type of -Distribution of Plant Individuals. -Ann Bot (1954) 18:213–227. doi:10.1093/OXFORDJOURNALS.AOB.A083391 -} diff --git a/man/get_data_dim.Rd b/man/get_data_dim.Rd deleted file mode 100644 index 84598ff..0000000 --- a/man/get_data_dim.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{get_data_dim} -\alias{get_data_dim} -\title{Get dimensions of a data frame or matrix.} -\usage{ -get_data_dim(data) -} -\arguments{ -\item{data}{a data frame or matrix.} -} -\value{ -a list with the requested statistics. -} -\description{ -Gets the number of observations and variables of -the given object. -} diff --git a/man/get_sum_sq.Rd b/man/get_sum_sq.Rd deleted file mode 100644 index a7e2b9b..0000000 --- a/man/get_sum_sq.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{get_sum_sq} -\alias{get_sum_sq} -\title{Calculate clustering sum of squares.} -\usage{ -get_sum_sq(dist_mtx, assignment) -} -\arguments{ -\item{dist_mtx}{a numeric matrix with the distances.} - -\item{assignment}{a data frame with the variable 'clust_id' specifying the -assignment of the observations to the clusters.} -} -\value{ -a list with the values of within-cluster ss for the particular -clusters, total within-cluster ss, total ss, total between-cluster ss as well -as the ratio of between-cluster ss to total ss, interpreted as the fraction -of 'explained' clustering variance. -} -\description{ -Calculates total, within cluster and between cluster -sum of squares (ss). -} -\details{ -The calculation method is independent of the clustering method. -} diff --git a/man/hcluster.Rd b/man/hcluster.Rd deleted file mode 100644 index 04e78bd..0000000 --- a/man/hcluster.Rd +++ /dev/null @@ -1,47 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/clustering_functions.R -\name{hcluster} -\alias{hcluster} -\title{Hierarchical clustering.} -\usage{ -hcluster( - data, - distance_method = "euclidean", - k = 2, - hc_method = "ward.D2", - seed = 1234, - ... -) -} -\arguments{ -\item{data}{a numeric data frame or matrix or a red_analysis object.} - -\item{distance_method}{name of the distance metric, see: -\code{\link{get_kernel_info}}.} - -\item{k}{number of clusters.} - -\item{hc_method}{the hierarchical clustering algorithm, see: -\code{\link[stats]{hclust}} for details.} - -\item{seed}{initial setting of the random number generator.} - -\item{...}{extra arguments passed to \code{\link[stats]{hclust}}.} -} -\value{ -an object of the class \code{\link{clust_analysis}}. -} -\description{ -Performs hierarchical clustering analysis of a numeric data -frame, matrix or the results of a reduction analysis. -} -\details{ -Technically, a wrapper around \code{\link[stats]{hclust}}. If a -red_analysis object is provided as the data argument, the observation -component/score table is subjected to clustering. -} -\references{ -Murtagh F, Contreras P. Algorithms for hierarchical clustering: -An overview. Wiley Interdiscip Rev Data Min Knowl Discov -(2012) 2:86–97. doi:10.1002/widm.53 -} diff --git a/man/impact.Rd b/man/impact.Rd deleted file mode 100644 index ad62a85..0000000 --- a/man/impact.Rd +++ /dev/null @@ -1,47 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/feature_importance.R -\name{impact} -\alias{impact} -\alias{impact.clust_analysis} -\alias{impact.combi_analysis} -\title{Permutation importance of clustering features.} -\usage{ -impact(x, ...) - -\method{impact}{clust_analysis}(x, n_iter = 1, seed = 1234, .parallel = FALSE, ...) - -\method{impact}{combi_analysis}(x, n_iter = 1, seed = 1234, .parallel = FALSE, ...) -} -\arguments{ -\item{x}{a `clust_analysis` object.} - -\item{...}{extra arguments, currently none.} - -\item{n_iter}{number of iterations, 1 by default. -If the arguments is larger that 1, the function is run multiple times, -which may help at testing variable importance in a more objective way -for different permutations.} - -\item{seed}{initial setting of the random number generator.} - -\item{.parallel}{logical, should the CV be run in parallel? Experimental.} -} -\value{ -a data frame of class \code{\link{importance}} with the defined -\code{\link{plot.importance}} and \code{\link{summary.importance}} methods. -} -\description{ -Determines importance of specific clustering variables by -comparing the fraction of 'explained' clustering variance of the input -clustering object and the object generated with the variable -re-shuffled randomly - so called 'permutation' importance. -} -\details{ -The permutation importance algorithm is 'blind' or agnostic to the -clustering procedure. -`impact()` is a S3 generic function. -} -\references{ -Breiman L. Random forests. Mach Learn (2001) 45:5–32. -doi:10.1023/A:1010933404324 -} diff --git a/man/importance.Rd b/man/importance.Rd deleted file mode 100644 index 4158b71..0000000 --- a/man/importance.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/constructors.R -\name{importance} -\alias{importance} -\title{Construct an importance object.} -\usage{ -importance(x) -} -\arguments{ -\item{x}{a data frame with the columns 'total_wss', 'total_ss', 'between_ss', -'frac_var', 'variable' and 'frac_diff'.} -} -\value{ -a tibble of the `importance` class. -} -\description{ -Constructs an object of class `importance` on the top of -a tibble with the importance testing results. -} diff --git a/man/importance_cluster.Rd b/man/importance_cluster.Rd deleted file mode 100644 index 8ff9104..0000000 --- a/man/importance_cluster.Rd +++ /dev/null @@ -1,42 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/feature_importance.R -\name{importance_cluster} -\alias{importance_cluster} -\title{Determine clustering feature importance.} -\usage{ -importance_cluster( - data, - clustering_fun = kcluster, - seed = 1234, - .parallel = FALSE, - ... -) -} -\arguments{ -\item{data}{a numeric data frame, matrix or a red_analysis object. If a -red_analysis object is provided as the data argument, the observation -component/score table is subjected to clustering.} - -\item{clustering_fun}{clustering function. Should return a -`clust_analysis` object.} - -\item{seed}{initial setting of the random number generator.} - -\item{.parallel}{logical, should the CV be run in parallel?} - -\item{...}{extra arguments passed to the clustering_fun.} -} -\value{ -a data frame with the values of sum of squares and the clustering -variances. -} -\description{ -Determines importance of specific clustering variables by -comparing the fraction of 'explained' clustering variance of the input -clustering object and the object generated with the variable -re-shuffled randomly - i.e. so called 'permutation' importance. -} -\references{ -Breiman L. Random forests. Mach Learn (2001) 45:5–32. -doi:10.1023/A:1010933404324 -} diff --git a/man/is_clust_analysis.Rd b/man/is_clust_analysis.Rd deleted file mode 100644 index cdcbd03..0000000 --- a/man/is_clust_analysis.Rd +++ /dev/null @@ -1,33 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/class_testing.R -\name{is_clust_analysis} -\alias{is_clust_analysis} -\alias{is_combi_analysis} -\alias{is_red_analysis} -\alias{is_importance} -\alias{is_cross_dist} -\alias{is_sil_extra} -\title{Test class inheritance.} -\usage{ -is_clust_analysis(x) - -is_combi_analysis(x) - -is_red_analysis(x) - -is_importance(x) - -is_cross_dist(x) - -is_sil_extra(x) -} -\arguments{ -\item{x}{an object.} -} -\value{ -a logical value. -} -\description{ -Tests if the object is an instance of the `red_analysis`, `clust_analysis`, -`combi_analysis`, `importance`, `cross_dist` or `sil_extra` class. -} diff --git a/man/kcluster.Rd b/man/kcluster.Rd deleted file mode 100644 index 7f1ae0d..0000000 --- a/man/kcluster.Rd +++ /dev/null @@ -1,55 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/clustering_functions.R -\name{kcluster} -\alias{kcluster} -\title{K-means or medoid clustering.} -\usage{ -kcluster( - data, - distance_method = "euclidean", - clust_fun = c("kmeans", "pam"), - k = 2, - seed = 1234, - ... -) -} -\arguments{ -\item{data}{a numeric data frame or matrix or a red_analysis object.} - -\item{distance_method}{name of the distance metric, see: -\code{\link{get_kernel_info}}.} - -\item{clust_fun}{the name of the clustering function, currently implemented -are 'kmeans' and 'pam'.} - -\item{k}{number of clusters.} - -\item{seed}{initial setting of the random number generator.} - -\item{...}{extra arguments passed to \code{\link[stats]{kmeans}} or -\code{\link[cluster]{pam}}.} -} -\value{ -an object of the class \code{\link{clust_analysis}}. -} -\description{ -Performs k-means and PAM (partition around medoids) clustering -analysis of of a numeric data frame, matrix or the results of -a reduction analysis. -} -\details{ -Technically, a wrapper around \code{\link[stats]{kmeans}} and -\code{\link[cluster]{pam}}. If a red_analysis object is provided as the -data argument, the observation component/score table is subjected to -clustering. -} -\references{ -Hartigan JA, Wong MA. Algorithm AS 136: A K-Means Clustering Algorithm. -Appl Stat (1979) 28:100. doi:10.2307/2346830 - -Schubert E, Rousseeuw PJ. Faster k-Medoids Clustering: Improving the PAM, -CLARA, and CLARANS Algorithms. in Lecture Notes in Computer Science -(including subseries Lecture Notes in Artificial Intelligence and -Lecture Notes in Bioinformatics) (Springer), 171–187. -doi:10.1007/978-3-030-32047-8_16 -} diff --git a/man/mds.Rd b/man/mds.Rd deleted file mode 100644 index e3e2136..0000000 --- a/man/mds.Rd +++ /dev/null @@ -1,55 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/reduction_functions.R -\name{mds} -\alias{mds} -\alias{pca} -\alias{umap} -\alias{fa} -\title{Perform dimensionality reduction.} -\usage{ -mds(data, distance_method = "euclidean", kdim = 2, ...) - -pca(data, kdim = 2, ...) - -umap(data, distance_method, kdim, ...) - -fa(data, kdim = 2, ...) -} -\arguments{ -\item{data}{a data frame.} - -\item{distance_method}{name of the distance metric, see: -\code{\link{get_kernel_info}}} - -\item{kdim}{dimension number.} - -\item{...}{extra arguments passed to -\code{\link[stats]{cmdscale}}, \code{\link[pcaPP]{PCAproj}}, -\code{\link[umap]{umap}} or \code{\link[stats]{factanal}}.} -} -\value{ -an object of the class \code{\link{red_analysis}}. -} -\description{ -Performs -multi-dimensional scaling (`mds()` via ' \code{\link[stats]{cmdscale}}), -principal component analysis (`pca()` via \code{\link[pcaPP]{PCAproj}}), -UMAP (`umap()` via \code{\link[umap]{umap}}), -or factor analysis (`fa()` via \code{\link[stats]{factanal}}). -} -\details{ -UMAP parameters such as dimension number or distance are provided -as a \code{\link[umap]{umap.defaults}} object. -} -\references{ -McInnes L, Healy J, Melville J. UMAP: Uniform Manifold Approximation and -Projection for Dimension Reduction. (2018) -Available at: https://arxiv.org/abs/1802.03426v3 - -Croux C, Filzmoser P, Oliveira MR. Algorithms for Projection-Pursuit robust -principal component analysis. Chemom Intell Lab Syst (2007) 87:218–225. -doi:10.1016/j.chemolab.2007.01.004 - -BARTLETT MS. THE STATISTICAL CONCEPTION OF MENTAL FACTORS. Br J Psychol -Gen Sect (1937) 28:97–104. doi:10.1111/j.2044-8295.1937.tb00863.x -} diff --git a/man/ngroups.Rd b/man/ngroups.Rd deleted file mode 100644 index abb7cf1..0000000 --- a/man/ngroups.Rd +++ /dev/null @@ -1,26 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/numbers.R -\name{ngroups} -\alias{ngroups} -\alias{ngroups.clust_analysis} -\alias{ngroups.combi_analysis} -\title{Numbers of observations in the clusters.} -\usage{ -ngroups(x, ...) - -\method{ngroups}{clust_analysis}(x, ...) - -\method{ngroups}{combi_analysis}(x, ...) -} -\arguments{ -\item{x}{an object.} - -\item{...}{extra arguments passed to methods, currently none.} -} -\description{ -Compute numbers of observations in the clusters or, for `combi_analysis` -objects, numbers of observations in the SOM nodes and clusters. -} -\details{ -`ngroups()` is a S3 generic function. -} diff --git a/man/nobs.clust_analysis.Rd b/man/nobs.clust_analysis.Rd deleted file mode 100644 index 000ba1d..0000000 --- a/man/nobs.clust_analysis.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/numbers.R -\name{nobs.clust_analysis} -\alias{nobs.clust_analysis} -\alias{nobs.red_analysis} -\alias{nobs.combi_analysis} -\title{Number of observations and variables for dimensionality reduction and clustering.} -\usage{ -\method{nobs}{clust_analysis}(object, ...) - -\method{nobs}{red_analysis}(object, ...) - -\method{nobs}{combi_analysis}(object, ...) -} -\arguments{ -\item{object}{an object.} - -\item{...}{extra arguments, currently none.} -} -\value{ -a list with the numbers of observations and variables. -} -\description{ -Computes numbers of observations and variables used in the analyses. -} diff --git a/man/plot.clust_analysis.Rd b/man/plot.clust_analysis.Rd deleted file mode 100644 index ea31639..0000000 --- a/man/plot.clust_analysis.Rd +++ /dev/null @@ -1,78 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/clust_analysis_oop.R -\name{plot.clust_analysis} -\alias{plot.clust_analysis} -\title{Plot selected features of a clust_analysis object.} -\usage{ -\method{plot}{clust_analysis}( - x, - type = c("diagnostic", "components", "heat_map", "training", "data"), - cust_theme = ggplot2::theme_classic(), - jitter_width = 0, - jitter_height = 0, - point_alpha = 1, - ... -) -} -\arguments{ -\item{x}{a `clust_analysis` object.} - -\item{type}{the type of plots: -'diagnostic' returns a series of diagnostic plots, -for non-SOM clustering those include a dendrogram (hierarchical clustering), -WSS and silhouette curve (see: \code{\link{plot_nbclust}}) or the complete -output of \code{\link[kohonen]{plot.kohonen}}; -'components' plots the results of reduction analysis done with the clustering -data or the distance matrix (see: \code{\link{components.clust_analysis}}); -'heat_map' plots the distances between observations as a heat map, 'training' -plots the mean distance to the SOM winning unit as a function of the -iteration number; 'data' works only if reduction analysis results were used -for clustering and plots the first two components/dimensions.} - -\item{cust_theme}{a ggplot theme.} - -\item{jitter_width}{horizontal jittering of the points in the plots.} - -\item{jitter_height}{vertical jittering of the points in the plots.} - -\item{point_alpha}{scatter plot's point alpha.} - -\item{...}{extra arguments passed to \code{\link{components.clust_analysis}}.} -} -\value{ -a ggplot object or a list of ggplot objects, as specified by the -'type' argument -} -\description{ -The plotting method for the `clust_analysis` class. Enables -plotting of the standard diagnostic plots used for the optimal cluster number -determination (dendrogram, WSS- and silhouette curve), results of the -reduction analysis, heat map of the distances between the observations as -well as the self-organizing map training process. It is also possible to plot -the first two variables of the clustering data frame, an option which is -attractive, if the clustering of reduction analysis was performed. -} -\references{ -Kassambara A, Mundt F. factoextra: Extract and Visualize the Results -of Multivariate Data Analyses. (2020) Available -at: https://cran.r-project.org/web/packages/factoextra/index.html - -Galili T. dendextend: an R package for visualizing, adjusting and -comparing trees of hierarchical clustering. -Bioinformatics (2015) 31:3718–20. doi:10.1093/bioinformatics/btv428 - -McInnes L, Healy J, Melville J. UMAP: Uniform Manifold Approximation -and Projection for Dimension Reduction. (2018) Available -at: https://arxiv.org/abs/1802.03426v3 - -Belyadi H, Haghighat A, Nguyen H, Guerin A-J. IOP Conference Series: -Earth and Environmental Science Determination of Optimal Epsilon (Eps) -Value on DBSCAN Algorithm to Clustering Data on Peatland Hotspots in -Sumatra Related content EPS conference comes to London-EPS rewards -quasiparticle research-EP. IOP Conf Ser Earth Environ Sci (2016) 31: -doi:10.1088/1755-1315/31/1/012012 - -Rousseeuw PJ. Silhouettes: A graphical aid to the interpretation and -validation of cluster analysis. J Comput Appl Math (1987) 20:53–65. -doi:10.1016/0377-0427(87)90125-7 -} diff --git a/man/plot.combi_analysis.Rd b/man/plot.combi_analysis.Rd deleted file mode 100644 index ccc16f2..0000000 --- a/man/plot.combi_analysis.Rd +++ /dev/null @@ -1,80 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/combi_analysis_oop.R -\name{plot.combi_analysis} -\alias{plot.combi_analysis} -\title{Plot selected features of a combi_analysis object.} -\usage{ -\method{plot}{combi_analysis}( - x, - type = c("diagnostic", "components", "heat_map", "training", "data"), - cust_theme = ggplot2::theme_classic(), - jitter_width = 0, - jitter_height = 0, - point_alpha = 1, - ... -) -} -\arguments{ -\item{x}{a `combi_analysis` object.} - -\item{type}{the type of plots: -'diagnostic' returns a series of diagnostic plots for the SOM construction -and clustering of the nodes; -'components' plots the results of reduction analysis done with the clustering -data or the distance matrix (see: \code{\link{components.clust_analysis}}); -'heat_map' plots the distances between observations and nodes as a heat maps, -'training' plots the mean distance to the SOM winning unit as a function -of the iteration number; 'data' works only if reduction analysis results -were used for clustering and plots the first two components/dimensions.} - -\item{cust_theme}{a ggplot theme.} - -\item{jitter_width}{horizontal jittering of the points in the plots.} - -\item{jitter_height}{vertical jittering of the points in the plots.} - -\item{point_alpha}{scatter plot's point alpha.} - -\item{...}{extra arguments passed to \code{\link{components.clust_analysis}}.} -} -\value{ -a ggplot object or a list of ggplot objects, as specified by the -'type' argument -} -\description{ -The plotting method for the `combi_analysis` class. Enables -plotting of the standard diagnostic plots used for the optimal cluster number -determination for the node clustering (dendrogram, WSS- and -silhouette curve), results of the reduction analysis, -heat map of the distances between the observations and SOM nodes as -well as the self-organizing map training process. It is also possible to plot -the first two variables of the clustering data frame, an option which is -attractive, if the clustering of reduction analysis was performed. -} -\references{ -Wehrens R, Kruisselbrink J. Flexible self-organizing maps in kohonen 3.0. -J Stat Softw (2018) 87:1–18. doi:10.18637/jss.v087.i07 - -Kassambara A, Mundt F. factoextra: Extract and Visualize the Results -of Multivariate Data Analyses. (2020) Available -at: https://cran.r-project.org/web/packages/factoextra/index.html - -Galili T. dendextend: an R package for visualizing, adjusting and -comparing trees of hierarchical clustering. -Bioinformatics (2015) 31:3718–20. doi:10.1093/bioinformatics/btv428 - -McInnes L, Healy J, Melville J. UMAP: Uniform Manifold Approximation -and Projection for Dimension Reduction. (2018) Available -at: https://arxiv.org/abs/1802.03426v3 - -Belyadi H, Haghighat A, Nguyen H, Guerin A-J. IOP Conference Series: -Earth and Environmental Science Determination of Optimal Epsilon (Eps) -Value on DBSCAN Algorithm to Clustering Data on Peatland Hotspots in -Sumatra Related content EPS conference comes to London-EPS rewards -quasiparticle research-EP. IOP Conf Ser Earth Environ Sci (2016) 31: -doi:10.1088/1755-1315/31/1/012012 - -Rousseeuw PJ. Silhouettes: A graphical aid to the interpretation and -validation of cluster analysis. J Comput Appl Math (1987) 20:53–65. -doi:10.1016/0377-0427(87)90125-7 -} diff --git a/man/plot.cross_dist.Rd b/man/plot.cross_dist.Rd deleted file mode 100644 index f84e4ee..0000000 --- a/man/plot.cross_dist.Rd +++ /dev/null @@ -1,57 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cross_dist_oop.R -\name{plot.cross_dist} -\alias{plot.cross_dist} -\title{Plots of cross-distances.} -\usage{ -\method{plot}{cross_dist}( - x, - type = c("heat_map", "mean", "histogram"), - reorder = FALSE, - upper = TRUE, - signif_digits = 2, - txt_size = 2.75, - labeller = NULL, - cust_theme = ggplot2::theme_classic(), - ... -) -} -\arguments{ -\item{x}{a `cross_distance` class object.} - -\item{type}{type of the plot: -`heat_map` (default) generates a heat map of -homologous or heterologous cross-distances for observation pairs with -mean distances and 95% ranges of distances, -`mean` plots mean distances with 95% ranges as a heat map, and -`histogram` generates a faceted panel of cross-distance histograms -(for heterologous distances, x object clusters are represented by horizontal -facets, y object clusters are represented by vertical facets) .} - -\item{reorder}{logical: should distances in the heat maps be ordered -by mean distance? Defaults to FALSE.} - -\item{upper}{should the upper half of the distance heat map be plotted? -Defaults to TRUE.} - -\item{signif_digits}{significant digits for mean distances and distance -ranges presented in the heat map.} - -\item{txt_size}{of the mean distance text.} - -\item{labeller}{a \code{\link[ggplot2]{labeller}} object to provide -customized labels of the facets of the histogram panel.} - -\item{cust_theme}{a custom ggplot theme.} - -\item{...}{extra arguments, such as color or number of bins, passed to -\code{\link[ggplot2]{geom_histogram}}.} -} -\value{ -a `ggplot` graphic, whose elements like themes or fill scales can be easily -modified by the user. -} -\description{ -Visualizes pairwise cross-distances as heat maps for observation pairs, -heat maps of average cross-distances between the clusters or histograms. -} diff --git a/man/plot.importance.Rd b/man/plot.importance.Rd deleted file mode 100644 index a270fb8..0000000 --- a/man/plot.importance.Rd +++ /dev/null @@ -1,79 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/importance_oop.R -\name{plot.importance} -\alias{plot.importance} -\title{Plot feature importance as a scatter or bar plot.} -\usage{ -\method{plot}{importance}( - x, - type = c("scatter", "bar"), - fill_color = "cornsilk3", - point_color = fill_color, - point_size = 2, - point_alpha = 0.5, - point_wjitter = 0, - point_hjitter = 0.1, - box_alpha = 0.25, - label = TRUE, - txt_size = 2.75, - signif_digits = 2, - plot_title = NULL, - plot_subtitle = NULL, - plot_tag = NULL, - cust_theme = ggplot2::theme_classic(), - ... -) -} -\arguments{ -\item{x}{an `importance` object.} - -\item{type}{type of the plot: scatter or bar. Defaults to scatter. -This parameter is silently ignored, if evaluation of the importance was done -in multiple iterations -(e.g. `n_iter` set to > 1 in \code{\link{impact.clust_analysis}}). -In such cases, a box plot of importance metrics obtained in algorithm -iteration is generated.} - -\item{fill_color}{fill color for the bars or boxes.} - -\item{point_color}{size of the points, refers only to scatter and box plots.} - -\item{point_size}{size of the points, refers only to scatter and box plots.} - -\item{point_alpha}{alpha of the points, refers only to box plots.} - -\item{point_wjitter}{width of the data point jittering, refers only to -box plots.} - -\item{point_hjitter}{height of the data point jittering, refers only to -box plots.} - -\item{box_alpha}{alpha of the boxes, refers only to box plots.} - -\item{label}{logical, should the points be labeled with the importance -stat value?} - -\item{txt_size}{label text size.} - -\item{signif_digits}{significant digits for rounding of the statistic value.} - -\item{plot_title}{plot title.} - -\item{plot_subtitle}{plot subtitle.} - -\item{plot_tag}{plot tag.} - -\item{cust_theme}{a ggplot theme.} - -\item{...}{extra arguments, currently none.} -} -\value{ -a ggplot bar or scatter plot. -} -\description{ -Generates a bar, scatter or box plot with the importance -statistic for the clustering variables. -The importance statistic is the difference in the -clustering variance fraction between the original clustering structure and -the clustering objects with the given variable reshuffled randomly. -} diff --git a/man/plot.red_analysis.Rd b/man/plot.red_analysis.Rd deleted file mode 100644 index 9c04199..0000000 --- a/man/plot.red_analysis.Rd +++ /dev/null @@ -1,46 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/red_analysis_oop.R -\name{plot.red_analysis} -\alias{plot.red_analysis} -\title{Plot features of a red_analysis object.} -\usage{ -\method{plot}{red_analysis}( - x, - type = c("component_tbl", "scores", "loadings", "scree"), - label_points = TRUE, - cust_theme = ggplot2::theme_classic(), - segment_color = "steelblue", - ... -) -} -\arguments{ -\item{x}{a `red_analysis` object, created with \code{\link{reduce_data}}.} - -\item{type}{plot type: -'component_tbl' or 'score' present the scores for particular observations in -a scatter plot. -'loadings' plot the variable PCA loadings as a scatter plot. -'scree' plots the percentage of component's variances as a line plot.} - -\item{label_points}{logical, should the variable names be displayed in the -plot? Valid only for the PCA loadings plot.} - -\item{cust_theme}{a ggplot plot theme.} - -\item{segment_color}{color of the lines presented in the PCA loading plot.} - -\item{...}{extra arguments passed to \code{\link{plot_point}}.} -} -\value{ -a ggplot object. -} -\description{ -Plots the component table, loadings table - in both cases the -first two components/dimensions in form of scatter plots - or generates -a scree plot of the variance percentages associated with -the components/dimensions. -} -\details{ -The loadings table plot is available only for the PCA `red_analysis` -objects. -} diff --git a/man/plot.sil_extra.Rd b/man/plot.sil_extra.Rd deleted file mode 100644 index b73d3c3..0000000 --- a/man/plot.sil_extra.Rd +++ /dev/null @@ -1,47 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sil_oop.R -\name{plot.sil_extra} -\alias{plot.sil_extra} -\title{Plots of silhouette statistics.} -\usage{ -\method{plot}{sil_extra}( - x, - fill_by = c("cluster", "neighbor", "value", "sign"), - show_stats = TRUE, - signif_digits = 2, - cust_theme = ggplot2::theme_classic(), - bar_color = "black", - ... -) -} -\arguments{ -\item{x}{an object of the \code{\link{sil_extra}} class.} - -\item{fill_by}{defines the color coding of the bar fill color. -For `cluster`, the bars are colored after cluster assignment of the -observations (default). -For `neighbor`, the bar color codes for the nearest neighbor cluster. -For `value`, the bar color codes for the silhouette width. -For `sign`, the bar color represents the sign of the silhouette width.} - -\item{show_stats}{logical, should the number of observations in the cluster, -percentage of negative silhouette widths and average silhouette statistic -be shown in the plot? Defaults to TRUE.} - -\item{signif_digits}{significant digits used for rounding of the statistics -presented in the plot.} - -\item{cust_theme}{custom ggplot theme.} - -\item{bar_color}{color of the bar line.} - -\item{...}{extra arguments passed to \code{\link[ggplot2]{geom_bar}}.} -} -\description{ -Generates a classical bar plot of silhouette width distribution in clusters. -} -\references{ -Rousseeuw PJ. Silhouettes: A graphical aid to the interpretation and -validation of cluster analysis. J Comput Appl Math (1987) 20:53–65. -doi:10.1016/0377-0427(87)90125-7 -} diff --git a/man/plot_clust_hm.Rd b/man/plot_clust_hm.Rd deleted file mode 100644 index c048491..0000000 --- a/man/plot_clust_hm.Rd +++ /dev/null @@ -1,43 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/extra_graphs.R -\name{plot_clust_hm} -\alias{plot_clust_hm} -\title{Plot levels of clustering features in a heat map.} -\usage{ -plot_clust_hm( - x_object, - y_object = NULL, - plot_title = NULL, - plot_subtitle = NULL, - x_lab = "Sample", - fill_lab = "Feature level", - cust_theme = ggplot2::theme_classic(), - discrete_fill = FALSE -) -} -\arguments{ -\item{x_object}{a `clust_analysis` or `combi_analysis` object, specifies -clustering of the observations.} - -\item{y_object}{a `clust_analysis` or `combi_analysis` object, specifies -clustering of the features. Optional.} - -\item{plot_title}{plot title.} - -\item{plot_subtitle}{plot subtitle.} - -\item{x_lab}{x axis title.} - -\item{fill_lab}{fill scale title.} - -\item{cust_theme}{a ggplot theme.} - -\item{discrete_fill}{logical, force a discrete fill scale?} -} -\value{ -a ggplot graph. -} -\description{ -Generates a heat map of the clustering features, cluster -assignment is indicated by the plot faceting. -} diff --git a/man/plot_dendro.Rd b/man/plot_dendro.Rd deleted file mode 100644 index 03c56cb..0000000 --- a/man/plot_dendro.Rd +++ /dev/null @@ -1,65 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{plot_dendro} -\alias{plot_dendro} -\title{Plot a dendrogram.} -\usage{ -plot_dendro( - clust_str, - k, - labels = TRUE, - cluster_colors = NULL, - cluster_labels = paste0("Cluster #", 1:k), - cluster_leg_title = "Cluster", - plot_title = NULL, - plot_subtitle = NULL, - plot_tag = NULL, - y_lab = NULL, - cust_theme = ggplot2::theme_classic(), - ... -) -} -\arguments{ -\item{clust_str}{an object of the 'hclust' class.} - -\item{k}{an integer, the cluster number.} - -\item{labels}{logical, should observation labels be presented in the x axis?} - -\item{cluster_colors}{colors of the cluster branches, a vector of the length -k + 1. The last color codes for the connecor branches.} - -\item{cluster_labels}{cluster names, a text vector of the lenght k.} - -\item{cluster_leg_title}{cluster legend title.} - -\item{plot_title}{plot title.} - -\item{plot_subtitle}{plot subtitle.} - -\item{plot_tag}{plot tag.} - -\item{y_lab}{y axis title.} - -\item{cust_theme}{custom plot theme, a ggplot2 theme object.} - -\item{...}{extra arguments, currently none.} -} -\value{ -A ggplot object. -} -\description{ -Plots a dendrogram given a clustering object generated by -\code{\link[stats]{hclust}}. -} -\details{ -The dendrogram structure is generated with the -\code{\link[stats]{as.dendrogram}} function and graphical layout provided -by \code{\link[dendextend]{color_branches}} and -\code{\link[dendextend]{set}}. -} -\references{ -Galili T. dendextend: an R package for visualizing, adjusting and -comparing trees of hierarchical clustering. Bioinformatics (2015) 31:3718–20. -doi:10.1093/bioinformatics/btv428 -} diff --git a/man/plot_knn_distance.Rd b/man/plot_knn_distance.Rd deleted file mode 100644 index 3bc3d89..0000000 --- a/man/plot_knn_distance.Rd +++ /dev/null @@ -1,54 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{plot_knn_distance} -\alias{plot_knn_distance} -\title{Plot the mean distance to k-nearest neighbors.} -\usage{ -plot_knn_distance( - diss_obj, - k, - eps = NULL, - plot_title = NULL, - plot_subtitle = NULL, - plot_tag = NULL, - cust_theme = ggplot2::theme_classic() -) -} -\arguments{ -\item{diss_obj}{a dissimilarity object (e.g. 'dist' class).} - -\item{k}{the k number of the nearest neighbors.} - -\item{eps}{the distance to be presented in the plot as a horizontal dashed -line. If NULL, the line is hidden.} - -\item{plot_title}{plot title.} - -\item{plot_subtitle}{plot subtitle.} - -\item{plot_tag}{plot tag.} - -\item{cust_theme}{custom plot theme, a ggplot2 theme object.} -} -\value{ -A ggplot object. -} -\description{ -Plots the sorted (ascending) distances to k-nearest neighbors -(kNN) for each observation in the provided dissimilarity object. -} -\details{ -Internally, the mean kNN distances are calculated with the -\code{\link[dbscan]{kNNdist}} function. -} -\references{ -Hahsler M, Piekenbrock M, Doran D. Dbscan: Fast density-based clustering -with R. J Stat Softw (2019) 91:1–30. doi:10.18637/jss.v091.i01 - -Belyadi H, Haghighat A, Nguyen H, Guerin A-J. IOP Conference Series: -Earth and Environmental Science Determination of Optimal Epsilon (Eps) -Value on DBSCAN Algorithm to Clustering Data on Peatland Hotspots in -Sumatra Related content EPS conference comes to London-EPS rewards -quasiparticle research-EP. IOP Conf Ser Earth Environ Sci (2016) 31: -doi:10.1088/1755-1315/31/1/012012 -} diff --git a/man/plot_nbclust.Rd b/man/plot_nbclust.Rd deleted file mode 100644 index c0bec6a..0000000 --- a/man/plot_nbclust.Rd +++ /dev/null @@ -1,61 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{plot_nbclust} -\alias{plot_nbclust} -\title{Plot WSS curve and silhouette statistic values as a function of cluster -number.} -\usage{ -plot_nbclust( - data, - k, - FUNcluster = NULL, - method = c("wss", "silhouette", "gap_stat"), - plot_title = NULL, - plot_subtitle = NULL, - plot_tag = NULL, - cust_theme = ggplot2::theme_classic(), - ... -) -} -\arguments{ -\item{data}{a numeric matrix with the distances or a data frame.} - -\item{k}{an integer, the cluster number.} - -\item{FUNcluster}{a clustering function. See: -\code{\link[factoextra]{fviz_nbclust}} for details.} - -\item{method}{a statistic to be plotted. See: -\code{\link[factoextra]{fviz_nbclust}} for details.} - -\item{plot_title}{plot title.} - -\item{plot_subtitle}{plot subtitle.} - -\item{plot_tag}{plot tag.} - -\item{cust_theme}{custom plot theme, a ggplot2 theme object.} - -\item{...}{extra arguments passed to \code{\link[factoextra]{fviz_nbclust}}.} -} -\value{ -a ggplot object. -} -\description{ -Plots the values of the total within-cluster sum-of-squares and -silhouette statistic as a function of the cluster number. -} -\details{ -Takes a distance matrix (e.g. the \code{\link{get_kernel_info}} -output) and a clustering function, for the details, see: -\code{\link[factoextra]{fviz_nbclust}}. -} -\references{ -Kassambara A, Mundt F. factoextra: Extract and Visualize the Results -of Multivariate Data Analyses. (2020) Available at: -https://cran.r-project.org/web/packages/factoextra/index.html - -Rousseeuw PJ. Silhouettes: A graphical aid to the interpretation and -validation of cluster analysis. J Comput Appl Math (1987) 20:53–65. -doi:10.1016/0377-0427(87)90125-7 -} diff --git a/man/plot_point.Rd b/man/plot_point.Rd deleted file mode 100644 index 0f2ceed..0000000 --- a/man/plot_point.Rd +++ /dev/null @@ -1,88 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{plot_point} -\alias{plot_point} -\title{Generate a custom scatter ggplot.} -\usage{ -plot_point( - data, - x_var, - y_var, - fill_var = NULL, - label_var = NULL, - plot_title = NULL, - plot_subtitle = NULL, - plot_tag = NULL, - x_lab = x_var, - y_lab = y_var, - fill_lab = NULL, - cust_theme = ggplot2::theme_classic(), - point_color = "steelblue", - point_alpha = 1, - show_segments = FALSE, - segment_color = "steelblue", - segment_alpha = 1, - label_color = point_color, - txt_color = "black", - txt_size = 2.5, - txt_type = c("label", "text"), - jitter_width = 0, - jitter_height = 0 -) -} -\arguments{ -\item{data}{a data frame.} - -\item{x_var}{the name of the variable to be presented in the x axis.} - -\item{y_var}{the name of the variable to be presented in the y axis.} - -\item{fill_var}{optional, the name of the variable coded by the point fill. -If NULL, the point fill is specified by the point_color argument.} - -\item{label_var}{optional, the name of the variable to be presented in the -point labels. If NULL, no point labels are displayed.} - -\item{plot_title}{plot title.} - -\item{plot_subtitle}{plot subtitle.} - -\item{plot_tag}{plot tag.} - -\item{x_lab}{x axis title.} - -\item{y_lab}{y axis title.} - -\item{fill_lab}{fill legend title.} - -\item{cust_theme}{custom plot theme, a ggplot2 theme object.} - -\item{point_color}{point fill color.} - -\item{point_alpha}{point alpha.} - -\item{show_segments}{logical, should lines connecting the (0,0) point with -the plot point be displayed?} - -\item{segment_color}{color of the connecting lines.} - -\item{segment_alpha}{alpha of the connecting lines.} - -\item{label_color}{color of the text labels.} - -\item{txt_color}{color of the text presented in the labels.} - -\item{txt_size}{size of the text presented in the labels.} - -\item{txt_type}{type of the displayed text: either as geom_text or geom_label} - -\item{jitter_width}{horizontal jittering of the points.} - -\item{jitter_height}{vertical jittering of the points.} -} -\value{ -a ggplot object. -} -\description{ -Generates a simple scatter ggplot. -} diff --git a/man/plot_som.Rd b/man/plot_som.Rd deleted file mode 100644 index bc7c8e2..0000000 --- a/man/plot_som.Rd +++ /dev/null @@ -1,26 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{plot_som} -\alias{plot_som} -\title{Plot diagnostic plots for the self-organizing map.} -\usage{ -plot_som(kohonen_object) -} -\arguments{ -\item{kohonen_object}{a 'kohonen' class object. -See: \code{\link[kohonen]{som}} for details.} -} -\value{ -a list of non-editable ggplot objects. -} -\description{ -Generates a set of diagnostic plots for the 'kohonen' class -object as specified by \code{\link[kohonen]{plot.kohonen}}. -} -\details{ -The plots are wrapped as a list of (non-editable) ggplot objects. -} -\references{ -Wehrens R, Kruisselbrink J. Flexible self-organizing maps in kohonen 3.0. -J Stat Softw (2018) 87:1–18. doi:10.18637/jss.v087.i07 -} diff --git a/man/plot_train_som.Rd b/man/plot_train_som.Rd deleted file mode 100644 index d8028e9..0000000 --- a/man/plot_train_som.Rd +++ /dev/null @@ -1,36 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{plot_train_som} -\alias{plot_train_som} -\title{Visualize the SOM training process.} -\usage{ -plot_train_som( - kohonen_object, - plot_title = NULL, - plot_subtitle = NULL, - cust_theme = ggplot2::theme_classic(), - ... -) -} -\arguments{ -\item{kohonen_object}{a 'kohonen' class object.} - -\item{plot_title}{plot title.} - -\item{plot_subtitle}{plot subtitle.} - -\item{cust_theme}{custom plot theme, a ggplot2 theme object.} - -\item{...}{extra arguments, currently none specified.} -} -\value{ -a ggplot object. -} -\description{ -Plots the mean distance to the neuron/winning unit as a -function of the iteration number. -} -\references{ -Wehrens R, Kruisselbrink J. Flexible self-organizing maps in kohonen 3.0. -J Stat Softw (2018) 87:1–18. doi:10.18637/jss.v087.i07 -} diff --git a/man/predict.clust_analysis.Rd b/man/predict.clust_analysis.Rd deleted file mode 100644 index 6ad7caf..0000000 --- a/man/predict.clust_analysis.Rd +++ /dev/null @@ -1,44 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/semi_supervised.R -\name{predict.clust_analysis} -\alias{predict.clust_analysis} -\alias{predict.combi_analysis} -\title{Semi-supervised clustering.} -\usage{ -\method{predict}{clust_analysis}(object, newdata = NULL, type = c("class", "propagation"), ...) - -\method{predict}{combi_analysis}(object, newdata = NULL, type = c("class", "propagation"), ...) -} -\arguments{ -\item{object}{an object.} - -\item{newdata}{a numeric data frame, matrix or a red_analysis object. -If NULL (default), the bare cluster assignment table is returned.} - -\item{type}{type of the projection: simple observation matching -('class', default) or kNN label propagation ('propagation').} - -\item{...}{extra arguments passed to \code{\link{propagate}}.} -} -\value{ -a \code{\link{clust_analysis}} object. -} -\description{ -Projects the cluster assignment onto new data using simple -observation matching or a k-nearest neighbor (kNN) label propagation -algorithm. -} -\details{ -For the implementation details, see: \code{\link{propagate}}. -The default distance metric is extracted from the `clust_analysis` object. -For `combi_analysis` objects, the default distance metric is the distance -between observations (not nodes!). -The cluster projection is done on the top level, i.e. takes into account the -final assignment of the observations to the clusters and ignoring -the SOM nodes. -} -\references{ -Leng M, Wang J, Cheng J, Zhou H, Chen X. Adaptive -semi-supervised clustering algorithm with label propagation. -J Softw Eng (2014) 8:14–22. doi:10.3923/jse.2014.14.22 -} diff --git a/man/print.clust_analysis.Rd b/man/print.clust_analysis.Rd deleted file mode 100644 index b349597..0000000 --- a/man/print.clust_analysis.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/appearance.R -\name{print.clust_analysis} -\alias{print.clust_analysis} -\alias{print.combi_analysis} -\alias{print.cross_dist} -\alias{print.red_analysis} -\title{Printing of a clust_analysis object.} -\usage{ -\method{print}{clust_analysis}(x, ...) - -\method{print}{combi_analysis}(x, ...) - -\method{print}{cross_dist}(x, ...) - -\method{print}{red_analysis}(x, ...) -} -\arguments{ -\item{x}{an object.} - -\item{...}{extra arguments, currently none.} -} -\value{ -nothing, called for side effects. -} -\description{ -Prints a `clust_analysis`, `combi_analysis`, `red_analysis` -or `cross_dist` object. -} diff --git a/man/propagate.Rd b/man/propagate.Rd deleted file mode 100644 index db8e71e..0000000 --- a/man/propagate.Rd +++ /dev/null @@ -1,62 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/semi_supervised.R -\name{propagate} -\alias{propagate} -\title{Project the cluster assignment with k-NN label propagation.} -\usage{ -propagate( - object, - newdata = NULL, - distance_method = NULL, - kNN = 5, - simple_vote = TRUE, - resolve_ties = FALSE, - kernel_fun = function(x) 1/x, - detailed = FALSE -) -} -\arguments{ -\item{object}{a `clust_analysis` or a `combi_analysis` object} - -\item{newdata}{a numeric data frame, matrix or a `red_analysis` object. -If NULL (default), the bare cluster assignment table is returned.} - -\item{distance_method}{a distance metric, by default it is retrieved from -the input `clust_analysis` or `combi_analysis` object. For the later, the -distance used for observation clustering is used in the projection.} - -\item{kNN}{number of the nearest neighbors.} - -\item{simple_vote}{logical, should classical unweighted k-NN classification -be applied? If FALSE, distance-weighted k-NN is used with the provided kernel -function.} - -\item{resolve_ties}{logical, should the ties be resolved at random? Applies -only to the simple unweighted voting algorithm.} - -\item{kernel_fun}{kernel function transforming the distance into weight.} - -\item{detailed}{logical, should a detailed output including the kNN table and -voting scheme be returned. If FALSE, the bare `clust_analysis` object with the -predictions is returned.} -} -\value{ -a \code{\link{clust_analysis}} object or, -if `detailed` = TRUE, a list with the kNN -table and the voting results. -} -\description{ -Projects the cluster assignment with a k-nearest neighbor -classifier onto a new data set. -} -\details{ -If a `red_analysis` object is provided as new data, the cluster -assignment is projected onto the component/score table. The newdata input has -to have the same variables as those used for development of the input -cluster_analysis object. -} -\references{ -Leng M, Wang J, Cheng J, Zhou H, Chen X. Adaptive -semi-supervised clustering algorithm with label propagation. -J Softw Eng (2014) 8:14–22. doi:10.3923/jse.2014.14.22 -} diff --git a/man/red_analysis.Rd b/man/red_analysis.Rd deleted file mode 100644 index 7076f15..0000000 --- a/man/red_analysis.Rd +++ /dev/null @@ -1,26 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/constructors.R -\name{red_analysis} -\alias{red_analysis} -\title{Construct a red_analysis class object.} -\usage{ -red_analysis(x) -} -\arguments{ -\item{x}{a named list, see details.} -} -\value{ -a `red_analysis` object. -} -\description{ -Constructs a red_analysis class object given a list storing -results of a dimensionality reduction analysis. -} -\details{ -A named list with the following elements ir required: -red_obj (analysis output), -red_fun (name of the reduction function), -component_tbl (component values for the observations), -loadings (variable loadings, relevant only for PCA), -data (a quosure calling the original data set). -} diff --git a/man/reduce_data.Rd b/man/reduce_data.Rd deleted file mode 100644 index ce43753..0000000 --- a/man/reduce_data.Rd +++ /dev/null @@ -1,59 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/reduction_functions.R -\name{reduce_data} -\alias{reduce_data} -\title{Dimensionality reduction of a data set.} -\usage{ -reduce_data( - data, - distance_method = "euclidean", - kdim = 2, - red_fun = c("pca", "mds", "umap", "fa"), - ... -) -} -\arguments{ -\item{data}{a numeric data frame or a matrix.} - -\item{distance_method}{name of the distance metric, see: -\code{\link{get_kernel_info}}. Valid only for MDS and UMAP. -For UMAP, the distance is specified by a -\code{\link[umap]{umap.defaults}} object.} - -\item{kdim}{dimension number.} - -\item{red_fun}{name of the dimensionality reduction function.} - -\item{...}{extra arguments passed to \code{\link[pcaPP]{PCAproj}} (PCA), -\code{\link[stats]{cmdscale}} (MDS) and \code{\link[umap]{umap}} (UMAP), -like the \code{\link[umap]{umap.defaults}} object for UMAP, -\code{\link[stats]{factanal}} (FA).} -} -\value{ -a \code{\link{red_analysis}} object. -} -\description{ -Performs dimensionality reduction of a data frame with principal -component analysis (PCA), multi-dimensional scaling (MDS), Uniform Manifold -Approximation and Projection (UMAP) or factor analysis (FA). -} -\details{ -A wrapper around \code{\link[pcaPP]{PCAproj}} (PCA), -\code{\link[stats]{cmdscale}} (MDS), \code{\link[umap]{umap}} (UMAP) -and \code{\link[stats]{factanal}} (FA). Note: -the distances and other UMAP parameters are specified by a -\code{\link[umap]{umap.defaults}} object. Hence, not all distance measures -returned by \code{\link{get_kernel_info}} are available for UMAP computation. -} -\references{ -McInnes L, Healy J, Melville J. UMAP: Uniform Manifold Approximation and -Projection for Dimension Reduction. (2018) -Available at: https://arxiv.org/abs/1802.03426v3 - -Croux C, Filzmoser P, Oliveira MR. Algorithms for Projection-Pursuit robust -principal component analysis. Chemom Intell Lab Syst (2007) 87:218–225. -doi:10.1016/j.chemolab.2007.01.004 - -BARTLETT MS. THE STATISTICAL CONCEPTION OF MENTAL FACTORS. Br J Psychol -Gen Sect (1937) 28:97–104. doi:10.1111/j.2044-8295.1937.tb00863.x -} diff --git a/man/rename.clust_analysis.Rd b/man/rename.clust_analysis.Rd deleted file mode 100644 index d7870a4..0000000 --- a/man/rename.clust_analysis.Rd +++ /dev/null @@ -1,32 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/names.R -\name{rename.clust_analysis} -\alias{rename.clust_analysis} -\alias{rename.combi_analysis} -\title{Set cluster names.} -\usage{ -\method{rename}{clust_analysis}(.data, nm, ...) - -\method{rename}{combi_analysis}(.data, nm, ...) -} -\arguments{ -\item{.data}{a `clust_analysis` or `combi_analysis` object.} - -\item{nm}{a named character vector with the new names as elements and old -cluster names as names.} - -\item{...}{extra arguments, currently none.} -} -\value{ -a \code{\link{clust_analysis}} or \code{\link{combi_analysis}} object. -} -\description{ -Sets custom cluster names. -} -\details{ -The package's clustering functions of the clustTools package -name clusters with integer numbers by default. This method poses a handy -tool to set custom cluster names with a named character vector. -The cluster order (i.e. vector levels) is defined by the order of -the naming vector's elements. -} diff --git a/man/set_rownames.Rd b/man/set_rownames.Rd deleted file mode 100644 index 8f0d193..0000000 --- a/man/set_rownames.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/preprocessing_functions.R -\name{set_rownames} -\alias{set_rownames} -\title{Set row names.} -\usage{ -set_rownames(data, row_names = as.character(1:nrow(data))) -} -\arguments{ -\item{data}{a data frame or a tibble.} - -\item{row_names}{a character vector of the proper length.} -} -\value{ -a data frame. -} -\description{ -Sets row names in a data frame. -} -\details{ -a tibble is silently converted to a data frame. -} diff --git a/man/sil_extra.Rd b/man/sil_extra.Rd deleted file mode 100644 index 9b72356..0000000 --- a/man/sil_extra.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/constructors.R -\name{sil_extra} -\alias{sil_extra} -\title{Generate a sil_extra object.} -\usage{ -sil_extra(x, assignment) -} -\arguments{ -\item{x}{an object of the \code{\link[cluster]{silhouette}} class.} - -\item{assignment}{an data frame with the `clust_id` and `observation` -columns defining the cluster assignment, e.g. obtained by the -\code{\link{extract}} function applied to a `clust_analysis` or -`combi_analysis` object.} -} -\value{ -an object of the `sil_extra` class. Technically, a tibble with -the observation ID, cluster name, neighbor cluster name and silhouette width. -} -\description{ -Extends the \code{\link[cluster]{silhouette}} object by -cluster order and names. -} -\details{ -The `sil_extra` class has \code{\link{summary.sil_extra}} -and \code{\link{plot.sil_extra}} -methods compatible with the tidyverse environment. -} diff --git a/man/silhouette.clust_analysis.Rd b/man/silhouette.clust_analysis.Rd deleted file mode 100644 index b213f42..0000000 --- a/man/silhouette.clust_analysis.Rd +++ /dev/null @@ -1,54 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/silhouettes.R -\name{silhouette.clust_analysis} -\alias{silhouette.clust_analysis} -\alias{silhouette.combi_analysis} -\title{Silhouette statistic.} -\usage{ -\method{silhouette}{clust_analysis}(x, output = c("extended", "silhouette"), ...) - -\method{silhouette}{combi_analysis}(x, output = c("extended", "silhouette"), ...) -} -\arguments{ -\item{x}{an object of the \code{\link{clust_analysis}} or -\code{\link{combi_analysis}} class.} - -\item{output}{the function output. -For `silhouette`, an object of the canonical cluster's class -\code{\link[cluster]{silhouette}} is returned. -For `extended`, an object of the class \code{\link{sil_extra}} is returned. -See the Details.} - -\item{...}{extra arguments passed to \code{\link[cluster]{silhouette}}.} -} -\value{ -an object of the class \code{\link[cluster]{silhouette}} or -\code{\link{sil_extra}}. -} -\description{ -Computes silhouette statistics for a `clust_analysis` or `combi_analysis` -objects. -} -\details{ -The function employs the default method of the -\code{\link[cluster]{silhouette}} generics and is hence agnostic to the -clustering method. -For SOM clustering, i.e. `combi_analysis` objects, the calculation is done -for the simple assignment of observations to the clusters -(SOM nodes are ignored). -For the extended output of the function, an object of the class -\code{\link{sil_extra}} is returned, which preserves cluster order -and names and offers tidyverse-friendly \code{\link{plot.sil_extra}} -and \code{\link{summary.sil_extra}} methods. -} -\references{ -Rousseeuw PJ. Silhouettes: A graphical aid to the interpretation and -validation of cluster analysis. J Comput Appl Math (1987) 20:53–65. -doi:10.1016/0377-0427(87)90125-7 - -Schubert E, Rousseeuw PJ. Faster k-Medoids Clustering: Improving the PAM, -CLARA, and CLARANS Algorithms. in Lecture Notes in Computer Science -(including subseries Lecture Notes in Artificial Intelligence and Lecture -Notes in Bioinformatics) (Springer), 171–187. -doi:10.1007/978-3-030-32047-8_16 -} diff --git a/man/som_cluster.Rd b/man/som_cluster.Rd deleted file mode 100644 index 4ba2c4e..0000000 --- a/man/som_cluster.Rd +++ /dev/null @@ -1,62 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/clustering_functions.R -\name{som_cluster} -\alias{som_cluster} -\title{Self-organizing maps.} -\usage{ -som_cluster( - data, - distance_method = "euclidean", - xdim = 5, - ydim = 4, - topo = c("hexagonal", "rectangular"), - neighbourhood.fct = c("gaussian", "bubble"), - toroidal = FALSE, - seed = 1234, - ... -) -} -\arguments{ -\item{data}{a numeric data frame or matrix or a red_analysis object.} - -\item{distance_method}{name of the distance metric, see: -\code{\link{get_kernel_info}}.} - -\item{xdim}{x dimension of the SOM grid, -see: \code{\link[kohonen]{somgrid}} for details.} - -\item{ydim}{y dimension of the SOM grid, -#' see: \code{\link[kohonen]{somgrid}} for details.} - -\item{topo}{SOM grid topology, see: \code{\link[kohonen]{somgrid}} -for details. 'hexagonal' for default.} - -\item{neighbourhood.fct}{neighborhood function, 'gaussian' for default.} - -\item{toroidal}{logical, should toroidal grid be used?} - -\item{seed}{initial setting of the random number generator.} - -\item{...}{extra arguments passed to \code{\link[kohonen]{som}}.} -} -\value{ -an object of the class \code{\link{clust_analysis}}. -} -\description{ -Performs self-organizing map (SOM) clustering of a numeric data -frame, matrix or the results of a reduction analysis. -} -\details{ -Technically, a wrapper around \code{\link[kohonen]{som}}. If a -red_analysis object is provided as the data argument, the observation -component/score table is subjected to clustering. Note, in order to make use -of the full set of distance measures, the package 'somKernels' need to be -installed and loaded. -} -\references{ -Kohonen T. Self-Organizing Maps. Berlin, Heidelberg: -Springer Berlin Heidelberg (1995). doi:10.1007/978-3-642-97610-0 - -Wehrens R, Kruisselbrink J. Flexible self-organizing maps in kohonen -3.0. J Stat Softw (2018) 87:1–18. doi:10.18637/jss.v087.i07 -} diff --git a/man/summary.cross_dist.Rd b/man/summary.cross_dist.Rd deleted file mode 100644 index 2627589..0000000 --- a/man/summary.cross_dist.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cross_dist_oop.R -\name{summary.cross_dist} -\alias{summary.cross_dist} -\title{Summary of cross-distances between clusters.} -\usage{ -\method{summary}{cross_dist}(object, ...) -} -\arguments{ -\item{object}{a `cross_dist` object.} - -\item{...}{extra arguments, currently none.} -} -\value{ -a data frame with mean, SD, median, interquartile range, 95% range -and range of cross-distances between the cluster pairs. -} -\description{ -Computes summary statistics of homologous or heterologous cross-distances -between the clusters. -} diff --git a/man/summary.importance.Rd b/man/summary.importance.Rd deleted file mode 100644 index 4d8f6f3..0000000 --- a/man/summary.importance.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/importance_oop.R -\name{summary.importance} -\alias{summary.importance} -\title{Importance statistic summary.} -\usage{ -\method{summary}{importance}(object, ...) -} -\arguments{ -\item{object}{an `importance` class object.} - -\item{...}{extra arguments, currently none.} -} -\value{ -a data frame with importance metrics. -} -\description{ -If the permutation importance analysis for clustering variables was done -in multiple iterations -(e.g. `n_iter` set to > 1 in \code{\link{impact.clust_analysis}}), -number of iterations, mean, SD, median, interquartile range -and range of the difference in -clustering variance for each clustering variable is computed. -Otherwise, a plain data frame with importance statistics is returned. -} diff --git a/man/summary.sil_extra.Rd b/man/summary.sil_extra.Rd deleted file mode 100644 index ff33b06..0000000 --- a/man/summary.sil_extra.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sil_oop.R -\name{summary.sil_extra} -\alias{summary.sil_extra} -\title{Summary silhouette width statistic for clusters.} -\usage{ -\method{summary}{sil_extra}(object, ...) -} -\arguments{ -\item{object}{an object of the \code{\link{sil_extra}} class.} - -\item{...}{extra arguments, currently none.} -} -\value{ -a data frame with numeric statistics for the whole clustering -structure (`clust_id` = 'global') and particular clusters. -} -\description{ -Computes mean, SD, median, interquartile range, 95% range, range as well as -the number and percentage of observations with negative silhouette width. -Such observations are likely in an improper cluster. -} -\references{ -Rousseeuw PJ. Silhouettes: A graphical aid to the interpretation and -validation of cluster analysis. J Comput Appl Math (1987) 20:53–65. -doi:10.1016/0377-0427(87)90125-7 -} diff --git a/man/var.Rd b/man/var.Rd deleted file mode 100644 index 66f472f..0000000 --- a/man/var.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/generics.R -\name{var} -\alias{var} -\alias{var.default} -\title{Object's variance} -\usage{ -var(x, ...) - -\method{var}{default}(x, ...) -} -\arguments{ -\item{x}{an object. For the default method a numeric vector, matrix -or a data frame.} - -\item{...}{extra arguments passed to methods, e.g. \code{\link[stats]{var}}.} -} -\description{ -Computes variance statistic specific for the given object. -} -\details{ -The default `var()` method is a wrapper around -\code{\link[stats]{var}}. -} diff --git a/man/var.clust_analysis.Rd b/man/var.clust_analysis.Rd deleted file mode 100644 index 7ea4051..0000000 --- a/man/var.clust_analysis.Rd +++ /dev/null @@ -1,32 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/variance.R -\name{var.clust_analysis} -\alias{var.clust_analysis} -\alias{var.combi_analysis} -\title{Calculate clustering variance.} -\usage{ -\method{var}{clust_analysis}(x, ...) - -\method{var}{combi_analysis}(x, ...) -} -\arguments{ -\item{x}{an object.} - -\item{...}{extra arguments, currently none.} -} -\value{ -a list with the total, within-cluster, between-cluster sum of -squares and explained clustering variance (named `frac_var`). -} -\description{ -Calculates the clustering sum of squares (total, within -clusters, total within clusters and between clusters) as well as the -fraction of 'explained' clustering variance. The later is the ratio of -the total between-cluster sum of squares to the total sum of squares. -} -\details{ -`var()` is a S3 generic function. -`var()` overwrites the `var()` function provided by the stats package, -but provides a handy default method, so that `var()` is expected to behave -the same way as in base R. -} diff --git a/man/var.red_analysis.Rd b/man/var.red_analysis.Rd deleted file mode 100644 index 3932db1..0000000 --- a/man/var.red_analysis.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/red_analysis_oop.R -\name{var.red_analysis} -\alias{var.red_analysis} -\alias{summary.red_analysis} -\title{Variance and summary for a red_analysis object.} -\usage{ -\method{var}{red_analysis}(x, ...) - -\method{summary}{red_analysis}(object, ...) -} -\arguments{ -\item{x}{a `red_analysis` object.} - -\item{...}{extra arguments, currently none.} - -\item{object}{a `red_analysis` object.} -} -\value{ -`var()` returns a data frame with components' -variances, `summary()` returns a set of summary statistic specific for -the wrapped dimensionality reduction function. -} -\description{ -Variance associated with the -components and statistic summary for `red_analysis` class objects. -} diff --git a/man/vote_kernel.Rd b/man/vote_kernel.Rd deleted file mode 100644 index 2498bd3..0000000 --- a/man/vote_kernel.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{vote_kernel} -\alias{vote_kernel} -\title{Find the most frequently occurring element with distance weighting.} -\usage{ -vote_kernel(vector, dist_vec, kernel_fun = function(x) 1/x) -} -\arguments{ -\item{vector}{a vector.} - -\item{dist_vec}{a numeric vector with the distance values.} - -\item{kernel_fun}{a kernel function.} -} -\description{ -Finds the element of a vector with the highers number of -occurrences. The voting is distance weighted by the given kernel function. -} -\references{ -the most frequent element. -} diff --git a/man/vote_simple.Rd b/man/vote_simple.Rd deleted file mode 100644 index a4e9a28..0000000 --- a/man/vote_simple.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{vote_simple} -\alias{vote_simple} -\title{Find the most frequently occurring element of a vector.} -\usage{ -vote_simple(vector, resolve_ties = FALSE) -} -\arguments{ -\item{vector}{a vector.} - -\item{resolve_ties}{logical, should the ties be resolved at random?} -} -\value{ -the most frequent element. -} -\description{ -Finds the element of a vector with the highers number of -occurrences. -} -\details{ -Ties may be resolved at random (resolve_ties = TRUE), otherwise, -if a tie exists, the alphabetically first element is returned. -}