From aaf09418568ead622cabbc9ca3cac1b34fd3d8ae Mon Sep 17 00:00:00 2001 From: Tuomas Borman <60338854+TuomasBorman@users.noreply.github.com> Date: Mon, 11 Sep 2023 17:41:20 +0300 Subject: [PATCH] plotRDA (#87) * Initialise plotRDA * up * up * up * up * Match dot and ellipse colours * Add arguments for ellipse, vector and text aesthetics * plotRDA -> Add option to turn off text repel and catch all ggrepel args * up * Catch all params for plotReducedDim * up * Update plotRDA vignettes * Make alias plotCCA for plotRDA * Use retrieveCellInfo in plotRDA * Remove remnants of conflicts * Implement method of plotRDA for rda/cca object and extend vignettes * Streamline plotRDA code * Catch all args in setGeneric and setMethod of plotRDA * Minor fixes * Update plotRDA docs and check input params * Fix build for plotRDA * Update docs on plotRDA * Import SingleCellExperiment in DESCRIPTION * Fix DESCRIPTION * Update plotRDA examples * Fix plotRDA docs * Debug and add signif/variance args to plotRDA * Implement add.ellipse arg in plotRDA * Update plotRDA vignettes * Bump version and add news * Initiate tests for plotRDA and correct version * Implement unit tests for plotRDA * up --------- Co-authored-by: Giulio Benedetti --- .Rbuildignore | 2 + .gitignore | 3 + DESCRIPTION | 11 +- NAMESPACE | 9 + NEWS | 3 +- R/plotCCA.R | 612 ++++++++++++++++++++++++++++++++++ man/plotCCA.Rd | 188 +++++++++++ tests/testthat/test-plotCCA.R | 82 +++++ 8 files changed, 906 insertions(+), 4 deletions(-) create mode 100644 R/plotCCA.R create mode 100644 man/plotCCA.Rd create mode 100644 tests/testthat/test-plotCCA.R diff --git a/.Rbuildignore b/.Rbuildignore index c46ab48e..0f56b95e 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -5,3 +5,5 @@ .RData .pdf inst/extras/ +^doc$ +^Meta$ diff --git a/.gitignore b/.gitignore index b424f209..2bab5502 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,6 @@ inst/doc docs .Rhistory *.RData +.DS_Store +/doc/ +/Meta/ diff --git a/DESCRIPTION b/DESCRIPTION index fdacfcc3..f35c3dae 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: miaViz Title: Microbiome Analysis Plotting and Visualization -Version: 1.9.2 +Version: 1.9.3 Authors@R: c(person(given = "Tuomas", family = "Borman", role = c("aut", "cre"), email = "tuomas.v.borman@utu.fi", @@ -11,7 +11,10 @@ Authors@R: person(given = "Leo", family = "Lahti", role = c("aut"), email = "leo.lahti@iki.fi", comment = c(ORCID = "0000-0001-5537-637X")), - person(given = "Basil", family = "Courbayre", role = c("ctb")) + person(given = "Basil", family = "Courbayre", role = c("ctb")), + person(given = "Giulio", family = "Benedetti", role = c("ctb"), + email = "giulio.benedetti@utu.fi", + comment = c(ORCID = "0000-0002-8732-7692")) ) Description: The miaViz package implements functions to visualize TreeSummarizedExperiment objects especially in the context of microbiome @@ -46,7 +49,9 @@ Imports: tidyr, dplyr, ape, - DirichletMultinomial + DirichletMultinomial, + ggrepel, + SingleCellExperiment Suggests: knitr, rmarkdown, diff --git a/NAMESPACE b/NAMESPACE index c4a449db..387a8e02 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,11 +1,13 @@ # Generated by roxygen2: do not edit by hand export(plotAbundanceDensity) +export(plotCCA) export(plotColGraph) export(plotColTile) export(plotDMNFit) export(plotPrevalence) export(plotPrevalentAbundance) +export(plotRDA) export(plotRowGraph) export(plotRowTile) export(plotSeries) @@ -16,12 +18,14 @@ exportMethods(colTreeData) exportMethods(combineTreeData) exportMethods(plotAbundance) exportMethods(plotAbundanceDensity) +exportMethods(plotCCA) exportMethods(plotColGraph) exportMethods(plotColTile) exportMethods(plotColTree) exportMethods(plotDMNFit) exportMethods(plotPrevalence) exportMethods(plotPrevalentAbundance) +exportMethods(plotRDA) exportMethods(plotRowGraph) exportMethods(plotRowTile) exportMethods(plotRowTree) @@ -45,6 +49,8 @@ importFrom(DelayedArray,rowMeans) importFrom(DelayedArray,rowSums) importFrom(DirichletMultinomial,mixture) importFrom(S4Vectors,metadata) +importFrom(SingleCellExperiment,reducedDim) +importFrom(SingleCellExperiment,reducedDimNames) importFrom(SummarizedExperiment,assay) importFrom(SummarizedExperiment,colData) importFrom(SummarizedExperiment,rowData) @@ -90,6 +96,8 @@ importFrom(ggplot2,scale_y_discrete) importFrom(ggplot2,theme) importFrom(ggplot2,theme_bw) importFrom(ggplot2,theme_classic) +importFrom(ggrepel,geom_label_repel) +importFrom(ggrepel,geom_text_repel) importFrom(ggtree,geom_cladelab) importFrom(ggtree,geom_highlight) importFrom(ggtree,geom_nodelab) @@ -105,6 +113,7 @@ importFrom(rlang,"!!") importFrom(rlang,":=") importFrom(rlang,sym) importFrom(scater,plotExpression) +importFrom(scater,plotReducedDim) importFrom(scater,retrieveCellInfo) importFrom(scater,retrieveFeatureInfo) importFrom(stats,sd) diff --git a/NEWS b/NEWS index 45477bdf..3eaae99c 100644 --- a/NEWS +++ b/NEWS @@ -19,4 +19,5 @@ Changes in version 1.7.x + Fixed plotGraph* that was broked due changes in dependencies Changes in version 1.9.x -+ Updated plotDMN to work with newest mia version \ No newline at end of file ++ Updated plotDMN to work with newest mia version ++ Added plotCCA and plotRDA functions \ No newline at end of file diff --git a/R/plotCCA.R b/R/plotCCA.R new file mode 100644 index 00000000..699cc22d --- /dev/null +++ b/R/plotCCA.R @@ -0,0 +1,612 @@ +#' Plot RDA or CCA object +#' +#' \code{plotRDA} and \code{plotCCA} create an RDA/CCA plot starting from the +#' output of \code{\link[mia:runCCA]{CCA and RDA}} functions, two common methods +#' for supervised ordination of microbiome data. +#' +#' @param object a +#' \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-constructor]{TreeSummarizedExperiment}} +#' or a matrix of weights. The latter is returned as output from \code{\link[mia:runCCA]{calculateRDA}}. +#' +#' @param dimred A string or integer scalar indicating the reduced dimension to +#' plot. This is the output of \code{\link[mia:runCCA]{runRDA}} and resides in +#' \code{reducedDim(tse, dimred)}. +#' +#' @param add.ellipse One of \code{c(TRUE, FALSE, "fill", "colour", "color")}, +#' indicating whether ellipses should be present, absent, filled or colored. +#' (default: \code{ellipse.fill = TRUE}) +#' +#' @param ellipse.alpha Number between 0 and 1 to adjust the opacity of ellipses. +#' (default: \code{ellipse.alpha = 0.2}) +#' +#' @param ellipse.linewidth Number specifying the size of ellipses. +#' (default: \code{ellipse.linewidth = 0.1}) +#' +#' @param ellipse.linetype Discrete number specifying the style of ellipses. +#' (default: \code{ellipse.linetype = 1}) +#' +#' @param add.vectors TRUE or FALSE, should vectors appear in the plot. +#' (default: \code{add.vectors = TRUE}) +#' +#' @param vec.size Number specifying the size of vectors. +#' (default: \code{vec.size = 0.5}) +#' +#' @param vec.colour String specifying the colour of vectors. +#' (default: \code{vec.color = "black"}) +#' +#' @param vec.color Alias for `vec.colour`. +#' +#' @param vec.linetype Discrete number specifying the style of vector lines. +#' (default: \code{vec.linetype = 1}) +#' +#' @param arrow.size Number specifying the size of arrows. +#' (defaults: \code{arrow.size = 0.25}) +#' +#' @param label.size Number specifying the size of text and labels. +#' (default: \code{label.size = 4}) +#' +#' @param label.colour String specifying the colour of text and labels. +#' (default: \code{label.color = "black"}) +#' +#' @param label.color Alias for `label.colour`. +#' +#' @param sep.group String specifying the separator used in the labels. +#' (default: \code{sep.group = "\U2012"}) +#' +#' @param repl.underscore String used to replace underscores in the labels. +#' (default: \code{repl.underscore = " "}) +#' +#' @param vec.text TRUE or FALSE, should text instead of labels be used to label vectors. +#' (default: \code{vec.text = TRUE}) +#' +#' @param repel.labels TRUE or FALSE, should labels be repelled. +#' (default: \code{repel.labels = TRUE}) +#' +#' @param parse.labels TRUE or FALSE, should labels be parsed. +#' (default: \code{parse.labels = TRUE}) +#' +#' @param add.significance TRUE or FALSE, should explained variance and p-value +#' appear in the labels. (default: \code{add.significance = TRUE}) +#' +#' @param add.expl.var TRUE or FALSE, should explained variance appear on the +#' coordinate axes. (default: \code{add.expl.var = TRUE}) +#' +#' @param ... additional parameters for plotting, inherited from +#' \code{\link[scater:plotReducedDim]{plotReducedDim}}, +#' \code{\link[ggplot2:geom_label]{geom_label}} and +#' \code{\link[ggrepel:geom_label_repel]{geom_label_repel}}. +#' +#' @details +#' \code{plotRDA} and \code{plotCCA} create an RDA/CCA plot starting from the +#' output of \code{\link[mia:runCCA]{CCA and RDA}} functions, two common methods +#' for supervised ordination of microbiome data. Either a +#' \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-constructor]{TreeSummarizedExperiment}} +#' or a matrix object is supported as input. When the input is a +#' TreeSummarizedExperiment, this should contain the output of runRDA +#' in the reducedDim slot and the argument \code{dimred} needs to be defined. +#' When the input is a matrix, this should be returned as output from +#' calculateRDA. However, the first method is recommended because it provides +#' the option to adjust aesthetics to the colData variables through the +#' arguments inherited from \code{\link[scater:plotReducedDim]{plotReducedDim}}. +#' +#' @return +#' A \code{ggplot2} object +#' +#' @name plotCCA +#' +#' @examples +#' # Load dataset +#' library(miaViz) +#' data("enterotype", package = "mia") +#' tse <- enterotype +#' +#' # Run RDA and store results into TreeSE +#' tse <- runRDA(tse, +#' formula = assay ~ ClinicalStatus + Gender + Age, +#' FUN = vegan::vegdist, +#' distance = "bray", +#' na.action = na.exclude) +#' +#' # Create RDA plot coloured by variable +#' plotRDA(tse, "RDA", +#' colour_by = "ClinicalStatus") +#' +#' # Create RDA plot with empty ellipses +#' plotRDA(tse, "RDA", +#' colour_by = "ClinicalStatus", +#' add.ellipse = "colour") +#' +#' # Create RDA plot with text encased in labels +#' plotRDA(tse, "RDA", +#' colour_by = "ClinicalStatus", +#' vec.text = FALSE) +#' +#' # Create RDA plot without repelling text +#' plotRDA(tse, "RDA", +#' colour_by = "ClinicalStatus", +#' repel.labels = FALSE) +#' +#' # Create RDA plot without vectors +#' plotRDA(tse, "RDA", +#' colour_by = "ClinicalStatus", +#' add.vectors = FALSE) +#' +#' # Calculate RDA as a separate object +#' rda_mat <- calculateRDA(tse, +#' formula = assay ~ ClinicalStatus + Gender + Age, +#' FUN = vegan::vegdist, +#' distance = "bray", +#' na.action = na.exclude) +#' +#' # Create RDA plot from RDA matrix +#' plotRDA(rda_mat) +NULL + +#' @rdname plotCCA +#' @aliases plotRDA +#' @export +setGeneric("plotCCA", signature = c("object"), + function(object, ...) standardGeneric("plotCCA")) + +#' @rdname plotCCA +#' @aliases plotRDA +#' @export +setMethod("plotCCA", signature = c(object = "SingleCellExperiment"), + function(object, dimred, ...){ + # Reproduce plotRDA function + return(plotRDA(object, dimred, ...)) + } +) + +#' @rdname plotCCA +#' @aliases plotRDA +#' @export +setMethod("plotCCA", signature = c(object = "matrix"), + function(object, ...){ + # Reproduce plotRDA function + return(plotRDA(object, ...)) + } +) + +#' @rdname plotCCA +#' @aliases plotCCA +#' @export +setGeneric("plotRDA", signature = c("object"), + function(object, ...) standardGeneric("plotRDA")) + +#' @rdname plotCCA +#' @aliases plotCCA +#' @export +setMethod("plotRDA", signature = c(object = "SingleCellExperiment"), + function(object, dimred, + add.ellipse = TRUE, ellipse.alpha = 0.2, ellipse.linewidth = 0.1, ellipse.linetype = 1, + vec.size = 0.5, vec.color = vec.colour, vec.colour = "black", vec.linetype = 1, + arrow.size = 0.25, label.color = label.colour, label.colour = "black", label.size = 4, + vec.text = TRUE, repel.labels = TRUE, sep.group = "\U2012", repl.underscore = " ", + add.significance = TRUE, add.expl.var = TRUE, add.vectors = TRUE, parse.labels = TRUE, ...){ + ###################### Input check ######################## + if( !(add.ellipse %in% c(TRUE, FALSE, "fill", "color", "colour")) ){ + stop("'add.ellipse' must be one of c(TRUE, FALSE, 'fill', 'color', 'colour').", call. = FALSE) + } + if ( !.is_a_bool(add.vectors) ){ + stop("'add.vectors must be TRUE or FALSE.", call. = FALSE) + } + if ( !add.vectors ){ + warning("'add.vectors' is FALSE, so other arguments for vectors and labels will be disregarded.", call. = FALSE) + } + if( !.is_a_bool(vec.text) ){ + stop("'vec.text' must be TRUE or FALSE.", call. = FALSE) + } + if( !.is_a_bool(repel.labels) ){ + stop("'repel.labels' must be TRUE or FALSE.", call. = FALSE) + } + if( !.is_a_bool(parse.labels) ){ + stop("'parse.labels' must be TRUE or FALSE.", call. = FALSE) + } + if( !.is_a_bool(add.significance) ){ + stop("'add.significance' must be TRUE or FALSE.", call. = FALSE) + } + if( parse.labels && !add.significance ){ + parse.labels <- FALSE + warning("'parse.labels' was turned off because 'add.significance' is FALSE.", call. = FALSE) + } + if( !.is_a_bool(add.expl.var) ){ + stop("'add.expl.var' must be TRUE or FALSE.", call. = FALSE) + } + if( !.are_whole_numbers(ellipse.linetype) ){ + stop("'ellipse.linetype' must be a whole number.", call. = FALSE) + } + if( !.are_whole_numbers(ellipse.linetype) ){ + stop("'vec.linetype' must be a whole number.", call. = FALSE) + } + if( ellipse.alpha < 0 || ellipse.alpha > 1 ){ + stop("'ellipse.alpha' must be a number between 0 and 1.", call. = FALSE) + } + if ( !is.numeric(ellipse.linewidth) && ellipse.linewidth > 0 ) { + stop("'ellipse.linewidth' must be a positive number.", call. = FALSE) + } + if ( !is.numeric(vec.size) && vec.size > 0 ) { + stop("'vec.size' must be a positive number.", call. = FALSE) + } + if ( !is.numeric(arrow.size) && arrow.size > 0 ) { + stop("'arrow.size' must be a positive number.", call. = FALSE) + } + if ( !is.numeric(label.size) && label.size > 0 ) { + stop("'label.size' must be a positive number.", call. = FALSE) + } + if ( !.is_non_empty_string(vec.color) ) { + stop("'vec.color' must be a non-empty string specifying a colour", call. = FALSE) + } + if ( !.is_non_empty_string(label.color) ) { + stop("'label.color' must be a non-empty string specifying a colour", call. = FALSE) + } + if ( !.is_a_string(sep.group) ) { + stop("'sep.group' must be a string specifying a separator.", call. = FALSE) + } + if ( !.is_a_string(repl.underscore) ) { + stop("'repl.underscore' must be a string.", call. = FALSE) + } + ###################### Input check end #################### + # Get data for plotting + plot_data <- .incorporate_rda_vis( + object, dimred, sep.group = sep.group, repl.underscore = repl.underscore, + add.significance = add.significance, add.expl.var = add.expl.var, + add.ellipse = add.ellipse, add.vectors = add.vectors, ... + ) + # Create a plot + plot <- .rda_plotter( + plot_data, ellipse.alpha = ellipse.alpha, ellipse.linewidth = ellipse.linewidth, + ellipse.linetype = ellipse.linetype, vec.size = vec.size, vec.color = vec.color, + vec.colour = vec.colour, vec.linetype = vec.linetype, arrow.size = arrow.size, + label.color = label.color, label.colour = label.colour, label.size = label.size, + vec.text = vec.text, add.ellipse = add.ellipse, repel.labels = repel.labels, + parse.labels = parse.labels, ... + ) + return(plot) + } +) + +#' @rdname plotCCA +#' @aliases plotCCA +#' @export +setMethod("plotRDA", signature = c(object = "matrix"), + function(object, ...){ + # Construct TreeSE from rda/cca object + object <- .rda2tse(object) + # Run plotRDA method for TreeSE + return(plotRDA(object, "RDA", ...)) + } +) + + +################## HELP FUNCTIONS ########################## + +# Construct TreeSE from rda/cca object to pass it to downstream functions +.rda2tse <- function(object) { + # Convert rda/cca object to TreeSE + object <- TreeSummarizedExperiment( + assays = matrix(ncol = nrow(object), dimnames = list(NULL, rownames(object))), + reducedDims = list(RDA = object) + ) + return(object) +} + +# Get data for plotting +#' @importFrom scater plotReducedDim retrieveCellInfo +#' @importFrom SingleCellExperiment reducedDim reducedDimNames +.incorporate_rda_vis <- function( + tse, dimred, ncomponents = 2, colour_by = color_by, color_by = NULL, + shape_by = NULL, size_by = NULL, order_by = NULL, text_by = NULL, + other_fields = list(), swap_rownames = NULL, point.padding = NA, + add.ellipse = TRUE, add.vectors = TRUE, add.significance = TRUE, + add.expl.var = TRUE, vec_lab = NULL, bins = NULL, sep.group = "\U2012", + repl.underscore = " ", ...){ + + # Check dimred + if( !dimred %in% reducedDimNames(tse) ){ + stop("'dimred' must specify reducedDim.", call. = FALSE) + } + # Get reducedDim + reduced_dim <- reducedDim(tse, dimred) + # Check that there are at least 2 coordinates + if( ncol(reduced_dim) < 2 ){ + stop("reducedDim specified by 'dimred' must have at least 2 columns.", call. = FALSE) + } + # Check that each by-argument matches the name of a column in colData + by_args <- c( + colour_by = colour_by, + shape_by = shape_by, + size_by = size_by, + order_by = order_by, + text_by = text_by + ) + correct_args <- sapply(by_args, function(x) x %in% names(colData(tse))) + sapply(names(by_args[!correct_args]), + function(x) { + stop(paste0("'", x, "' must match the name of a column in colData."), + call. = FALSE) + }) + # Only 2 dimensions are supported currently + ncomponents <- 2 + # Make list of arguments for plotReducedDim + plotReducedDim_args <- c(by_args, list( + object = tse, dimred = dimred, ncomponents = ncomponents, + text_size = 5, text_colour = "black", text_color = "black", + label_format = c("%s %i", " (%i%%)"), other_fields = other_fields, + swap_rownames = swap_rownames, point.padding = point.padding, force = 1, + rasterise = FALSE, scattermore = FALSE, summary_fun = "sum", hex = FALSE + )) + # Get scatter plot with plotReducedDim --> keep theme similar between ordination methods + plot <- do.call("plotReducedDim", plotReducedDim_args) + + # Get data for ellipse + ellipse_data <- NULL + if( add.ellipse != FALSE && !is.null(colour_by) ){ + ellipse_data <- reduced_dim + ellipse_data <- as.data.frame(ellipse_data) + ellipse_data[[colour_by]] <- retrieveCellInfo(tse, colour_by)[["value"]] + attributes(ellipse_data)$colour_by <- colour_by + } + + # Get data for vectors + vector_data <- NULL + if( add.vectors ){ + # Check if data is available + ind <- names(attributes(reduced_dim)) %in% c("rda", "cca") + # If it can be found + if( any(ind) ){ + # Get biplot from cca object + rda <- attributes(reduced_dim)[ind][[1]] + vector_data <- rda$CCA$biplot + vector_data <- as.data.frame(vector_data) + vector_data[["group"]] <- rownames(vector_data) + } else{ + # If it cannot be found, give warning + warning(paste("RDA/CCA object was not found. Please compute RDA/CCA", + "by using runCCA or calculateCCA."), + call. = FALSE) + } + } + + # Get variable names from sample metadata + coldata <- colData(tse) + variable_names <- colnames(coldata) + # Check if variable names can be found metadata + all_var_found <- FALSE + if( !is.null(vector_data) && length(variable_names) > 0 ){ + all_var_found <- all(colSums( + sapply(rownames(vector_data), function(x) + sapply(variable_names, function(y) grepl(y, x)) )) == 1) + } + + # Get vector labels + if( !is.null(vector_data) ){ + # Get those names that are present in data + + # If user has not provided vector labels + if( is.null(vec_lab) ){ + vector_label <- rownames(vector_data) + # Make labels more tidy + if( all_var_found ){ + vector_label <- .tidy_vector_labels( + vector_label, coldata, + sep.group = sep.group, + repl.underscore = repl.underscore + ) + } + # Add to df + vector_data$vector_label <- vector_label + } else{ + # Check that user-provided labels are correct length + if( length(vec_lab) != nrow(vector_data) ){ + stop("Number of labels in 'vec_lab' do not match with number of vectors.", + call. = FALSE) + } + # If they are, add labels to data + vector_data$vector_label <- vec_lab + } + } + + # Get significance data + signif_data <- NULL + if( add.significance && !is.null(vector_data) && all_var_found ){ + # Check if data is available + ind <- names(attributes(reduced_dim)) %in% c("significance") + # If it can be found + if( any(ind) ){ + # Get biplot from cca object + signif_data <- attributes(reduced_dim)[ind][[1]] + signif_data <- signif_data[["permanova"]] + signif_data <- as.data.frame(signif_data) + # Add significance to vector labels + # Get vector labels + vector_label <- vector_data[["vector_label"]] + # Add significance to vector labels + vector_label <- .add_signif_to_vector_labels(vector_label, variable_names, signif_data, ...) + vector_data[["vector_label"]] <- vector_label + } else{ + # If it cannot be found, give warning + warning(paste("Significance data was not found. please compute", + "CCA/RDA by using runCCA or calculateCCA."), + call. = FALSE) + } + } + + # Create labels for axis + xlab <- paste0(dimred, " 1") + ylab <- paste0(dimred, " 2") + if( add.expl.var ){ + # Check if data is available + ind <- names(attributes(reduced_dim)) %in% c("rda", "cca") + # If it can be found + if( any(ind) ){ + # Add explained variance + rda <- attributes(reduced_dim)[ind][[1]] + xlab <- paste0( + xlab, " (", + format(round( summary(rda)$concont$importance[2, 1]*100, 1 ), nsmall = 1 ), "%)") + ylab <- paste0( + ylab, " (", + format(round( summary(rda)$concont$importance[2, 2]*100, 1 ), nsmall = 1 ), "%)") + } else{ + # If it cannot be found, give warning + warning(paste("RDA/CCA object was not found. Please compute", + "RDA/CCA by using runCCA or calculateCCA."), + call. = FALSE) + } + } + + # Create a list to return + result <- list( + plot = plot, + ellipse_data = ellipse_data, + vector_data = vector_data, + xlab = xlab, + ylab = ylab + ) + return(result) +} + +# Make vector labels more tidy, i.e, separate variable and group names. +# Replace also underscores with space +.tidy_vector_labels <- function( + vector_label, coldata, sep.group = "\U2012", repl.underscore = " "){ + # Get variable names from sample metadata + var_names <- colnames(coldata) + # Loop through vector labels + vector_label <- sapply(vector_label, FUN = function(name){ + # Get the real variable name from sample metadata + var_name <- var_names[ sapply(var_names, function(x) grepl(x, name)) ] + # If the vector label includes also group name + if( !name %in% var_names ){ + # Get the group name + group_name <- unique( coldata[[var_name]] )[ + which( paste0(var_name, unique( coldata[[var_name]] )) == name ) ] + # Modify vector so that group is separated from variable name + new_name <- paste0(var_name, " ", sep.group, " ", group_name) + } else{ + new_name <- name + } + # Replace underscores with space + new_name <- gsub("_", repl.underscore, new_name) + return(new_name) + }) + return(vector_label) +} + +# This function adds significance info to vector labels +.add_signif_to_vector_labels <- function(vector_label, var_names, signif_data, repl.underscore = " ", ...){ + # Replace underscores from significance data and variable names to match labels + rownames(signif_data) <- sapply(rownames(signif_data), function(x) gsub("_", repl.underscore, x)) + var_names <- sapply(var_names, function(x) gsub("_", repl.underscore, x)) + # Loop through vector labels + vector_label <- sapply(vector_label, FUN = function(name){ + # Get the real variable name from sample metadata + var_name <- var_names[ sapply(var_names, function(x) grepl(x, name)) ] + # Add percentage how much this variable explains, and p-value + new_name <- expr( + paste(!!name, " (", + !!format( + round( signif_data[var_name, "Explained variance"]*100, 1), + nsmall = 1), "%, P = ", + !!gsub("0\\.","\\.", format( + round( signif_data[var_name, "Pr(>F)"], 3), + nsmall = 3)), ")")) + + return(new_name) + }) + return(vector_label) +} + +# Plot based on the data +#' @importFrom ggrepel geom_text_repel geom_label_repel +.rda_plotter <- function( + plot_data, ellipse.alpha = 0.2, ellipse.linewidth = 0.1, ellipse.linetype = 1, + vec.size = 0.5, vec.color = vec.colour, vec.colour = "black", + vec.linetype = 1, arrow.size = 0.25, min.segment.length = 5, + label.color = label.colour, label.colour = "black", label.size = 4, + parse.labels = TRUE, vec.text = TRUE, repel.labels = TRUE, add.ellipse = TRUE, + position = NULL, nudge_x = NULL, nudge_y = NULL, direction = "both", + max.overlaps = 10, check_overlap = FALSE, ...){ + + # Get the scatter plot + plot <- plot_data[["plot"]] + # Add ellipse + if( !is.null(plot_data$ellipse_data) ){ + # Get data and variabe names + data <- plot_data$ellipse_data + xvar <- colnames(data)[[1]] + yvar <- colnames(data)[[2]] + colour_var <- attributes(plot_data$ellipse_data)$colour_by + # Add ellipses to plot (fill or colour the edge) + if( add.ellipse %in% c(TRUE, "fill") ){ + plot <- plot + + stat_ellipse(data = data, + aes(x = .data[[xvar]], y = .data[[yvar]], + color = .data[[colour_var]], fill = after_scale(color)), + geom = "polygon", alpha = ellipse.alpha, + size = ellipse.linewidth, linetype = ellipse.linetype) + } else if ( add.ellipse %in% c("color", "colour") ){ + plot <- plot + + stat_ellipse(data = data, + aes(x = .data[[xvar]], y = .data[[yvar]], color = .data[[colour_var]]), + geom = "polygon", alpha = 0, linetype = ellipse.linetype) + } + + } + # Add vectors + if( !is.null(plot_data$vector_data) ){ + # Get data and variabe names + data <- plot_data$vector_data + xvar <- colnames(data)[[1]] + yvar <- colnames(data)[[2]] + # Add vectors + plot <- plot + + geom_segment(data = data, + aes(x = 0, y = 0, xend = .data[[xvar]], yend = .data[[yvar]], + group = .data[["group"]]), + arrow = arrow(length = unit(arrow.size, "cm")), + color = vec.color, linetype = vec.linetype, size = vec.size) + # Add vector labels (text or label) + # Make list of arguments for geom_text/geom_label + label_args <- list( + data = data, + mapping = aes(x = .data[[xvar]], y = .data[[yvar]]), + label = data[["vector_label"]], parse = parse.labels, + color = label.color, size = label.size, stat = "identity", + nudge_x = nudge_x, nudge_y = nudge_y, show.legend = NA, + na.rm = FALSE, inherit.aes = TRUE + ) + # Repel text + if( repel.labels ){ + # Add arguments for geom_text_repel/geom_label_repel to list + label_args <- c( + label_args, min.segment.length = min.segment.length, + box.padding = 0.25, point.padding = 1e-06, force = 1, force_pull = 1, + max.time = 0.5, max.iter = 10000, max.overlaps = max.overlaps, + direction = direction, seed = NA, verbose = FALSE + ) + # repelled text + if( vec.text ){ + plot <- plot + do.call("geom_text_repel", label_args) + # repelled labels + } else{ + plot <- plot + do.call("geom_label_repel", label_args) + } + # Do not repel text + } else{ + # not repelled text + if( vec.text ){ + label_args <- c(label_args, check_overlap = check_overlap) + plot <- plot + do.call("geom_text", label_args) + # not repelled labels + } else{ + plot <- plot + do.call("geom_label", label_args) + } + } + + } + # Add axis labels + plot <- plot + xlab(plot_data$xlab) + ylab(plot_data$ylab) + return(plot) +} \ No newline at end of file diff --git a/man/plotCCA.Rd b/man/plotCCA.Rd new file mode 100644 index 00000000..714ca498 --- /dev/null +++ b/man/plotCCA.Rd @@ -0,0 +1,188 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotCCA.R +\name{plotCCA} +\alias{plotCCA} +\alias{plotRDA} +\alias{plotCCA,SingleCellExperiment-method} +\alias{plotCCA,matrix-method} +\alias{plotRDA,SingleCellExperiment-method} +\alias{plotRDA,matrix-method} +\title{Plot RDA or CCA object} +\usage{ +plotCCA(object, ...) + +\S4method{plotCCA}{SingleCellExperiment}(object, dimred, ...) + +\S4method{plotCCA}{matrix}(object, ...) + +plotRDA(object, ...) + +\S4method{plotRDA}{SingleCellExperiment}( + object, + dimred, + add.ellipse = TRUE, + ellipse.alpha = 0.2, + ellipse.linewidth = 0.1, + ellipse.linetype = 1, + vec.size = 0.5, + vec.color = vec.colour, + vec.colour = "black", + vec.linetype = 1, + arrow.size = 0.25, + label.color = label.colour, + label.colour = "black", + label.size = 4, + vec.text = TRUE, + repel.labels = TRUE, + sep.group = "‒", + repl.underscore = " ", + add.significance = TRUE, + add.expl.var = TRUE, + add.vectors = TRUE, + parse.labels = TRUE, + ... +) + +\S4method{plotRDA}{matrix}(object, ...) +} +\arguments{ +\item{object}{a +\code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-constructor]{TreeSummarizedExperiment}} +or a matrix of weights. The latter is returned as output from \code{\link[mia:runCCA]{calculateRDA}}.} + +\item{...}{additional parameters for plotting, inherited from +\code{\link[scater:plotReducedDim]{plotReducedDim}}, +\code{\link[ggplot2:geom_label]{geom_label}} and +\code{\link[ggrepel:geom_label_repel]{geom_label_repel}}.} + +\item{dimred}{A string or integer scalar indicating the reduced dimension to +plot. This is the output of \code{\link[mia:runCCA]{runRDA}} and resides in +\code{reducedDim(tse, dimred)}.} + +\item{add.ellipse}{One of \code{c(TRUE, FALSE, "fill", "colour", "color")}, +indicating whether ellipses should be present, absent, filled or colored. +(default: \code{ellipse.fill = TRUE})} + +\item{ellipse.alpha}{Number between 0 and 1 to adjust the opacity of ellipses. +(default: \code{ellipse.alpha = 0.2})} + +\item{ellipse.linewidth}{Number specifying the size of ellipses. +(default: \code{ellipse.linewidth = 0.1})} + +\item{ellipse.linetype}{Discrete number specifying the style of ellipses. +(default: \code{ellipse.linetype = 1})} + +\item{vec.size}{Number specifying the size of vectors. +(default: \code{vec.size = 0.5})} + +\item{vec.color}{Alias for \code{vec.colour}.} + +\item{vec.colour}{String specifying the colour of vectors. +(default: \code{vec.color = "black"})} + +\item{vec.linetype}{Discrete number specifying the style of vector lines. +(default: \code{vec.linetype = 1})} + +\item{arrow.size}{Number specifying the size of arrows. +(defaults: \code{arrow.size = 0.25})} + +\item{label.color}{Alias for \code{label.colour}.} + +\item{label.colour}{String specifying the colour of text and labels. +(default: \code{label.color = "black"})} + +\item{label.size}{Number specifying the size of text and labels. +(default: \code{label.size = 4})} + +\item{vec.text}{TRUE or FALSE, should text instead of labels be used to label vectors. +(default: \code{vec.text = TRUE})} + +\item{repel.labels}{TRUE or FALSE, should labels be repelled. +(default: \code{repel.labels = TRUE})} + +\item{sep.group}{String specifying the separator used in the labels. +(default: \code{sep.group = "\U2012"})} + +\item{repl.underscore}{String used to replace underscores in the labels. +(default: \code{repl.underscore = " "})} + +\item{add.significance}{TRUE or FALSE, should explained variance and p-value +appear in the labels. (default: \code{add.significance = TRUE})} + +\item{add.expl.var}{TRUE or FALSE, should explained variance appear on the +coordinate axes. (default: \code{add.expl.var = TRUE})} + +\item{add.vectors}{TRUE or FALSE, should vectors appear in the plot. +(default: \code{add.vectors = TRUE})} + +\item{parse.labels}{TRUE or FALSE, should labels be parsed. +(default: \code{parse.labels = TRUE})} +} +\value{ +A \code{ggplot2} object +} +\description{ +\code{plotRDA} and \code{plotCCA} create an RDA/CCA plot starting from the +output of \code{\link[mia:runCCA]{CCA and RDA}} functions, two common methods +for supervised ordination of microbiome data. +} +\details{ +\code{plotRDA} and \code{plotCCA} create an RDA/CCA plot starting from the +output of \code{\link[mia:runCCA]{CCA and RDA}} functions, two common methods +for supervised ordination of microbiome data. Either a +\code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-constructor]{TreeSummarizedExperiment}} +or a matrix object is supported as input. When the input is a +TreeSummarizedExperiment, this should contain the output of runRDA +in the reducedDim slot and the argument \code{dimred} needs to be defined. +When the input is a matrix, this should be returned as output from +calculateRDA. However, the first method is recommended because it provides +the option to adjust aesthetics to the colData variables through the +arguments inherited from \code{\link[scater:plotReducedDim]{plotReducedDim}}. +} +\examples{ +# Load dataset +library(miaViz) +data("enterotype", package = "mia") +tse <- enterotype + +# Run RDA and store results into TreeSE +tse <- runRDA(tse, + formula = assay ~ ClinicalStatus + Gender + Age, + FUN = vegan::vegdist, + distance = "bray", + na.action = na.exclude) + +# Create RDA plot coloured by variable +plotRDA(tse, "RDA", + colour_by = "ClinicalStatus") + +# Create RDA plot with empty ellipses +plotRDA(tse, "RDA", + colour_by = "ClinicalStatus", + add.ellipse = "colour") + +# Create RDA plot with text encased in labels +plotRDA(tse, "RDA", + colour_by = "ClinicalStatus", + vec.text = FALSE) + +# Create RDA plot without repelling text +plotRDA(tse, "RDA", + colour_by = "ClinicalStatus", + repel.labels = FALSE) + +# Create RDA plot without vectors +plotRDA(tse, "RDA", + colour_by = "ClinicalStatus", + add.vectors = FALSE) + +# Calculate RDA as a separate object +rda_mat <- calculateRDA(tse, + formula = assay ~ ClinicalStatus + Gender + Age, + FUN = vegan::vegdist, + distance = "bray", + na.action = na.exclude) + +# Create RDA plot from RDA matrix +plotRDA(rda_mat) +} diff --git a/tests/testthat/test-plotCCA.R b/tests/testthat/test-plotCCA.R new file mode 100644 index 00000000..25e770e3 --- /dev/null +++ b/tests/testthat/test-plotCCA.R @@ -0,0 +1,82 @@ +test_that("plot RDA/CCA", { + data("Tengeler2020", package = "mia") + tse <- Tengeler2020 + + ### 1). TEST error messages ### + + # Object without reducedDim + expect_error(plotRDA(tse), 'argument "dimred" is missing, with no default') + expect_error(plotRDA(tse, "RDA"), "'dimred' must specify reducedDim.") + + # Run/calculate RDA + tse <- runRDA(tse, assay ~ patient_status + cohort) + rda <- calculateRDA(tse, assay ~ patient_status + cohort) + + # Minimal functionality + expect_no_error(plotRDA(tse, "RDA")) + + # Wrong-entry scenarios + expect_error(plotRDA(tse, "RDA", colour_by = "wrong colname"), + "'colour_by' must match the name of a column in colData.") + expect_error(plotRDA(tse, "RDA", colour_by = "cohort", shape_by = "wrong colname"), + "'shape_by' must match the name of a column in colData.") + expect_error(plotRDA(tse, "RDA", add.ellipse = "invalid value"), + "'add.ellipse' must be one of c(TRUE, FALSE, 'fill', 'color', 'colour').", + fixed = TRUE) + expect_error(plotRDA(tse, "RDA", add.significance = "invalid value"), + "'add.significance' must be TRUE or FALSE.") + expect_error(plotRDA(tse, "RDA", add.expl.var = "invalid value"), + "'add.expl.var' must be TRUE or FALSE.") + expect_error(plotRDA(tse, "RDA", repel.labels)) + expect_warning(plotRDA(tse, "RDA", add.significance = FALSE, parse.labels = TRUE), + "'parse.labels' was turned off because 'add.significance' is FALSE.") + expect_warning(plotRDA(tse, "RDA", add.vectors = FALSE), + "'add.vectors' is FALSE, so other arguments for vectors and labels will be disregarded.") + ## add more tests here + + ### 2). TEST plot layers ### + + el_true <- plotRDA(tse, "RDA", colour_by = "patient_status") + el_false <- plotRDA(tse, "RDA", colour_by = "patient_status", add.ellipse = FALSE) + el_col <- plotRDA(tse, "RDA", colour_by = "patient_status", add.ellipse = "colour") + el_fill <- plotRDA(tse, "RDA", colour_by = "patient_status", add.ellipse = "fill") + expect_warning( + vec_false <- plotRDA(tse, "RDA", colour_by = "patient_status", add.vectors = FALSE) + ) + # Filled ellipse has one more layer than no ellipse plot + expect_equal(length(ggplot_build(el_true)[["data"]]), 4) + expect_equal(length(ggplot_build(el_false)[["data"]]), 3) + # No-vector plot has only 2 layers, (points and ellipse) + expect_equal(length(ggplot_build(vec_false)[["data"]]), 2) + # Coloured ellipse but not filled ellipse plot has all 0 alpha values + expect_true(all(ggplot_build(el_col)[["data"]][[2]][["alpha"]] == 0)) + expect_false(all(ggplot_build(el_fill)[["data"]][[2]][["alpha"]] == 0)) + + # Check ggplot aesthetics + p_aes <- plotRDA(tse, "RDA", colour_by = "patient_status", ellipse.alpha = 0.5, + ellipse.linewidth = 0.2, ellipse.linetype = 3, vec.size = 0.6, + vec.colour = "red", vec.linetype = 2, arrow.size = 0.15, + label.colour = "blue", label.size = 5) + # Build plot and get data + p_aes_build <- ggplot_build(p_aes)[["data"]] + # Ellipse aesthetics are correctly defined in ggplot + expect_true(all(p_aes_build[[2]][["alpha"]] == 0.5)) + expect_true(all(ggplot_build(p_aes)[[2]][["linewidth"]] == 0.2)) + expect_true(all(ggplot_build(p_aes)[[2]][["linetype"]] == 3)) + # Vector aesthetics are correctly defined in ggplot + expect_true(all(p_aes_build[[3]][["colour"]] == "red")) + expect_true(all(p_aes_build[[3]][["linewidth"]] == 0.6)) + expect_true(all(p_aes_build[[3]][["linetype"]] == 2)) + # Label aesthetics are correctly defined in ggplot + expect_true(all(p_aes_build[[4]][["colour"]] == "blue")) + expect_true(all(p_aes_build[[4]][["size"]] == 5)) + # Where is arrow size stored? + # expect_true(arrow_size == 0.15) + + # Vector or label text + p_vec <- plotRDA(tse, "RDA", colour_by = "patient_status", vec.text = TRUE) + p_lab <- plotRDA(tse, "RDA", colour_by = "patient_status", vec.text = FALSE) + # Column "fill" is present in p_vec and missing in p_lab, so length differs by 1 + expect_length(ggplot_build(p_vec)[["data"]][[4]], 29) + expect_length(ggplot_build(p_lab)[["data"]][[4]], 28) +})