Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Short term change #94

Merged
merged 18 commits into from
Jan 27, 2025
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -23,7 +23,9 @@ biocViews: Microbiome, Software, Sequencing
License: Artistic-2.0 | file LICENSE
Depends:
R (>= 4.4.0),
mia
ggplot2,
mia,
SummarizedExperiment
Imports:
dplyr,
methods,
@@ -35,7 +37,6 @@ Imports:
Suggests:
BiocStyle,
devtools,
ggplot2,
knitr,
lubridate,
miaViz,
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -21,6 +21,7 @@ import(mia)
importFrom(dplyr,across)
importFrom(dplyr,all_of)
importFrom(dplyr,arrange)
importFrom(dplyr,as_tibble)
importFrom(dplyr,group_by)
importFrom(dplyr,lag)
importFrom(dplyr,mutate)
10 changes: 10 additions & 0 deletions R/AllGenerics.R
Original file line number Diff line number Diff line change
@@ -30,3 +30,13 @@ setGeneric("getBimodality", signature = "x", function(x, ...)
#' @export
setGeneric("addBimodality", signature = "x", function(x, ...)
standardGeneric("addBimodality"))

#' @rdname addShortTermChange
#' @export
setGeneric("addShortTermChange", signature = "x", function(x, ...)
standardGeneric("addShortTermChange"))

#' @rdname addShortTermChange
#' @export
setGeneric("getShortTermChange", signature = "x", function(x, ...)
standardGeneric("getShortTermChange"))
147 changes: 147 additions & 0 deletions R/getShortTermChange.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
#' @name
#' getShortTermChange
#'
#' @export
#'
#' @title
#' Short term changes in abundance
#'
#' @description
#' Calculates short term changes in abundance of taxa using temporal
#' abundance data.
#'
#' @details
#' This approach is used by Wisnoski NI and colleagues
#' \url{https://github.com/nwisnoski/ul-seedbank}. Their approach is based on
#' the following calculation log(present abundance/past abundance).
#' Also a compositional version using relative abundance similar to
#' Brian WJi, Sheth R et al
#' \url{https://www.nature.com/articles/s41564-020-0685-1} can be used.
#' This approach is useful for identifying short term growth behaviors of taxa.
#'
#' @references
#'
#' Ji, B.W., et al. (2020) Macroecological dynamics of gut microbiota.
#' Nat Microbiol 5, 768–775 . doi: https://doi.org/10.1038/s41564-020-0685-1
#'
#' @return
#' \code{getShortTermChange} returns \code{DataFrame} object containing
#' the short term change in abundance over time for a microbe.
#' \code{addShortTermChange}, on the other hand, returns a
#' \code{\link[SummarizedExperiment:SummarizedExperiment]{SummarizedExperiment}}
#' object with these results in its \code{metadata}.
#'
#' @inheritParams addBaselineDivergence
#'
#' @param name \code{Character scalar}. Specifies a name for storing
#' short term results. (Default: \code{"short_term_change"})
#'
#' @param ... additional arguments.
#
#' @examples
#' library(miaTime)
#'
#' # Load time series data
#' data(minimalgut)
#' tse <- minimalgut
#'
#' # Get relative abundances
#' tse <- transformAssay(tse, method = "relabundance")
#' # Calculate short term changes
#' df <- getShortTermChange(tse, assay.type = "relabundance", time.col = "Time.hr", group = "StudyIdentifier")
#'
#' # Select certain bacteria
#' select <- df[["FeatureID"]] %in% c(
#' "Ruminococcus_bromii", "Coprococcus_catus", "Akkermansia_muciniphila")
#' df <- df[ select, ]
#'
#' # Plot results
#' library(ggplot2)
#' p <- ggplot(df, aes(x = Time.hr, y = growth_rate, colour = FeatureID)) +
#' geom_smooth() +
#' facet_grid(. ~ StudyIdentifier, scales = "free") +
#' scale_y_log10()
#' p
#'
#' @seealso
#' \code{\link[mia:getStepwiseDivergence]{mia::getStepwiseDivergence()}}
NULL

#' @rdname getShortTermChange
#' @export
setMethod("addShortTermChange", signature = c(x = "SummarizedExperiment"),
function(x, name = "short_term_change", ...){
x <- .check_and_get_altExp(x, ...)
args <- c(list(x = x), list(...)[!names(list(...)) %in% c("altexp")])
# Calculate short term change
res <- do.call(getShortTermChange, args)
# Add to metadata
x <- .add_values_to_metadata(x, name, res, ...)
return(x)
}
)

#' @rdname getShortTermChange
#' @export
setMethod("getShortTermChange", signature = c(x = "SummarizedExperiment"),
function(
x, time.col, assay.type = "counts", time.interval = 1L, group = NULL,
...){
############################## Input check #############################
x <- .check_and_get_altExp(x, ...)
temp <- .check_input(
time.col, list("character scalar"), colnames(colData(x)))
if( !is.numeric(x[[time.col]]) ){
stop("'time.col' must specify numeric column from colData(x)",
call. = FALSE)
}
#
.check_assay_present(assay.type, x)
#
temp <- .check_input(
group, list(NULL, "character scalar"), colnames(colData(x)))
########################### Input check end ############################
res <- .calculate_growth_metrics(
x, assay.type, time.col, group, time.interval, ...)
return(res)
}
)

################################ HELP FUNCTIONS ################################

# wrapper to calculate growth matrix
#' @importFrom dplyr arrange group_by summarise mutate select
.calculate_growth_metrics <- function(
x, assay.type, time.col, group, time.interval, ...) {
# Get data in long format
df <- meltSE(x, assay.type, add.col = c(time.col, group))
# If there are replicated samples, give warning that average is calculated
if( anyDuplicated(df[, c("FeatureID", group, time.col)]) ){
warning("The dataset contains replicated samples. The average is ",
"calculated for each time point.", call. = FALSE)
}
# Sort data based on time
df <- df |> arrange( !!sym(time.col) )
# Group based on features and time points. If group was specified, take that
# also into account
if( !is.null(group) ){
df <- df |> group_by(!!sym(group), FeatureID, !!sym(time.col))
} else{
df <- df |> group_by(FeatureID, !!sym(time.col))
}
df <- df |>
# Summarize duplicated samples by taking an average
summarise(value = mean(.data[[assay.type]], na.rm = TRUE)) |>
# For each feature in a sample group, calculate growth metrics
mutate(
time_lag = !!sym(time.col) -
lag(!!sym(time.col), n = time.interval),
growth_diff = value - lag(value, n = time.interval),
growth_rate = (value - lag(value, n = time.interval)) /
lag(value, n = time.interval),
var_abund = (value - lag(value, n = time.interval)) / time_lag
) |>
# Remove value column that includes average abundances
select(-value)
return(df)
}
1 change: 1 addition & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
@@ -176,3 +176,4 @@
.check_assay_present <- mia:::.check_assay_present
.add_values_to_colData <- mia:::.add_values_to_colData
.check_and_get_altExp <- mia:::.check_and_get_altExp
.add_values_to_metadata <- mia:::.add_values_to_metadata
69 changes: 69 additions & 0 deletions man/addShortTermChange.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 25 additions & 0 deletions tests/testthat/test-getShortTermChange.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
test_that("getShortTermChange", {
library(SummarizedExperiment)
# Load dataset
data(minimalgut)
tse <- minimalgut
# Check if the function handles empty input
empty_se <- SummarizedExperiment()
expect_error(getShortTermChange(empty_se),
"No data available in `x`")

# Should still return a dataframe
short_time_labels <- c("74.5h", "173h", "438h", "434h", "390h")
# Subset samples by Time_label and StudyIdentifier
tse_filtered <- tse[, !(tse$Time_label %in% short_time_labels)]
tse_filtered <- tse_filtered[, (tse_filtered$StudyIdentifier == "Bioreactor A")]

expect_true(all(!(tse_filtered$Time_label %in% short_time_labels)))

result <- getShortTermChange(tse_filtered, time.col = "Time.hr")
# Expected output is a dataframe
expect_true(is.data.frame(result))
expect_true("growth_diff" %in% colnames(result))
# Test some expected properties (e.g., that growth_diff isn't all NAs)
expect_false(all(is.na(result$growth_diff)))
})