From a0d3ffc7bf189282603d8abb7f4f3c2a1c5847a4 Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Wed, 11 Dec 2024 23:00:22 +0000 Subject: [PATCH 1/6] Split prior infections and growth calculation into separate function --- R/create.R | 26 ++++----------------- R/utilities.R | 39 ++++++++++++++++++++++++++++++++ man/calculate_prior_estimates.Rd | 22 ++++++++++++++++++ 3 files changed, 65 insertions(+), 22 deletions(-) create mode 100644 man/calculate_prior_estimates.Rd diff --git a/R/create.R b/R/create.R index ade71b2e4..fcfe538e1 100644 --- a/R/create.R +++ b/R/create.R @@ -501,28 +501,10 @@ create_stan_data <- function(data, seeding_time, delay = stan_data$seeding_time, horizon = stan_data$horizon ) ) - # initial estimate of growth - first_week <- data.table::data.table( - confirm = cases[seq_len(min(7, length(cases)))], - t = seq_len(min(7, length(cases))) - )[!is.na(confirm)] - stan_data$prior_infections <- log(mean(first_week$confirm, na.rm = TRUE)) - stan_data$prior_infections <- ifelse( - is.na(stan_data$prior_infections) || is.null(stan_data$prior_infections), - 0, stan_data$prior_infections - ) - if (stan_data$seeding_time > 1 && nrow(first_week) > 1) { - safe_lm <- purrr::safely(stats::lm) - stan_data$prior_growth <- safe_lm(log(confirm) ~ t, - data = first_week - )[[1]] - stan_data$prior_growth <- ifelse(is.null(stan_data$prior_growth), 0, - stan_data$prior_growth$coefficients[2] - ) - } else { - stan_data$prior_growth <- 0 - } - + # Calculate prior infections and growth + prior_estimates <- calculate_prior_estimates(cases, seeding_time) + stan_data$prior_infections <- prior_estimates$prior_infections + stan_data$prior_growth <- prior_estimates$prior_growth # backcalculation settings stan_data <- c(stan_data, create_backcalc_data(backcalc)) # gaussian process data diff --git a/R/utilities.R b/R/utilities.R index 3a73bacec..7a4e93b4e 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -417,6 +417,45 @@ lapply_func <- function(..., backend = "rstan", future.opts = list()) { } } +#' Calculate prior infections and growth +#' +#' @description Calculates the prior infections and growth rate based on the +#' first week's data. +#' +#' @param cases Numeric vector; the case counts from the input data. +#' @inheritParams create_stan_data +#' @return A list containing `prior_infections` and `prior_growth`. +#' @keywords internal +calculate_prior_estimates <- function(cases, seeding_time) { + first_week <- data.table::data.table( + confirm = cases[seq_len(min(7, length(cases)))], + t = seq_len(min(7, length(cases))) + )[!is.na(confirm)] + + # Calculate prior infections + prior_infections <- log(mean(first_week$confirm, na.rm = TRUE)) + prior_infections <- ifelse( + is.na(prior_infections) || is.null(prior_infections), + 0, prior_infections + ) + + # Calculate prior growth + if (seeding_time > 1 && nrow(first_week) > 1) { + safe_lm <- purrr::safely(stats::lm) + prior_growth <- safe_lm(log(confirm) ~ t, data = first_week)[[1]] + prior_growth <- ifelse( + is.null(prior_growth), 0, prior_growth$coefficients[2] + ) + } else { + prior_growth <- 0 + } + + return(list( + prior_infections = prior_infections, + prior_growth = prior_growth + )) +} + #' @importFrom stats glm median na.omit pexp pgamma plnorm quasipoisson rexp #' @importFrom stats rlnorm rnorm rpois runif sd var rgamma pnorm globalVariables( diff --git a/man/calculate_prior_estimates.Rd b/man/calculate_prior_estimates.Rd new file mode 100644 index 000000000..edbcdda43 --- /dev/null +++ b/man/calculate_prior_estimates.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utilities.R +\name{calculate_prior_estimates} +\alias{calculate_prior_estimates} +\title{Calculate prior infections and growth} +\usage{ +calculate_prior_estimates(cases, seeding_time) +} +\arguments{ +\item{cases}{Numeric vector; the case counts from the input data.} + +\item{seeding_time}{Integer; seeding time, usually obtained using +\code{\link[=get_seeding_time]{get_seeding_time()}}.} +} +\value{ +A list containing \code{prior_infections} and \code{prior_growth}. +} +\description{ +Calculates the prior infections and growth rate based on the +first week's data. +} +\keyword{internal} From e3980e5018fc1f6fb048204f1d449d9d95ed05d2 Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Wed, 11 Dec 2024 23:00:33 +0000 Subject: [PATCH 2/6] Add tests --- tests/testthat/test-utilities.R | 49 +++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 tests/testthat/test-utilities.R diff --git a/tests/testthat/test-utilities.R b/tests/testthat/test-utilities.R new file mode 100644 index 000000000..d29e21e2d --- /dev/null +++ b/tests/testthat/test-utilities.R @@ -0,0 +1,49 @@ +test_that("calculate_prior_estimates works", { + cases <- EpiNow2::example_confirmed[1:30] + prior_estimates <- calculate_prior_estimates(cases$confirm, 7) + # Check dimensions + expect_identical( + names(prior_estimates), + c("prior_infections", "prior_growth") + ) + expect_identical(length(prior_estimates), 2L) + # Check values + expect_identical( + round(prior_estimates$prior_infections, 2), + 4.53 + ) + expect_identical( + round(prior_estimates$prior_growth, 2), + 0.35 + ) +}) + +test_that("calculate_prior_estimates handles NA values correctly", { + cases <- c(10, 20, NA, 40, 50, NA, 70) + prior_estimates <- calculate_prior_estimates(cases, 7) + expect_equal( + prior_estimates$prior_infections, + log(mean(c(10, 20, 40, 50, 70), na.rm = TRUE)) + ) + expect_true(!is.na(prior_estimates$prior_growth)) +}) + +test_that("calculate_prior_estimates handles exponential growth", { + cases <- 2^(c(0:6)) # Exponential growth + prior_estimates <- calculate_prior_estimates(cases, 7) + expect_equal(prior_estimates$prior_infections, log(mean(cases[1:7]))) + expect_true(prior_estimates$prior_growth > 0) # Growth should be positive +}) + +test_that("calculate_prior_estimates handles exponential decline", { + cases <- rev(2^(c(0:6))) # Exponential decline + prior_estimates <- calculate_prior_estimates(cases, 7) + expect_equal(prior_estimates$prior_infections, log(mean(cases[1:7]))) + expect_true(prior_estimates$prior_growth < 0) # Growth should be negative +}) + +test_that("calculate_prior_estimates correctly handles seeding time less than 2", { + cases <- c(5, 10, 20) # Less than 7 days of data + prior_estimates <- calculate_prior_estimates(cases, 1) + expect_equal(prior_estimates$prior_growth, 0) # Growth should be 0 if seeding time is <= 1 +}) \ No newline at end of file From 340e570597fc1943b2073bb8726a6b189999e93c Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Thu, 12 Dec 2024 10:10:21 +0000 Subject: [PATCH 3/6] Move function to create.R --- R/create.R | 48 ++++++++++++++++++++++++++++++++++++++++++++---- R/utilities.R | 39 --------------------------------------- 2 files changed, 44 insertions(+), 43 deletions(-) diff --git a/R/create.R b/R/create.R index fcfe538e1..789bbb444 100644 --- a/R/create.R +++ b/R/create.R @@ -444,6 +444,45 @@ create_obs_model <- function(obs = obs_opts(), dates) { return(data) } + +#' Calculate prior infections and fit early growth +#' +#' @description Calculates the prior infections and growth rate based on the +#' first week's data. +#' +#' @param cases Numeric vector; the case counts from the input data. +#' @inheritParams create_stan_data +#' @return A list containing `prior_infections` and `prior_growth`. +#' @keywords internal +estimate_early_dynamics <- function(cases, seeding_time) { + first_week <- data.table::data.table( + confirm = cases[seq_len(min(7, length(cases)))], + t = seq_len(min(7, length(cases))) + )[!is.na(confirm)] + + # Calculate prior infections + prior_infections <- log(mean(first_week$confirm, na.rm = TRUE)) + prior_infections <- ifelse( + is.na(prior_infections) || is.null(prior_infections), + 0, prior_infections + ) + + # Calculate prior growth + if (seeding_time > 1 && nrow(first_week) > 1) { + safe_lm <- purrr::safely(stats::lm) + prior_growth <- safe_lm(log(confirm) ~ t, data = first_week)[[1]] + prior_growth <- ifelse( + is.null(prior_growth), 0, prior_growth$coefficients[2] + ) + } else { + prior_growth <- 0 + } + return(list( + prior_infections = prior_infections, + prior_growth = prior_growth + )) +} + #' Create Stan Data Required for estimate_infections #' #' @description`r lifecycle::badge("stable")` @@ -501,10 +540,11 @@ create_stan_data <- function(data, seeding_time, delay = stan_data$seeding_time, horizon = stan_data$horizon ) ) - # Calculate prior infections and growth - prior_estimates <- calculate_prior_estimates(cases, seeding_time) - stan_data$prior_infections <- prior_estimates$prior_infections - stan_data$prior_growth <- prior_estimates$prior_growth + # calculate prior infections and fit early growth + stan_data <- c( + stan_data, + estimate_early_dynamics(cases, seeding_time) + ) # backcalculation settings stan_data <- c(stan_data, create_backcalc_data(backcalc)) # gaussian process data diff --git a/R/utilities.R b/R/utilities.R index 7a4e93b4e..3a73bacec 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -417,45 +417,6 @@ lapply_func <- function(..., backend = "rstan", future.opts = list()) { } } -#' Calculate prior infections and growth -#' -#' @description Calculates the prior infections and growth rate based on the -#' first week's data. -#' -#' @param cases Numeric vector; the case counts from the input data. -#' @inheritParams create_stan_data -#' @return A list containing `prior_infections` and `prior_growth`. -#' @keywords internal -calculate_prior_estimates <- function(cases, seeding_time) { - first_week <- data.table::data.table( - confirm = cases[seq_len(min(7, length(cases)))], - t = seq_len(min(7, length(cases))) - )[!is.na(confirm)] - - # Calculate prior infections - prior_infections <- log(mean(first_week$confirm, na.rm = TRUE)) - prior_infections <- ifelse( - is.na(prior_infections) || is.null(prior_infections), - 0, prior_infections - ) - - # Calculate prior growth - if (seeding_time > 1 && nrow(first_week) > 1) { - safe_lm <- purrr::safely(stats::lm) - prior_growth <- safe_lm(log(confirm) ~ t, data = first_week)[[1]] - prior_growth <- ifelse( - is.null(prior_growth), 0, prior_growth$coefficients[2] - ) - } else { - prior_growth <- 0 - } - - return(list( - prior_infections = prior_infections, - prior_growth = prior_growth - )) -} - #' @importFrom stats glm median na.omit pexp pgamma plnorm quasipoisson rexp #' @importFrom stats rlnorm rnorm rpois runif sd var rgamma pnorm globalVariables( From 90cc4a3bdc9c7ad7bea711654c72acbd3787e23b Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Thu, 12 Dec 2024 10:10:30 +0000 Subject: [PATCH 4/6] Rename function --- ...e_prior_estimates.Rd => estimate_early_dynamics.Rd} | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) rename man/{calculate_prior_estimates.Rd => estimate_early_dynamics.Rd} (68%) diff --git a/man/calculate_prior_estimates.Rd b/man/estimate_early_dynamics.Rd similarity index 68% rename from man/calculate_prior_estimates.Rd rename to man/estimate_early_dynamics.Rd index edbcdda43..399ff76b4 100644 --- a/man/calculate_prior_estimates.Rd +++ b/man/estimate_early_dynamics.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utilities.R -\name{calculate_prior_estimates} -\alias{calculate_prior_estimates} -\title{Calculate prior infections and growth} +% Please edit documentation in R/create.R +\name{estimate_early_dynamics} +\alias{estimate_early_dynamics} +\title{Calculate prior infections and fit early growth} \usage{ -calculate_prior_estimates(cases, seeding_time) +estimate_early_dynamics(cases, seeding_time) } \arguments{ \item{cases}{Numeric vector; the case counts from the input data.} From 44ddef83c3dea62e8882e21c2b721a5d9d3d28e3 Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Thu, 12 Dec 2024 10:55:08 +0000 Subject: [PATCH 5/6] Rename test script and function --- ...ities.R => test-estimate-early-dynamics.R} | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) rename tests/testthat/{test-utilities.R => test-estimate-early-dynamics.R} (64%) diff --git a/tests/testthat/test-utilities.R b/tests/testthat/test-estimate-early-dynamics.R similarity index 64% rename from tests/testthat/test-utilities.R rename to tests/testthat/test-estimate-early-dynamics.R index d29e21e2d..b7c0d50bd 100644 --- a/tests/testthat/test-utilities.R +++ b/tests/testthat/test-estimate-early-dynamics.R @@ -1,6 +1,6 @@ -test_that("calculate_prior_estimates works", { +test_that("estimate_early_dynamics works", { cases <- EpiNow2::example_confirmed[1:30] - prior_estimates <- calculate_prior_estimates(cases$confirm, 7) + prior_estimates <- estimate_early_dynamics(cases$confirm, 7) # Check dimensions expect_identical( names(prior_estimates), @@ -18,9 +18,9 @@ test_that("calculate_prior_estimates works", { ) }) -test_that("calculate_prior_estimates handles NA values correctly", { +test_that("estimate_early_dynamics handles NA values correctly", { cases <- c(10, 20, NA, 40, 50, NA, 70) - prior_estimates <- calculate_prior_estimates(cases, 7) + prior_estimates <- estimate_early_dynamics(cases, 7) expect_equal( prior_estimates$prior_infections, log(mean(c(10, 20, 40, 50, 70), na.rm = TRUE)) @@ -28,22 +28,22 @@ test_that("calculate_prior_estimates handles NA values correctly", { expect_true(!is.na(prior_estimates$prior_growth)) }) -test_that("calculate_prior_estimates handles exponential growth", { +test_that("estimate_early_dynamics handles exponential growth", { cases <- 2^(c(0:6)) # Exponential growth - prior_estimates <- calculate_prior_estimates(cases, 7) + prior_estimates <- estimate_early_dynamics(cases, 7) expect_equal(prior_estimates$prior_infections, log(mean(cases[1:7]))) expect_true(prior_estimates$prior_growth > 0) # Growth should be positive }) -test_that("calculate_prior_estimates handles exponential decline", { +test_that("estimate_early_dynamics handles exponential decline", { cases <- rev(2^(c(0:6))) # Exponential decline - prior_estimates <- calculate_prior_estimates(cases, 7) + prior_estimates <- estimate_early_dynamics(cases, 7) expect_equal(prior_estimates$prior_infections, log(mean(cases[1:7]))) expect_true(prior_estimates$prior_growth < 0) # Growth should be negative }) -test_that("calculate_prior_estimates correctly handles seeding time less than 2", { +test_that("estimate_early_dynamics correctly handles seeding time less than 2", { cases <- c(5, 10, 20) # Less than 7 days of data - prior_estimates <- calculate_prior_estimates(cases, 1) + prior_estimates <- estimate_early_dynamics(cases, 1) expect_equal(prior_estimates$prior_growth, 0) # Growth should be 0 if seeding time is <= 1 }) \ No newline at end of file From 00a296a3bc3b10d7cc4ad14fe61c1c388e4fcb3e Mon Sep 17 00:00:00 2001 From: James Azam Date: Thu, 12 Dec 2024 12:57:44 +0000 Subject: [PATCH 6/6] Add NEWS item --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 29b8a2ebf..a2df2e442 100644 --- a/NEWS.md +++ b/NEWS.md @@ -22,6 +22,7 @@ ## Package changes - The internal functions `create_clean_reported_cases()` has been broken up into several functions, with relevant ones `filter_leading_zeros()`, `add_breakpoints()` and `apply_zero_threshold()` exposed to the user. By @sbfnk in #884 and reviewed by @seabbs and @jamesmbaazam. +- The step of estimating early infections and growth in the internal function `create_stan_data()` has been separated into a new internal function `estimate_early_dynamics()`. By @jamesmbaazam in #888 and reviewed by @sbfnk. ## Documentation