From 375b11f510ac8492ef9ab578be57801ad81d9d65 Mon Sep 17 00:00:00 2001 From: Rosemary Li <42564519+yli110-stat697@users.noreply.github.com> Date: Wed, 13 Mar 2024 11:07:39 -0500 Subject: [PATCH] # 16 calculate_study_day (#20) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * draft * add reference df so that DM can be joined on the fly * add roxygen doc * doc update * rename ds_in as sdtm_in * replace ds_dm with dm_domain * add default value for refdt * add matching check * refdt and tgdt have to be ISO format, otherwise NA and warning * utils::hasName * modify assert message * update message for refdt and tgdt convertion error * set .call to FALSE for warning * add unit test * update document * style update * add asserthat in DESCRIPTION * Automatic renv profile update. * Automatic renv profile update. * use data frame instead of data.frame Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com> * capitalize param per reference Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com> * data frame instead of data.frame Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com> * capitalize param per reference Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com> * capitalize param per reference Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com> * update wording for test Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com> * update wording for test Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com> * use base pipe Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com> * use base pipe Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com> * use base pipe Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com> * update man file * update df name for lint compliance * integer with L * L for test integers * replace nested ifelse with case when per lintr suggestion * Revert "replace nested ifelse with case when per lintr suggestion" This reverts commit f6d467bd015993b4e98869959c1f8c4e793b0578. * capitalize param per reference Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com> * update typo in roxygen header * capitalize param per reference Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com> * capitalize param per reference Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com> * remove don't run * capitalize param per reference Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com> * use mapply to replace nested if else * style * roxygen update * fixed to be TRUE * fix lintr * fix lintr in test file * remove @md tag * Update `calculate_study_day()` Fix usage of default missing value in parameter `dm_domain` and calculate study day with more straightforward expression. * move defaults to the end of argument positions * add equal sign * default to "DM" * update unit test per the arg position change * man file update * remove dm_domain default * Update common.yml * simplify by statement * Automatic renv profile update. * add unit test for same input and reference df * rename function * rename files * study day changed to integer * update style * update man file * update doc Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com> * update man file --------- Co-authored-by: Rosemary Li Co-authored-by: yli110-stat697 Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com> Co-authored-by: Ramiro Magno Co-authored-by: Adam Foryƛ --- .github/workflows/common.yml | 2 +- DESCRIPTION | 1 + NAMESPACE | 1 + R/derive_study_day.R | 132 +++++++++++++++++++++++++ man/derive_study_day.Rd | 57 +++++++++++ renv.lock | 12 ++- renv/profiles/4.2/renv.lock | 10 ++ renv/profiles/4.3/renv.lock | 12 ++- tests/testthat/test-derive_study_day.R | 76 ++++++++++++++ 9 files changed, 300 insertions(+), 3 deletions(-) create mode 100644 R/derive_study_day.R create mode 100644 man/derive_study_day.Rd create mode 100644 tests/testthat/test-derive_study_day.R diff --git a/.github/workflows/common.yml b/.github/workflows/common.yml index de646ffb..202066b0 100644 --- a/.github/workflows/common.yml +++ b/.github/workflows/common.yml @@ -90,7 +90,7 @@ jobs: if: > github.event_name != 'release' with: - r-version: "4.3" + r-version: "release" # Whether to skip code coverage badge creation # Setting to 'false' will require you to create # an orphan branch called 'badges' in your repository diff --git a/DESCRIPTION b/DESCRIPTION index acb15ebf..714525c5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -38,6 +38,7 @@ Depends: R (>= 4.2) Imports: admiraldev, dplyr (>= 1.0.0), + assertthat, purrr (>= 0.3.3), rlang (>= 0.4.4), stringr (>= 1.4.0), diff --git a/NAMESPACE b/NAMESPACE index 7fc88eef..f1161f5e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ S3method(print,iso8601) export(create_iso8601) +export(derive_study_day) export(fmt_cmp) export(problems) importFrom(rlang,.data) diff --git a/R/derive_study_day.R b/R/derive_study_day.R new file mode 100644 index 00000000..6a04c15c --- /dev/null +++ b/R/derive_study_day.R @@ -0,0 +1,132 @@ +#' `derive_study_day` performs study day calculation +#' @description +#' This function takes the an input data frame and a reference data frame (which +#' is DM domain in most cases), and calculate the study day from reference date +#' and target date. In case of unexpected conditions like reference date is not +#' unique for each patient, or reference and input dates are not actual dates, +#' NA will be returned for those records. +#' +#' @param sdtm_in Input data frame that contains the target date. +#' @param dm_domain Reference date frame that contains the reference date. +#' @param tgdt Target date from `sdtm_in` that will be used to calculate the study +#' day. +#' @param refdt Reference date from `dm_domain` that will be used as reference to +#' calculate the study day. +#' @param study_day_var New study day variable name in the output. For +#' example, AESTDY for AE domain and CMSTDY for CM domain. +#' @param merge_key Character to represent the merging key between `sdtm_in` and +#' `dm_domain`. +#' +#' @return Data frame that takes all columns from `sdtm_in` and a new variable +#' to represent the calculated study day. +#' +#' @export +#' +#' @examples +#' ae <- data.frame( +#' USUBJID = c("study123-123", "study123-124", "study123-125"), +#' AESTDTC = c("2012-01-01", "2012-04-14", "2012-04-14") +#' ) +#' dm <- data.frame( +#' USUBJID = c("study123-123", "study123-124", "study123-125"), +#' RFSTDTC = c("2012-02-01", "2012-04-14", NA) +#' ) +#' ae$AESTDTC <- as.Date(ae$AESTDTC) +#' dm$RFSTDTC <- as.Date(dm$RFSTDTC) +#' derive_study_day(ae, dm, "AESTDTC", "RFSTDTC", "AESTDY") +#' +derive_study_day <- function(sdtm_in, + dm_domain, + tgdt, + refdt, + study_day_var, + merge_key = "USUBJID") { + assertthat::assert_that(is.data.frame(sdtm_in)) + assertthat::assert_that(is.data.frame(dm_domain)) + assertthat::assert_that( + utils::hasName(dm_domain, refdt), + msg = "dm_domain needs to have the variable of refdt." + ) + assertthat::assert_that( + utils::hasName(sdtm_in, tgdt), + msg = "sdtm_in needs to have the variable of tgdt." + ) + assertthat::assert_that( + utils::hasName(dm_domain, merge_key), + msg = "dm_domain needs to have the variable of merge_key." + ) + assertthat::assert_that( + utils::hasName(sdtm_in, merge_key), + msg = "sdtm_in needs to have the variable of merge_key." + ) + assertthat::assert_that(is.character(study_day_var)) + # check tgdt and study_day_var matching, for example, CMSTDTC matches CMSTDY + if (gsub("DTC", "", tgdt, fixed = TRUE) != gsub("DY", "", study_day_var, fixed = TRUE)) { + warning( + "Target date and the returned study day doesn't match. ", + "Expecting matching date and study day, for example, CMENDTC and CMENDY" + ) + } + + original_variables <- names(sdtm_in) + + if (!identical(sdtm_in, dm_domain)) { + dm_domain <- unique(dm_domain[c(merge_key, refdt)]) + + check_refdt_uniqueness <- dm_domain |> + dplyr::group_by(dplyr::pick({{ merge_key }})) |> + dplyr::filter(dplyr::n() > 1L) + if (nrow(check_refdt_uniqueness) > 0L) { + warning( + "Reference date is not unique for each patient! ", + "Patient without unique reference date will be ingored. ", + "NA will be returned for such records." + ) + dm_domain <- dm_domain[ + !dm_domain[[merge_key]] %in% check_refdt_uniqueness[[merge_key]], + ] + } + + sdtm_in <- sdtm_in |> + dplyr::left_join( + dm_domain, + by = merge_key + ) + } + + # refdt/tgdt should be in ISO format, otherwise throw warning + sdtm_in[[refdt]] <- tryCatch( + as.Date(sdtm_in[[refdt]], "%Y-%m-%d"), + error = function(e) { + warning( + "Encountered errors when converting refdt to dates. ", + "The warning message is ", + e$message, + call. = FALSE + ) + NA + } + ) + sdtm_in[[tgdt]] <- tryCatch( + as.Date(sdtm_in[[tgdt]], "%Y-%m-%d"), + error = function(e) { + warning( + "Encountered errors when converting tgdt to dates. ", + "The warning message is ", + e$message, + call. = FALSE + ) + NA + } + ) + + ref <- sdtm_in[[refdt]] + tgt <- sdtm_in[[tgdt]] + + # SDTMIG 4.4.4 Use of the Study Day Variables + res <- ifelse(tgt >= ref, tgt - ref + 1L, tgt - ref) + + sdtm_in <- sdtm_in[original_variables] + sdtm_in[study_day_var] <- as.integer(res) + return(sdtm_in) +} diff --git a/man/derive_study_day.Rd b/man/derive_study_day.Rd new file mode 100644 index 00000000..a24eb765 --- /dev/null +++ b/man/derive_study_day.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/derive_study_day.R +\name{derive_study_day} +\alias{derive_study_day} +\title{\code{derive_study_day} performs study day calculation} +\usage{ +derive_study_day( + sdtm_in, + dm_domain, + tgdt, + refdt, + study_day_var, + merge_key = "USUBJID" +) +} +\arguments{ +\item{sdtm_in}{Input data frame that contains the target date.} + +\item{dm_domain}{Reference date frame that contains the reference date.} + +\item{tgdt}{Target date from \code{sdtm_in} that will be used to calculate the study +day.} + +\item{refdt}{Reference date from \code{dm_domain} that will be used as reference to +calculate the study day.} + +\item{study_day_var}{New study day variable name in the output. For +example, AESTDY for AE domain and CMSTDY for CM domain.} + +\item{merge_key}{Character to represent the merging key between \code{sdtm_in} and +\code{dm_domain}.} +} +\value{ +Data frame that takes all columns from \code{sdtm_in} and a new variable +to represent the calculated study day. +} +\description{ +This function takes the an input data frame and a reference data frame (which +is DM domain in most cases), and calculate the study day from reference date +and target date. In case of unexpected conditions like reference date is not +unique for each patient, or reference and input dates are not actual dates, +NA will be returned for those records. +} +\examples{ +ae <- data.frame( + USUBJID = c("study123-123", "study123-124", "study123-125"), + AESTDTC = c("2012-01-01", "2012-04-14", "2012-04-14") +) +dm <- data.frame( + USUBJID = c("study123-123", "study123-124", "study123-125"), + RFSTDTC = c("2012-02-01", "2012-04-14", NA) +) +ae$AESTDTC <- as.Date(ae$AESTDTC) +dm$RFSTDTC <- as.Date(dm$RFSTDTC) +derive_study_day(ae, dm, "AESTDTC", "RFSTDTC", "AESTDY") + +} diff --git a/renv.lock b/renv.lock index 20b69f01..329ec463 100644 --- a/renv.lock +++ b/renv.lock @@ -1,6 +1,6 @@ { "R": { - "Version": "4.3.2", + "Version": "4.3.3", "Repositories": [ { "Name": "CRAN", @@ -98,6 +98,16 @@ ], "Hash": "e8a22846fff485f0be3770c2da758713" }, + "assertthat": { + "Package": "assertthat", + "Version": "0.2.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "tools" + ], + "Hash": "50c838a310445e954bc13f26f26a6ecf" + }, "backports": { "Package": "backports", "Version": "1.4.1", diff --git a/renv/profiles/4.2/renv.lock b/renv/profiles/4.2/renv.lock index a85cc2fe..494312ef 100644 --- a/renv/profiles/4.2/renv.lock +++ b/renv/profiles/4.2/renv.lock @@ -98,6 +98,16 @@ ], "Hash": "e8a22846fff485f0be3770c2da758713" }, + "assertthat": { + "Package": "assertthat", + "Version": "0.2.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "tools" + ], + "Hash": "50c838a310445e954bc13f26f26a6ecf" + }, "backports": { "Package": "backports", "Version": "1.4.1", diff --git a/renv/profiles/4.3/renv.lock b/renv/profiles/4.3/renv.lock index 20b69f01..329ec463 100644 --- a/renv/profiles/4.3/renv.lock +++ b/renv/profiles/4.3/renv.lock @@ -1,6 +1,6 @@ { "R": { - "Version": "4.3.2", + "Version": "4.3.3", "Repositories": [ { "Name": "CRAN", @@ -98,6 +98,16 @@ ], "Hash": "e8a22846fff485f0be3770c2da758713" }, + "assertthat": { + "Package": "assertthat", + "Version": "0.2.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "tools" + ], + "Hash": "50c838a310445e954bc13f26f26a6ecf" + }, "backports": { "Package": "backports", "Version": "1.4.1", diff --git a/tests/testthat/test-derive_study_day.R b/tests/testthat/test-derive_study_day.R new file mode 100644 index 00000000..e396bbb4 --- /dev/null +++ b/tests/testthat/test-derive_study_day.R @@ -0,0 +1,76 @@ +ae <- data.frame( + USUBJID = c("study123-123", "study123-124", "study123-125"), + AESTDTC = c("2012-01-01", "2012-04-14", "2012-04-14"), + stringsAsFactors = FALSE +) +dm <- data.frame( + USUBJID = c("study123-123", "study123-124", "study123-125"), + RFSTDTC = c("2012-02-01", "2012-04-14", NA_character_), + stringsAsFactors = FALSE +) + +test_that("`derive_study_day()` works as expected for invalid input", { + expect_error( + derive_study_day("a", "b", "C", "D", "E"), + "sdtm_in is not a data frame" + ) + expect_error( + derive_study_day(iris, "b", "C", "D", "E"), + "dm_domain is not a data frame" + ) + expect_error( + derive_study_day(iris, iris, "d", "e", "b"), + "dm_domain needs to have the variable of refdt" + ) + expect_error( + derive_study_day(iris, iris, "d", "Species"), + "sdtm_in needs to have the variable of tgdt" + ) + expect_error( + derive_study_day(iris, iris, "Petal.Length", "Species", "e"), + "needs to have the variable of merge_key" + ) + expect_error( + derive_study_day(iris, iris, "Petal.Length", "Species", 123L, "Species"), + "study_day_var is not a character vector" + ) + expect_warning( + derive_study_day(ae, dm, "AESTDTC", "RFSTDTC", "AENDY"), + "Target date and the returned study day doesn't match." + ) + + dm1 <- data.frame( + USUBJID = c("study123-123", "study123-123", "study123-125"), + RFSTDTC = c("2012-02-01", "2012-04-14", "2012-04-14"), + stringsAsFactors = FALSE + ) + expect_warning( + derive_study_day(ae, dm1, "AESTDTC", "RFSTDTC", "AESTDY"), + "Reference date is not unique for each patient!" + ) + + dm2 <- data.frame( + USUBJID = c("study123-123", "study123-124", "study123-125"), + RFSTDTC = c(123L, 456L, 789L), + stringsAsFactors = FALSE + ) + expect_warning( + derive_study_day(ae, dm2, "AESTDTC", "RFSTDTC", "AESTDY"), + "Encountered errors when converting refdt to dates." + ) +}) + +test_that("`calculate_study_day()` works as expected for valid input", { + res <- derive_study_day(ae, dm, "AESTDTC", "RFSTDTC", "AESTDY") + expected <- c(-31L, 1L, NA) + expect_equal(res$AESTDY, expected, tolerance = "1.5e-08") + + df <- data.frame( + USUBJID = c("study123-123", "study123-124", "study123-125"), + RFSTDTC = c("2012-02-01", "2012-04-14", NA_character_), + AESTDTC = c("2012-01-01", "2012-04-14", "2012-04-14"), + stringsAsFactors = FALSE + ) + res1 <- derive_study_day(df, df, "AESTDTC", "RFSTDTC", "AESTDY") + expect_equal(res1$AESTDY, expected, tolerance = "1.5e-08") +})