From 375b11f510ac8492ef9ab578be57801ad81d9d65 Mon Sep 17 00:00:00 2001
From: Rosemary Li <42564519+yli110-stat697@users.noreply.github.com>
Date: Wed, 13 Mar 2024 11:07:39 -0500
Subject: [PATCH] # 16 calculate_study_day (#20)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* draft
* add reference df so that DM can be joined on the fly
* add roxygen doc
* doc update
* rename ds_in as sdtm_in
* replace ds_dm with dm_domain
* add default value for refdt
* add matching check
* refdt and tgdt have to be ISO format, otherwise NA and warning
* utils::hasName
* modify assert message
* update message for refdt and tgdt convertion error
* set .call to FALSE for warning
* add unit test
* update document
* style update
* add asserthat in DESCRIPTION
* Automatic renv profile update.
* Automatic renv profile update.
* use data frame instead of data.frame
Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>
* capitalize param per reference
Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>
* data frame instead of data.frame
Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>
* capitalize param per reference
Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>
* capitalize param per reference
Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>
* update wording for test
Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>
* update wording for test
Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>
* use base pipe
Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>
* use base pipe
Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>
* use base pipe
Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>
* update man file
* update df name for lint compliance
* integer with L
* L for test integers
* replace nested ifelse with case when per lintr suggestion
* Revert "replace nested ifelse with case when per lintr suggestion"
This reverts commit f6d467bd015993b4e98869959c1f8c4e793b0578.
* capitalize param per reference
Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>
* update typo in roxygen header
* capitalize param per reference
Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>
* capitalize param per reference
Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>
* remove don't run
* capitalize param per reference
Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>
* use mapply to replace nested if else
* style
* roxygen update
* fixed to be TRUE
* fix lintr
* fix lintr in test file
* remove @md tag
* Update `calculate_study_day()`
Fix usage of default missing value in parameter `dm_domain` and calculate study day with more straightforward expression.
* move defaults to the end of argument positions
* add equal sign
* default to "DM"
* update unit test per the arg position change
* man file update
* remove dm_domain default
* Update common.yml
* simplify by statement
* Automatic renv profile update.
* add unit test for same input and reference df
* rename function
* rename files
* study day changed to integer
* update style
* update man file
* update doc
Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>
* update man file
---------
Co-authored-by: Rosemary Li
Co-authored-by: yli110-stat697
Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>
Co-authored-by: Ramiro Magno
Co-authored-by: Adam ForyĆ
---
.github/workflows/common.yml | 2 +-
DESCRIPTION | 1 +
NAMESPACE | 1 +
R/derive_study_day.R | 132 +++++++++++++++++++++++++
man/derive_study_day.Rd | 57 +++++++++++
renv.lock | 12 ++-
renv/profiles/4.2/renv.lock | 10 ++
renv/profiles/4.3/renv.lock | 12 ++-
tests/testthat/test-derive_study_day.R | 76 ++++++++++++++
9 files changed, 300 insertions(+), 3 deletions(-)
create mode 100644 R/derive_study_day.R
create mode 100644 man/derive_study_day.Rd
create mode 100644 tests/testthat/test-derive_study_day.R
diff --git a/.github/workflows/common.yml b/.github/workflows/common.yml
index de646ffb..202066b0 100644
--- a/.github/workflows/common.yml
+++ b/.github/workflows/common.yml
@@ -90,7 +90,7 @@ jobs:
if: >
github.event_name != 'release'
with:
- r-version: "4.3"
+ r-version: "release"
# Whether to skip code coverage badge creation
# Setting to 'false' will require you to create
# an orphan branch called 'badges' in your repository
diff --git a/DESCRIPTION b/DESCRIPTION
index acb15ebf..714525c5 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -38,6 +38,7 @@ Depends: R (>= 4.2)
Imports:
admiraldev,
dplyr (>= 1.0.0),
+ assertthat,
purrr (>= 0.3.3),
rlang (>= 0.4.4),
stringr (>= 1.4.0),
diff --git a/NAMESPACE b/NAMESPACE
index 7fc88eef..f1161f5e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,6 +2,7 @@
S3method(print,iso8601)
export(create_iso8601)
+export(derive_study_day)
export(fmt_cmp)
export(problems)
importFrom(rlang,.data)
diff --git a/R/derive_study_day.R b/R/derive_study_day.R
new file mode 100644
index 00000000..6a04c15c
--- /dev/null
+++ b/R/derive_study_day.R
@@ -0,0 +1,132 @@
+#' `derive_study_day` performs study day calculation
+#' @description
+#' This function takes the an input data frame and a reference data frame (which
+#' is DM domain in most cases), and calculate the study day from reference date
+#' and target date. In case of unexpected conditions like reference date is not
+#' unique for each patient, or reference and input dates are not actual dates,
+#' NA will be returned for those records.
+#'
+#' @param sdtm_in Input data frame that contains the target date.
+#' @param dm_domain Reference date frame that contains the reference date.
+#' @param tgdt Target date from `sdtm_in` that will be used to calculate the study
+#' day.
+#' @param refdt Reference date from `dm_domain` that will be used as reference to
+#' calculate the study day.
+#' @param study_day_var New study day variable name in the output. For
+#' example, AESTDY for AE domain and CMSTDY for CM domain.
+#' @param merge_key Character to represent the merging key between `sdtm_in` and
+#' `dm_domain`.
+#'
+#' @return Data frame that takes all columns from `sdtm_in` and a new variable
+#' to represent the calculated study day.
+#'
+#' @export
+#'
+#' @examples
+#' ae <- data.frame(
+#' USUBJID = c("study123-123", "study123-124", "study123-125"),
+#' AESTDTC = c("2012-01-01", "2012-04-14", "2012-04-14")
+#' )
+#' dm <- data.frame(
+#' USUBJID = c("study123-123", "study123-124", "study123-125"),
+#' RFSTDTC = c("2012-02-01", "2012-04-14", NA)
+#' )
+#' ae$AESTDTC <- as.Date(ae$AESTDTC)
+#' dm$RFSTDTC <- as.Date(dm$RFSTDTC)
+#' derive_study_day(ae, dm, "AESTDTC", "RFSTDTC", "AESTDY")
+#'
+derive_study_day <- function(sdtm_in,
+ dm_domain,
+ tgdt,
+ refdt,
+ study_day_var,
+ merge_key = "USUBJID") {
+ assertthat::assert_that(is.data.frame(sdtm_in))
+ assertthat::assert_that(is.data.frame(dm_domain))
+ assertthat::assert_that(
+ utils::hasName(dm_domain, refdt),
+ msg = "dm_domain needs to have the variable of refdt."
+ )
+ assertthat::assert_that(
+ utils::hasName(sdtm_in, tgdt),
+ msg = "sdtm_in needs to have the variable of tgdt."
+ )
+ assertthat::assert_that(
+ utils::hasName(dm_domain, merge_key),
+ msg = "dm_domain needs to have the variable of merge_key."
+ )
+ assertthat::assert_that(
+ utils::hasName(sdtm_in, merge_key),
+ msg = "sdtm_in needs to have the variable of merge_key."
+ )
+ assertthat::assert_that(is.character(study_day_var))
+ # check tgdt and study_day_var matching, for example, CMSTDTC matches CMSTDY
+ if (gsub("DTC", "", tgdt, fixed = TRUE) != gsub("DY", "", study_day_var, fixed = TRUE)) {
+ warning(
+ "Target date and the returned study day doesn't match. ",
+ "Expecting matching date and study day, for example, CMENDTC and CMENDY"
+ )
+ }
+
+ original_variables <- names(sdtm_in)
+
+ if (!identical(sdtm_in, dm_domain)) {
+ dm_domain <- unique(dm_domain[c(merge_key, refdt)])
+
+ check_refdt_uniqueness <- dm_domain |>
+ dplyr::group_by(dplyr::pick({{ merge_key }})) |>
+ dplyr::filter(dplyr::n() > 1L)
+ if (nrow(check_refdt_uniqueness) > 0L) {
+ warning(
+ "Reference date is not unique for each patient! ",
+ "Patient without unique reference date will be ingored. ",
+ "NA will be returned for such records."
+ )
+ dm_domain <- dm_domain[
+ !dm_domain[[merge_key]] %in% check_refdt_uniqueness[[merge_key]],
+ ]
+ }
+
+ sdtm_in <- sdtm_in |>
+ dplyr::left_join(
+ dm_domain,
+ by = merge_key
+ )
+ }
+
+ # refdt/tgdt should be in ISO format, otherwise throw warning
+ sdtm_in[[refdt]] <- tryCatch(
+ as.Date(sdtm_in[[refdt]], "%Y-%m-%d"),
+ error = function(e) {
+ warning(
+ "Encountered errors when converting refdt to dates. ",
+ "The warning message is ",
+ e$message,
+ call. = FALSE
+ )
+ NA
+ }
+ )
+ sdtm_in[[tgdt]] <- tryCatch(
+ as.Date(sdtm_in[[tgdt]], "%Y-%m-%d"),
+ error = function(e) {
+ warning(
+ "Encountered errors when converting tgdt to dates. ",
+ "The warning message is ",
+ e$message,
+ call. = FALSE
+ )
+ NA
+ }
+ )
+
+ ref <- sdtm_in[[refdt]]
+ tgt <- sdtm_in[[tgdt]]
+
+ # SDTMIG 4.4.4 Use of the Study Day Variables
+ res <- ifelse(tgt >= ref, tgt - ref + 1L, tgt - ref)
+
+ sdtm_in <- sdtm_in[original_variables]
+ sdtm_in[study_day_var] <- as.integer(res)
+ return(sdtm_in)
+}
diff --git a/man/derive_study_day.Rd b/man/derive_study_day.Rd
new file mode 100644
index 00000000..a24eb765
--- /dev/null
+++ b/man/derive_study_day.Rd
@@ -0,0 +1,57 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/derive_study_day.R
+\name{derive_study_day}
+\alias{derive_study_day}
+\title{\code{derive_study_day} performs study day calculation}
+\usage{
+derive_study_day(
+ sdtm_in,
+ dm_domain,
+ tgdt,
+ refdt,
+ study_day_var,
+ merge_key = "USUBJID"
+)
+}
+\arguments{
+\item{sdtm_in}{Input data frame that contains the target date.}
+
+\item{dm_domain}{Reference date frame that contains the reference date.}
+
+\item{tgdt}{Target date from \code{sdtm_in} that will be used to calculate the study
+day.}
+
+\item{refdt}{Reference date from \code{dm_domain} that will be used as reference to
+calculate the study day.}
+
+\item{study_day_var}{New study day variable name in the output. For
+example, AESTDY for AE domain and CMSTDY for CM domain.}
+
+\item{merge_key}{Character to represent the merging key between \code{sdtm_in} and
+\code{dm_domain}.}
+}
+\value{
+Data frame that takes all columns from \code{sdtm_in} and a new variable
+to represent the calculated study day.
+}
+\description{
+This function takes the an input data frame and a reference data frame (which
+is DM domain in most cases), and calculate the study day from reference date
+and target date. In case of unexpected conditions like reference date is not
+unique for each patient, or reference and input dates are not actual dates,
+NA will be returned for those records.
+}
+\examples{
+ae <- data.frame(
+ USUBJID = c("study123-123", "study123-124", "study123-125"),
+ AESTDTC = c("2012-01-01", "2012-04-14", "2012-04-14")
+)
+dm <- data.frame(
+ USUBJID = c("study123-123", "study123-124", "study123-125"),
+ RFSTDTC = c("2012-02-01", "2012-04-14", NA)
+)
+ae$AESTDTC <- as.Date(ae$AESTDTC)
+dm$RFSTDTC <- as.Date(dm$RFSTDTC)
+derive_study_day(ae, dm, "AESTDTC", "RFSTDTC", "AESTDY")
+
+}
diff --git a/renv.lock b/renv.lock
index 20b69f01..329ec463 100644
--- a/renv.lock
+++ b/renv.lock
@@ -1,6 +1,6 @@
{
"R": {
- "Version": "4.3.2",
+ "Version": "4.3.3",
"Repositories": [
{
"Name": "CRAN",
@@ -98,6 +98,16 @@
],
"Hash": "e8a22846fff485f0be3770c2da758713"
},
+ "assertthat": {
+ "Package": "assertthat",
+ "Version": "0.2.1",
+ "Source": "Repository",
+ "Repository": "RSPM",
+ "Requirements": [
+ "tools"
+ ],
+ "Hash": "50c838a310445e954bc13f26f26a6ecf"
+ },
"backports": {
"Package": "backports",
"Version": "1.4.1",
diff --git a/renv/profiles/4.2/renv.lock b/renv/profiles/4.2/renv.lock
index a85cc2fe..494312ef 100644
--- a/renv/profiles/4.2/renv.lock
+++ b/renv/profiles/4.2/renv.lock
@@ -98,6 +98,16 @@
],
"Hash": "e8a22846fff485f0be3770c2da758713"
},
+ "assertthat": {
+ "Package": "assertthat",
+ "Version": "0.2.1",
+ "Source": "Repository",
+ "Repository": "RSPM",
+ "Requirements": [
+ "tools"
+ ],
+ "Hash": "50c838a310445e954bc13f26f26a6ecf"
+ },
"backports": {
"Package": "backports",
"Version": "1.4.1",
diff --git a/renv/profiles/4.3/renv.lock b/renv/profiles/4.3/renv.lock
index 20b69f01..329ec463 100644
--- a/renv/profiles/4.3/renv.lock
+++ b/renv/profiles/4.3/renv.lock
@@ -1,6 +1,6 @@
{
"R": {
- "Version": "4.3.2",
+ "Version": "4.3.3",
"Repositories": [
{
"Name": "CRAN",
@@ -98,6 +98,16 @@
],
"Hash": "e8a22846fff485f0be3770c2da758713"
},
+ "assertthat": {
+ "Package": "assertthat",
+ "Version": "0.2.1",
+ "Source": "Repository",
+ "Repository": "RSPM",
+ "Requirements": [
+ "tools"
+ ],
+ "Hash": "50c838a310445e954bc13f26f26a6ecf"
+ },
"backports": {
"Package": "backports",
"Version": "1.4.1",
diff --git a/tests/testthat/test-derive_study_day.R b/tests/testthat/test-derive_study_day.R
new file mode 100644
index 00000000..e396bbb4
--- /dev/null
+++ b/tests/testthat/test-derive_study_day.R
@@ -0,0 +1,76 @@
+ae <- data.frame(
+ USUBJID = c("study123-123", "study123-124", "study123-125"),
+ AESTDTC = c("2012-01-01", "2012-04-14", "2012-04-14"),
+ stringsAsFactors = FALSE
+)
+dm <- data.frame(
+ USUBJID = c("study123-123", "study123-124", "study123-125"),
+ RFSTDTC = c("2012-02-01", "2012-04-14", NA_character_),
+ stringsAsFactors = FALSE
+)
+
+test_that("`derive_study_day()` works as expected for invalid input", {
+ expect_error(
+ derive_study_day("a", "b", "C", "D", "E"),
+ "sdtm_in is not a data frame"
+ )
+ expect_error(
+ derive_study_day(iris, "b", "C", "D", "E"),
+ "dm_domain is not a data frame"
+ )
+ expect_error(
+ derive_study_day(iris, iris, "d", "e", "b"),
+ "dm_domain needs to have the variable of refdt"
+ )
+ expect_error(
+ derive_study_day(iris, iris, "d", "Species"),
+ "sdtm_in needs to have the variable of tgdt"
+ )
+ expect_error(
+ derive_study_day(iris, iris, "Petal.Length", "Species", "e"),
+ "needs to have the variable of merge_key"
+ )
+ expect_error(
+ derive_study_day(iris, iris, "Petal.Length", "Species", 123L, "Species"),
+ "study_day_var is not a character vector"
+ )
+ expect_warning(
+ derive_study_day(ae, dm, "AESTDTC", "RFSTDTC", "AENDY"),
+ "Target date and the returned study day doesn't match."
+ )
+
+ dm1 <- data.frame(
+ USUBJID = c("study123-123", "study123-123", "study123-125"),
+ RFSTDTC = c("2012-02-01", "2012-04-14", "2012-04-14"),
+ stringsAsFactors = FALSE
+ )
+ expect_warning(
+ derive_study_day(ae, dm1, "AESTDTC", "RFSTDTC", "AESTDY"),
+ "Reference date is not unique for each patient!"
+ )
+
+ dm2 <- data.frame(
+ USUBJID = c("study123-123", "study123-124", "study123-125"),
+ RFSTDTC = c(123L, 456L, 789L),
+ stringsAsFactors = FALSE
+ )
+ expect_warning(
+ derive_study_day(ae, dm2, "AESTDTC", "RFSTDTC", "AESTDY"),
+ "Encountered errors when converting refdt to dates."
+ )
+})
+
+test_that("`calculate_study_day()` works as expected for valid input", {
+ res <- derive_study_day(ae, dm, "AESTDTC", "RFSTDTC", "AESTDY")
+ expected <- c(-31L, 1L, NA)
+ expect_equal(res$AESTDY, expected, tolerance = "1.5e-08")
+
+ df <- data.frame(
+ USUBJID = c("study123-123", "study123-124", "study123-125"),
+ RFSTDTC = c("2012-02-01", "2012-04-14", NA_character_),
+ AESTDTC = c("2012-01-01", "2012-04-14", "2012-04-14"),
+ stringsAsFactors = FALSE
+ )
+ res1 <- derive_study_day(df, df, "AESTDTC", "RFSTDTC", "AESTDY")
+ expect_equal(res1$AESTDY, expected, tolerance = "1.5e-08")
+})