From 375b11f510ac8492ef9ab578be57801ad81d9d65 Mon Sep 17 00:00:00 2001
From: Rosemary Li <42564519+yli110-stat697@users.noreply.github.com>
Date: Wed, 13 Mar 2024 11:07:39 -0500
Subject: [PATCH] # 16 calculate_study_day (#20)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* draft

* add reference df so that DM can be joined on the fly

* add roxygen doc

* doc update

* rename ds_in as sdtm_in

* replace ds_dm with dm_domain

* add default value for refdt

* add matching check

* refdt and tgdt have to be ISO format, otherwise NA and warning

* utils::hasName

* modify assert message

* update message for refdt and tgdt convertion error

* set .call to FALSE for warning

* add unit test

* update document

* style update

* add asserthat in DESCRIPTION

* Automatic renv profile update.

* Automatic renv profile update.

* use data frame instead of data.frame

Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>

* capitalize param per reference

Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>

* data frame instead of data.frame

Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>

* capitalize param per reference

Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>

* capitalize param per reference

Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>

* update wording for test

Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>

* update wording for test

Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>

* use base pipe

Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>

* use base pipe

Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>

* use base pipe

Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>

* update man file

* update df name for lint compliance

* integer with L

* L for test integers

* replace nested ifelse with case when per lintr suggestion

* Revert "replace nested ifelse with case when per lintr suggestion"

This reverts commit f6d467bd015993b4e98869959c1f8c4e793b0578.

* capitalize param per reference

Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>

* update typo in roxygen header

* capitalize param per reference

Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>

* capitalize param per reference

Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>

* remove don't run

* capitalize param per reference

Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>

* use mapply to replace nested if else

* style

* roxygen update

* fixed to be TRUE

* fix lintr

* fix lintr in test file

* remove @md tag

* Update `calculate_study_day()`

Fix usage of default missing value in parameter `dm_domain` and calculate study day with more straightforward expression.

* move defaults to the end of argument positions

* add equal sign

* default to "DM"

* update unit test per the arg position change

* man file update

* remove dm_domain default

* Update common.yml

* simplify by statement

* Automatic renv profile update.

* add unit test for same input and reference df

* rename function

* rename files

* study day changed to integer

* update style

* update man file

* update doc

Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>

* update man file

---------

Co-authored-by: Rosemary Li <li.yaqiong@gene.com>
Co-authored-by: yli110-stat697 <yli110-stat697@users.noreply.github.com>
Co-authored-by: Shiyu Chen <39443807+ShiyuC@users.noreply.github.com>
Co-authored-by: Ramiro Magno <rmagno@pattern.institute>
Co-authored-by: Adam Foryś <adam.forys@gmail.com>
---
 .github/workflows/common.yml           |   2 +-
 DESCRIPTION                            |   1 +
 NAMESPACE                              |   1 +
 R/derive_study_day.R                   | 132 +++++++++++++++++++++++++
 man/derive_study_day.Rd                |  57 +++++++++++
 renv.lock                              |  12 ++-
 renv/profiles/4.2/renv.lock            |  10 ++
 renv/profiles/4.3/renv.lock            |  12 ++-
 tests/testthat/test-derive_study_day.R |  76 ++++++++++++++
 9 files changed, 300 insertions(+), 3 deletions(-)
 create mode 100644 R/derive_study_day.R
 create mode 100644 man/derive_study_day.Rd
 create mode 100644 tests/testthat/test-derive_study_day.R

diff --git a/.github/workflows/common.yml b/.github/workflows/common.yml
index de646ffb..202066b0 100644
--- a/.github/workflows/common.yml
+++ b/.github/workflows/common.yml
@@ -90,7 +90,7 @@ jobs:
     if: >
       github.event_name != 'release'
     with:
-      r-version: "4.3"
+      r-version: "release"
       # Whether to skip code coverage badge creation
       # Setting to 'false' will require you to create
       # an orphan branch called 'badges' in your repository
diff --git a/DESCRIPTION b/DESCRIPTION
index acb15ebf..714525c5 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -38,6 +38,7 @@ Depends: R (>= 4.2)
 Imports: 
     admiraldev,
     dplyr (>= 1.0.0),
+    assertthat,
     purrr (>= 0.3.3),
     rlang (>= 0.4.4),
     stringr (>= 1.4.0),
diff --git a/NAMESPACE b/NAMESPACE
index 7fc88eef..f1161f5e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,6 +2,7 @@
 
 S3method(print,iso8601)
 export(create_iso8601)
+export(derive_study_day)
 export(fmt_cmp)
 export(problems)
 importFrom(rlang,.data)
diff --git a/R/derive_study_day.R b/R/derive_study_day.R
new file mode 100644
index 00000000..6a04c15c
--- /dev/null
+++ b/R/derive_study_day.R
@@ -0,0 +1,132 @@
+#' `derive_study_day` performs study day calculation
+#' @description
+#' This function takes the an input data frame and a reference data frame (which
+#' is DM domain in most cases), and calculate the study day from reference date
+#' and target date. In case of unexpected conditions like reference date is not
+#' unique for each patient, or reference and input dates are not actual dates,
+#' NA will be returned for those records.
+#'
+#' @param sdtm_in Input data frame that contains the target date.
+#' @param dm_domain Reference date frame that contains the reference date.
+#' @param tgdt Target date from `sdtm_in` that will be used to calculate the study
+#' day.
+#' @param refdt Reference date from `dm_domain` that will be used as reference to
+#' calculate the study day.
+#' @param study_day_var New study day variable name in the output. For
+#' example, AESTDY for AE domain and CMSTDY for CM domain.
+#' @param merge_key Character to represent the merging key between `sdtm_in` and
+#' `dm_domain`.
+#'
+#' @return Data frame that takes all columns from `sdtm_in` and a new variable
+#' to represent the calculated study day.
+#'
+#' @export
+#'
+#' @examples
+#' ae <- data.frame(
+#'   USUBJID = c("study123-123", "study123-124", "study123-125"),
+#'   AESTDTC = c("2012-01-01", "2012-04-14", "2012-04-14")
+#' )
+#' dm <- data.frame(
+#'   USUBJID = c("study123-123", "study123-124", "study123-125"),
+#'   RFSTDTC = c("2012-02-01", "2012-04-14", NA)
+#' )
+#' ae$AESTDTC <- as.Date(ae$AESTDTC)
+#' dm$RFSTDTC <- as.Date(dm$RFSTDTC)
+#' derive_study_day(ae, dm, "AESTDTC", "RFSTDTC", "AESTDY")
+#'
+derive_study_day <- function(sdtm_in,
+                             dm_domain,
+                             tgdt,
+                             refdt,
+                             study_day_var,
+                             merge_key = "USUBJID") {
+  assertthat::assert_that(is.data.frame(sdtm_in))
+  assertthat::assert_that(is.data.frame(dm_domain))
+  assertthat::assert_that(
+    utils::hasName(dm_domain, refdt),
+    msg = "dm_domain needs to have the variable of refdt."
+  )
+  assertthat::assert_that(
+    utils::hasName(sdtm_in, tgdt),
+    msg = "sdtm_in needs to have the variable of tgdt."
+  )
+  assertthat::assert_that(
+    utils::hasName(dm_domain, merge_key),
+    msg = "dm_domain needs to have the variable of merge_key."
+  )
+  assertthat::assert_that(
+    utils::hasName(sdtm_in, merge_key),
+    msg = "sdtm_in needs to have the variable of merge_key."
+  )
+  assertthat::assert_that(is.character(study_day_var))
+  # check tgdt and study_day_var matching, for example, CMSTDTC matches CMSTDY
+  if (gsub("DTC", "", tgdt, fixed = TRUE) != gsub("DY", "", study_day_var, fixed = TRUE)) {
+    warning(
+      "Target date and the returned study day doesn't match. ",
+      "Expecting matching date and study day, for example, CMENDTC and CMENDY"
+    )
+  }
+
+  original_variables <- names(sdtm_in)
+
+  if (!identical(sdtm_in, dm_domain)) {
+    dm_domain <- unique(dm_domain[c(merge_key, refdt)])
+
+    check_refdt_uniqueness <- dm_domain |>
+      dplyr::group_by(dplyr::pick({{ merge_key }})) |>
+      dplyr::filter(dplyr::n() > 1L)
+    if (nrow(check_refdt_uniqueness) > 0L) {
+      warning(
+        "Reference date is not unique for each patient! ",
+        "Patient without unique reference date will be ingored. ",
+        "NA will be returned for such records."
+      )
+      dm_domain <- dm_domain[
+        !dm_domain[[merge_key]] %in% check_refdt_uniqueness[[merge_key]],
+      ]
+    }
+
+    sdtm_in <- sdtm_in |>
+      dplyr::left_join(
+        dm_domain,
+        by = merge_key
+      )
+  }
+
+  # refdt/tgdt should be in ISO format, otherwise throw warning
+  sdtm_in[[refdt]] <- tryCatch(
+    as.Date(sdtm_in[[refdt]], "%Y-%m-%d"),
+    error = function(e) {
+      warning(
+        "Encountered errors when converting refdt to dates. ",
+        "The warning message is ",
+        e$message,
+        call. = FALSE
+      )
+      NA
+    }
+  )
+  sdtm_in[[tgdt]] <- tryCatch(
+    as.Date(sdtm_in[[tgdt]], "%Y-%m-%d"),
+    error = function(e) {
+      warning(
+        "Encountered errors when converting tgdt to dates. ",
+        "The warning message is ",
+        e$message,
+        call. = FALSE
+      )
+      NA
+    }
+  )
+
+  ref <- sdtm_in[[refdt]]
+  tgt <- sdtm_in[[tgdt]]
+
+  # SDTMIG 4.4.4 Use of the Study Day Variables
+  res <- ifelse(tgt >= ref, tgt - ref + 1L, tgt - ref)
+
+  sdtm_in <- sdtm_in[original_variables]
+  sdtm_in[study_day_var] <- as.integer(res)
+  return(sdtm_in)
+}
diff --git a/man/derive_study_day.Rd b/man/derive_study_day.Rd
new file mode 100644
index 00000000..a24eb765
--- /dev/null
+++ b/man/derive_study_day.Rd
@@ -0,0 +1,57 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/derive_study_day.R
+\name{derive_study_day}
+\alias{derive_study_day}
+\title{\code{derive_study_day} performs study day calculation}
+\usage{
+derive_study_day(
+  sdtm_in,
+  dm_domain,
+  tgdt,
+  refdt,
+  study_day_var,
+  merge_key = "USUBJID"
+)
+}
+\arguments{
+\item{sdtm_in}{Input data frame that contains the target date.}
+
+\item{dm_domain}{Reference date frame that contains the reference date.}
+
+\item{tgdt}{Target date from \code{sdtm_in} that will be used to calculate the study
+day.}
+
+\item{refdt}{Reference date from \code{dm_domain} that will be used as reference to
+calculate the study day.}
+
+\item{study_day_var}{New study day variable name in the output. For
+example, AESTDY for AE domain and CMSTDY for CM domain.}
+
+\item{merge_key}{Character to represent the merging key between \code{sdtm_in} and
+\code{dm_domain}.}
+}
+\value{
+Data frame that takes all columns from \code{sdtm_in} and a new variable
+to represent the calculated study day.
+}
+\description{
+This function takes the an input data frame and a reference data frame (which
+is DM domain in most cases), and calculate the study day from reference date
+and target date. In case of unexpected conditions like reference date is not
+unique for each patient, or reference and input dates are not actual dates,
+NA will be returned for those records.
+}
+\examples{
+ae <- data.frame(
+  USUBJID = c("study123-123", "study123-124", "study123-125"),
+  AESTDTC = c("2012-01-01", "2012-04-14", "2012-04-14")
+)
+dm <- data.frame(
+  USUBJID = c("study123-123", "study123-124", "study123-125"),
+  RFSTDTC = c("2012-02-01", "2012-04-14", NA)
+)
+ae$AESTDTC <- as.Date(ae$AESTDTC)
+dm$RFSTDTC <- as.Date(dm$RFSTDTC)
+derive_study_day(ae, dm, "AESTDTC", "RFSTDTC", "AESTDY")
+
+}
diff --git a/renv.lock b/renv.lock
index 20b69f01..329ec463 100644
--- a/renv.lock
+++ b/renv.lock
@@ -1,6 +1,6 @@
 {
   "R": {
-    "Version": "4.3.2",
+    "Version": "4.3.3",
     "Repositories": [
       {
         "Name": "CRAN",
@@ -98,6 +98,16 @@
       ],
       "Hash": "e8a22846fff485f0be3770c2da758713"
     },
+    "assertthat": {
+      "Package": "assertthat",
+      "Version": "0.2.1",
+      "Source": "Repository",
+      "Repository": "RSPM",
+      "Requirements": [
+        "tools"
+      ],
+      "Hash": "50c838a310445e954bc13f26f26a6ecf"
+    },
     "backports": {
       "Package": "backports",
       "Version": "1.4.1",
diff --git a/renv/profiles/4.2/renv.lock b/renv/profiles/4.2/renv.lock
index a85cc2fe..494312ef 100644
--- a/renv/profiles/4.2/renv.lock
+++ b/renv/profiles/4.2/renv.lock
@@ -98,6 +98,16 @@
       ],
       "Hash": "e8a22846fff485f0be3770c2da758713"
     },
+    "assertthat": {
+      "Package": "assertthat",
+      "Version": "0.2.1",
+      "Source": "Repository",
+      "Repository": "RSPM",
+      "Requirements": [
+        "tools"
+      ],
+      "Hash": "50c838a310445e954bc13f26f26a6ecf"
+    },
     "backports": {
       "Package": "backports",
       "Version": "1.4.1",
diff --git a/renv/profiles/4.3/renv.lock b/renv/profiles/4.3/renv.lock
index 20b69f01..329ec463 100644
--- a/renv/profiles/4.3/renv.lock
+++ b/renv/profiles/4.3/renv.lock
@@ -1,6 +1,6 @@
 {
   "R": {
-    "Version": "4.3.2",
+    "Version": "4.3.3",
     "Repositories": [
       {
         "Name": "CRAN",
@@ -98,6 +98,16 @@
       ],
       "Hash": "e8a22846fff485f0be3770c2da758713"
     },
+    "assertthat": {
+      "Package": "assertthat",
+      "Version": "0.2.1",
+      "Source": "Repository",
+      "Repository": "RSPM",
+      "Requirements": [
+        "tools"
+      ],
+      "Hash": "50c838a310445e954bc13f26f26a6ecf"
+    },
     "backports": {
       "Package": "backports",
       "Version": "1.4.1",
diff --git a/tests/testthat/test-derive_study_day.R b/tests/testthat/test-derive_study_day.R
new file mode 100644
index 00000000..e396bbb4
--- /dev/null
+++ b/tests/testthat/test-derive_study_day.R
@@ -0,0 +1,76 @@
+ae <- data.frame(
+  USUBJID = c("study123-123", "study123-124", "study123-125"),
+  AESTDTC = c("2012-01-01", "2012-04-14", "2012-04-14"),
+  stringsAsFactors = FALSE
+)
+dm <- data.frame(
+  USUBJID = c("study123-123", "study123-124", "study123-125"),
+  RFSTDTC = c("2012-02-01", "2012-04-14", NA_character_),
+  stringsAsFactors = FALSE
+)
+
+test_that("`derive_study_day()` works as expected for invalid input", {
+  expect_error(
+    derive_study_day("a", "b", "C", "D", "E"),
+    "sdtm_in is not a data frame"
+  )
+  expect_error(
+    derive_study_day(iris, "b", "C", "D", "E"),
+    "dm_domain is not a data frame"
+  )
+  expect_error(
+    derive_study_day(iris, iris, "d", "e", "b"),
+    "dm_domain needs to have the variable of refdt"
+  )
+  expect_error(
+    derive_study_day(iris, iris, "d", "Species"),
+    "sdtm_in needs to have the variable of tgdt"
+  )
+  expect_error(
+    derive_study_day(iris, iris, "Petal.Length", "Species", "e"),
+    "needs to have the variable of merge_key"
+  )
+  expect_error(
+    derive_study_day(iris, iris, "Petal.Length", "Species", 123L, "Species"),
+    "study_day_var is not a character vector"
+  )
+  expect_warning(
+    derive_study_day(ae, dm, "AESTDTC", "RFSTDTC", "AENDY"),
+    "Target date and the returned study day doesn't match."
+  )
+
+  dm1 <- data.frame(
+    USUBJID = c("study123-123", "study123-123", "study123-125"),
+    RFSTDTC = c("2012-02-01", "2012-04-14", "2012-04-14"),
+    stringsAsFactors = FALSE
+  )
+  expect_warning(
+    derive_study_day(ae, dm1, "AESTDTC", "RFSTDTC", "AESTDY"),
+    "Reference date is not unique for each patient!"
+  )
+
+  dm2 <- data.frame(
+    USUBJID = c("study123-123", "study123-124", "study123-125"),
+    RFSTDTC = c(123L, 456L, 789L),
+    stringsAsFactors = FALSE
+  )
+  expect_warning(
+    derive_study_day(ae, dm2, "AESTDTC", "RFSTDTC", "AESTDY"),
+    "Encountered errors when converting refdt to dates."
+  )
+})
+
+test_that("`calculate_study_day()` works as expected for valid input", {
+  res <- derive_study_day(ae, dm, "AESTDTC", "RFSTDTC", "AESTDY")
+  expected <- c(-31L, 1L, NA)
+  expect_equal(res$AESTDY, expected, tolerance = "1.5e-08")
+
+  df <- data.frame(
+    USUBJID = c("study123-123", "study123-124", "study123-125"),
+    RFSTDTC = c("2012-02-01", "2012-04-14", NA_character_),
+    AESTDTC = c("2012-01-01", "2012-04-14", "2012-04-14"),
+    stringsAsFactors = FALSE
+  )
+  res1 <- derive_study_day(df, df, "AESTDTC", "RFSTDTC", "AESTDY")
+  expect_equal(res1$AESTDY, expected, tolerance = "1.5e-08")
+})