Skip to content

Commit

Permalink
# 16 calculate_study_day (#20)
Browse files Browse the repository at this point in the history
* draft

* add reference df so that DM can be joined on the fly

* add roxygen doc

* doc update

* rename ds_in as sdtm_in

* replace ds_dm with dm_domain

* add default value for refdt

* add matching check

* refdt and tgdt have to be ISO format, otherwise NA and warning

* utils::hasName

* modify assert message

* update message for refdt and tgdt convertion error

* set .call to FALSE for warning

* add unit test

* update document

* style update

* add asserthat in DESCRIPTION

* Automatic renv profile update.

* Automatic renv profile update.

* use data frame instead of data.frame

Co-authored-by: Shiyu Chen <[email protected]>

* capitalize param per reference

Co-authored-by: Shiyu Chen <[email protected]>

* data frame instead of data.frame

Co-authored-by: Shiyu Chen <[email protected]>

* capitalize param per reference

Co-authored-by: Shiyu Chen <[email protected]>

* capitalize param per reference

Co-authored-by: Shiyu Chen <[email protected]>

* update wording for test

Co-authored-by: Shiyu Chen <[email protected]>

* update wording for test

Co-authored-by: Shiyu Chen <[email protected]>

* use base pipe

Co-authored-by: Shiyu Chen <[email protected]>

* use base pipe

Co-authored-by: Shiyu Chen <[email protected]>

* use base pipe

Co-authored-by: Shiyu Chen <[email protected]>

* update man file

* update df name for lint compliance

* integer with L

* L for test integers

* replace nested ifelse with case when per lintr suggestion

* Revert "replace nested ifelse with case when per lintr suggestion"

This reverts commit f6d467b.

* capitalize param per reference

Co-authored-by: Shiyu Chen <[email protected]>

* update typo in roxygen header

* capitalize param per reference

Co-authored-by: Shiyu Chen <[email protected]>

* capitalize param per reference

Co-authored-by: Shiyu Chen <[email protected]>

* remove don't run

* capitalize param per reference

Co-authored-by: Shiyu Chen <[email protected]>

* use mapply to replace nested if else

* style

* roxygen update

* fixed to be TRUE

* fix lintr

* fix lintr in test file

* remove @md tag

* Update `calculate_study_day()`

Fix usage of default missing value in parameter `dm_domain` and calculate study day with more straightforward expression.

* move defaults to the end of argument positions

* add equal sign

* default to "DM"

* update unit test per the arg position change

* man file update

* remove dm_domain default

* Update common.yml

* simplify by statement

* Automatic renv profile update.

* add unit test for same input and reference df

* rename function

* rename files

* study day changed to integer

* update style

* update man file

* update doc

Co-authored-by: Shiyu Chen <[email protected]>

* update man file

---------

Co-authored-by: Rosemary Li <[email protected]>
Co-authored-by: yli110-stat697 <[email protected]>
Co-authored-by: Shiyu Chen <[email protected]>
Co-authored-by: Ramiro Magno <[email protected]>
Co-authored-by: Adam Foryś <[email protected]>
  • Loading branch information
6 people authored Mar 13, 2024
1 parent 8aed148 commit 375b11f
Show file tree
Hide file tree
Showing 9 changed files with 300 additions and 3 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/common.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ jobs:
if: >
github.event_name != 'release'
with:
r-version: "4.3"
r-version: "release"
# Whether to skip code coverage badge creation
# Setting to 'false' will require you to create
# an orphan branch called 'badges' in your repository
Expand Down
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ Depends: R (>= 4.2)
Imports:
admiraldev,
dplyr (>= 1.0.0),
assertthat,
purrr (>= 0.3.3),
rlang (>= 0.4.4),
stringr (>= 1.4.0),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

S3method(print,iso8601)
export(create_iso8601)
export(derive_study_day)
export(fmt_cmp)
export(problems)
importFrom(rlang,.data)
Expand Down
132 changes: 132 additions & 0 deletions R/derive_study_day.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
#' `derive_study_day` performs study day calculation
#' @description
#' This function takes the an input data frame and a reference data frame (which
#' is DM domain in most cases), and calculate the study day from reference date
#' and target date. In case of unexpected conditions like reference date is not
#' unique for each patient, or reference and input dates are not actual dates,
#' NA will be returned for those records.
#'
#' @param sdtm_in Input data frame that contains the target date.
#' @param dm_domain Reference date frame that contains the reference date.
#' @param tgdt Target date from `sdtm_in` that will be used to calculate the study
#' day.
#' @param refdt Reference date from `dm_domain` that will be used as reference to
#' calculate the study day.
#' @param study_day_var New study day variable name in the output. For
#' example, AESTDY for AE domain and CMSTDY for CM domain.
#' @param merge_key Character to represent the merging key between `sdtm_in` and
#' `dm_domain`.
#'
#' @return Data frame that takes all columns from `sdtm_in` and a new variable
#' to represent the calculated study day.
#'
#' @export
#'
#' @examples
#' ae <- data.frame(
#' USUBJID = c("study123-123", "study123-124", "study123-125"),
#' AESTDTC = c("2012-01-01", "2012-04-14", "2012-04-14")
#' )
#' dm <- data.frame(
#' USUBJID = c("study123-123", "study123-124", "study123-125"),
#' RFSTDTC = c("2012-02-01", "2012-04-14", NA)
#' )
#' ae$AESTDTC <- as.Date(ae$AESTDTC)
#' dm$RFSTDTC <- as.Date(dm$RFSTDTC)
#' derive_study_day(ae, dm, "AESTDTC", "RFSTDTC", "AESTDY")
#'
derive_study_day <- function(sdtm_in,
dm_domain,
tgdt,
refdt,
study_day_var,
merge_key = "USUBJID") {
assertthat::assert_that(is.data.frame(sdtm_in))
assertthat::assert_that(is.data.frame(dm_domain))
assertthat::assert_that(
utils::hasName(dm_domain, refdt),
msg = "dm_domain needs to have the variable of refdt."
)
assertthat::assert_that(
utils::hasName(sdtm_in, tgdt),
msg = "sdtm_in needs to have the variable of tgdt."
)
assertthat::assert_that(
utils::hasName(dm_domain, merge_key),
msg = "dm_domain needs to have the variable of merge_key."
)
assertthat::assert_that(
utils::hasName(sdtm_in, merge_key),
msg = "sdtm_in needs to have the variable of merge_key."
)
assertthat::assert_that(is.character(study_day_var))
# check tgdt and study_day_var matching, for example, CMSTDTC matches CMSTDY
if (gsub("DTC", "", tgdt, fixed = TRUE) != gsub("DY", "", study_day_var, fixed = TRUE)) {
warning(
"Target date and the returned study day doesn't match. ",
"Expecting matching date and study day, for example, CMENDTC and CMENDY"
)
}

original_variables <- names(sdtm_in)

if (!identical(sdtm_in, dm_domain)) {
dm_domain <- unique(dm_domain[c(merge_key, refdt)])

check_refdt_uniqueness <- dm_domain |>
dplyr::group_by(dplyr::pick({{ merge_key }})) |>
dplyr::filter(dplyr::n() > 1L)
if (nrow(check_refdt_uniqueness) > 0L) {
warning(
"Reference date is not unique for each patient! ",
"Patient without unique reference date will be ingored. ",
"NA will be returned for such records."
)
dm_domain <- dm_domain[
!dm_domain[[merge_key]] %in% check_refdt_uniqueness[[merge_key]],
]
}

sdtm_in <- sdtm_in |>
dplyr::left_join(
dm_domain,
by = merge_key
)
}

# refdt/tgdt should be in ISO format, otherwise throw warning
sdtm_in[[refdt]] <- tryCatch(
as.Date(sdtm_in[[refdt]], "%Y-%m-%d"),
error = function(e) {
warning(
"Encountered errors when converting refdt to dates. ",
"The warning message is ",
e$message,
call. = FALSE
)
NA
}
)
sdtm_in[[tgdt]] <- tryCatch(
as.Date(sdtm_in[[tgdt]], "%Y-%m-%d"),
error = function(e) {
warning(
"Encountered errors when converting tgdt to dates. ",
"The warning message is ",
e$message,
call. = FALSE
)
NA
}
)

ref <- sdtm_in[[refdt]]
tgt <- sdtm_in[[tgdt]]

# SDTMIG 4.4.4 Use of the Study Day Variables
res <- ifelse(tgt >= ref, tgt - ref + 1L, tgt - ref)

sdtm_in <- sdtm_in[original_variables]
sdtm_in[study_day_var] <- as.integer(res)
return(sdtm_in)
}
57 changes: 57 additions & 0 deletions man/derive_study_day.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 11 additions & 1 deletion renv.lock
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"R": {
"Version": "4.3.2",
"Version": "4.3.3",
"Repositories": [
{
"Name": "CRAN",
Expand Down Expand Up @@ -98,6 +98,16 @@
],
"Hash": "e8a22846fff485f0be3770c2da758713"
},
"assertthat": {
"Package": "assertthat",
"Version": "0.2.1",
"Source": "Repository",
"Repository": "RSPM",
"Requirements": [
"tools"
],
"Hash": "50c838a310445e954bc13f26f26a6ecf"
},
"backports": {
"Package": "backports",
"Version": "1.4.1",
Expand Down
10 changes: 10 additions & 0 deletions renv/profiles/4.2/renv.lock
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,16 @@
],
"Hash": "e8a22846fff485f0be3770c2da758713"
},
"assertthat": {
"Package": "assertthat",
"Version": "0.2.1",
"Source": "Repository",
"Repository": "RSPM",
"Requirements": [
"tools"
],
"Hash": "50c838a310445e954bc13f26f26a6ecf"
},
"backports": {
"Package": "backports",
"Version": "1.4.1",
Expand Down
12 changes: 11 additions & 1 deletion renv/profiles/4.3/renv.lock
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"R": {
"Version": "4.3.2",
"Version": "4.3.3",
"Repositories": [
{
"Name": "CRAN",
Expand Down Expand Up @@ -98,6 +98,16 @@
],
"Hash": "e8a22846fff485f0be3770c2da758713"
},
"assertthat": {
"Package": "assertthat",
"Version": "0.2.1",
"Source": "Repository",
"Repository": "RSPM",
"Requirements": [
"tools"
],
"Hash": "50c838a310445e954bc13f26f26a6ecf"
},
"backports": {
"Package": "backports",
"Version": "1.4.1",
Expand Down
76 changes: 76 additions & 0 deletions tests/testthat/test-derive_study_day.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
ae <- data.frame(
USUBJID = c("study123-123", "study123-124", "study123-125"),
AESTDTC = c("2012-01-01", "2012-04-14", "2012-04-14"),
stringsAsFactors = FALSE
)
dm <- data.frame(
USUBJID = c("study123-123", "study123-124", "study123-125"),
RFSTDTC = c("2012-02-01", "2012-04-14", NA_character_),
stringsAsFactors = FALSE
)

test_that("`derive_study_day()` works as expected for invalid input", {
expect_error(
derive_study_day("a", "b", "C", "D", "E"),
"sdtm_in is not a data frame"
)
expect_error(
derive_study_day(iris, "b", "C", "D", "E"),
"dm_domain is not a data frame"
)
expect_error(
derive_study_day(iris, iris, "d", "e", "b"),
"dm_domain needs to have the variable of refdt"
)
expect_error(
derive_study_day(iris, iris, "d", "Species"),
"sdtm_in needs to have the variable of tgdt"
)
expect_error(
derive_study_day(iris, iris, "Petal.Length", "Species", "e"),
"needs to have the variable of merge_key"
)
expect_error(
derive_study_day(iris, iris, "Petal.Length", "Species", 123L, "Species"),
"study_day_var is not a character vector"
)
expect_warning(
derive_study_day(ae, dm, "AESTDTC", "RFSTDTC", "AENDY"),
"Target date and the returned study day doesn't match."
)

dm1 <- data.frame(
USUBJID = c("study123-123", "study123-123", "study123-125"),
RFSTDTC = c("2012-02-01", "2012-04-14", "2012-04-14"),
stringsAsFactors = FALSE
)
expect_warning(
derive_study_day(ae, dm1, "AESTDTC", "RFSTDTC", "AESTDY"),
"Reference date is not unique for each patient!"
)

dm2 <- data.frame(
USUBJID = c("study123-123", "study123-124", "study123-125"),
RFSTDTC = c(123L, 456L, 789L),
stringsAsFactors = FALSE
)
expect_warning(
derive_study_day(ae, dm2, "AESTDTC", "RFSTDTC", "AESTDY"),
"Encountered errors when converting refdt to dates."
)
})

test_that("`calculate_study_day()` works as expected for valid input", {
res <- derive_study_day(ae, dm, "AESTDTC", "RFSTDTC", "AESTDY")
expected <- c(-31L, 1L, NA)
expect_equal(res$AESTDY, expected, tolerance = "1.5e-08")

df <- data.frame(
USUBJID = c("study123-123", "study123-124", "study123-125"),
RFSTDTC = c("2012-02-01", "2012-04-14", NA_character_),
AESTDTC = c("2012-01-01", "2012-04-14", "2012-04-14"),
stringsAsFactors = FALSE
)
res1 <- derive_study_day(df, df, "AESTDTC", "RFSTDTC", "AESTDY")
expect_equal(res1$AESTDY, expected, tolerance = "1.5e-08")
})

0 comments on commit 375b11f

Please sign in to comment.