resolve confs

Merge branch 'main' of github.com:pharmaverse/sdtm.oak into 51-v2 # Conflicts: # NAMESPACE # inst/WORDLIST # renv/profiles/4.4/renv.lock
pharmaverse · Jun 17, 2024 · dee4f86 · dee4f86
2 parents 8a1ddfd + e1aa479
commit dee4f86
Show file tree

Hide file tree

Showing 22 changed files with 817 additions and 1,020 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: sdtm.oak
 Type: Package
 Title: SDTM Data Transformation Engine
-Version: 0.0.0.9003
+Version: 0.0.0.9004
 Authors@R: c(
     person("Rammprasad", "Ganapathy", role = c("aut", "cre"),
     email = "[email protected]"),

diff --git a/NAMESPACE b/NAMESPACE
@@ -14,7 +14,9 @@ export(ct_map)
 export(ct_spec_example)
 export(ct_spec_vars)
 export(dataset_oak_vignette)
+export(derive_seq)
 export(derive_study_day)
+export(domain_example)
 export(fmt_cmp)
 export(generate_oak_id_vars)
 export(hardcode_ct)
@@ -27,6 +29,8 @@ export(rm_cnd_df)
 importFrom(dplyr,mutate)
 importFrom(pillar,ctl_new_rowid_pillar)
 importFrom(pillar,tbl_sum)
+export(read_domain_example)
+export(sbj_vars)
 importFrom(rlang,"%||%")
 importFrom(rlang,":=")
 importFrom(rlang,.data)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,9 @@
+# sdtm.oak 0.0.0.9004 (development version)
+
+## New Features
+
+* New function: `derive_seq()` for deriving a sequence number variable.
+
 # sdtm.oak 0.0.0.9003 (development version)
 
 ## New Features

diff --git a/R/derive_seq.R b/R/derive_seq.R
@@ -0,0 +1,97 @@
+#' Derive the sequence number (`--SEQ`) variable
+#'
+#' @description
+#' [derive_seq()] creates a new identifier variable: the sequence number
+#' (`--SEQ`).
+#'
+#' This function adds a newly derived variable to `tgt_dat`, namely the sequence
+#' number (`--SEQ`) whose name is the one provided in `tgt_var`. An integer
+#' sequence is generated that uniquely identifies each record within the domain.
+#'
+#' Prior to the derivation of `tgt_var`, the data frame `tgt_dat` is sorted
+#' according to grouping variables indicated in `rec_vars`.
+#'
+#' @param tgt_dat The target dataset, a data frame.
+#' @param tgt_var The target SDTM variable: a single string indicating the name
+#'   of the sequence number (`--SEQ`) variable, e.g. `"DSSEQ"`. Note that
+#'   supplying a name not ending in `"SEQ"` will raise a warning.
+#' @param rec_vars A character vector of record-level identifier variables.
+#' @param sbj_vars A character vector of subject-level identifier variables.
+#' @param start_at The sequence numbering starts at this value (default is `1`).
+#'
+#' @returns Returns the data frame supplied in `tgt_dat` with the newly derived
+#'   variable, i.e. the sequence number (`--SEQ`), whose name is that passed in
+#'   `tgt_var`. This variable is of type integer.
+#'
+#' @examples
+#' # A VS raw data set example
+#' (vs <- read_domain_example("vs"))
+#'
+#' # Derivation of VSSEQ
+#' rec_vars <- c("STUDYID", "USUBJID", "VSTESTCD", "VSDTC", "VSTPTNUM")
+#' derive_seq(tgt_dat = vs, tgt_var = "VSSEQ", rec_vars = rec_vars)
+#'
+#' # An APSC raw data set example
+#' (apsc <- read_domain_example("apsc"))
+#'
+#' # Derivation of APSEQ
+#' derive_seq(
+#'   tgt_dat = apsc,
+#'   tgt_var = "APSEQ",
+#'   rec_vars = c("STUDYID", "RSUBJID", "SCTESTCD"),
+#'   sbj_vars = c("STUDYID", "RSUBJID")
+#' )
+#' @export
+derive_seq <-
+  function(tgt_dat,
+           tgt_var,
+           rec_vars,
+           sbj_vars = sdtm.oak::sbj_vars(),
+           start_at = 1L) {
+    admiraldev::assert_character_scalar(tgt_var)
+    if (!is_seq_name(tgt_var)) {
+      rlang::warn("Target variable name (`tgt_var`) should end in 'SEQ'.")
+    }
+
+    admiraldev::assert_character_vector(rec_vars)
+    admiraldev::assert_character_vector(sbj_vars)
+    admiraldev::assert_data_frame(tgt_dat,
+      required_vars = rlang::syms(rec_vars),
+      optional = FALSE
+    )
+
+    admiraldev::assert_integer_scalar(start_at, subset = "non-negative")
+
+    tgt_dat |>
+      # Ensure that no prior grouping exists that alters ordering and new
+      # grouping.
+      dplyr::ungroup() |>
+      dplyr::arrange(dplyr::across(.cols = dplyr::all_of(rec_vars))) |>
+      dplyr::group_by(dplyr::across(dplyr::all_of(sbj_vars))) |>
+      dplyr::mutate("{tgt_var}" := dplyr::row_number() + start_at - 1L) |> # nolint object_name_linter()
+      dplyr::ungroup()
+  }
+
+#' Is it a --SEQ variable name
+#'
+#' [is_seq_name()] returns which variable names end in `"SEQ"`.
+#'
+#' @param x A character vector.
+#'
+#' @returns A logical vector.
+#'
+#' @examples
+#' # A valid SEQ name.
+#' sdtm.oak:::is_seq_name("AESEQ")
+#'
+#' # Not valid sequence number (`--SEQ`) variable names.
+#' # Case matters.
+#' sdtm.oak:::is_seq_name("AEseq")
+#'
+#' # A valid name has to end in "SEQ".
+#' sdtm.oak:::is_seq_name("AESEQUENCE")
+#'
+#' @keywords internal
+is_seq_name <- function(x) {
+  stringr::str_detect(x, "SEQ$")
+}
diff --git a/R/domain_example.R b/R/domain_example.R
@@ -0,0 +1,129 @@
+#' Find the path to an example SDTM domain file
+#'
+#' @description
+#' [domain_example()] resolves the local path to a SDTM domain example file. The
+#' domain examples files were imported from
+#' [pharmaversesdtm](https://cran.r-project.org/package=pharmaversesdtm). See
+#' Details section for available datasets.
+#'
+#' @details
+#' Datasets were obtained from
+#' [pharmaversesdtm](https://cran.r-project.org/package=pharmaversesdtm) but are
+#' originally sourced from the [CDISC pilot
+#' project](https://github.com/cdisc-org/sdtm-adam-pilot-project) or have been
+#' constructed ad-hoc by the
+#' [admiral](https://cran.r-project.org/package=admiral) team. These datasets
+#' are bundled with `{sdtm.oak}`, thus obviating a dependence on
+#' `{pharmaversesdtm}`.
+#'
+#' ### Example SDTM domains
+#'
+#' \describe{
+#' \item{`"ae_ophtha"`}{Ophthalmology Adverse Events Dataset.}
+#' \item{`"ae"`}{Adverse Events Dataset-updated.}
+#' \item{`"ce_vaccine"`}{Clinical Events Dataset for Vaccine Studies.}
+#' \item{`"cm"`}{Concomitant Medication Dataset.}
+#' \item{`"dm_vaccine"`}{Demographics Dataset for Vaccine Studies.}
+#' \item{`"dm"`}{Demography Dataset.}
+#' \item{`"ds"`}{Disposition Dataset-updated.}
+#' \item{`"eg"`}{	Electrocardiogram Dataset.}
+#' \item{`"ex_ophtha"`}{Ophthalmology Exposure Dataset.}
+#' \item{`"ex_vaccine"`}{Exposures Dataset for Vaccine Studies.}
+#' \item{`"ex"`}{Exposure Dataset.}
+#' \item{`"face_vaccine"`}{Findings About Clinical Events Dataset for Vaccine Studies.}
+#' \item{`"is_vaccine"`}{Immunogenicity Specimen Assessments Dataset for Vaccine Studies.}
+#' \item{`"lb"`}{Laboratory Measurements Dataset.}
+#' \item{`"mh"`}{Medical History Dataset-updated.}
+#' \item{`"oe_ophtha"`}{Ophthalmology Adverse Events Dataset.}
+#' \item{`"pc"`}{Pharmacokinetics Concentrations Dataset.}
+#' \item{`"pp"`}{Pharmacokinetics Parameters Dataset.}
+#' \item{`"qs_ophtha"`}{Ophthalmology Questionnaire Dataset.}
+#' \item{`"rs_onco_irecist"`}{Disease Response Dataset (iRECIST).}
+#' \item{`"rs_onco"`}{Disease Response Dataset.}
+#' }
+#'
+#' @param example A string with either the basename, file name, or relative path
+#'   to a SDTM domain example file bundled with `{stdm.oak}`, e.g. `"cm"`
+#'   (Concomitant Medication) or `"ae"` (Adverse Events).
+#'
+#' @returns The local path to an example file if `example` is supplied, or a
+#'   character vector of example file names.
+#'
+#' @examples
+#' # If no example is provided it returns a vector of possible choices.
+#' domain_example()
+#'
+#' # Get the local path to the Concomitant Medication dataset file.
+#' domain_example("cm")
+#'
+#' # Local path to the Adverse Events dataset file.
+#' domain_example("ae")
+#'
+#' @source See \url{https://cran.r-project.org/package=pharmaversesdtm}.
+#'
+#' @seealso [read_domain_example()]
+#' @export
+domain_example <- function(example) {
+  # If no example is requested, then return all available files.
+  if (missing(example)) {
+    domain_path <- system.file("domain", package = "sdtm.oak", mustWork = TRUE)
+    domain_files <- list.files(domain_path, pattern = "*.rds")
+    domains <- tools::file_path_sans_ext(basename(domain_files))
+    return(domains)
+  }
+
+  # Otherwise, resolve the local path to the example requested.
+  admiraldev::assert_character_scalar(example, optional = TRUE)
+  base_name <- tools::file_path_sans_ext(basename(example))
+  path <- file.path("domain", paste0(base_name, ".rds"))
+  local_path <- system.file(path, package = "sdtm.oak")
+
+  if (identical(local_path, "")) {
+    stop(
+      glue::glue(
+        "'{example}' does not match any domain example files. Run `domain_example()` for options."
+      ),
+      call. = FALSE
+    )
+  } else {
+    local_path <-
+      system.file(path, package = "sdtm.oak", mustWork = TRUE)
+    return(local_path)
+  }
+}
+
+#' Read an example SDTM domain
+#'
+#' [read_domain_example()] imports one of the bundled SDTM domain examples
+#'  as a [tibble][tibble::tibble-package] into R. See [domain_example()] for
+#'  possible choices.
+#'
+#' @param example The name of SDTM domain example, e.g. `"cm"` (Concomitant
+#'   Medication) or `"ae"` (Adverse Events). Run `read_domain_example()` for
+#'   available example files.
+#'
+#' @returns A [tibble][tibble::tibble-package] with an SDTM domain dataset, or a
+#'   character vector of example file names.
+#'
+#' @examples
+#' # Leave the `example` parameter as missing for available example files.
+#' read_domain_example()
+#'
+#' # Read the example Concomitant Medication domain.
+#' read_domain_example("cm")
+#'
+#' # Read the example Adverse Events domain.
+#' read_domain_example("ae")
+#'
+#' @seealso [domain_example()]
+#' @export
+read_domain_example <- function(example) {
+  if (missing(example)) {
+    return(domain_example())
+  } else {
+    admiraldev::assert_character_scalar(example)
+  }
+
+  path <- domain_example(example)
+  readr::read_rds(file = path)
+}
diff --git a/R/sbj_vars.R b/R/sbj_vars.R
@@ -0,0 +1,14 @@
+#' Subject-level key variables
+#'
+#' [sbj_vars()] returns the set of variable names that uniquely define
+#' a subject.
+#'
+#' @returns A character vector of variable names.
+#'
+#' @examples
+#' sbj_vars()
+#'
+#' @export
+sbj_vars <- function() {
+  c("STUDYID", "USUBJID")
+}
diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -13,9 +13,16 @@ reference:
   contents:
   - assign
   - harcode
+  - derive_seq
   - derive_study_day
   - assign_datetime
 
+- title: SDTM examples
+  desc: SDTM domain file examples
+  contents:
+  - domain_example
+  - read_domain_example
+
 - title: Controlled terminology
   contents:
   - read_ct_spec
@@ -30,6 +37,10 @@ reference:
   - dtc_formats
   - problems
 
+- title: Utils
+  contents:
+  - sbj_vars
+
 - title: Package global state
   contents:
   - clear_cache

diff --git a/data-raw/sdtm_domain_examples.R b/data-raw/sdtm_domain_examples.R
@@ -0,0 +1,32 @@
+# Title: SDTM domain example datasets.
+
+library(pharmaversesdtm)
+library(readr)
+library(here)
+
+path <- here::here("inst/domain")
+
+vs <- tibble::tribble(
+  ~STUDYID, ~DOMAIN, ~USUBJID, ~VSSPID, ~VSTESTCD, ~VSDTC, ~VSTPTNUM,
+  "ABC123", "VS", "ABC123-375", "/F:VTLS1-D:9795532-R:2", "DIABP", "2020-09-01T13:31", NA,
+  "ABC123", "VS", "ABC123-375", "/F:VTLS1-D:9795532-R:2", "TEMP", "2020-09-01T13:31", NA,
+  "ABC123", "VS", "ABC123-375", "/F:VTLS2-D:9795533-R:2", "DIABP", "2020-09-28T11:00", 2L,
+  "ABC123", "VS", "ABC123-375", "/F:VTLS2-D:9795533-R:2", "TEMP", "2020-09-28T11:00", 2L,
+  "ABC123", "VS", "ABC123-376", "/F:VTLS1-D:9795591-R:1", "DIABP", "2020-09-20", NA,
+  "ABC123", "VS", "ABC123-376", "/F:VTLS1-D:9795591-R:1", "TEMP", "2020-09-20", NA
+)
+
+apsc <- tibble::tribble(
+  ~STUDYID, ~RSUBJID,     ~SCTESTCD,  ~DOMAIN, ~SREL,     ~SCCAT,
+  "ABC123", "ABC123-210", "LVSBJIND", "APSC",  "FRIEND",  "CAREGIVERSTUDY",
+  "ABC123", "ABC123-210", "EDULEVEL", "APSC",  "FRIEND",  "CAREGIVERSTUDY",
+  "ABC123", "ABC123-210", "TMSPPT",   "APSC",  "FRIEND",  "CAREGIVERSTUDY",
+  "ABC123", "ABC123-211", "CAREDUR",  "APSC",  "SIBLING", "CAREGIVERSTUDY",
+  "ABC123", "ABC123-211", "LVSBJIND", "APSC",  "SIBLING", "CAREGIVERSTUDY",
+  "ABC123", "ABC123-212", "JOBCLAS",  "APSC",  "SPOUSE",  "CAREGIVERSTUDY"
+)
+
+readr::write_rds(x = pharmaversesdtm::ae, file = file.path(path, "ae.rds"), compress = "xz")
+readr::write_rds(x = pharmaversesdtm::cm, file = file.path(path, "cm.rds"), compress = "xz")
+readr::write_rds(x = vs, file = file.path(path, "vs.rds"), compress = "xz")
+readr::write_rds(x = apsc, file = file.path(path, "apsc.rds"), compress = "xz")
diff --git a/inst/WORDLIST b/inst/WORDLIST
@@ -26,3 +26,7 @@ AESTDY
 CMSTDY
 DM
 ungrouped
+Immunogenicity
+Pharmacokinetics
+iRECIST
+pharmaversesdtm
diff --git a/inst/domain/ae.rds b/inst/domain/ae.rds
diff --git a/inst/domain/apsc.rds b/inst/domain/apsc.rds
diff --git a/inst/domain/cm.rds b/inst/domain/cm.rds
diff --git a/inst/domain/vs.rds b/inst/domain/vs.rds