From 58688df348f6a5c1325499fbf2359461511a0c25 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 15 May 2024 17:36:56 +0100 Subject: [PATCH] Simplification of `derive_seq()` interface `derive_seq()` no longer has a default value for parameter `id_vars` as per the meeting of 2024-05-15, so function's logic has been simplified accordingly. --- R/derive_seq.R | 86 +++++++---------------------------------------- _pkgdown.yml | 7 ++++ man/derive_seq.Rd | 55 ++---------------------------- 3 files changed, 21 insertions(+), 127 deletions(-) diff --git a/R/derive_seq.R b/R/derive_seq.R index 87eb448b..165b01d4 100644 --- a/R/derive_seq.R +++ b/R/derive_seq.R @@ -11,14 +11,11 @@ #' Prior to the derivation of `tgt_var`, the data frame `tgt_dat` is sorted #' according to grouping variables indicated in `id_vars`. #' -#' Passing `NULL` to `id_vars` will ensure that every row in `tgt_dat` gets a -#' unique integer number in the column named by `tgt_var`. -#' #' @param tgt_dat The target dataset, a data frame. #' @param tgt_var The target SDTM variable: a single string indicating the name #' of the sequence number (`--SEQ`) variable, e.g. `"DSSEQ"`. Note that #' supplying a name not ending in `"SEQ"` will raise a warning. -#' @param id_vars Either a character vector of identifier variables or `NULL`. +#' @param id_vars Either a character vector of identifier variables. #' Default is the set of variables returned by [oak_id_vars()]. #' @param start_at The sequence numbering starts at this value (default is `1`). #' @@ -26,84 +23,25 @@ #' variable, i.e. the sequence number (`--SEQ`), whose name is that passed in #' `tgt_var`. This variable is of type integer. #' -#' @examples -#' # An example Medical Devices (MD) domain raw data set. -#' md <- -#' tibble::tribble( -#' ~oak_id, ~raw_source, ~patient_number, ~MDBDR, ~MDEDR, ~MDETM, -#' 1L, "MD1", 375, NA, NA, NA, -#' 2L, "MD1", 375, "15-Sep-20", NA, NA, -#' 3L, "MD1", 376, "17-Feb-21", "17-Feb-21", NA, -#' 4L, "MD1", 377, "4-Oct-20", NA, NA, -#' 5L, "MD1", 377, "20-Jan-20", "20-Jan-20", "10:00:00", -#' 6L, "MD1", 377, "UN-UNK-2019", "UN-UNK-2019", NA, -#' 7L, "MD1", 377, "20-UNK-2019", "20-UNK-2019", NA, -#' 8L, "MD1", 378, "UN-UNK-2020", "UN-UNK-2020", NA, -#' 9L, "MD1", 378, "26-Jan-20", "26-Jan-20", "07:00:00", -#' 10L, "MD1", 378, "28-Jan-20", "1-Feb-20", NA, -#' 11L, "MD1", 378, "12-Feb-20", "18-Feb-20", NA, -#' 12L, "MD1", 379, "10-UNK-2020", "20-UNK-2020", NA, -#' 13L, "MD1", 379, NA, NA, NA, -#' 14L, "MD1", 379, NA, "17-Feb-20", NA -#' ) -#' -#' # Derive the sequence number MDSEQ. By default, the grouping variables -#' # (`id_vars`) are the ones defined by `oak_id_vars()`. -#' derive_seq(tgt_dat = md, tgt_var = "MDSEQ") -#' -#' # An example Vital Signs (VS) domain raw data set. -#' vs <- tibble::tribble( -#' ~oak_id, ~raw_source, ~patient_number, ~VSTESTCD, ~VISITNUM, ~VSTPTNUM, -#' "PILOT01", "VS", 703, "DIABP", 3, 815, -#' "PILOT01", "VS", 703, "DIABP", 3, 816, -#' "PILOT01", "VS", 703, "SYSBP", 4, 816, -#' "PILOT01", "VS", 716, "DIABP", 3, 815, -#' "PILOT01", "VS", 716, "DIABP", 3, 816, -#' "PILOT01", "VS", 716, "DIABP", 4, 815, -#' "PILOT01", "VS", 716, "SYSBP", 3, 815, -#' "PILOT01", "VS", 716, "SYSBP", 4, 816 -#' ) -#' -#' # Derive sequence number by explicitly indicating the records' grouping -#' # defined by the variables `patient_number` and `VSTESTCD`. -#' vs_id_vars <- c("patient_number", "VSTESTCD") -#' derive_seq(tgt_dat = vs, tgt_var = "ASEQ", id_vars = vs_id_vars) -#' -#' # If no grouping variables are provided then the rows are numbered -#' # sequentially in the order provided. -#' derive_seq(tgt_dat = vs, tgt_var = "ASEQ", id_vars = NULL) -#' #' @export derive_seq <- function(tgt_dat, tgt_var, - id_vars = oak_id_vars(), + id_vars, start_at = 1L) { - admiraldev::assert_character_scalar(tgt_var) - if (is.null(id_vars)) { - admiraldev::assert_data_frame(tgt_dat, optional = FALSE) - - } else { - admiraldev::assert_character_vector(id_vars) - admiraldev::assert_data_frame(tgt_dat, - required_vars = rlang::syms(id_vars), - optional = FALSE) - } + admiraldev::assert_character_vector(id_vars) + admiraldev::assert_data_frame(tgt_dat, + required_vars = rlang::syms(id_vars), + optional = FALSE) admiraldev::assert_integer_scalar(start_at, subset = "non-negative") - if (is.null(id_vars)) { - tgt_dat |> - dplyr::ungroup() |> # ensure that is ungrouped - dplyr::mutate("{tgt_var}" := dplyr::row_number() + start_at - 1L) - } else { - tgt_dat |> - dplyr::ungroup() |> # ensure that is ungrouped - dplyr::arrange(dplyr::across(.cols = dplyr::all_of(id_vars))) |> - dplyr::group_by(dplyr::across(dplyr::all_of(id_vars))) |> - dplyr::mutate("{tgt_var}" := dplyr::row_number() + start_at - 1L) |> - dplyr::ungroup() - } + tgt_dat |> + dplyr::ungroup() |> # ensure that is ungrouped + dplyr::arrange(dplyr::across(.cols = dplyr::all_of(id_vars))) |> + dplyr::group_by(dplyr::across(dplyr::all_of(id_vars))) |> + dplyr::mutate("{tgt_var}" := dplyr::row_number() + start_at - 1L) |> + dplyr::ungroup() } diff --git a/_pkgdown.yml b/_pkgdown.yml index 13e86fbf..f5b61778 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -13,9 +13,16 @@ reference: contents: - assign - harcode + - derive_seq - derive_study_day - assign_datetime +- title: SDTM examples + desc: SDTM domain file examples + contents: + - domain_example + - read_domain_example + - title: Controlled terminology contents: - read_ct_spec diff --git a/man/derive_seq.Rd b/man/derive_seq.Rd index e1961af8..75c02550 100644 --- a/man/derive_seq.Rd +++ b/man/derive_seq.Rd @@ -4,7 +4,7 @@ \alias{derive_seq} \title{Derive the sequence number (\code{--SEQ}) variable} \usage{ -derive_seq(tgt_dat, tgt_var, id_vars = oak_id_vars(), start_at = 1L) +derive_seq(tgt_dat, tgt_var, id_vars, start_at = 1L) } \arguments{ \item{tgt_dat}{The target dataset, a data frame.} @@ -13,7 +13,7 @@ derive_seq(tgt_dat, tgt_var, id_vars = oak_id_vars(), start_at = 1L) of the sequence number (\code{--SEQ}) variable, e.g. \code{"DSSEQ"}. Note that supplying a name not ending in \code{"SEQ"} will raise a warning.} -\item{id_vars}{Either a character vector of identifier variables or \code{NULL}. +\item{id_vars}{Either a character vector of identifier variables. Default is the set of variables returned by \code{\link[=oak_id_vars]{oak_id_vars()}}.} \item{start_at}{The sequence numbering starts at this value (default is \code{1}).} @@ -33,55 +33,4 @@ sequence is generated that uniquely identifies each record within the domain. Prior to the derivation of \code{tgt_var}, the data frame \code{tgt_dat} is sorted according to grouping variables indicated in \code{id_vars}. - -Passing \code{NULL} to \code{id_vars} will ensure that every row in \code{tgt_dat} gets a -unique integer number in the column named by \code{tgt_var}. -} -\examples{ -# An example Medical Devices (MD) domain raw data set. -md <- - tibble::tribble( - ~oak_id, ~raw_source, ~patient_number, ~MDBDR, ~MDEDR, ~MDETM, - 1L, "MD1", 375, NA, NA, NA, - 2L, "MD1", 375, "15-Sep-20", NA, NA, - 3L, "MD1", 376, "17-Feb-21", "17-Feb-21", NA, - 4L, "MD1", 377, "4-Oct-20", NA, NA, - 5L, "MD1", 377, "20-Jan-20", "20-Jan-20", "10:00:00", - 6L, "MD1", 377, "UN-UNK-2019", "UN-UNK-2019", NA, - 7L, "MD1", 377, "20-UNK-2019", "20-UNK-2019", NA, - 8L, "MD1", 378, "UN-UNK-2020", "UN-UNK-2020", NA, - 9L, "MD1", 378, "26-Jan-20", "26-Jan-20", "07:00:00", - 10L, "MD1", 378, "28-Jan-20", "1-Feb-20", NA, - 11L, "MD1", 378, "12-Feb-20", "18-Feb-20", NA, - 12L, "MD1", 379, "10-UNK-2020", "20-UNK-2020", NA, - 13L, "MD1", 379, NA, NA, NA, - 14L, "MD1", 379, NA, "17-Feb-20", NA - ) - -# Derive the sequence number MDSEQ. By default, the grouping variables -# (`id_vars`) are the ones defined by `oak_id_vars()`. -derive_seq(tgt_dat = md, tgt_var = "MDSEQ") - -# An example Vital Signs (VS) domain raw data set. -vs <- tibble::tribble( - ~oak_id, ~raw_source, ~patient_number, ~VSTESTCD, ~VISITNUM, ~VSTPTNUM, - "PILOT01", "VS", 703, "DIABP", 3, 815, - "PILOT01", "VS", 703, "DIABP", 3, 816, - "PILOT01", "VS", 703, "SYSBP", 4, 816, - "PILOT01", "VS", 716, "DIABP", 3, 815, - "PILOT01", "VS", 716, "DIABP", 3, 816, - "PILOT01", "VS", 716, "DIABP", 4, 815, - "PILOT01", "VS", 716, "SYSBP", 3, 815, - "PILOT01", "VS", 716, "SYSBP", 4, 816 -) - -# Derive sequence number by explicitly indicating the records' grouping -# defined by the variables `patient_number` and `VSTESTCD`. -vs_id_vars <- c("patient_number", "VSTESTCD") -derive_seq(tgt_dat = vs, tgt_var = "ASEQ", id_vars = vs_id_vars) - -# If no grouping variables are provided then the rows are numbered -# sequentially in the order provided. -derive_seq(tgt_dat = vs, tgt_var = "ASEQ", id_vars = NULL) - }