From 8aed1487777dc4df15f873c92395f005aeef45d8 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 8 Feb 2024 01:50:54 +0000 Subject: [PATCH] 0029 feedback create iso8601 (#33) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Ensure `assert_capture_matrix()` return value There was a bug that could lead `assert_capture_matrix()` to return its output (matrix) with columns out of order. This is now ensured. * Add support for warnings in `create_iso8601()` This is loosely inspired on https://github.com/tidyverse/readr/blob/main/R/problems.R * Closes #29 The `problems()` is introduced that allows easy retrieval of what went wrong with the parsing by `create_iso8601()` * Update `create_iso8601()` docs One more example about the `problems()` function. * Make `create_iso8601()` trigger warnings if parsing fails in any of the date/time components Previously, `create_iso8601()` would not trigger a warning if at least one of the date, time or date-time components parsed successfully. Now it is enough for one single component to fail at parsing for warnings to be triggered. This is following the request: https://github.com/pharmaverse/sdtm.oak/pull/33#discussion_r1436195327. * styler update * Update link in the Contributing guide * Update docs and links. * Update WORDLIST * Add `any_problems()` documentation * Improve grammar in `any_problems()` documentation * Add `add_problems()` documentation * Upgrade roxygen2 version * Automatic renv profile update. * Automatic renv profile update. * Add R_REMOTES_STANDALONE env variable. * Add env into admiralci. * Update .lycheeignore * Fix NOTE: Malformed Description field Fixes the NR CMD check NOTE: ❯ checking DESCRIPTION meta-information ... NOTE Malformed Description field: should contain one or more complete sentences. * Fix typo in Description field --------- Co-authored-by: Adam Foryś Co-authored-by: galachad --- .github/CONTRIBUTING.md | 2 +- DESCRIPTION | 8 +- NAMESPACE | 2 + R/dtc_create_iso8601.R | 88 ++++++++--- R/dtc_parse_dttm.R | 3 + R/dtc_problems.R | 209 +++++++++++++++++++++++++++ R/dtc_utils.R | 11 +- inst/WORDLIST | 1 + man/add_problems.Rd | 55 +++++++ man/any_problems.Rd | 56 +++++++ man/create_iso8601.Rd | 5 +- man/parse_dttm.Rd | 3 + man/problems.Rd | 62 ++++++++ man/sdtm.oak-package.Rd | 2 +- tests/testthat/test-create_iso8601.R | 55 +++++-- 15 files changed, 521 insertions(+), 41 deletions(-) create mode 100644 R/dtc_problems.R create mode 100644 man/add_problems.Rd create mode 100644 man/any_problems.Rd create mode 100644 man/problems.Rd diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index ed286ceb..f7d256e2 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -19,4 +19,4 @@ with the `{sdtm.oak}` philosophy and programming strategy. The team will try to review the issues within the next backlog meeting and give some initial feedback. Since we are not a 100% fully resourced software development team it might be that some issues will take longer to respond to depending on the amount -of overall issues. \ No newline at end of file +of overall issues. diff --git a/DESCRIPTION b/DESCRIPTION index bd5e53b6..acb15ebf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,7 +19,13 @@ Authors@R: c( person("Pfizer Inc", role = c("cph", "fnd")) ) Maintainer: Rammprasad Ganapathy -Description: An EDC and Data Standard-agnostic SDTM data transformation engine designed for SDTM programming in R. Powered by metadata sdtm.oak can automate the conversion of raw clinical data to SDTM through standardized mapping algorithms. SDTM is one of the required standards for data submission to FDA (U.S.) and PMDA (Japan). SDTM standards are implemented in accordance with the SDTM Implemetation guide as defined by CDISC +Description: An EDC and Data Standard-agnostic SDTM data transformation engine + designed for SDTM programming in R. Powered by metadata sdtm.oak can + automate the conversion of raw clinical data to SDTM through standardized + mapping algorithms. SDTM is one of the required standards for data + submission to FDA (U.S.) and PMDA (Japan). SDTM standards are implemented + in accordance with the SDTM Implementation guide as defined by CDISC + . Language: en-US License: Apache License (>= 2) BugReports: https://github.com/pharmaverse/sdtm.oak/issues diff --git a/NAMESPACE b/NAMESPACE index 455b5386..7fc88eef 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,8 @@ # Generated by roxygen2: do not edit by hand +S3method(print,iso8601) export(create_iso8601) export(fmt_cmp) +export(problems) importFrom(rlang,.data) importFrom(tibble,tibble) diff --git a/R/dtc_create_iso8601.R b/R/dtc_create_iso8601.R index 9d2908de..24c9e3e1 100644 --- a/R/dtc_create_iso8601.R +++ b/R/dtc_create_iso8601.R @@ -337,6 +337,7 @@ format_iso8601 <- function(m, .cutoff_2000 = 68L) { #' meaning to check against a selection of validated formats in #' [dtc_formats][sdtm.oak::dtc_formats]; or to have a more permissible #' interpretation of the formats. +#' @param .warn Whether to warn about parsing failures. #' #' @examples #' # Converting dates @@ -395,36 +396,75 @@ format_iso8601 <- function(m, .cutoff_2000 = 68L) { #' create_iso8601("05 feb 1985 12 55 02", .format = fmt, .fmt_c = fmt_cmp) #' #' @export -create_iso8601 <- function(..., .format, .fmt_c = fmt_cmp(), .na = NULL, .cutoff_2000 = 68L, .check_format = FALSE) { - assert_fmt_c(.fmt_c) +create_iso8601 <- + function(..., + .format, + .fmt_c = fmt_cmp(), + .na = NULL, + .cutoff_2000 = 68L, + .check_format = FALSE, + .warn = TRUE) { + assert_fmt_c(.fmt_c) + admiraldev::assert_logical_scalar(.check_format) + admiraldev::assert_logical_scalar(.warn) - dots <- rlang::dots_list(...) + dots <- rlang::dots_list(...) - if (rlang::is_empty(dots)) { - return(character()) - } + if (rlang::is_empty(dots)) { + return(character()) + } - # Check if all vectors in `dots` are of character type. - if (!identical(unique(sapply(dots, typeof)), "character")) { - rlang::abort("All vectors in `...` must be of type character.") - } + # Check if all vectors in `dots` are of character type. + if (!identical(unique(sapply(dots, typeof)), "character")) { + rlang::abort("All vectors in `...` must be of type character.") + } - # Check if all vectors in `dots` are of the same length. - n <- unique(lengths(dots)) - if (!identical(length(n), 1L)) { - rlang::abort("All vectors in `...` must be of the same length.") - } + # Check if all vectors in `dots` are of the same length. + n <- unique(lengths(dots)) + if (!identical(length(n), 1L)) { + rlang::abort("All vectors in `...` must be of the same length.") + } - if (!identical(length(dots), length(.format))) { - rlang::abort("Number of vectors in `...` should match length of `.format`.") - } + if (!identical(length(dots), length(.format))) { + rlang::abort("Number of vectors in `...` should match length of `.format`.") + } + + # Check that the `.format` is either a character vector or a list of + # character vectors, and that each string is one of the possible formats. + if (.check_format) { + assert_dtc_format(.format) + } - # Check that the `.format` is either a character vector or a list of - # character vectors, and that each string is one of the possible formats. - if (.check_format) assert_dtc_format(.format) + cap_matrices <- + purrr::map2( + dots, + .format, + ~ parse_dttm( + dttm = .x, + fmt = .y, + na = .na, + fmt_c = .fmt_c + ) + ) + cap_matrix <- coalesce_capture_matrices(!!!cap_matrices) - cap_matrices <- purrr::map2(dots, .format, ~ parse_dttm(dttm = .x, fmt = .y, na = .na, fmt_c = .fmt_c)) - cap_matrix <- coalesce_capture_matrices(!!!cap_matrices) + iso8601 <- format_iso8601(cap_matrix, .cutoff_2000 = .cutoff_2000) + any_prob <- any_problems(cap_matrices, .cutoff_2000 = .cutoff_2000) + iso8601 <- add_problems(iso8601, any_prob, dots) + class(iso8601) <- "iso8601" - format_iso8601(cap_matrix, .cutoff_2000 = .cutoff_2000) + if (.warn && rlang::is_interactive()) { + warn_problems(iso8601) + } + + iso8601 + } + +#' @export +print.iso8601 <- function(x, ...) { + # Here we take advantage of the subset operator `[` dropping + # attributes. Also, using `seq_along()` should not force a copy of `x` thus + # being memory-efficient. + print(x[seq_along(x)]) + invisible(x) } diff --git a/R/dtc_parse_dttm.R b/R/dtc_parse_dttm.R index 2feed78a..0211eb09 100644 --- a/R/dtc_parse_dttm.R +++ b/R/dtc_parse_dttm.R @@ -83,6 +83,9 @@ parse_dttm_ <- function(dttm, #' sdtm.oak:::parse_dttm(c("2002-05-11 11:45", "-05-11 11:45"), "-m-d H:M") #' sdtm.oak:::parse_dttm(c("2002-05-11 11:45", "-05-11 11:45"), c("y-m-d H:M", "-m-d H:M")) #' +#' sdtm.oak:::parse_dttm("05 feb 1985 12 55 02", "d m y H M S") +#' sdtm.oak:::parse_dttm("12 55 02 05 feb 1985", "H M S d m y") +#' #' sdtm.oak:::parse_dttm(c("2020-05-18", "2020-UN-18", "2020-UNK-UN"), "y-m-d") #' sdtm.oak:::parse_dttm(c("2020-05-18", "2020-UN-18", "2020-UNK-UN"), "y-m-d", na = "UN") #' sdtm.oak:::parse_dttm(c("2020-05-18", "2020-UN-18", "2020-UNK-UN"), "y-m-d", na = c("UN", "UNK")) diff --git a/R/dtc_problems.R b/R/dtc_problems.R new file mode 100644 index 00000000..c6ab3494 --- /dev/null +++ b/R/dtc_problems.R @@ -0,0 +1,209 @@ +#' Add ISO 8601 parsing problems +#' +#' @description +#' [add_problems()] annotates the returned value of [create_iso8601()] with +#' possible parsing problems. This annotation consists of a +#' [tibble][tibble::tibble-package] of problems, one row for each parsing +#' failure (see Details section). +#' +#' @details +#' This function annotates its input `x`, a vector date-times in ISO 8601 +#' format, by creating an attribute named `problems`. This attribute's value +#' is a [tibble][tibble::tibble-package] of parsing problems. The problematic +#' date/times are indicated by the `logical` vector passed as argument to +#' `is_problem`. +#' +#' The attribute `problems` in the returned value will contain a first column +#' named `..i` that indicates the date/time index of the problematic date/time +#' in `x`, and as many extra columns as there were inputs (passed in `dtc`). If +#' `dtc` is named, then those names are used to name the extra columns, +#' otherwise they get named sequentially like so `..var1`, `..var2`, etc.. +#' +#' @param x A character vector of date-times in ISO 8601 format; typically, the +#' output of [format_iso8601()]. +#' @param is_problem A `logical` indicating which date/time inputs are +#' associated with parsing failures. +#' @param dtc A list of `character` vectors of dates, times or date-times' +#' components. Typically, this parameter takes the value passed in `...` to +#' a [create_iso8601()] call. +#' +#' @returns Either `x` without any modification, if no parsing problems exist, +#' or an annotated `x`, meaning having a `problems` attribute that holds +#' parsing issues (see the Details section). +#' +#' @examples +#' date <- c("2000-01-05", "", "1980-06-18", "1979-09-07") +#' time <- c("001221", "22:35:05", "03:00:15", "07:09:00") +#' dtc <- list(date, time) +#' dttm <- c("2000-01-05", "T22:35:05", "1980-06-18T03:00:15", "1979-09-07T07:09:00") +#' is_problem <- c(TRUE, TRUE, FALSE, FALSE) +#' +#' dttm2 <- sdtm.oak:::add_problems(dttm, is_problem, dtc) +#' sdtm.oak:::problems(dttm2) +#' +#' @keywords internal +add_problems <- function(x, is_problem, dtc) { + is_x_na <- is_problem + if (!any(is_x_na)) { + return(x) + } + + names <- names(dtc) + bad_names <- duplicated(names) | names == "" + compat_names <- paste0("..var", seq_along(dtc)) + + if (is.null(names)) { + names <- compat_names + } else { + names[bad_names] <- compat_names[bad_names] + } + + names(dtc) <- names + + index <- which(is_problem) + problems <- tibble::as_tibble(dtc)[is_problem, ] + problems <- tibble::add_column(problems, ..i = index, .before = 1L) + attr(x, "problems") <- problems + x +} + +#' Detect problems with the parsing of date/times +#' +#' @description +#' +#' [any_problems()] takes a list of capture matrices (see [parse_dttm()]) and +#' reports on parsing problems by means of predicate values. A `FALSE` value +#' indicates that the parsing was successful and a `TRUE` value a parsing +#' failure in at least one of the inputs to [create_iso8601()]. Note that this +#' is an internal function to be used in the context of [create_iso8601()] +#' source code and hence each capture matrix corresponds to one input to +#' [create_iso8601()]. +#' +#' @param cap_matrices A list of capture matrices in the sense of the returned +#' value by [parse_dttm()]. +#' @param .cutoff_2000 An integer value. Two-digit years smaller or equal to +#' `.cutoff_2000` are parsed as though starting with `20`, otherwise parsed as +#' though starting with `19`. +#' +#' @returns A `logical` whose length matches the number of underlying date/times +#' passed as inputs to [create_iso8601()], i.e. whose length matches the +#' number of rows of the capture matrices in `cap_matrices`. +#' +#' @examples +#' # No problem (return value is `FALSE`). +#' sdtm.oak:::any_problems(list(sdtm.oak:::parse_dttm("1980-06-18", "y-m-d"))) +#' +#' # Now the parsing fails (return value is `TRUE`). +#' sdtm.oak:::any_problems(list(sdtm.oak:::parse_dttm("1980-06-18", "ymd"))) +#' +#' # Find if there has been a problem in either in the `date` or `time` inputs. +#' # The following problems are expected with: +#' # - `"2001/12/25"` as it won't be parsed with the format `"y-m-d"` +#' # - `"00h12m21"` as it won't be parsed with the format `"H:M:S"`. +#' # +#' date <- c("2000-01-05", "2001/12/25", "1980-06-18", "1979-09-07") +#' time <- c("00h12m21", "22:35:05", "03:00:15", "07:09:00") +#' +#' cap_matrix_date <- sdtm.oak:::parse_dttm(date, "y-m-d") +#' cap_matrix_time <- sdtm.oak:::parse_dttm(time, "H:M:S") +#' +#' (cap_matrices <- list(cap_matrix_date, cap_matrix_time)) +#' +#' # `any_problems()` returns `TRUE` for the first two elements because of the +#' # failure to parse `"2001/12/25"` and `"00h12m21"`, respectively. +#' sdtm.oak:::any_problems(cap_matrices) +#' +#' @keywords internal +any_problems <- function(cap_matrices, .cutoff_2000 = 68L) { + cap_matrices |> + purrr::map(~ format_iso8601(.x, .cutoff_2000 = .cutoff_2000)) |> + unlist() |> + matrix(ncol = length(cap_matrices)) |> + is.na() |> + rowSums() |> + as.logical() +} + +#' Retrieve date/time parsing problems +#' +#' [problems()] is a companion helper function to [create_iso8601()]. It +#' retrieves ISO 8601 parsing problems from an object of class iso8601, which is +#' [create_iso8601()]'s return value and that might contain a `problems` +#' attribute in case of parsing failures. [problems()] is a helper function that +#' provides easy access to these parsing problems. +#' +#' @param x An object of class iso8601, as typically obtained from a call to +#' [create_iso8601()]. The argument can also be left empty, in that case it +#' `problems()` will use the last returned value, making it convenient to use +#' immediately after [create_iso8601()]. +#' +#' @returns If there are no parsing problems in `x`, then the returned value is +#' `NULL`; otherwise, a [tibble][tibble::tibble-package] of parsing failures +#' is returned. Each row corresponds to a parsing problem. There will be a +#' first column named `..i` indicating the position(s) in the inputs to the +#' [create_iso8601()] call that resulted in failures; remaining columns +#' correspond to the original input values passed on to [create_iso8601()], +#' with columns being automatically named `..var1`, `..var2`, and so on, if +#' the inputs to [create_iso8601()] were unnamed, otherwise, the original +#' variable names are used instead. +#' +#' @examples +#' dates <- +#' c( +#' "2020-01-01", +#' "2020-02-11", +#' "2020-01-06", +#' "2020-0921", +#' "2020/10/30", +#' "2020-12-05", +#' "20231225" +#' ) +#' +#' #' # By inspecting the problematic dates it can be understood that +#' # the `.format` parameter needs to updated to include other variations. +#' iso8601_dttm <- create_iso8601(dates, .format = "y-m-d") +#' problems(iso8601_dttm) +#' +#' # Including more parsing formats addresses the previous problems +#' formats <- c("y-m-d", "y-md", "y/m/d", "ymd") +#' iso8601_dttm2 <- create_iso8601(dates, .format = list(formats)) +#' +#' # So now `problems()` returns `NULL` because there are no more parsing issues. +#' problems(iso8601_dttm2) +#' +#' # If you pass named arguments when calling `create_iso8601()` then they will +#' # be used to create the problems object. +#' iso8601_dttm3 <- create_iso8601(date = dates, .format = "y-m-d") +#' problems(iso8601_dttm3) +#' +#' @export +problems <- function(x = .Last.value) { + probs <- attr(x, "problems") + if (!is.null(probs)) { + probs + } else { + invisible(NULL) + } +} + +n_problems <- function(x) { + probs <- problems(x) + if (is.null(probs)) { + return(0L) + } else { + nrow(probs) + } +} + +warn_problems <- function(x) { + n_probs <- n_problems(x) + if (n_probs > 0L) { + msg <- paste( + sprintf("There were %d parsing problems.", n_probs), + "Run `problems()` on parsed results for details." + ) + rlang::warn(msg) + } + + invisible(NULL) +} diff --git a/R/dtc_utils.R b/R/dtc_utils.R index 93361403..00068168 100644 --- a/R/dtc_utils.R +++ b/R/dtc_utils.R @@ -102,7 +102,7 @@ assert_capture_matrix <- function(m) { col_names <- c("year", "mon", "mday", "hour", "min", "sec") m_col_names <- colnames(m) - if (is.null(m_col_names) || !all(m_col_names %in% col_names)) { + if (is.null(m_col_names) || !all(m_col_names == col_names)) { rlang::abort("`m` must have the following colnames: `year`, `mon`, `mday`, `hour`, `min` and `sec`.") } @@ -139,10 +139,17 @@ complete_capture_matrix <- function(m) { col_names <- c("year", "mon", "mday", "hour", "min", "sec") - if (setequal(col_names, colnames(m))) { + # If all columns are already present, and in the correct order, + # then simply return. + if (identical(col_names, colnames(m))) { return(m) } + # If all columns are present but not in the right order, then reorder. + if (setequal(col_names, colnames(m))) { + return(m[, col_names, drop = FALSE]) + } + miss_cols <- setdiff(col_names, colnames(m)) miss_n_cols <- length(miss_cols) diff --git a/inst/WORDLIST b/inst/WORDLIST index e695dab8..65b6b4f9 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -7,3 +7,4 @@ dtc funder vectorized ORCID +iso diff --git a/man/add_problems.Rd b/man/add_problems.Rd new file mode 100644 index 00000000..23005a07 --- /dev/null +++ b/man/add_problems.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dtc_problems.R +\name{add_problems} +\alias{add_problems} +\title{Add ISO 8601 parsing problems} +\usage{ +add_problems(x, is_problem, dtc) +} +\arguments{ +\item{x}{A character vector of date-times in ISO 8601 format; typically, the +output of \code{\link[=format_iso8601]{format_iso8601()}}.} + +\item{is_problem}{A \code{logical} indicating which date/time inputs are +associated with parsing failures.} + +\item{dtc}{A list of \code{character} vectors of dates, times or date-times' +components. Typically, this parameter takes the value passed in \code{...} to +a \code{\link[=create_iso8601]{create_iso8601()}} call.} +} +\value{ +Either \code{x} without any modification, if no parsing problems exist, +or an annotated \code{x}, meaning having a \code{problems} attribute that holds +parsing issues (see the Details section). +} +\description{ +\code{\link[=add_problems]{add_problems()}} annotates the returned value of \code{\link[=create_iso8601]{create_iso8601()}} with +possible parsing problems. This annotation consists of a +\link[tibble:tibble-package]{tibble} of problems, one row for each parsing +failure (see Details section). +} +\details{ +This function annotates its input \code{x}, a vector date-times in ISO 8601 +format, by creating an attribute named \code{problems}. This attribute's value +is a \link[tibble:tibble-package]{tibble} of parsing problems. The problematic +date/times are indicated by the \code{logical} vector passed as argument to +\code{is_problem}. + +The attribute \code{problems} in the returned value will contain a first column +named \code{..i} that indicates the date/time index of the problematic date/time +in \code{x}, and as many extra columns as there were inputs (passed in \code{dtc}). If +\code{dtc} is named, then those names are used to name the extra columns, +otherwise they get named sequentially like so \code{..var1}, \code{..var2}, etc.. +} +\examples{ +date <- c("2000-01-05", "", "1980-06-18", "1979-09-07") +time <- c("001221", "22:35:05", "03:00:15", "07:09:00") +dtc <- list(date, time) +dttm <- c("2000-01-05", "T22:35:05", "1980-06-18T03:00:15", "1979-09-07T07:09:00") +is_problem <- c(TRUE, TRUE, FALSE, FALSE) + +dttm2 <- sdtm.oak:::add_problems(dttm, is_problem, dtc) +sdtm.oak:::problems(dttm2) + +} +\keyword{internal} diff --git a/man/any_problems.Rd b/man/any_problems.Rd new file mode 100644 index 00000000..36f01c96 --- /dev/null +++ b/man/any_problems.Rd @@ -0,0 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dtc_problems.R +\name{any_problems} +\alias{any_problems} +\title{Detect problems with the parsing of date/times} +\usage{ +any_problems(cap_matrices, .cutoff_2000 = 68L) +} +\arguments{ +\item{cap_matrices}{A list of capture matrices in the sense of the returned +value by \code{\link[=parse_dttm]{parse_dttm()}}.} + +\item{.cutoff_2000}{An integer value. Two-digit years smaller or equal to +\code{.cutoff_2000} are parsed as though starting with \code{20}, otherwise parsed as +though starting with \code{19}.} +} +\value{ +A \code{logical} whose length matches the number of underlying date/times +passed as inputs to \code{\link[=create_iso8601]{create_iso8601()}}, i.e. whose length matches the +number of rows of the capture matrices in \code{cap_matrices}. +} +\description{ +\code{\link[=any_problems]{any_problems()}} takes a list of capture matrices (see \code{\link[=parse_dttm]{parse_dttm()}}) and +reports on parsing problems by means of predicate values. A \code{FALSE} value +indicates that the parsing was successful and a \code{TRUE} value a parsing +failure in at least one of the inputs to \code{\link[=create_iso8601]{create_iso8601()}}. Note that this +is an internal function to be used in the context of \code{\link[=create_iso8601]{create_iso8601()}} +source code and hence each capture matrix corresponds to one input to +\code{\link[=create_iso8601]{create_iso8601()}}. +} +\examples{ +# No problem (return value is `FALSE`). +sdtm.oak:::any_problems(list(sdtm.oak:::parse_dttm("1980-06-18", "y-m-d"))) + +# Now the parsing fails (return value is `TRUE`). +sdtm.oak:::any_problems(list(sdtm.oak:::parse_dttm("1980-06-18", "ymd"))) + +# Find if there has been a problem in either in the `date` or `time` inputs. +# The following problems are expected with: +# - `"2001/12/25"` as it won't be parsed with the format `"y-m-d"` +# - `"00h12m21"` as it won't be parsed with the format `"H:M:S"`. +# +date <- c("2000-01-05", "2001/12/25", "1980-06-18", "1979-09-07") +time <- c("00h12m21", "22:35:05", "03:00:15", "07:09:00") + +cap_matrix_date <- sdtm.oak:::parse_dttm(date, "y-m-d") +cap_matrix_time <- sdtm.oak:::parse_dttm(time, "H:M:S") + +(cap_matrices <- list(cap_matrix_date, cap_matrix_time)) + +# `any_problems()` returns `TRUE` for the first two elements because of the +# failure to parse `"2001/12/25"` and `"00h12m21"`, respectively. +sdtm.oak:::any_problems(cap_matrices) + +} +\keyword{internal} diff --git a/man/create_iso8601.Rd b/man/create_iso8601.Rd index 81481975..b7a69c60 100644 --- a/man/create_iso8601.Rd +++ b/man/create_iso8601.Rd @@ -10,7 +10,8 @@ create_iso8601( .fmt_c = fmt_cmp(), .na = NULL, .cutoff_2000 = 68L, - .check_format = FALSE + .check_format = FALSE, + .warn = TRUE ) } \arguments{ @@ -37,6 +38,8 @@ though starting with \code{19}.} meaning to check against a selection of validated formats in \link[=dtc_formats]{dtc_formats}; or to have a more permissible interpretation of the formats.} + +\item{.warn}{Whether to warn about parsing failures.} } \description{ \code{\link[=create_iso8601]{create_iso8601()}} converts vectors of dates, times or date-times to \href{https://en.wikipedia.org/wiki/ISO_8601}{ISO 8601} format. Learn more in diff --git a/man/parse_dttm.Rd b/man/parse_dttm.Rd index 016afca0..b0de5132 100644 --- a/man/parse_dttm.Rd +++ b/man/parse_dttm.Rd @@ -83,6 +83,9 @@ sdtm.oak:::parse_dttm(c("2002-05-11 11:45", "-05-11 11:45"), "y-m-d H:M") sdtm.oak:::parse_dttm(c("2002-05-11 11:45", "-05-11 11:45"), "-m-d H:M") sdtm.oak:::parse_dttm(c("2002-05-11 11:45", "-05-11 11:45"), c("y-m-d H:M", "-m-d H:M")) +sdtm.oak:::parse_dttm("05 feb 1985 12 55 02", "d m y H M S") +sdtm.oak:::parse_dttm("12 55 02 05 feb 1985", "H M S d m y") + sdtm.oak:::parse_dttm(c("2020-05-18", "2020-UN-18", "2020-UNK-UN"), "y-m-d") sdtm.oak:::parse_dttm(c("2020-05-18", "2020-UN-18", "2020-UNK-UN"), "y-m-d", na = "UN") sdtm.oak:::parse_dttm(c("2020-05-18", "2020-UN-18", "2020-UNK-UN"), "y-m-d", na = c("UN", "UNK")) diff --git a/man/problems.Rd b/man/problems.Rd new file mode 100644 index 00000000..ce68ad46 --- /dev/null +++ b/man/problems.Rd @@ -0,0 +1,62 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dtc_problems.R +\name{problems} +\alias{problems} +\title{Retrieve date/time parsing problems} +\usage{ +problems(x = .Last.value) +} +\arguments{ +\item{x}{An object of class iso8601, as typically obtained from a call to +\code{\link[=create_iso8601]{create_iso8601()}}. The argument can also be left empty, in that case it +\code{problems()} will use the last returned value, making it convenient to use +immediately after \code{\link[=create_iso8601]{create_iso8601()}}.} +} +\value{ +If there are no parsing problems in \code{x}, then the returned value is +\code{NULL}; otherwise, a \link[tibble:tibble-package]{tibble} of parsing failures +is returned. Each row corresponds to a parsing problem. There will be a +first column named \code{..i} indicating the position(s) in the inputs to the +\code{\link[=create_iso8601]{create_iso8601()}} call that resulted in failures; remaining columns +correspond to the original input values passed on to \code{\link[=create_iso8601]{create_iso8601()}}, +with columns being automatically named \code{..var1}, \code{..var2}, and so on, if +the inputs to \code{\link[=create_iso8601]{create_iso8601()}} were unnamed, otherwise, the original +variable names are used instead. +} +\description{ +\code{\link[=problems]{problems()}} is a companion helper function to \code{\link[=create_iso8601]{create_iso8601()}}. It +retrieves ISO 8601 parsing problems from an object of class iso8601, which is +\code{\link[=create_iso8601]{create_iso8601()}}'s return value and that might contain a \code{problems} +attribute in case of parsing failures. \code{\link[=problems]{problems()}} is a helper function that +provides easy access to these parsing problems. +} +\examples{ +dates <- + c( + "2020-01-01", + "2020-02-11", + "2020-01-06", + "2020-0921", + "2020/10/30", + "2020-12-05", + "20231225" + ) + +#' # By inspecting the problematic dates it can be understood that +# the `.format` parameter needs to updated to include other variations. +iso8601_dttm <- create_iso8601(dates, .format = "y-m-d") +problems(iso8601_dttm) + +# Including more parsing formats addresses the previous problems +formats <- c("y-m-d", "y-md", "y/m/d", "ymd") +iso8601_dttm2 <- create_iso8601(dates, .format = list(formats)) + +# So now `problems()` returns `NULL` because there are no more parsing issues. +problems(iso8601_dttm2) + +# If you pass named arguments when calling `create_iso8601()` then they will +# be used to create the problems object. +iso8601_dttm3 <- create_iso8601(date = dates, .format = "y-m-d") +problems(iso8601_dttm3) + +} diff --git a/man/sdtm.oak-package.Rd b/man/sdtm.oak-package.Rd index 20606fa4..fc991afe 100644 --- a/man/sdtm.oak-package.Rd +++ b/man/sdtm.oak-package.Rd @@ -6,7 +6,7 @@ \alias{sdtm.oak-package} \title{sdtm.oak: SDTM Data Transformation Engine} \description{ -An EDC and Data Standard-agnostic SDTM data transformation engine designed for SDTM programming in R. Powered by metadata sdtm.oak can automate the conversion of raw clinical data to SDTM through standardized mapping algorithms. SDTM is one of the required standards for data submission to FDA (U.S.) and PMDA (Japan). SDTM standards are implemented in accordance with the SDTM Implemetation guide as defined by CDISC \url{https://www.cdisc.org/standards/foundational/sdtmig} +An EDC and Data Standard-agnostic SDTM data transformation engine designed for SDTM programming in R. Powered by metadata sdtm.oak can automate the conversion of raw clinical data to SDTM through standardized mapping algorithms. SDTM is one of the required standards for data submission to FDA (U.S.) and PMDA (Japan). SDTM standards are implemented in accordance with the SDTM Implementation guide as defined by CDISC \url{https://www.cdisc.org/standards/foundational/sdtmig}. } \seealso{ Useful links: diff --git a/tests/testthat/test-create_iso8601.R b/tests/testthat/test-create_iso8601.R index f1325cfc..1260b1ca 100644 --- a/tests/testthat/test-create_iso8601.R +++ b/tests/testthat/test-create_iso8601.R @@ -2,17 +2,17 @@ test_that("`create_iso8601()`: individual date components", { x <- c("0", "50", "1950", "80", "1980", "2000") y0 <- create_iso8601(x, .format = "y", .check_format = FALSE) y1 <- c(NA, "2050", "1950", "1980", "1980", "2000") - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) x <- c("0", "jan", "JAN", "JaN", "1", "01") y0 <- create_iso8601(x, .format = "m", .check_format = FALSE) y1 <- c(NA, "--01", "--01", "--01", NA, "--01") - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) x <- c("0", "00", "1", "01", "10", "31") y0 <- create_iso8601(x, .format = "d", .check_format = FALSE) y1 <- c("----00", "----00", "----01", "----01", "----10", "----31") - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) }) test_that("`create_iso8601()`: dates", { @@ -20,15 +20,15 @@ test_that("`create_iso8601()`: dates", { x <- c("19990101", "20000101", "990101", "991231") y0 <- create_iso8601(x, .format = "ymd", .check_format = FALSE) - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) x <- c("1999-01-01", "2000-01-01", "99-01-01", "99-12-31") y0 <- create_iso8601(x, .format = "y-m-d", .check_format = FALSE) - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) x <- c("1999 01 01", "2000 01 01", "99 01 01", "99 12 31") y0 <- create_iso8601(x, .format = "y m d", .check_format = FALSE) - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) }) test_that("`create_iso8601()`: times: hours and minutes", { @@ -36,27 +36,27 @@ test_that("`create_iso8601()`: times: hours and minutes", { x <- c("1520", "0010", "2301", "0000") y0 <- create_iso8601(x, .format = "HM", .check_format = FALSE) - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) x <- c("15:20", "00:10", "23:01", "00:00") y0 <- create_iso8601(x, .format = "H:M", .check_format = FALSE) - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) x <- c("15h20", "00h10", "23h01", "00h00") y0 <- create_iso8601(x, .format = "HhM", .check_format = FALSE) - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) }) test_that("`create_iso8601()`: times: hours, minutes and seconds", { x <- c("152000", "001059", "230112.123", "00002.") y0 <- create_iso8601(x, .format = "HMS", .check_format = FALSE) y1 <- c("-----T15:20:00", "-----T00:10:59", "-----T23:01:12.123", "-----T00:00:02") - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) x <- c("15:20:00", "00:10:59", "23:01:12.123", "00:00:2.", "5:1:4") y0 <- create_iso8601(x, .format = "H:M:S", .check_format = FALSE) y1 <- c(y1, "-----T05:01:04") - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) }) @@ -71,5 +71,38 @@ test_that("`create_iso8601()`: dates and times", { "1999-01-01T23:01", "1999-12-31T00:00" ) + expect_identical(as.character(iso8601_dttm), expectation) +}) + +# https://github.com/pharmaverse/sdtm.oak/pull/33#discussion_r1436195327 +test_that("`create_iso8601()`: expect problems", { + dates <- c("999999999", "2000-01-01", "99-01-01", "99-12-31") + times <- c("1520", "0010", "2301", "999999999999") + iso8601_dttm <- create_iso8601(dates, times, .format = c("y-m-d", "HM"), .check_format = FALSE) + expectation <- + structure( + c( + "-----T15:20", + "2000-01-01T00:10", + "1999-01-01T23:01", + "1999-12-31" + ), + problems = structure( + list( + ..i = c(1L, 4L), + ..var1 = c( + "999999999", + "99-12-31" + ), + ..var2 = c("1520", "999999999999") + ), + row.names = c( + NA, + -2L + ), + class = c("tbl_df", "tbl", "data.frame") + ), + class = "iso8601" + ) expect_identical(iso8601_dttm, expectation) })