From 34e70e2504b8b52fc86453723f9edf721ffdc3ac Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 17 Jan 2024 12:11:05 +0000 Subject: [PATCH] Make `create_iso8601()` trigger warnings if parsing fails in any of the date/time components Previously, `create_iso8601()` would not trigger a warning if at least one of the date, time or date-time components parsed successfully. Now it is enough for one single component to fail at parsing for warnings to be triggered. This is following the request: https://github.com/pharmaverse/sdtm.oak/pull/33#discussion_r1436195327. --- R/dtc_create_iso8601.R | 3 ++- R/dtc_problems.R | 24 ++++++++++++++++------- man/problems.Rd | 4 ++-- tests/testthat/test-create_iso8601.R | 29 ++++++++++++++++++++++++++++ 4 files changed, 50 insertions(+), 10 deletions(-) diff --git a/R/dtc_create_iso8601.R b/R/dtc_create_iso8601.R index 0bd3f90b..5f1fd1b7 100644 --- a/R/dtc_create_iso8601.R +++ b/R/dtc_create_iso8601.R @@ -446,7 +446,8 @@ create_iso8601 <- cap_matrix <- coalesce_capture_matrices(!!!cap_matrices) iso8601 <- format_iso8601(cap_matrix, .cutoff_2000 = .cutoff_2000) - iso8601 <- add_problems(iso8601, dots) + any_prob <- any_problems(cap_matrices, .cutoff_2000 = .cutoff_2000) + iso8601 <- add_problems(iso8601, any_prob, dots) class(iso8601) <- "iso8601" if (.warn && rlang::is_interactive()) { diff --git a/R/dtc_problems.R b/R/dtc_problems.R index 8072f5c6..deb88c1b 100644 --- a/R/dtc_problems.R +++ b/R/dtc_problems.R @@ -1,5 +1,5 @@ -add_problems <- function(x, dtc) { - is_x_na <- is.na(x) +add_problems <- function(x, is_problem, dtc) { + is_x_na <- is_problem if (!any(is_x_na)) { return(x) } @@ -16,13 +16,23 @@ add_problems <- function(x, dtc) { names(dtc) <- names - index <- which(is_x_na) - problems <- tibble::as_tibble(dtc)[is_x_na, ] + index <- which(is_problem) + problems <- tibble::as_tibble(dtc)[is_problem, ] problems <- tibble::add_column(problems, ..i = index, .before = 1L) attr(x, "problems") <- problems x } +any_problems <- function(cap_matrices, .cutoff_2000 = 68L) { + cap_matrices |> + purrr::map(~ format_iso8601(.x, .cutoff_2000 = .cutoff_2000)) |> + unlist() |> + matrix(ncol = length(cap_matrices)) |> + is.na() |> + rowSums() |> + as.logical() +} + #' Retrieve date/time parsing problems #' #' [problems()] is a companion helper function to [create_iso8601()]. It @@ -58,8 +68,8 @@ add_problems <- function(x, dtc) { #' "20231225" #' ) #' -#' #' # By inspect the problematic dates it can be understood that -#' # the `.format` parameter needs to update to include other variations. +#' #' # By inspecting the problematic dates it can be understood that +#' # the `.format` parameter needs to updated to include other variations. #' iso8601_dttm <- create_iso8601(dates, .format = "y-m-d") #' problems(iso8601_dttm) #' @@ -98,7 +108,7 @@ warn_problems <- function(x) { n_probs <- n_problems(x) if (n_probs > 0L) { msg <- paste( - sprintf("There were parsing %d problems.", n_probs), + sprintf("There were %d parsing problems.", n_probs), "Run `problems()` on parsed results for details." ) rlang::warn(msg) diff --git a/man/problems.Rd b/man/problems.Rd index f59090a3..097d98fc 100644 --- a/man/problems.Rd +++ b/man/problems.Rd @@ -42,8 +42,8 @@ c( "20231225" ) -#' # By inspect the problematic dates it can be understood that -# the `.format` parameter needs to update to include other variations. +#' # By inspecting the problematic dates it can be understood that +# the `.format` parameter needs to updated to include other variations. iso8601_dttm <- create_iso8601(dates, .format = "y-m-d") problems(iso8601_dttm) diff --git a/tests/testthat/test-create_iso8601.R b/tests/testthat/test-create_iso8601.R index b21e9305..63b722ab 100644 --- a/tests/testthat/test-create_iso8601.R +++ b/tests/testthat/test-create_iso8601.R @@ -73,3 +73,32 @@ test_that("`create_iso8601()`: dates and times", { ) expect_identical(as.character(iso8601_dttm), expectation) }) + +# https://github.com/pharmaverse/sdtm.oak/pull/33#discussion_r1436195327 +test_that("`create_iso8601()`: expect problems", { + dates <- c("999999999", "2000-01-01", "99-01-01", "99-12-31") + times <- c("1520", "0010", "2301", "999999999999") + iso8601_dttm <- create_iso8601(dates, times, .format = c("y-m-d", "HM"), .check_format = FALSE) + expectation <- + structure( + c( + "-----T15:20", + "2000-01-01T00:10", + "1999-01-01T23:01", + "1999-12-31" + ), + problems = structure( + list( + ..i = c(1L, 4L), + ..var1 = c("999999999", + "99-12-31"), + ..var2 = c("1520", "999999999999") + ), + row.names = c(NA, + -2L), + class = c("tbl_df", "tbl", "data.frame") + ), + class = "iso8601" + ) + expect_identical(iso8601_dttm, expectation) +})