From f3c531579a17abd7b4ee0c93f7c1a93b23aca40c Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 29 Nov 2023 18:33:03 +0000 Subject: [PATCH 01/20] Ensure `assert_capture_matrix()` return value There was a bug that could lead `assert_capture_matrix()` to return its output (matrix) with columns out of order. This is now ensured. --- R/dtc_parse_dttm.R | 3 +++ R/dtc_utils.R | 11 +++++++++-- man/parse_dttm.Rd | 3 +++ tests/testthat/test-parse_dttm.R | 1 + 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/R/dtc_parse_dttm.R b/R/dtc_parse_dttm.R index 2feed78a..0211eb09 100644 --- a/R/dtc_parse_dttm.R +++ b/R/dtc_parse_dttm.R @@ -83,6 +83,9 @@ parse_dttm_ <- function(dttm, #' sdtm.oak:::parse_dttm(c("2002-05-11 11:45", "-05-11 11:45"), "-m-d H:M") #' sdtm.oak:::parse_dttm(c("2002-05-11 11:45", "-05-11 11:45"), c("y-m-d H:M", "-m-d H:M")) #' +#' sdtm.oak:::parse_dttm("05 feb 1985 12 55 02", "d m y H M S") +#' sdtm.oak:::parse_dttm("12 55 02 05 feb 1985", "H M S d m y") +#' #' sdtm.oak:::parse_dttm(c("2020-05-18", "2020-UN-18", "2020-UNK-UN"), "y-m-d") #' sdtm.oak:::parse_dttm(c("2020-05-18", "2020-UN-18", "2020-UNK-UN"), "y-m-d", na = "UN") #' sdtm.oak:::parse_dttm(c("2020-05-18", "2020-UN-18", "2020-UNK-UN"), "y-m-d", na = c("UN", "UNK")) diff --git a/R/dtc_utils.R b/R/dtc_utils.R index 93361403..9a5662cc 100644 --- a/R/dtc_utils.R +++ b/R/dtc_utils.R @@ -102,7 +102,7 @@ assert_capture_matrix <- function(m) { col_names <- c("year", "mon", "mday", "hour", "min", "sec") m_col_names <- colnames(m) - if (is.null(m_col_names) || !all(m_col_names %in% col_names)) { + if (is.null(m_col_names) || !all(m_col_names == col_names)) { rlang::abort("`m` must have the following colnames: `year`, `mon`, `mday`, `hour`, `min` and `sec`.") } @@ -139,10 +139,17 @@ complete_capture_matrix <- function(m) { col_names <- c("year", "mon", "mday", "hour", "min", "sec") - if (setequal(col_names, colnames(m))) { + # If all columns are already present, and in the correct order, + # then simply return. + if (all(col_names == colnames(m))) { return(m) } + # If all columns are present but not in the right order, then reorder. + if (setequal(col_names, colnames(m))) { + return(m[, col_names, drop = FALSE]) + } + miss_cols <- setdiff(col_names, colnames(m)) miss_n_cols <- length(miss_cols) diff --git a/man/parse_dttm.Rd b/man/parse_dttm.Rd index 016afca0..b0de5132 100644 --- a/man/parse_dttm.Rd +++ b/man/parse_dttm.Rd @@ -83,6 +83,9 @@ sdtm.oak:::parse_dttm(c("2002-05-11 11:45", "-05-11 11:45"), "y-m-d H:M") sdtm.oak:::parse_dttm(c("2002-05-11 11:45", "-05-11 11:45"), "-m-d H:M") sdtm.oak:::parse_dttm(c("2002-05-11 11:45", "-05-11 11:45"), c("y-m-d H:M", "-m-d H:M")) +sdtm.oak:::parse_dttm("05 feb 1985 12 55 02", "d m y H M S") +sdtm.oak:::parse_dttm("12 55 02 05 feb 1985", "H M S d m y") + sdtm.oak:::parse_dttm(c("2020-05-18", "2020-UN-18", "2020-UNK-UN"), "y-m-d") sdtm.oak:::parse_dttm(c("2020-05-18", "2020-UN-18", "2020-UNK-UN"), "y-m-d", na = "UN") sdtm.oak:::parse_dttm(c("2020-05-18", "2020-UN-18", "2020-UNK-UN"), "y-m-d", na = c("UN", "UNK")) diff --git a/tests/testthat/test-parse_dttm.R b/tests/testthat/test-parse_dttm.R index 8da7ca9a..7ebbcf7e 100644 --- a/tests/testthat/test-parse_dttm.R +++ b/tests/testthat/test-parse_dttm.R @@ -51,3 +51,4 @@ test_that("`months_abb_regex()`: lowercase", { ) expect_identical(months_abb_regex(case = "lower"), x) }) + From e756942717fb2f5e170b7303cea509eaec9dae38 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 6 Dec 2023 11:58:13 +0000 Subject: [PATCH 02/20] Add support for warnings in `create_iso8601()` This is loosely inspired on https://github.com/tidyverse/readr/blob/main/R/problems.R --- NAMESPACE | 1 + R/dtc_create_iso8601.R | 31 +++++++++++++++++++++-- R/dtc_problems.R | 56 ++++++++++++++++++++++++++++++++++++++++++ R/dtc_utils.R | 2 +- man/create_iso8601.Rd | 3 ++- 5 files changed, 89 insertions(+), 4 deletions(-) create mode 100644 R/dtc_problems.R diff --git a/NAMESPACE b/NAMESPACE index 455b5386..38607f72 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +S3method(print,iso8601) export(create_iso8601) export(fmt_cmp) importFrom(rlang,.data) diff --git a/R/dtc_create_iso8601.R b/R/dtc_create_iso8601.R index 9d2908de..8bf15dd6 100644 --- a/R/dtc_create_iso8601.R +++ b/R/dtc_create_iso8601.R @@ -395,8 +395,18 @@ format_iso8601 <- function(m, .cutoff_2000 = 68L) { #' create_iso8601("05 feb 1985 12 55 02", .format = fmt, .fmt_c = fmt_cmp) #' #' @export -create_iso8601 <- function(..., .format, .fmt_c = fmt_cmp(), .na = NULL, .cutoff_2000 = 68L, .check_format = FALSE) { +create_iso8601 <- + function(..., + .format, + .fmt_c = fmt_cmp(), + .na = NULL, + .cutoff_2000 = 68L, + .check_format = FALSE, + .warn = TRUE) { + assert_fmt_c(.fmt_c) + admiraldev::assert_logical_scalar(.check_format) + admiraldev::assert_logical_scalar(.warn) dots <- rlang::dots_list(...) @@ -426,5 +436,22 @@ create_iso8601 <- function(..., .format, .fmt_c = fmt_cmp(), .na = NULL, .cutoff cap_matrices <- purrr::map2(dots, .format, ~ parse_dttm(dttm = .x, fmt = .y, na = .na, fmt_c = .fmt_c)) cap_matrix <- coalesce_capture_matrices(!!!cap_matrices) - format_iso8601(cap_matrix, .cutoff_2000 = .cutoff_2000) + iso8601 <- format_iso8601(cap_matrix, .cutoff_2000 = .cutoff_2000) + iso8601 <- add_problems(iso8601, dots) + class(iso8601) <- "iso8601" + + if (.warn) { + warn_problems(iso8601) + } + + iso8601 +} + +#' @export +print.iso8601 <- function(x, ...) { + # Here we take advantage of the subset operator `[` dropping + # attributes. Also, using `seq_along()` should not force a copy of `x` thus + # being memory-efficient. + print(x[seq_along(x)]) + invisible(x) } diff --git a/R/dtc_problems.R b/R/dtc_problems.R new file mode 100644 index 00000000..6b5b144c --- /dev/null +++ b/R/dtc_problems.R @@ -0,0 +1,56 @@ +add_problems <- function(x, dtc) { + is_x_na <- is.na(x) + if (!any(is_x_na)) { + return(x) + } + + names <- names(dtc) + bad_names <- duplicated(names) | names == "" + compat_names <- paste0("..var", seq_along(dtc)) + + if (is.null(names)) { + names <- compat_names + } else { + names[bad_names] <- compat_names[bad_names] + } + + names(dtc) <- names + + index <- which(is_x_na) + problems <- tibble::as_tibble(dtc)[is_x_na, ] + problems <- tibble::add_column(problems, ..i = index, .before = 1L) + attr(x, "problems") <- problems + x +} + +problems <- function(x = .Last.value) { + probs <- attr(x, "problems") + if (!is.null(probs)) { + probs + } else { + invisible(NULL) + } +} + +n_problems <- function(x) { + probs <- problems(x) + if (is.null(probs)) { + return(0L) + } else { + nrow(probs) + } +} + +warn_problems <- function(x) { + n_probs <- n_problems(x) + if (n_probs > 0) { + msg <- paste( + sprintf("There were parsing %d problems.", n_probs), + "Run `problems()` on parsed results for details." + ) + rlang::warn(msg) + invisible(NULL) + } else { + invisible(NULL) + } +} diff --git a/R/dtc_utils.R b/R/dtc_utils.R index 9a5662cc..00068168 100644 --- a/R/dtc_utils.R +++ b/R/dtc_utils.R @@ -141,7 +141,7 @@ complete_capture_matrix <- # If all columns are already present, and in the correct order, # then simply return. - if (all(col_names == colnames(m))) { + if (identical(col_names, colnames(m))) { return(m) } diff --git a/man/create_iso8601.Rd b/man/create_iso8601.Rd index 81481975..7aea3f09 100644 --- a/man/create_iso8601.Rd +++ b/man/create_iso8601.Rd @@ -10,7 +10,8 @@ create_iso8601( .fmt_c = fmt_cmp(), .na = NULL, .cutoff_2000 = 68L, - .check_format = FALSE + .check_format = FALSE, + .warn = TRUE ) } \arguments{ From 22d69aeb1e60020fcb2a4e4aac7b71318629562e Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 13 Dec 2023 16:49:25 +0000 Subject: [PATCH 03/20] Closes #29 The `problems()` is introduced that allows easy retrieval of what went wrong with the parsing by `create_iso8601()` --- NAMESPACE | 1 + R/dtc_create_iso8601.R | 93 +++++++++++++++------------- R/dtc_problems.R | 55 ++++++++++++++-- man/create_iso8601.Rd | 2 + man/problems.Rd | 57 +++++++++++++++++ tests/testthat/test-create_iso8601.R | 24 +++---- tests/testthat/test-parse_dttm.R | 1 - 7 files changed, 174 insertions(+), 59 deletions(-) create mode 100644 man/problems.Rd diff --git a/NAMESPACE b/NAMESPACE index 38607f72..7fc88eef 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,5 +3,6 @@ S3method(print,iso8601) export(create_iso8601) export(fmt_cmp) +export(problems) importFrom(rlang,.data) importFrom(tibble,tibble) diff --git a/R/dtc_create_iso8601.R b/R/dtc_create_iso8601.R index 8bf15dd6..0bd3f90b 100644 --- a/R/dtc_create_iso8601.R +++ b/R/dtc_create_iso8601.R @@ -337,6 +337,7 @@ format_iso8601 <- function(m, .cutoff_2000 = 68L) { #' meaning to check against a selection of validated formats in #' [dtc_formats][sdtm.oak::dtc_formats]; or to have a more permissible #' interpretation of the formats. +#' @param .warn Whether to warn about parsing failures. #' #' @examples #' # Converting dates @@ -403,50 +404,58 @@ create_iso8601 <- .cutoff_2000 = 68L, .check_format = FALSE, .warn = TRUE) { - - assert_fmt_c(.fmt_c) - admiraldev::assert_logical_scalar(.check_format) - admiraldev::assert_logical_scalar(.warn) - - dots <- rlang::dots_list(...) - - if (rlang::is_empty(dots)) { - return(character()) - } - - # Check if all vectors in `dots` are of character type. - if (!identical(unique(sapply(dots, typeof)), "character")) { - rlang::abort("All vectors in `...` must be of type character.") - } - - # Check if all vectors in `dots` are of the same length. - n <- unique(lengths(dots)) - if (!identical(length(n), 1L)) { - rlang::abort("All vectors in `...` must be of the same length.") - } - - if (!identical(length(dots), length(.format))) { - rlang::abort("Number of vectors in `...` should match length of `.format`.") - } - - # Check that the `.format` is either a character vector or a list of - # character vectors, and that each string is one of the possible formats. - if (.check_format) assert_dtc_format(.format) - - cap_matrices <- purrr::map2(dots, .format, ~ parse_dttm(dttm = .x, fmt = .y, na = .na, fmt_c = .fmt_c)) - cap_matrix <- coalesce_capture_matrices(!!!cap_matrices) - - iso8601 <- format_iso8601(cap_matrix, .cutoff_2000 = .cutoff_2000) - iso8601 <- add_problems(iso8601, dots) - class(iso8601) <- "iso8601" - - if (.warn) { - warn_problems(iso8601) + assert_fmt_c(.fmt_c) + admiraldev::assert_logical_scalar(.check_format) + admiraldev::assert_logical_scalar(.warn) + + dots <- rlang::dots_list(...) + + if (rlang::is_empty(dots)) { + return(character()) + } + + # Check if all vectors in `dots` are of character type. + if (!identical(unique(sapply(dots, typeof)), "character")) { + rlang::abort("All vectors in `...` must be of type character.") + } + + # Check if all vectors in `dots` are of the same length. + n <- unique(lengths(dots)) + if (!identical(length(n), 1L)) { + rlang::abort("All vectors in `...` must be of the same length.") + } + + if (!identical(length(dots), length(.format))) { + rlang::abort("Number of vectors in `...` should match length of `.format`.") + } + + # Check that the `.format` is either a character vector or a list of + # character vectors, and that each string is one of the possible formats. + if (.check_format) + assert_dtc_format(.format) + + cap_matrices <- + purrr::map2(dots, + .format, + ~ parse_dttm( + dttm = .x, + fmt = .y, + na = .na, + fmt_c = .fmt_c + )) + cap_matrix <- coalesce_capture_matrices(!!!cap_matrices) + + iso8601 <- format_iso8601(cap_matrix, .cutoff_2000 = .cutoff_2000) + iso8601 <- add_problems(iso8601, dots) + class(iso8601) <- "iso8601" + + if (.warn && rlang::is_interactive()) { + warn_problems(iso8601) + } + + iso8601 } - iso8601 -} - #' @export print.iso8601 <- function(x, ...) { # Here we take advantage of the subset operator `[` dropping diff --git a/R/dtc_problems.R b/R/dtc_problems.R index 6b5b144c..d50f9c37 100644 --- a/R/dtc_problems.R +++ b/R/dtc_problems.R @@ -23,6 +23,54 @@ add_problems <- function(x, dtc) { x } +#' Retrieve date/time parsing problems +#' +#' [problems()] is a companion helper function to [create_iso8601()]. It +#' retrieves ISO 8601 parsing problems from an object of class iso8601, which is +#' [create_iso8601()]'s return value and that might contain a `problems` +#' attribute in case of parsing failures. [problems()] is a helper function that +#' provides easy access to these parsing problems. +#' +#' @param x An object of class iso8601, as typically obtained from a call to +#' [create_iso8601()]. The argument can also be left empty, in that case it +#' `problems()` will use the last returned value, making it convenient to use +#' immediately after [create_iso8601()]. +#' +#' @returns If there are no parsing problems in `x`, then the returned value is +#' `NULL`; otherwise, a [tibble][tibble::tibble-package] of parsing failures +#' is returned. Each row corresponds to a parsing problem. There will be a +#' first column named `..i` indicating the position(s) in the inputs to the +#' [create_iso8601()] call that resulted in failures; remaining columns +#' correspond to the original input values passed on to [create_iso8601()], +#' with columns being automatically named `..var1`, `..var2`, and so on, if +#' the inputs to [create_iso8601()] were unnamed, otherwise, the original +#' variable names are used instead. +#' +#' @examples +#' dates <- +#' c( +#' "2020-01-01", +#' "2020-02-11", +#' "2020-01-06", +#' "2020-0921", +#' "2020/10/30", +#' "2020-12-05", +#' "20231225" +#' ) +#' +#' #' # By inspect the problematic dates it can be understood that +#' # the `.format` parameter needs to update to include other variations. +#' iso8601_dttm <- create_iso8601(dates, .format = "y-m-d") +#' problems(iso8601_dttm) +#' +#' # Including more parsing formats addresses the previous problems +#' formats <- c("y-m-d", "y-md", "y/m/d", "ymd") +#' iso8601_dttm2 <- create_iso8601(dates, .format = list(formats)) +#' +#' # So now `problems()` returns `NULL` because there are no more parsing issues. +#' problems(iso8601_dttm2) +#' +#' @export problems <- function(x = .Last.value) { probs <- attr(x, "problems") if (!is.null(probs)) { @@ -43,14 +91,13 @@ n_problems <- function(x) { warn_problems <- function(x) { n_probs <- n_problems(x) - if (n_probs > 0) { + if (n_probs > 0L) { msg <- paste( sprintf("There were parsing %d problems.", n_probs), "Run `problems()` on parsed results for details." ) rlang::warn(msg) - invisible(NULL) - } else { - invisible(NULL) } + + invisible(NULL) } diff --git a/man/create_iso8601.Rd b/man/create_iso8601.Rd index 7aea3f09..b7a69c60 100644 --- a/man/create_iso8601.Rd +++ b/man/create_iso8601.Rd @@ -38,6 +38,8 @@ though starting with \code{19}.} meaning to check against a selection of validated formats in \link[=dtc_formats]{dtc_formats}; or to have a more permissible interpretation of the formats.} + +\item{.warn}{Whether to warn about parsing failures.} } \description{ \code{\link[=create_iso8601]{create_iso8601()}} converts vectors of dates, times or date-times to \href{https://en.wikipedia.org/wiki/ISO_8601}{ISO 8601} format. Learn more in diff --git a/man/problems.Rd b/man/problems.Rd new file mode 100644 index 00000000..c820713b --- /dev/null +++ b/man/problems.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dtc_problems.R +\name{problems} +\alias{problems} +\title{Retrieve date/time parsing problems} +\usage{ +problems(x = .Last.value) +} +\arguments{ +\item{x}{An object of class iso8601, as typically obtained from a call to +\code{\link[=create_iso8601]{create_iso8601()}}. The argument can also be left empty, in that case it +\code{problems()} will use the last returned value, making it convenient to use +immediately after \code{\link[=create_iso8601]{create_iso8601()}}.} +} +\value{ +If there are no parsing problems in \code{x}, then the returned value is +\code{NULL}; otherwise, a \link[tibble:tibble-package]{tibble} of parsing failures +is returned. Each row corresponds to a parsing problem. There will be a +first column named \code{..i} indicating the position(s) in the inputs to the +\code{\link[=create_iso8601]{create_iso8601()}} call that resulted in failures; remaining columns +correspond to the original input values passed on to \code{\link[=create_iso8601]{create_iso8601()}}, +with columns being automatically named \code{..var1}, \code{..var2}, and so on, if +the inputs to \code{\link[=create_iso8601]{create_iso8601()}} were unnamed, otherwise, the original +variable names are used instead. +} +\description{ +\code{\link[=problems]{problems()}} is a companion helper function to \code{\link[=create_iso8601]{create_iso8601()}}. It +retrieves ISO 8601 parsing problems from an object of class iso8601, which is +\code{\link[=create_iso8601]{create_iso8601()}}'s return value and that might contain a \code{problems} +attribute in case of parsing failures. \code{\link[=problems]{problems()}} is a helper function that +provides easy access to these parsing problems. +} +\examples{ +dates <- +c( + "2020-01-01", + "2020-02-11", + "2020-01-06", + "2020-0921", + "2020/10/30", + "2020-12-05", + "20231225" +) + +#' # By inspect the problematic dates it can be understood that +# the `.format` parameter needs to update to include other variations. +iso8601_dttm <- create_iso8601(dates, .format = "y-m-d") +problems(iso8601_dttm) + +# Including more parsing formats addresses the previous problems +formats <- c("y-m-d", "y-md", "y/m/d", "ymd") +iso8601_dttm2 <- create_iso8601(dates, .format = list(formats)) + +# So now `problems()` returns `NULL` because there are no more parsing issues. +problems(iso8601_dttm2) + +} diff --git a/tests/testthat/test-create_iso8601.R b/tests/testthat/test-create_iso8601.R index f1325cfc..b21e9305 100644 --- a/tests/testthat/test-create_iso8601.R +++ b/tests/testthat/test-create_iso8601.R @@ -2,17 +2,17 @@ test_that("`create_iso8601()`: individual date components", { x <- c("0", "50", "1950", "80", "1980", "2000") y0 <- create_iso8601(x, .format = "y", .check_format = FALSE) y1 <- c(NA, "2050", "1950", "1980", "1980", "2000") - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) x <- c("0", "jan", "JAN", "JaN", "1", "01") y0 <- create_iso8601(x, .format = "m", .check_format = FALSE) y1 <- c(NA, "--01", "--01", "--01", NA, "--01") - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) x <- c("0", "00", "1", "01", "10", "31") y0 <- create_iso8601(x, .format = "d", .check_format = FALSE) y1 <- c("----00", "----00", "----01", "----01", "----10", "----31") - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) }) test_that("`create_iso8601()`: dates", { @@ -20,15 +20,15 @@ test_that("`create_iso8601()`: dates", { x <- c("19990101", "20000101", "990101", "991231") y0 <- create_iso8601(x, .format = "ymd", .check_format = FALSE) - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) x <- c("1999-01-01", "2000-01-01", "99-01-01", "99-12-31") y0 <- create_iso8601(x, .format = "y-m-d", .check_format = FALSE) - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) x <- c("1999 01 01", "2000 01 01", "99 01 01", "99 12 31") y0 <- create_iso8601(x, .format = "y m d", .check_format = FALSE) - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) }) test_that("`create_iso8601()`: times: hours and minutes", { @@ -36,27 +36,27 @@ test_that("`create_iso8601()`: times: hours and minutes", { x <- c("1520", "0010", "2301", "0000") y0 <- create_iso8601(x, .format = "HM", .check_format = FALSE) - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) x <- c("15:20", "00:10", "23:01", "00:00") y0 <- create_iso8601(x, .format = "H:M", .check_format = FALSE) - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) x <- c("15h20", "00h10", "23h01", "00h00") y0 <- create_iso8601(x, .format = "HhM", .check_format = FALSE) - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) }) test_that("`create_iso8601()`: times: hours, minutes and seconds", { x <- c("152000", "001059", "230112.123", "00002.") y0 <- create_iso8601(x, .format = "HMS", .check_format = FALSE) y1 <- c("-----T15:20:00", "-----T00:10:59", "-----T23:01:12.123", "-----T00:00:02") - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) x <- c("15:20:00", "00:10:59", "23:01:12.123", "00:00:2.", "5:1:4") y0 <- create_iso8601(x, .format = "H:M:S", .check_format = FALSE) y1 <- c(y1, "-----T05:01:04") - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) }) @@ -71,5 +71,5 @@ test_that("`create_iso8601()`: dates and times", { "1999-01-01T23:01", "1999-12-31T00:00" ) - expect_identical(iso8601_dttm, expectation) + expect_identical(as.character(iso8601_dttm), expectation) }) diff --git a/tests/testthat/test-parse_dttm.R b/tests/testthat/test-parse_dttm.R index 7ebbcf7e..8da7ca9a 100644 --- a/tests/testthat/test-parse_dttm.R +++ b/tests/testthat/test-parse_dttm.R @@ -51,4 +51,3 @@ test_that("`months_abb_regex()`: lowercase", { ) expect_identical(months_abb_regex(case = "lower"), x) }) - From 8d51f5f0797636d20368319913b9d24305ec2f4b Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Sun, 17 Dec 2023 00:45:46 +0000 Subject: [PATCH 04/20] Update `create_iso8601()` docs One more example about the `problems()` function. --- R/dtc_problems.R | 5 +++++ man/problems.Rd | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/R/dtc_problems.R b/R/dtc_problems.R index d50f9c37..8072f5c6 100644 --- a/R/dtc_problems.R +++ b/R/dtc_problems.R @@ -70,6 +70,11 @@ add_problems <- function(x, dtc) { #' # So now `problems()` returns `NULL` because there are no more parsing issues. #' problems(iso8601_dttm2) #' +#' # If you pass named arguments when calling `create_iso8601()` then they will +#' # be used to create the problems object. +#' iso8601_dttm3 <- create_iso8601(date = dates, .format = "y-m-d") +#' problems(iso8601_dttm3) +#' #' @export problems <- function(x = .Last.value) { probs <- attr(x, "problems") diff --git a/man/problems.Rd b/man/problems.Rd index c820713b..f59090a3 100644 --- a/man/problems.Rd +++ b/man/problems.Rd @@ -54,4 +54,9 @@ iso8601_dttm2 <- create_iso8601(dates, .format = list(formats)) # So now `problems()` returns `NULL` because there are no more parsing issues. problems(iso8601_dttm2) +# If you pass named arguments when calling `create_iso8601()` then they will +# be used to create the problems object. +iso8601_dttm3 <- create_iso8601(date = dates, .format = "y-m-d") +problems(iso8601_dttm3) + } From 34e70e2504b8b52fc86453723f9edf721ffdc3ac Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 17 Jan 2024 12:11:05 +0000 Subject: [PATCH 05/20] Make `create_iso8601()` trigger warnings if parsing fails in any of the date/time components Previously, `create_iso8601()` would not trigger a warning if at least one of the date, time or date-time components parsed successfully. Now it is enough for one single component to fail at parsing for warnings to be triggered. This is following the request: https://github.com/pharmaverse/sdtm.oak/pull/33#discussion_r1436195327. --- R/dtc_create_iso8601.R | 3 ++- R/dtc_problems.R | 24 ++++++++++++++++------- man/problems.Rd | 4 ++-- tests/testthat/test-create_iso8601.R | 29 ++++++++++++++++++++++++++++ 4 files changed, 50 insertions(+), 10 deletions(-) diff --git a/R/dtc_create_iso8601.R b/R/dtc_create_iso8601.R index 0bd3f90b..5f1fd1b7 100644 --- a/R/dtc_create_iso8601.R +++ b/R/dtc_create_iso8601.R @@ -446,7 +446,8 @@ create_iso8601 <- cap_matrix <- coalesce_capture_matrices(!!!cap_matrices) iso8601 <- format_iso8601(cap_matrix, .cutoff_2000 = .cutoff_2000) - iso8601 <- add_problems(iso8601, dots) + any_prob <- any_problems(cap_matrices, .cutoff_2000 = .cutoff_2000) + iso8601 <- add_problems(iso8601, any_prob, dots) class(iso8601) <- "iso8601" if (.warn && rlang::is_interactive()) { diff --git a/R/dtc_problems.R b/R/dtc_problems.R index 8072f5c6..deb88c1b 100644 --- a/R/dtc_problems.R +++ b/R/dtc_problems.R @@ -1,5 +1,5 @@ -add_problems <- function(x, dtc) { - is_x_na <- is.na(x) +add_problems <- function(x, is_problem, dtc) { + is_x_na <- is_problem if (!any(is_x_na)) { return(x) } @@ -16,13 +16,23 @@ add_problems <- function(x, dtc) { names(dtc) <- names - index <- which(is_x_na) - problems <- tibble::as_tibble(dtc)[is_x_na, ] + index <- which(is_problem) + problems <- tibble::as_tibble(dtc)[is_problem, ] problems <- tibble::add_column(problems, ..i = index, .before = 1L) attr(x, "problems") <- problems x } +any_problems <- function(cap_matrices, .cutoff_2000 = 68L) { + cap_matrices |> + purrr::map(~ format_iso8601(.x, .cutoff_2000 = .cutoff_2000)) |> + unlist() |> + matrix(ncol = length(cap_matrices)) |> + is.na() |> + rowSums() |> + as.logical() +} + #' Retrieve date/time parsing problems #' #' [problems()] is a companion helper function to [create_iso8601()]. It @@ -58,8 +68,8 @@ add_problems <- function(x, dtc) { #' "20231225" #' ) #' -#' #' # By inspect the problematic dates it can be understood that -#' # the `.format` parameter needs to update to include other variations. +#' #' # By inspecting the problematic dates it can be understood that +#' # the `.format` parameter needs to updated to include other variations. #' iso8601_dttm <- create_iso8601(dates, .format = "y-m-d") #' problems(iso8601_dttm) #' @@ -98,7 +108,7 @@ warn_problems <- function(x) { n_probs <- n_problems(x) if (n_probs > 0L) { msg <- paste( - sprintf("There were parsing %d problems.", n_probs), + sprintf("There were %d parsing problems.", n_probs), "Run `problems()` on parsed results for details." ) rlang::warn(msg) diff --git a/man/problems.Rd b/man/problems.Rd index f59090a3..097d98fc 100644 --- a/man/problems.Rd +++ b/man/problems.Rd @@ -42,8 +42,8 @@ c( "20231225" ) -#' # By inspect the problematic dates it can be understood that -# the `.format` parameter needs to update to include other variations. +#' # By inspecting the problematic dates it can be understood that +# the `.format` parameter needs to updated to include other variations. iso8601_dttm <- create_iso8601(dates, .format = "y-m-d") problems(iso8601_dttm) diff --git a/tests/testthat/test-create_iso8601.R b/tests/testthat/test-create_iso8601.R index b21e9305..63b722ab 100644 --- a/tests/testthat/test-create_iso8601.R +++ b/tests/testthat/test-create_iso8601.R @@ -73,3 +73,32 @@ test_that("`create_iso8601()`: dates and times", { ) expect_identical(as.character(iso8601_dttm), expectation) }) + +# https://github.com/pharmaverse/sdtm.oak/pull/33#discussion_r1436195327 +test_that("`create_iso8601()`: expect problems", { + dates <- c("999999999", "2000-01-01", "99-01-01", "99-12-31") + times <- c("1520", "0010", "2301", "999999999999") + iso8601_dttm <- create_iso8601(dates, times, .format = c("y-m-d", "HM"), .check_format = FALSE) + expectation <- + structure( + c( + "-----T15:20", + "2000-01-01T00:10", + "1999-01-01T23:01", + "1999-12-31" + ), + problems = structure( + list( + ..i = c(1L, 4L), + ..var1 = c("999999999", + "99-12-31"), + ..var2 = c("1520", "999999999999") + ), + row.names = c(NA, + -2L), + class = c("tbl_df", "tbl", "data.frame") + ), + class = "iso8601" + ) + expect_identical(iso8601_dttm, expectation) +}) From 799531db7d86a41bbfb9d0e5a69cdb8273d5e640 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 17 Jan 2024 12:24:21 +0000 Subject: [PATCH 06/20] styler update --- R/dtc_create_iso8601.R | 21 ++++++++++++--------- R/dtc_problems.R | 18 +++++++++--------- tests/testthat/test-create_iso8601.R | 12 ++++++++---- 3 files changed, 29 insertions(+), 22 deletions(-) diff --git a/R/dtc_create_iso8601.R b/R/dtc_create_iso8601.R index 5f1fd1b7..24c9e3e1 100644 --- a/R/dtc_create_iso8601.R +++ b/R/dtc_create_iso8601.R @@ -431,18 +431,21 @@ create_iso8601 <- # Check that the `.format` is either a character vector or a list of # character vectors, and that each string is one of the possible formats. - if (.check_format) + if (.check_format) { assert_dtc_format(.format) + } cap_matrices <- - purrr::map2(dots, - .format, - ~ parse_dttm( - dttm = .x, - fmt = .y, - na = .na, - fmt_c = .fmt_c - )) + purrr::map2( + dots, + .format, + ~ parse_dttm( + dttm = .x, + fmt = .y, + na = .na, + fmt_c = .fmt_c + ) + ) cap_matrix <- coalesce_capture_matrices(!!!cap_matrices) iso8601 <- format_iso8601(cap_matrix, .cutoff_2000 = .cutoff_2000) diff --git a/R/dtc_problems.R b/R/dtc_problems.R index deb88c1b..f0f1b5ad 100644 --- a/R/dtc_problems.R +++ b/R/dtc_problems.R @@ -58,15 +58,15 @@ any_problems <- function(cap_matrices, .cutoff_2000 = 68L) { #' #' @examples #' dates <- -#' c( -#' "2020-01-01", -#' "2020-02-11", -#' "2020-01-06", -#' "2020-0921", -#' "2020/10/30", -#' "2020-12-05", -#' "20231225" -#' ) +#' c( +#' "2020-01-01", +#' "2020-02-11", +#' "2020-01-06", +#' "2020-0921", +#' "2020/10/30", +#' "2020-12-05", +#' "20231225" +#' ) #' #' #' # By inspecting the problematic dates it can be understood that #' # the `.format` parameter needs to updated to include other variations. diff --git a/tests/testthat/test-create_iso8601.R b/tests/testthat/test-create_iso8601.R index 63b722ab..1260b1ca 100644 --- a/tests/testthat/test-create_iso8601.R +++ b/tests/testthat/test-create_iso8601.R @@ -90,12 +90,16 @@ test_that("`create_iso8601()`: expect problems", { problems = structure( list( ..i = c(1L, 4L), - ..var1 = c("999999999", - "99-12-31"), + ..var1 = c( + "999999999", + "99-12-31" + ), ..var2 = c("1520", "999999999999") ), - row.names = c(NA, - -2L), + row.names = c( + NA, + -2L + ), class = c("tbl_df", "tbl", "data.frame") ), class = "iso8601" From 518faaf197e689414b6070c3ca869868b391429b Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 17 Jan 2024 16:53:20 +0000 Subject: [PATCH 07/20] Update link in the Contributing guide --- .github/CONTRIBUTING.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 8a1ba358..def1b113 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -2,8 +2,7 @@ This outlines how to propose a change to the aok package. For more detailed info about contributing to {oak}, and other [pharmaverse -packages](https://pharmaverse.org/), please see the [development process -guide](https://pharmaverse.github.io/admiraldev/articles/development_process.html) +packages](https://pharmaverse.org/), please see the [Programming Strategy](https://pharmaverse.github.io/admiraldev/articles/programming_strategy.html) as well as other Developer Guides in the Articles section of the [{admiral} website](https://pharmaverse.github.io/admiral/cran-release/index.html) @@ -52,7 +51,7 @@ that they’d like to contribute code. as possible to discuss further details. -See [Contribution to {admiral}](https://pharmaverse.github.io/admiral/cran-release/articles/contribution_model.html) for additional details. +See [Contribution to {admiral}](https://pharmaverse.github.io/admiral/CONTRIBUTING.html) for additional details. # Containers guideline From fcaea2b6d8876befced773d6bec91c6356449a45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Fory=C5=9B?= Date: Thu, 18 Jan 2024 07:18:03 +0100 Subject: [PATCH 08/20] Update docs and links. --- .github/CONTRIBUTING.md | 18 +++++++++--------- .github/pull_request_template.md | 4 ++-- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 8a1ba358..f8b76705 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -1,11 +1,11 @@ -# Contribution to {oak} +# Contribution to {sdtm.oak} This outlines how to propose a change to the aok package. For more detailed info -about contributing to {oak}, and other [pharmaverse +about contributing to {sdtm.oak}, and other [pharmaverse packages](https://pharmaverse.org/), please see the [development process -guide](https://pharmaverse.github.io/admiraldev/articles/development_process.html) +guide](https://pharmaverse.github.io/sdtm.oak/CONTRIBUTING.html) as well as other Developer Guides in the Articles section of the [{admiral} -website](https://pharmaverse.github.io/admiral/cran-release/index.html) +website](https://pharmaverse.github.io/admiral/index.html) # Basics * For each new contribution, the user creates an issue on the issue tab on [GitHub](https://github.com/pharmaverse/oak/issues) to put it in our backlog. @@ -17,7 +17,7 @@ functions, documentation, tests or new features. [Slack](https://oakgarden.slack.com) (If you don't have access, use this [link](https://join.slack.com/t/oakgarden/shared_invite/zt-204sf8w5c-Vxl71cI~WAYhsMLbHGxeMw) to join). We can discuss details or align expectations if you are not familiar -with the `{oak}` philosophy and programming strategy. The team will try to +with the `{sdtm.oak}` philosophy and programming strategy. The team will try to review the issues within the next backlog meeting and give some initial feedback. Since we are not a 100% fully resourced software development team it might be that some issues will take longer to respond to depending on the amount @@ -42,17 +42,17 @@ community. * First, the user creates an issue or comments on an existing issue to notify that they’d like to contribute code. * Follow our development process step-by-step guide. - * We advise to contact an `{oak}` core development team directly via [Slack](https://app.slack.com/client/T028PB489D3/C02M8KN8269) before submitting code for complex functionality. + * We advise to contact an `{sdtm.oak}` core development team directly via [Slack](https://app.slack.com/client/T028PB489D3/C02M8KN8269) before submitting code for complex functionality. ## Type 2 Contribution without Code: - * User creates an issue and ideally contacts an `{oak}` team member via [Slack](https://oakgarden.slack.com). - * The `{oak}` core development team will contact the issue creator as soon + * User creates an issue and ideally contacts an `{sdtm.oak}` team member via [Slack](https://oakgarden.slack.com). + * The `{sdtm.oak}` core development team will contact the issue creator as soon as possible to discuss further details. -See [Contribution to {admiral}](https://pharmaverse.github.io/admiral/cran-release/articles/contribution_model.html) for additional details. +See [Contribution to {admiral}](https://pharmaverse.github.io/admiral/articles/contribution_model.html) for additional details. # Containers guideline diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index e32853fc..c70dca9b 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,6 +1,6 @@ Thank you for your Pull Request! We have developed this task checklist from the [Development Process -Guide](https://pharmaverse.github.io/admiraldev/articles/development_process.html) +Guide](https://pharmaverse.github.io/sdtm.oak/CONTRIBUTING.html) to help with the final steps of the process. Completing the below tasks helps to ensure our reviewers can maximize their time on your code as well as making sure the oak codebase remains robust and consistent. @@ -31,7 +31,7 @@ and families. Refer to the - [ ] Update `NEWS.md` if the changes pertain to a user-facing function (i.e. it has an `@export` tag) or documentation aimed at users (rather than developers) - [ ] Build oak site `pkgdown::build_site()` and check that all affected -examples are displayed correctly and that all new functions occur on the "[Reference](https://pharmaverse.github.io/admiral/cran-release/reference/index.html)" page. +examples are displayed correctly and that all new functions occur on the "[Reference](https://pharmaverse.github.io/admiral/reference/index.html)" page. - [ ] Address or fix all lintr warnings and errors - `lintr::lint_package()` - [ ] Run `R CMD check` locally and address all errors and warnings - `devtools::check()` - [ ] Link the issue in the Development Section on the right hand side. From f374968c76a1f55088fb9b85bd523756d0988fde Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 18 Jan 2024 15:31:57 +0000 Subject: [PATCH 09/20] Update WORDLIST --- inst/WORDLIST | 1 + man/problems.Rd | 18 +++++++++--------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/inst/WORDLIST b/inst/WORDLIST index e695dab8..65b6b4f9 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -7,3 +7,4 @@ dtc funder vectorized ORCID +iso diff --git a/man/problems.Rd b/man/problems.Rd index 097d98fc..ce68ad46 100644 --- a/man/problems.Rd +++ b/man/problems.Rd @@ -32,15 +32,15 @@ provides easy access to these parsing problems. } \examples{ dates <- -c( - "2020-01-01", - "2020-02-11", - "2020-01-06", - "2020-0921", - "2020/10/30", - "2020-12-05", - "20231225" -) + c( + "2020-01-01", + "2020-02-11", + "2020-01-06", + "2020-0921", + "2020/10/30", + "2020-12-05", + "20231225" + ) #' # By inspecting the problematic dates it can be understood that # the `.format` parameter needs to updated to include other variations. From 233a740ee2a94e9091fc07ccaf8399f886f86010 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 18 Jan 2024 16:10:52 +0000 Subject: [PATCH 10/20] Add `any_problems()` documentation --- R/dtc_problems.R | 48 ++++++++++++++++++++++++++++++++++++++ man/any_problems.Rd | 57 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+) create mode 100644 man/any_problems.Rd diff --git a/R/dtc_problems.R b/R/dtc_problems.R index f0f1b5ad..3f0967bc 100644 --- a/R/dtc_problems.R +++ b/R/dtc_problems.R @@ -23,6 +23,54 @@ add_problems <- function(x, is_problem, dtc) { x } +#' Detect problems with the parsing of date/times +#' +#' @description +#' +#' [any_problems()] takes a list of capture matrices (see [parse_dttm()]) and +#' reports on parsing problems by means of predicate values. A `FALSE` value +#' indicates the parsing was successful and a `TRUE` value that the parsing +#' failed in at least one of the inputs to [create_iso8601()]. Note that each +#' capture matrix corresponds to one input to [create_iso8601()]. +#' +#' Note that this is an internal function to be used in the context of +#' [create_iso8601()] source code. +#' +#' @param cap_matrices A list of capture matrices in the sense of the returned +#' value by [parse_dttm()]. +#' @param .cutoff_2000 An integer value. Two-digit years smaller or equal to +#' `.cutoff_2000` are parsed as though starting with `20`, otherwise parsed as +#' though starting with `19`. +#' +#' @returns A `logical` whose length matches the number underlying date/times +#' passed as inputs to [create_iso8601()], i.e. whose length matches the +#' number of rows of the capture matrices in `cap_matrices`. +#' +#' @examples +#' # No problem (return value is `FALSE`). +#' sdtm.oak:::any_problems(list(sdtm.oak:::parse_dttm("1980-06-18", "y-m-d"))) +#' +#' # Now the parsing fails (return value is `TRUE`). +#' sdtm.oak:::any_problems(list(sdtm.oak:::parse_dttm("1980-06-18", "ymd"))) +#' +#' # Find if there has been a problem in either in the `date` or `time` inputs. +#' # The following problems are expected with: +#' # - `"2001/12/25"` as it won't be parsed with the format `"y-m-d"` +#' # - `"00h12m21"` as it won't be parsed with the format `"H:M:S"`. +#' # +#' date <- c("2000-01-05", "2001/12/25", "1980-06-18", "1979-09-07") +#' time <- c("00h12m21", "22:35:05", "03:00:15", "07:09:00") +#' +#' cap_matrix_date <- sdtm.oak:::parse_dttm(date, "y-m-d") +#' cap_matrix_time <- sdtm.oak:::parse_dttm(time, "H:M:S") +#' +#' (cap_matrices <- list(cap_matrix_date, cap_matrix_time)) +#' +#' # `any_problems()` returns `TRUE` for the first two elements because of the +#' # failure to parse `"2001/12/25"` and `"00h12m21"`, respectively. +#' sdtm.oak:::any_problems(cap_matrices) +#' +#' @keywords internal any_problems <- function(cap_matrices, .cutoff_2000 = 68L) { cap_matrices |> purrr::map(~ format_iso8601(.x, .cutoff_2000 = .cutoff_2000)) |> diff --git a/man/any_problems.Rd b/man/any_problems.Rd new file mode 100644 index 00000000..834f73a6 --- /dev/null +++ b/man/any_problems.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dtc_problems.R +\name{any_problems} +\alias{any_problems} +\title{Detect problems with the parsing of date/times} +\usage{ +any_problems(cap_matrices, .cutoff_2000 = 68L) +} +\arguments{ +\item{cap_matrices}{A list of capture matrices in the sense of the returned +value by \code{\link[=parse_dttm]{parse_dttm()}}.} + +\item{.cutoff_2000}{An integer value. Two-digit years smaller or equal to +\code{.cutoff_2000} are parsed as though starting with \code{20}, otherwise parsed as +though starting with \code{19}.} +} +\value{ +A \code{logical} whose length matches the number underlying date/times +passed as inputs to \code{\link[=create_iso8601]{create_iso8601()}}, i.e. whose length matches the +number of rows of the capture matrices in \code{cap_matrices}. +} +\description{ +\code{\link[=any_problems]{any_problems()}} takes a list of capture matrices (see \code{\link[=parse_dttm]{parse_dttm()}}) and +reports on parsing problems by means of predicate values. A \code{FALSE} value +indicates the parsing was successful and a \code{TRUE} value that the parsing +failed in at least one of the inputs to \code{\link[=create_iso8601]{create_iso8601()}}. Note that each +capture matrix corresponds to one input to \code{\link[=create_iso8601]{create_iso8601()}}. + +Note that this is an internal function to be used in the context of +\code{\link[=create_iso8601]{create_iso8601()}} source code. +} +\examples{ +# No problem (return value is `FALSE`). +sdtm.oak:::any_problems(list(sdtm.oak:::parse_dttm("1980-06-18", "y-m-d"))) + +# Now the parsing fails (return value is `TRUE`). +sdtm.oak:::any_problems(list(sdtm.oak:::parse_dttm("1980-06-18", "ymd"))) + +# Find if there has been a problem in either in the `date` or `time` inputs. +# The following problems are expected with: +# - `"2001/12/25"` as it won't be parsed with the format `"y-m-d"` +# - `"00h12m21"` as it won't be parsed with the format `"H:M:S"`. +# +date <- c("2000-01-05", "2001/12/25", "1980-06-18", "1979-09-07") +time <- c("00h12m21", "22:35:05", "03:00:15", "07:09:00") + +cap_matrix_date <- sdtm.oak:::parse_dttm(date, "y-m-d") +cap_matrix_time <- sdtm.oak:::parse_dttm(time, "H:M:S") + +(cap_matrices <- list(cap_matrix_date, cap_matrix_time)) + +# `any_problems()` returns `TRUE` for the first two elements because of the +# failure to parse `"2001/12/25"` and `"00h12m21"`, respectively. +sdtm.oak:::any_problems(cap_matrices) + +} +\keyword{internal} From e15949961139918f602db41e7d5ea2319dbbd732 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 18 Jan 2024 16:17:12 +0000 Subject: [PATCH 11/20] Improve grammar in `any_problems()` documentation --- R/dtc_problems.R | 13 ++++++------- man/any_problems.Rd | 13 ++++++------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/R/dtc_problems.R b/R/dtc_problems.R index 3f0967bc..a9bd33ae 100644 --- a/R/dtc_problems.R +++ b/R/dtc_problems.R @@ -29,12 +29,11 @@ add_problems <- function(x, is_problem, dtc) { #' #' [any_problems()] takes a list of capture matrices (see [parse_dttm()]) and #' reports on parsing problems by means of predicate values. A `FALSE` value -#' indicates the parsing was successful and a `TRUE` value that the parsing -#' failed in at least one of the inputs to [create_iso8601()]. Note that each -#' capture matrix corresponds to one input to [create_iso8601()]. -#' -#' Note that this is an internal function to be used in the context of -#' [create_iso8601()] source code. +#' indicates that the parsing was successful and a `TRUE` value a parsing +#' failure in at least one of the inputs to [create_iso8601()]. Note that this +#' is an internal function to be used in the context of [create_iso8601()] +#' source code and hence each capture matrix corresponds to one input to +#' [create_iso8601()]. #' #' @param cap_matrices A list of capture matrices in the sense of the returned #' value by [parse_dttm()]. @@ -42,7 +41,7 @@ add_problems <- function(x, is_problem, dtc) { #' `.cutoff_2000` are parsed as though starting with `20`, otherwise parsed as #' though starting with `19`. #' -#' @returns A `logical` whose length matches the number underlying date/times +#' @returns A `logical` whose length matches the number of underlying date/times #' passed as inputs to [create_iso8601()], i.e. whose length matches the #' number of rows of the capture matrices in `cap_matrices`. #' diff --git a/man/any_problems.Rd b/man/any_problems.Rd index 834f73a6..36f01c96 100644 --- a/man/any_problems.Rd +++ b/man/any_problems.Rd @@ -15,19 +15,18 @@ value by \code{\link[=parse_dttm]{parse_dttm()}}.} though starting with \code{19}.} } \value{ -A \code{logical} whose length matches the number underlying date/times +A \code{logical} whose length matches the number of underlying date/times passed as inputs to \code{\link[=create_iso8601]{create_iso8601()}}, i.e. whose length matches the number of rows of the capture matrices in \code{cap_matrices}. } \description{ \code{\link[=any_problems]{any_problems()}} takes a list of capture matrices (see \code{\link[=parse_dttm]{parse_dttm()}}) and reports on parsing problems by means of predicate values. A \code{FALSE} value -indicates the parsing was successful and a \code{TRUE} value that the parsing -failed in at least one of the inputs to \code{\link[=create_iso8601]{create_iso8601()}}. Note that each -capture matrix corresponds to one input to \code{\link[=create_iso8601]{create_iso8601()}}. - -Note that this is an internal function to be used in the context of -\code{\link[=create_iso8601]{create_iso8601()}} source code. +indicates that the parsing was successful and a \code{TRUE} value a parsing +failure in at least one of the inputs to \code{\link[=create_iso8601]{create_iso8601()}}. Note that this +is an internal function to be used in the context of \code{\link[=create_iso8601]{create_iso8601()}} +source code and hence each capture matrix corresponds to one input to +\code{\link[=create_iso8601]{create_iso8601()}}. } \examples{ # No problem (return value is `FALSE`). From 69c463ae6d1c5f62ce6e04f0b51da7196bb63e49 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 18 Jan 2024 17:07:34 +0000 Subject: [PATCH 12/20] Add `add_problems()` documentation --- R/dtc_problems.R | 44 ++++++++++++++++++++++++++++++++++++ man/add_problems.Rd | 55 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 man/add_problems.Rd diff --git a/R/dtc_problems.R b/R/dtc_problems.R index a9bd33ae..c6ab3494 100644 --- a/R/dtc_problems.R +++ b/R/dtc_problems.R @@ -1,3 +1,47 @@ +#' Add ISO 8601 parsing problems +#' +#' @description +#' [add_problems()] annotates the returned value of [create_iso8601()] with +#' possible parsing problems. This annotation consists of a +#' [tibble][tibble::tibble-package] of problems, one row for each parsing +#' failure (see Details section). +#' +#' @details +#' This function annotates its input `x`, a vector date-times in ISO 8601 +#' format, by creating an attribute named `problems`. This attribute's value +#' is a [tibble][tibble::tibble-package] of parsing problems. The problematic +#' date/times are indicated by the `logical` vector passed as argument to +#' `is_problem`. +#' +#' The attribute `problems` in the returned value will contain a first column +#' named `..i` that indicates the date/time index of the problematic date/time +#' in `x`, and as many extra columns as there were inputs (passed in `dtc`). If +#' `dtc` is named, then those names are used to name the extra columns, +#' otherwise they get named sequentially like so `..var1`, `..var2`, etc.. +#' +#' @param x A character vector of date-times in ISO 8601 format; typically, the +#' output of [format_iso8601()]. +#' @param is_problem A `logical` indicating which date/time inputs are +#' associated with parsing failures. +#' @param dtc A list of `character` vectors of dates, times or date-times' +#' components. Typically, this parameter takes the value passed in `...` to +#' a [create_iso8601()] call. +#' +#' @returns Either `x` without any modification, if no parsing problems exist, +#' or an annotated `x`, meaning having a `problems` attribute that holds +#' parsing issues (see the Details section). +#' +#' @examples +#' date <- c("2000-01-05", "", "1980-06-18", "1979-09-07") +#' time <- c("001221", "22:35:05", "03:00:15", "07:09:00") +#' dtc <- list(date, time) +#' dttm <- c("2000-01-05", "T22:35:05", "1980-06-18T03:00:15", "1979-09-07T07:09:00") +#' is_problem <- c(TRUE, TRUE, FALSE, FALSE) +#' +#' dttm2 <- sdtm.oak:::add_problems(dttm, is_problem, dtc) +#' sdtm.oak:::problems(dttm2) +#' +#' @keywords internal add_problems <- function(x, is_problem, dtc) { is_x_na <- is_problem if (!any(is_x_na)) { diff --git a/man/add_problems.Rd b/man/add_problems.Rd new file mode 100644 index 00000000..23005a07 --- /dev/null +++ b/man/add_problems.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dtc_problems.R +\name{add_problems} +\alias{add_problems} +\title{Add ISO 8601 parsing problems} +\usage{ +add_problems(x, is_problem, dtc) +} +\arguments{ +\item{x}{A character vector of date-times in ISO 8601 format; typically, the +output of \code{\link[=format_iso8601]{format_iso8601()}}.} + +\item{is_problem}{A \code{logical} indicating which date/time inputs are +associated with parsing failures.} + +\item{dtc}{A list of \code{character} vectors of dates, times or date-times' +components. Typically, this parameter takes the value passed in \code{...} to +a \code{\link[=create_iso8601]{create_iso8601()}} call.} +} +\value{ +Either \code{x} without any modification, if no parsing problems exist, +or an annotated \code{x}, meaning having a \code{problems} attribute that holds +parsing issues (see the Details section). +} +\description{ +\code{\link[=add_problems]{add_problems()}} annotates the returned value of \code{\link[=create_iso8601]{create_iso8601()}} with +possible parsing problems. This annotation consists of a +\link[tibble:tibble-package]{tibble} of problems, one row for each parsing +failure (see Details section). +} +\details{ +This function annotates its input \code{x}, a vector date-times in ISO 8601 +format, by creating an attribute named \code{problems}. This attribute's value +is a \link[tibble:tibble-package]{tibble} of parsing problems. The problematic +date/times are indicated by the \code{logical} vector passed as argument to +\code{is_problem}. + +The attribute \code{problems} in the returned value will contain a first column +named \code{..i} that indicates the date/time index of the problematic date/time +in \code{x}, and as many extra columns as there were inputs (passed in \code{dtc}). If +\code{dtc} is named, then those names are used to name the extra columns, +otherwise they get named sequentially like so \code{..var1}, \code{..var2}, etc.. +} +\examples{ +date <- c("2000-01-05", "", "1980-06-18", "1979-09-07") +time <- c("001221", "22:35:05", "03:00:15", "07:09:00") +dtc <- list(date, time) +dttm <- c("2000-01-05", "T22:35:05", "1980-06-18T03:00:15", "1979-09-07T07:09:00") +is_problem <- c(TRUE, TRUE, FALSE, FALSE) + +dttm2 <- sdtm.oak:::add_problems(dttm, is_problem, dtc) +sdtm.oak:::problems(dttm2) + +} +\keyword{internal} From e36b69b5f176a411385614e740f050936a5951e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Fory=C5=9B?= Date: Tue, 23 Jan 2024 19:37:18 +0100 Subject: [PATCH 13/20] Upgrade roxygen2 version --- .github/workflows/r-renv-lock.yml | 2 +- DESCRIPTION | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/r-renv-lock.yml b/.github/workflows/r-renv-lock.yml index d07747e0..c19174ca 100644 --- a/.github/workflows/r-renv-lock.yml +++ b/.github/workflows/r-renv-lock.yml @@ -130,7 +130,7 @@ jobs: install_min_version("vctrs", "0.4.1") install_min_version("rlang", "1.0.6") renv::install("styler@1.10.2", dependencies = "none") - renv::install("roxygen2@7.2.3", dependencies = "none") + renv::install("roxygen2@7.3.1", dependencies = "none") renv::settings$snapshot.type("custom") renv::snapshot(force = TRUE, prompt = FALSE) diff --git a/DESCRIPTION b/DESCRIPTION index 7d93b1f5..60c5e227 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,7 +21,7 @@ URL: https://pharmaverse.github.io/sdtm.oak/, https://github.com/pharmaverse/sdt Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 Depends: R (>= 4.2) Imports: admiraldev, From 7c5948d50afeb3d5e17cc1f1b0bbe08874108f76 Mon Sep 17 00:00:00 2001 From: galachad Date: Wed, 24 Jan 2024 07:49:54 +0000 Subject: [PATCH 14/20] Automatic renv profile update. --- renv/profiles/4.2/renv.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/renv/profiles/4.2/renv.lock b/renv/profiles/4.2/renv.lock index 4120eeca..e59b2625 100644 --- a/renv/profiles/4.2/renv.lock +++ b/renv/profiles/4.2/renv.lock @@ -1056,7 +1056,7 @@ }, "roxygen2": { "Package": "roxygen2", - "Version": "7.2.3", + "Version": "7.3.1", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -1078,7 +1078,7 @@ "withr", "xml2" ], - "Hash": "7b153c746193b143c14baa072bae4e27" + "Hash": "c25fe7b2d8cba73d1b63c947bf7afdb9" }, "rprojroot": { "Package": "rprojroot", From 555d85ec724d4766cf211585e035f99676b45c34 Mon Sep 17 00:00:00 2001 From: galachad Date: Wed, 24 Jan 2024 07:53:51 +0000 Subject: [PATCH 15/20] Automatic renv profile update. --- renv.lock | 4 ++-- renv/profiles/4.3/renv.lock | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/renv.lock b/renv.lock index 5141d10f..77ba5c8d 100644 --- a/renv.lock +++ b/renv.lock @@ -1056,7 +1056,7 @@ }, "roxygen2": { "Package": "roxygen2", - "Version": "7.2.3", + "Version": "7.3.1", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -1078,7 +1078,7 @@ "withr", "xml2" ], - "Hash": "7b153c746193b143c14baa072bae4e27" + "Hash": "c25fe7b2d8cba73d1b63c947bf7afdb9" }, "rprojroot": { "Package": "rprojroot", diff --git a/renv/profiles/4.3/renv.lock b/renv/profiles/4.3/renv.lock index 5141d10f..77ba5c8d 100644 --- a/renv/profiles/4.3/renv.lock +++ b/renv/profiles/4.3/renv.lock @@ -1056,7 +1056,7 @@ }, "roxygen2": { "Package": "roxygen2", - "Version": "7.2.3", + "Version": "7.3.1", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -1078,7 +1078,7 @@ "withr", "xml2" ], - "Hash": "7b153c746193b143c14baa072bae4e27" + "Hash": "c25fe7b2d8cba73d1b63c947bf7afdb9" }, "rprojroot": { "Package": "rprojroot", From 96a978204fa6528ce7e3f7af94cd289357e5d906 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Fory=C5=9B?= Date: Wed, 24 Jan 2024 09:40:41 +0100 Subject: [PATCH 16/20] Add R_REMOTES_STANDALONE env variable. --- .github/workflows/common.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/common.yml b/.github/workflows/common.yml index de646ffb..70ca8e05 100644 --- a/.github/workflows/common.yml +++ b/.github/workflows/common.yml @@ -87,6 +87,8 @@ jobs: coverage: name: Code Coverage uses: pharmaverse/admiralci/.github/workflows/code-coverage.yml@main + env: + R_REMOTES_STANDALONE: true if: > github.event_name != 'release' with: @@ -98,6 +100,8 @@ jobs: man-pages: name: Man Pages uses: pharmaverse/admiralci/.github/workflows/man-pages.yml@main + env: + R_REMOTES_STANDALONE: true if: github.event_name == 'pull_request' with: r-version: "4.3" From fae98ae0acad6fbff98d48885773fd5bf2faaf34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Fory=C5=9B?= Date: Wed, 24 Jan 2024 10:13:50 +0100 Subject: [PATCH 17/20] Add env into admiralci. --- .github/workflows/common.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/common.yml b/.github/workflows/common.yml index 70ca8e05..de646ffb 100644 --- a/.github/workflows/common.yml +++ b/.github/workflows/common.yml @@ -87,8 +87,6 @@ jobs: coverage: name: Code Coverage uses: pharmaverse/admiralci/.github/workflows/code-coverage.yml@main - env: - R_REMOTES_STANDALONE: true if: > github.event_name != 'release' with: @@ -100,8 +98,6 @@ jobs: man-pages: name: Man Pages uses: pharmaverse/admiralci/.github/workflows/man-pages.yml@main - env: - R_REMOTES_STANDALONE: true if: github.event_name == 'pull_request' with: r-version: "4.3" From b58291afc8dae6791e08d6e8c4545e8c3ace9c05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Fory=C5=9B?= Date: Wed, 24 Jan 2024 13:24:20 +0100 Subject: [PATCH 18/20] Update .lycheeignore --- .lycheeignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.lycheeignore b/.lycheeignore index 0a51e5b1..d64d767b 100644 --- a/.lycheeignore +++ b/.lycheeignore @@ -4,5 +4,6 @@ https://packagemanager.posit.co/cran/2023-04-20/ https://packagemanager.posit.co/cran/2022-03-10/ https://packagemanager.posit.co/cran/latest roxygen2@7.2.3 +roxygen2@7.3.1 styler@1.10.2 .*@users.noreply.github.com From c2e405e2f78a94d472f40ba3dc119c46a69291aa Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 8 Feb 2024 00:47:26 +0000 Subject: [PATCH 19/20] Fix NOTE: Malformed Description field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the NR CMD check NOTE: ❯ checking DESCRIPTION meta-information ... NOTE Malformed Description field: should contain one or more complete sentences. --- DESCRIPTION | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index bd5e53b6..f029ced7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,7 +19,13 @@ Authors@R: c( person("Pfizer Inc", role = c("cph", "fnd")) ) Maintainer: Rammprasad Ganapathy -Description: An EDC and Data Standard-agnostic SDTM data transformation engine designed for SDTM programming in R. Powered by metadata sdtm.oak can automate the conversion of raw clinical data to SDTM through standardized mapping algorithms. SDTM is one of the required standards for data submission to FDA (U.S.) and PMDA (Japan). SDTM standards are implemented in accordance with the SDTM Implemetation guide as defined by CDISC +Description: An EDC and Data Standard-agnostic SDTM data transformation engine + designed for SDTM programming in R. Powered by metadata sdtm.oak can + automate the conversion of raw clinical data to SDTM through standardized + mapping algorithms. SDTM is one of the required standards for data + submission to FDA (U.S.) and PMDA (Japan). SDTM standards are implemented + in accordance with the SDTM Implemetation guide as defined by CDISC + . Language: en-US License: Apache License (>= 2) BugReports: https://github.com/pharmaverse/sdtm.oak/issues From f51ceb9e3beaf5fb61ecdd446acbe4a30ae43565 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 8 Feb 2024 00:50:25 +0000 Subject: [PATCH 20/20] Fix typo in Description field --- DESCRIPTION | 2 +- man/sdtm.oak-package.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f029ced7..acb15ebf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -24,7 +24,7 @@ Description: An EDC and Data Standard-agnostic SDTM data transformation engine automate the conversion of raw clinical data to SDTM through standardized mapping algorithms. SDTM is one of the required standards for data submission to FDA (U.S.) and PMDA (Japan). SDTM standards are implemented - in accordance with the SDTM Implemetation guide as defined by CDISC + in accordance with the SDTM Implementation guide as defined by CDISC . Language: en-US License: Apache License (>= 2) diff --git a/man/sdtm.oak-package.Rd b/man/sdtm.oak-package.Rd index 20606fa4..fc991afe 100644 --- a/man/sdtm.oak-package.Rd +++ b/man/sdtm.oak-package.Rd @@ -6,7 +6,7 @@ \alias{sdtm.oak-package} \title{sdtm.oak: SDTM Data Transformation Engine} \description{ -An EDC and Data Standard-agnostic SDTM data transformation engine designed for SDTM programming in R. Powered by metadata sdtm.oak can automate the conversion of raw clinical data to SDTM through standardized mapping algorithms. SDTM is one of the required standards for data submission to FDA (U.S.) and PMDA (Japan). SDTM standards are implemented in accordance with the SDTM Implemetation guide as defined by CDISC \url{https://www.cdisc.org/standards/foundational/sdtmig} +An EDC and Data Standard-agnostic SDTM data transformation engine designed for SDTM programming in R. Powered by metadata sdtm.oak can automate the conversion of raw clinical data to SDTM through standardized mapping algorithms. SDTM is one of the required standards for data submission to FDA (U.S.) and PMDA (Japan). SDTM standards are implemented in accordance with the SDTM Implementation guide as defined by CDISC \url{https://www.cdisc.org/standards/foundational/sdtmig}. } \seealso{ Useful links: