diff --git a/DESCRIPTION b/DESCRIPTION index 4eccaca8..979e2598 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: clinsight Title: ClinSight -Version: 0.1.0.9000 +Version: 0.1.0.9001 Authors@R: c( person("Leonard Daniƫl", "Samson", , "lsamson@gcp-service.com", role = c("cre", "aut"), comment = c(ORCID = "0000-0002-6252-7639")), diff --git a/NEWS.md b/NEWS.md index a2c68eb2..930d9445 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # clinsight (development version) +- Generalized `merge_meta_with_data()` to allow user-defined processing functions. + # clinsight 0.1.0 ## Changed diff --git a/R/fct_appdata.R b/R/fct_appdata.R index f0ab2b9a..bd7a6938 100644 --- a/R/fct_appdata.R +++ b/R/fct_appdata.R @@ -75,26 +75,23 @@ merge_meta_with_data <- function( synch_time <- attr(data, "synch_time") %||% "" merged_data <- data |> rename_raw_data(column_names = meta$column_names) |> - readr::type_convert(clinsight_col_specs) |> + readr::type_convert(clinsight_col_specs) |> + apply_custom_functions(meta$settings$pre_merge_fns) |> add_timevars_to_data() |> # fix MC values before merging: fix_multiple_choice_vars(expected_vars = meta$items_expanded$var) |> dplyr::right_join(meta$items_expanded, by = "var") |> - dplyr::filter(!is.na(item_value)) |> + dplyr::filter(!is.na(item_value)) |> + apply_custom_functions(meta$settings$mid_merge_fns) |> dplyr::mutate( - suffix = ifelse(item_name == "ECG interpretation", "LBCLSIG", suffix), - suffix = ifelse(is.na(suffix), "VAL", suffix), - # TODO: improve code below to handle exceptions in a more general manner - suffix = ifelse(suffix %in% c("LBORRES", "VSORRES", "EGORRES") | - item_group %in% c("Cytogenetics", "General"), - "VAL", suffix) + suffix_names = suffix_names %|_|% ifelse(is.na(suffix) | grepl("ORRES$", suffix) | item_group == "General", "VAL", suffix) ) |> - dplyr::select(-var) |> + dplyr::select(-var, -suffix) |> dplyr::mutate( edit_date_time = max(edit_date_time, na.rm = TRUE), .by = c(subject_id, item_name, event_name, event_repeat) ) |> - tidyr::pivot_wider(names_from = suffix, values_from = item_value) |> + tidyr::pivot_wider(names_from = suffix_names, values_from = item_value) |> add_missing_columns(expected_columns) |> dplyr::mutate( LBORNR_Lower = as.numeric(ifelse(!is.na(lower_limit), lower_limit, LBORNR_Lower)), @@ -112,12 +109,32 @@ merge_meta_with_data <- function( "item_value" = VAL, "reason_notdone" = LBREASND ) |> - apply_study_specific_fixes() + dplyr::mutate(region = region %|_|% "Missing") |> + apply_custom_functions(meta$settings$post_merge_fns) attr(merged_data, "synch_time") <- synch_time merged_data } +#' Apply study-specific suffix fixes +#' +#' These changes are study/EDC-specific and part of the legacy code for ClinSight. +#' +#' @param data A data frame +#' +#' @return A data frame. +apply_study_specific_suffix_fixes <- function(data) { + dplyr::mutate(data, + suffix = ifelse(item_name == "ECG interpretation", "LBCLSIG", suffix), + suffix = ifelse(is.na(suffix), "VAL", suffix), + # TODO: improve code below to handle exceptions in a more general manner + suffix_names = ifelse(suffix %in% c("LBORRES", "VSORRES", "EGORRES") | + item_group %in% c("Cytogenetics", "General"), + "VAL", suffix) + ) +} + + #' Apply study-specific fixes #' #' These changes are probably study-specific and need to be changed accordingly. @@ -186,6 +203,19 @@ apply_study_specific_fixes <- function( ) } +#' Apply custom modification functions +#' +#' @param data A data frame (for example, raw data merged). +#' @param functions A character vector containing the names of the functions to +#' apply to the data. Default is NULL. +#' @param .default A character vector containing the names of the functions to +#' apply if none are provided. Default is "identity". +apply_custom_functions <- function(data, functions = NULL, .default = "identity") { + Reduce(\(x1, x2) do.call(x2, list(x1)), # Apply next function to output of previous + functions %||% .default, # Apply default functions if no additional functions provided + init = data) # Initialize with the data object +} + #' Get appdata #' #' Converts data to a list of data frames in the expected format to be used by the diff --git a/R/fct_data_helpers.R b/R/fct_data_helpers.R index 09cb6e54..e8d9df29 100644 --- a/R/fct_data_helpers.R +++ b/R/fct_data_helpers.R @@ -29,6 +29,11 @@ get_metadata <- function( meta <- lapply(sheets, function(x){ readxl::read_excel(filepath, sheet = x, col_types = "text") }) + + meta$settings <- meta$settings |> + lapply(\(x) as.character(na.omit(x))) |> + Filter(f = length) + if(length(expand_tab_items[nchar(expand_tab_items) > 0 ] ) == 0) return(meta) if("items_expanded" %in% names(meta)) warning({ "Table 'items_expanded' already present. The old table will be overwritten." @@ -144,23 +149,23 @@ add_timevars_to_data <- function( dplyr::mutate( edit_date_time = as.POSIXct(edit_date_time, tz = "UTC"), event_date = as.Date(event_date), - day = event_date - min(event_date, na.rm = TRUE), - vis_day = ifelse(event_id %in% c("SCR", "VIS", "VISEXT", "VISVAR", "FU1", "FU2"), day, NA), + day = day %|_|% event_date - min(event_date, na.rm = TRUE), + vis_day = ifelse(grepl("^SCR|^VIS|^FU", event_id, ignore.case = TRUE), day, NA), vis_num = as.numeric(factor(vis_day))-1, - event_name = dplyr::case_when( - event_id == "SCR" ~ "Screening", - event_id %in% c("VIS", "VISEXT", "VISVAR") ~ paste0("Visit ", vis_num), - grepl("^FU[[:digit:]]+", event_id) ~ paste0("Visit ", vis_num, "(FU)"), - event_id == "UN" ~ paste0("Unscheduled visit ", event_repeat), - event_id == "EOT" ~ "EoT", - event_id == "EXIT" ~ "Exit", - form_id %in% c("AE", "CM", "CP", "MH", "MH", "MHTR", "PR", "ST", "CMTR", "CMHMA") ~ "Any visit", - TRUE ~ paste0("Other (", event_name, ")") + event_name = event_name %|_|% dplyr::case_when( + grepl("^SCR", event_id, ignore.case = TRUE) ~ "Screening", + grepl("^VIS", event_id, ignore.case = TRUE) ~ paste0("Visit ", vis_num), + grepl("^FU[[:digit:]]+", event_id, ignore.case = TRUE) ~ paste0("Visit ", vis_num, "(FU)"), + grepl("^UN", event_id, ignore.case = TRUE) ~ paste0("Unscheduled visit ", event_repeat), + toupper(event_id) == "EOT" ~ "EoT", + toupper(event_id) == "EXIT" ~ "Exit", + grepl("^AE|^CM|^CP|^MH|^PR|^ST", form_id) ~ "Any visit", + .default = paste0("Other (", event_id, ")") ), - event_label = dplyr::case_when( + event_label = event_label %|_|% dplyr::case_when( !is.na(vis_num) ~ paste0("V", vis_num), - event_id == "UN" ~ paste0("UV", event_repeat), - TRUE ~ event_name + grepl("^UN", event_id, ignore.case = TRUE) ~ paste0("UV", event_repeat), + .default = event_name ), .by = subject_id ) |> @@ -168,7 +173,7 @@ add_timevars_to_data <- function( factor(site_code, levels = order_string(site_code)), factor(subject_id, levels = order_string(subject_id)) ) - if(any(grepl("^Other ", df$event_name))) warning( + if(any(is.na(df$event_name) | grepl("^Other ", df$event_name))) warning( "Undefined Events detected. Please verify data before proceeding." ) df diff --git a/R/golem_utils_server.R b/R/golem_utils_server.R index 69be451b..907d64cd 100644 --- a/R/golem_utils_server.R +++ b/R/golem_utils_server.R @@ -52,3 +52,21 @@ drop_nulls <- function(x) { x } } + +#' If x does not exist, return y, otherwise return x +#' +#' @param x,y two elements to test, one potentially not existent +#' @param verbose logical, indicating whether warning message should be displayed. +#' +#' @noRd +#' +#' @examples +#' mtcars2 %|_|% mtcars +"%|_|%" <- function(x, y, verbose = TRUE) { + if (exists(deparse1(substitute(x)), envir = parent.frame())) { + if (verbose) cat("Using user supplied", deparse(deparse1(substitute(x))), "instead of deriving.\n") + x + } else { + y + } +} diff --git a/R/sysdata.rda b/R/sysdata.rda index 0e2f6218..aaebb8b1 100644 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/data-raw/internal_data.R b/data-raw/internal_data.R index a388f16d..08b35fb6 100644 --- a/data-raw/internal_data.R +++ b/data-raw/internal_data.R @@ -34,7 +34,6 @@ clinsight_col_specs <- c( "subject_id" = "c", "event_id" = "c", "event_date" = "D", - "event_name" = "c", "event_repeat" = "i", "form_id" = "c", "form_repeat" = "i", diff --git a/data-raw/metadata.xlsx b/data-raw/metadata.xlsx index 182428f7..41f6c6ed 100644 Binary files a/data-raw/metadata.xlsx and b/data-raw/metadata.xlsx differ diff --git a/data/metadata.rda b/data/metadata.rda index c26affad..27816ddd 100644 Binary files a/data/metadata.rda and b/data/metadata.rda differ diff --git a/inst/golem-config.yml b/inst/golem-config.yml index d489d7c7..c4db010d 100644 --- a/inst/golem-config.yml +++ b/inst/golem-config.yml @@ -1,6 +1,6 @@ default: golem_name: clinsight - golem_version: 0.1.0 + golem_version: 0.1.0.9001 app_prod: no user_identification: test_user study_data: !expr clinsight::clinsightful_data diff --git a/tests/testthat/_snaps/fct_appdata.md b/tests/testthat/_snaps/fct_appdata.md index ec24ab51..6b4ec7b9 100644 --- a/tests/testthat/_snaps/fct_appdata.md +++ b/tests/testthat/_snaps/fct_appdata.md @@ -31,16 +31,16 @@ df[c(1, 1000, 2000, 3000, 4000, 5000), ] Output # A tibble: 6 x 23 - site_code subject_id event_id event_date event_name event_repeat form_id - - 1 9600 9600-001 SCR 2022-11-09 Screening 1 DM - 2 NA NA - 3 NA NA - 4 NA NA - 5 NA NA - 6 NA NA - # i 16 more variables: form_repeat , edit_date_time , day , - # vis_day , vis_num , event_label , item_name , + site_code subject_id event_id event_date event_repeat form_id form_repeat + + 1 9600 9600-001 SCR 2022-11-09 1 DM 1 + 2 NA NA NA + 3 NA NA NA + 4 NA NA NA + 5 NA NA NA + 6 NA NA NA + # i 16 more variables: edit_date_time , day , vis_day , + # vis_num , event_name , event_label , item_name , # item_type , item_group , item_value , item_unit , # lower_lim , upper_lim , significance , reason_notdone , # region @@ -51,21 +51,21 @@ df Output # A tibble: 543 x 23 - site_code subject_id event_id event_date event_name event_repeat form_id - - 1 9600 9600-001 SCR 2022-11-09 Screening 1 DM - 2 9600 9600-001 SCR 2022-11-09 Screening 1 DM - 3 9600 9600-002 SCR 2022-01-01 Screening 1 DM - 4 9600 9600-002 SCR 2022-01-01 Screening 1 DM - 5 9600 9600-002 SCR 2022-01-01 Screening 1 DM - 6 9600 9600-002 SCR 2022-01-01 Screening 1 STE - 7 9600 9600-002 SCR 2022-01-01 Screening 1 STE - 8 9600 9600-002 SCR 2022-01-01 Screening 1 VS - 9 9600 9600-002 SCR 2022-01-01 Screening 1 VS - 10 9600 9600-002 SCR 2022-01-01 Screening 1 VS + site_code subject_id event_id event_date event_repeat form_id form_repeat + + 1 9600 9600-001 SCR 2022-11-09 1 DM 1 + 2 9600 9600-001 SCR 2022-11-09 1 DM 1 + 3 9600 9600-002 SCR 2022-01-01 1 DM 1 + 4 9600 9600-002 SCR 2022-01-01 1 DM 1 + 5 9600 9600-002 SCR 2022-01-01 1 DM 1 + 6 9600 9600-002 SCR 2022-01-01 1 STE 1 + 7 9600 9600-002 SCR 2022-01-01 1 STE 1 + 8 9600 9600-002 SCR 2022-01-01 1 VS 1 + 9 9600 9600-002 SCR 2022-01-01 1 VS 1 + 10 9600 9600-002 SCR 2022-01-01 1 VS 1 # i 533 more rows - # i 16 more variables: form_repeat , edit_date_time , day , - # vis_day , vis_num , event_label , item_name , + # i 16 more variables: edit_date_time , day , vis_day , + # vis_num , event_name , event_label , item_name , # item_type , item_group , item_value , item_unit , # lower_lim , upper_lim , significance , reason_notdone , # region diff --git a/tests/testthat/fixtures/testapp-raw/altered_metadata.xlsx b/tests/testthat/fixtures/testapp-raw/altered_metadata.xlsx index ad8313e4..ea325c99 100644 Binary files a/tests/testthat/fixtures/testapp-raw/altered_metadata.xlsx and b/tests/testthat/fixtures/testapp-raw/altered_metadata.xlsx differ diff --git a/tests/testthat/test-golem_utils_server.R b/tests/testthat/test-golem_utils_server.R index 08376179..929991bd 100644 --- a/tests/testthat/test-golem_utils_server.R +++ b/tests/testthat/test-golem_utils_server.R @@ -44,3 +44,36 @@ test_that("%|NA|% works", { ) }) +test_that("%|_|% works", { + expect_equal( + mtcarrs %|_|% iris, + iris + ) + test_wrn <- capture_output_lines( + test_out <- mtcars %|_|% iris + ) + expect_equal( + test_wrn, + 'Using user supplied "mtcars" instead of deriving.' + ) + expect_equal( + test_out, + mtcars + ) + test_wrn <- capture_output_lines( + test_out <- dplyr::mutate(mtcars, test = mpg %|_|% "RHS") + ) + expect_equal( + test_wrn, + 'Using user supplied "mpg" instead of deriving.' + ) + expect_equal( + test_out, + cbind(mtcars, list(test = mtcars$mpg)) + ) + expect_equal( + dplyr::mutate(mtcars, test = mpg2 %|_|% "RHS"), + cbind(mtcars, list(test = "RHS")) + ) +}) + diff --git a/tests/testthat/test-rename_raw_data.R b/tests/testthat/test-rename_raw_data.R index 94e0b232..fd7db26e 100644 --- a/tests/testthat/test-rename_raw_data.R +++ b/tests/testthat/test-rename_raw_data.R @@ -1,7 +1,7 @@ describe("rename_raw_data() renames raw study data and throws informative errors if needed.", { it("renames a data frame as expected", { - col_names <- metadata$column_names + col_names <- rbind(metadata$column_names, list("Placeholder", "pl")) testdata <- lapply(mtcars, as.character) |> as.data.frame() col_names$name_raw <- names(testdata) @@ -11,7 +11,7 @@ describe("rename_raw_data() renames raw study data and throws informative errors }) it("errors with incorrect input", { - col_names <- metadata$column_names + col_names <- rbind(metadata$column_names, list("Placeholder", "pl")) col_names$name_raw <- names(mtcars) expect_error( rename_raw_data(mtcars, column_names = "incorrect input"),