openpharma · LDSamson · Oct 15, 2024 · Oct 9, 2024 · Oct 9, 2024 · Oct 9, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: clinsight
 Title: ClinSight
-Version: 0.1.0.9000
+Version: 0.1.0.9001
 Authors@R: c(
     person("Leonard Daniël", "Samson", , "[email protected]", role = c("cre", "aut"),
            comment = c(ORCID = "0000-0002-6252-7639")),

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,7 @@
 # clinsight (development version)
 
+- Generalized `merge_meta_with_data()` to allow user-defined processing functions.
+
 # clinsight 0.1.0
 
 ## Changed

diff --git a/R/fct_appdata.R b/R/fct_appdata.R
@@ -75,26 +75,23 @@ merge_meta_with_data <- function(
   synch_time <- attr(data, "synch_time") %||% ""
   merged_data <- data |> 
     rename_raw_data(column_names = meta$column_names) |> 
-    readr::type_convert(clinsight_col_specs) |> 
+    readr::type_convert(clinsight_col_specs) |>
+    apply_custom_functions(meta$settings$pre_merge_fns) |>
     add_timevars_to_data() |> 
     # fix MC values before merging:
     fix_multiple_choice_vars(expected_vars = meta$items_expanded$var) |> 
     dplyr::right_join(meta$items_expanded, by = "var") |> 
-    dplyr::filter(!is.na(item_value)) |> 
+    dplyr::filter(!is.na(item_value)) |>
+    apply_custom_functions(meta$settings$mid_merge_fns) |>
     dplyr::mutate(
-      suffix = ifelse(item_name == "ECG interpretation", "LBCLSIG", suffix),
-      suffix = ifelse(is.na(suffix), "VAL", suffix),
-      # TODO: improve code below to handle exceptions in a more general manner
-      suffix = ifelse(suffix %in% c("LBORRES", "VSORRES", "EGORRES") | 
-                        item_group %in% c("Cytogenetics", "General"), 
-                      "VAL", suffix)
+      suffix_names = suffix_names %|_|% ifelse(is.na(suffix) | grepl("ORRES$", suffix) | item_group == "General", "VAL", suffix)
     ) |> 
-    dplyr::select(-var) |> 
+    dplyr::select(-var, -suffix) |> 
     dplyr::mutate(
       edit_date_time = max(edit_date_time, na.rm = TRUE), 
       .by = c(subject_id, item_name, event_name, event_repeat)
     ) |> 
-    tidyr::pivot_wider(names_from = suffix, values_from = item_value) |> 
+    tidyr::pivot_wider(names_from = suffix_names, values_from = item_value) |> 
     add_missing_columns(expected_columns) |> 
     dplyr::mutate(
       LBORNR_Lower = as.numeric(ifelse(!is.na(lower_limit), lower_limit, LBORNR_Lower)),
@@ -112,12 +109,32 @@ merge_meta_with_data <- function(
       "item_value" = VAL,
       "reason_notdone" = LBREASND
     ) |> 
-    apply_study_specific_fixes() 
+    dplyr::mutate(region = region %|_|% "Missing") |> 
+    apply_custom_functions(meta$settings$post_merge_fns)
   attr(merged_data, "synch_time") <- synch_time
   merged_data
 }
 
 
+#' Apply study-specific suffix fixes
+#' 
+#' These changes are study/EDC-specific and part of the legacy code for ClinSight.
+#' 
+#' @param data A data frame
+#' 
+#' @return A data frame.
+apply_study_specific_suffix_fixes <- function(data) {
+  dplyr::mutate(data,
+    suffix = ifelse(item_name == "ECG interpretation", "LBCLSIG", suffix),
+    suffix = ifelse(is.na(suffix), "VAL", suffix),
+    # TODO: improve code below to handle exceptions in a more general manner
+    suffix_names = ifelse(suffix %in% c("LBORRES", "VSORRES", "EGORRES") | 
+                      item_group %in% c("Cytogenetics", "General"), 
+                    "VAL", suffix)
+  )
+}
+
+
 #' Apply study-specific fixes
 #'
 #' These changes are probably study-specific and need to be changed accordingly.
@@ -186,6 +203,19 @@ apply_study_specific_fixes <- function(
     )
 }
 
+#' Apply custom modification functions
+#' 
+#' @param data A data frame (for example, raw data merged).
+#' @param functions A character vector containing the names of the functions to
+#'   apply to the data. Default is NULL.
+#' @param .default A character vector containing the names of the functions to
+#'   apply if none are provided. Default is "identity".
+apply_custom_functions <- function(data, functions = NULL, .default = "identity") {
+  Reduce(\(x1, x2) do.call(x2, list(x1)), # Apply next function to output of previous
+         functions %||% .default, # Apply default functions if no additional functions provided
+         init = data) # Initialize with the data object
+}
+
 #' Get appdata
 #'  
 #' Converts data to a list of data frames in the expected format to be used by the 

diff --git a/R/fct_data_helpers.R b/R/fct_data_helpers.R
@@ -29,6 +29,11 @@ get_metadata <- function(
   meta <- lapply(sheets, function(x){
     readxl::read_excel(filepath, sheet = x, col_types = "text")
   })
+
+  meta$settings <- meta$settings |> 
+    lapply(\(x) as.character(na.omit(x))) |> 
+    Filter(f = length)
+
   if(length(expand_tab_items[nchar(expand_tab_items) > 0 ] ) == 0) return(meta)
   if("items_expanded" %in% names(meta)) warning({
     "Table 'items_expanded' already present. The old table will be overwritten."
@@ -144,31 +149,31 @@ add_timevars_to_data <- function(
     dplyr::mutate(
       edit_date_time = as.POSIXct(edit_date_time, tz = "UTC"),
       event_date = as.Date(event_date),
-      day = event_date - min(event_date, na.rm = TRUE), 
-      vis_day = ifelse(event_id %in% c("SCR", "VIS", "VISEXT", "VISVAR", "FU1", "FU2"), day, NA),
+      day = day %|_|% event_date - min(event_date, na.rm = TRUE), 
+      vis_day = ifelse(grepl("^SCR|^VIS|^FU", event_id, ignore.case = TRUE), day, NA),
       vis_num = as.numeric(factor(vis_day))-1,
-      event_name = dplyr::case_when(
-        event_id == "SCR"    ~ "Screening",
-        event_id %in% c("VIS", "VISEXT", "VISVAR")    ~ paste0("Visit ", vis_num),
-        grepl("^FU[[:digit:]]+", event_id)  ~ paste0("Visit ", vis_num, "(FU)"),
-        event_id == "UN"     ~ paste0("Unscheduled visit ", event_repeat),
-        event_id == "EOT"    ~ "EoT",
-        event_id == "EXIT"   ~ "Exit",
-        form_id %in% c("AE", "CM", "CP", "MH", "MH", "MHTR", "PR", "ST", "CMTR", "CMHMA") ~ "Any visit",
-        TRUE                ~ paste0("Other (", event_name, ")")
+      event_name = event_name %|_|% dplyr::case_when(
+        grepl("^SCR", event_id, ignore.case = TRUE) ~ "Screening",
+        grepl("^VIS", event_id, ignore.case = TRUE) ~ paste0("Visit ", vis_num),
+        grepl("^FU[[:digit:]]+", event_id, ignore.case = TRUE) ~ paste0("Visit ", vis_num, "(FU)"),
+        grepl("^UN", event_id, ignore.case = TRUE) ~ paste0("Unscheduled visit ", event_repeat),
+        toupper(event_id) == "EOT"    ~ "EoT",
+        toupper(event_id) == "EXIT"   ~ "Exit",
+        grepl("^AE|^CM|^CP|^MH|^PR|^ST", form_id) ~ "Any visit",
+        .default = paste0("Other (", event_id, ")")
       ),
-      event_label = dplyr::case_when(
+      event_label = event_label %|_|% dplyr::case_when(
         !is.na(vis_num)   ~ paste0("V", vis_num),
-        event_id == "UN"   ~ paste0("UV", event_repeat),
-        TRUE              ~ event_name
+        grepl("^UN", event_id, ignore.case = TRUE)   ~ paste0("UV", event_repeat),
+        .default = event_name
       ),
       .by = subject_id
     ) |> 
     dplyr::arrange(
       factor(site_code, levels = order_string(site_code)),
       factor(subject_id, levels = order_string(subject_id))
     )
-  if(any(grepl("^Other ", df$event_name))) warning(
+  if(any(is.na(df$event_name) | grepl("^Other ", df$event_name))) warning(
     "Undefined Events detected. Please verify data before proceeding."
   )
   df

diff --git a/R/golem_utils_server.R b/R/golem_utils_server.R
@@ -52,3 +52,21 @@ drop_nulls <- function(x) {
     x
   }
 }
+
+#' If x does not exist, return y, otherwise return x
+#' 
+#' @param x,y two elements to test, one potentially not existent
+#' @param verbose logical, indicating whether warning message should be displayed.
+#' 
+#' @noRd
+#' 
+#' @examples
+#' mtcars2 %|_|% mtcars
+"%|_|%" <- function(x, y, verbose = TRUE) {
+  if (exists(deparse1(substitute(x)), envir = parent.frame())) {
+    if (verbose) cat("Using user supplied", deparse(deparse1(substitute(x))), "instead of deriving.\n")
+    x
+  } else {
+    y
+  }
+}
diff --git a/R/sysdata.rda b/R/sysdata.rda
diff --git a/data-raw/internal_data.R b/data-raw/internal_data.R
@@ -34,7 +34,6 @@ clinsight_col_specs <- c(
   "subject_id" = "c", 
   "event_id" = "c", 
   "event_date" = "D", 
-  "event_name" = "c", 
   "event_repeat" = "i", 
   "form_id" = "c", 
   "form_repeat" = "i", 

diff --git a/data-raw/metadata.xlsx b/data-raw/metadata.xlsx
diff --git a/data/metadata.rda b/data/metadata.rda
diff --git a/inst/golem-config.yml b/inst/golem-config.yml
@@ -1,6 +1,6 @@
 default:
   golem_name: clinsight
-  golem_version: 0.1.0
+  golem_version: 0.1.0.9001
   app_prod: no
   user_identification: test_user
   study_data: !expr clinsight::clinsightful_data

diff --git a/tests/testthat/_snaps/fct_appdata.md b/tests/testthat/_snaps/fct_appdata.md
@@ -31,16 +31,16 @@
       df[c(1, 1000, 2000, 3000, 4000, 5000), ]
     Output
       # A tibble: 6 x 23
-        site_code subject_id event_id event_date event_name event_repeat form_id
-        <chr>     <chr>      <chr>    <date>     <chr>             <int> <chr>  
-      1 9600      9600-001   SCR      2022-11-09 Screening             1 DM     
-      2 <NA>      <NA>       <NA>     NA         <NA>                 NA <NA>   
-      3 <NA>      <NA>       <NA>     NA         <NA>                 NA <NA>   
-      4 <NA>      <NA>       <NA>     NA         <NA>                 NA <NA>   
-      5 <NA>      <NA>       <NA>     NA         <NA>                 NA <NA>   
-      6 <NA>      <NA>       <NA>     NA         <NA>                 NA <NA>   
-      # i 16 more variables: form_repeat <int>, edit_date_time <dttm>, day <drtn>,
-      #   vis_day <dbl>, vis_num <dbl>, event_label <chr>, item_name <chr>,
+        site_code subject_id event_id event_date event_repeat form_id form_repeat
+        <chr>     <chr>      <chr>    <date>            <int> <chr>         <int>
+      1 9600      9600-001   SCR      2022-11-09            1 DM                1
+      2 <NA>      <NA>       <NA>     NA                   NA <NA>             NA
+      3 <NA>      <NA>       <NA>     NA                   NA <NA>             NA
+      4 <NA>      <NA>       <NA>     NA                   NA <NA>             NA
+      5 <NA>      <NA>       <NA>     NA                   NA <NA>             NA
+      6 <NA>      <NA>       <NA>     NA                   NA <NA>             NA
+      # i 16 more variables: edit_date_time <dttm>, day <drtn>, vis_day <dbl>,
+      #   vis_num <dbl>, event_name <chr>, event_label <chr>, item_name <chr>,
       #   item_type <chr>, item_group <chr>, item_value <chr>, item_unit <chr>,
       #   lower_lim <dbl>, upper_lim <dbl>, significance <chr>, reason_notdone <chr>,
       #   region <chr>
@@ -51,21 +51,21 @@
       df
     Output
       # A tibble: 543 x 23
-         site_code subject_id event_id event_date event_name event_repeat form_id
-         <chr>     <chr>      <chr>    <date>     <chr>             <int> <chr>  
-       1 9600      9600-001   SCR      2022-11-09 Screening             1 DM     
-       2 9600      9600-001   SCR      2022-11-09 Screening             1 DM     
-       3 9600      9600-002   SCR      2022-01-01 Screening             1 DM     
-       4 9600      9600-002   SCR      2022-01-01 Screening             1 DM     
-       5 9600      9600-002   SCR      2022-01-01 Screening             1 DM     
-       6 9600      9600-002   SCR      2022-01-01 Screening             1 STE    
-       7 9600      9600-002   SCR      2022-01-01 Screening             1 STE    
-       8 9600      9600-002   SCR      2022-01-01 Screening             1 VS     
-       9 9600      9600-002   SCR      2022-01-01 Screening             1 VS     
-      10 9600      9600-002   SCR      2022-01-01 Screening             1 VS     
+         site_code subject_id event_id event_date event_repeat form_id form_repeat
+         <chr>     <chr>      <chr>    <date>            <int> <chr>         <int>
+       1 9600      9600-001   SCR      2022-11-09            1 DM                1
+       2 9600      9600-001   SCR      2022-11-09            1 DM                1
+       3 9600      9600-002   SCR      2022-01-01            1 DM                1
+       4 9600      9600-002   SCR      2022-01-01            1 DM                1
+       5 9600      9600-002   SCR      2022-01-01            1 DM                1
+       6 9600      9600-002   SCR      2022-01-01            1 STE               1
+       7 9600      9600-002   SCR      2022-01-01            1 STE               1
+       8 9600      9600-002   SCR      2022-01-01            1 VS                1
+       9 9600      9600-002   SCR      2022-01-01            1 VS                1
+      10 9600      9600-002   SCR      2022-01-01            1 VS                1
       # i 533 more rows
-      # i 16 more variables: form_repeat <int>, edit_date_time <dttm>, day <drtn>,
-      #   vis_day <dbl>, vis_num <dbl>, event_label <chr>, item_name <chr>,
+      # i 16 more variables: edit_date_time <dttm>, day <drtn>, vis_day <dbl>,
+      #   vis_num <dbl>, event_name <chr>, event_label <chr>, item_name <chr>,
       #   item_type <chr>, item_group <chr>, item_value <chr>, item_unit <chr>,
       #   lower_lim <dbl>, upper_lim <dbl>, significance <chr>, reason_notdone <chr>,
       #   region <chr>

diff --git a/tests/testthat/fixtures/testapp-raw/altered_metadata.xlsx b/tests/testthat/fixtures/testapp-raw/altered_metadata.xlsx
diff --git a/tests/testthat/test-golem_utils_server.R b/tests/testthat/test-golem_utils_server.R
@@ -44,3 +44,36 @@ test_that("%|NA|% works", {
   )
 })
 
+test_that("%|_|% works", {
+  expect_equal(
+    mtcarrs %|_|% iris,
+    iris
+  )
+  test_wrn <- capture_output_lines(
+    test_out <- mtcars %|_|% iris
+  )
+  expect_equal(
+    test_wrn,
+    'Using user supplied "mtcars" instead of deriving.'
+  )
+  expect_equal(
+    test_out,
+    mtcars
+  )
+  test_wrn <- capture_output_lines(
+    test_out <- dplyr::mutate(mtcars, test = mpg %|_|% "RHS")
+  )
+  expect_equal(
+    test_wrn,
+    'Using user supplied "mpg" instead of deriving.'
+  )
+  expect_equal(
+    test_out,
+    cbind(mtcars, list(test = mtcars$mpg))
+  )
+  expect_equal(
+    dplyr::mutate(mtcars, test = mpg2 %|_|% "RHS"),
+    cbind(mtcars, list(test = "RHS"))
+  )
+})
+
diff --git a/tests/testthat/test-rename_raw_data.R b/tests/testthat/test-rename_raw_data.R
@@ -1,7 +1,7 @@
 describe("rename_raw_data() renames raw study data and throws informative errors if needed.", {
 
   it("renames a data frame as expected", {
-    col_names <- metadata$column_names
+    col_names <- rbind(metadata$column_names, list("Placeholder", "pl"))
     testdata <- lapply(mtcars, as.character) |> as.data.frame()
     col_names$name_raw <- names(testdata)
 
@@ -11,7 +11,7 @@ describe("rename_raw_data() renames raw study data and throws informative errors
   })
 
   it("errors with incorrect input", {
-    col_names <- metadata$column_names
+    col_names <- rbind(metadata$column_names, list("Placeholder", "pl"))
     col_names$name_raw <- names(mtcars)
     expect_error(
       rename_raw_data(mtcars, column_names = "incorrect input"),