Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generalize metadata merging with data #104

Merged
merged 24 commits into from
Oct 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
a7b8eaa
Pass additional modifying functions in arguments of `merge_meta_with_…
jthompson-arcus Oct 9, 2024
f091000
Recode to use base R and character values
jthompson-arcus Oct 9, 2024
1ff5c2d
Move additional merging functions to metadata
jthompson-arcus Oct 9, 2024
5d4a444
Adjust `%||%` to check for empty values, not just `NULL`
jthompson-arcus Oct 10, 2024
6451405
Specify `add_fns` as `post_merge_fns`
jthompson-arcus Oct 10, 2024
2270fdc
Add `pre_merge_fns` setting
jthompson-arcus Oct 10, 2024
7cf9ee4
Add inline function to check existence of an object for `dplyr` mutat…
jthompson-arcus Oct 10, 2024
ba6c086
Impute `event_name` from `event` instead of `event_name`
jthompson-arcus Oct 10, 2024
8dc00e0
Fix broken tests
jthompson-arcus Oct 10, 2024
8154349
Further generalize `add_timevars_to_data()`
jthompson-arcus Oct 10, 2024
325c27d
Initialize region field
jthompson-arcus Oct 10, 2024
2506500
Add mid-merging functions
jthompson-arcus Oct 11, 2024
c9755b7
Update version and NEWS
jthompson-arcus Oct 11, 2024
f48ebac
Remove duplicate type conversion
jthompson-arcus Oct 14, 2024
6087b0e
Create `apply_custom_functions()` to handle
jthompson-arcus Oct 14, 2024
a980920
Clean up metadata settings element generation
jthompson-arcus Oct 14, 2024
c90e778
Remove extraneous pipe
jthompson-arcus Oct 14, 2024
4d7fd45
Reset `%||%` to standard evaluation
jthompson-arcus Oct 14, 2024
6adecea
Display message if using user supplied data.
jthompson-arcus Oct 14, 2024
b001c9b
Remove `event_name` from required columns
jthompson-arcus Oct 14, 2024
568a05f
Resolve test errors
jthompson-arcus Oct 14, 2024
12b28f1
Specify the `parent.frame()` as the environment to check existence wi…
jthompson-arcus Oct 15, 2024
ea4714a
Add tests for `%|_|%`
jthompson-arcus Oct 15, 2024
3e9dc7b
Change `warning()` to `cat()`
jthompson-arcus Oct 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: clinsight
Title: ClinSight
Version: 0.1.0.9000
Version: 0.1.0.9001
Authors@R: c(
person("Leonard Daniël", "Samson", , "[email protected]", role = c("cre", "aut"),
comment = c(ORCID = "0000-0002-6252-7639")),
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# clinsight (development version)

- Generalized `merge_meta_with_data()` to allow user-defined processing functions.

# clinsight 0.1.0

## Changed
Expand Down
52 changes: 41 additions & 11 deletions R/fct_appdata.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,26 +75,23 @@ merge_meta_with_data <- function(
synch_time <- attr(data, "synch_time") %||% ""
merged_data <- data |>
rename_raw_data(column_names = meta$column_names) |>
readr::type_convert(clinsight_col_specs) |>
readr::type_convert(clinsight_col_specs) |>
apply_custom_functions(meta$settings$pre_merge_fns) |>
add_timevars_to_data() |>
# fix MC values before merging:
fix_multiple_choice_vars(expected_vars = meta$items_expanded$var) |>
dplyr::right_join(meta$items_expanded, by = "var") |>
dplyr::filter(!is.na(item_value)) |>
dplyr::filter(!is.na(item_value)) |>
apply_custom_functions(meta$settings$mid_merge_fns) |>
dplyr::mutate(
suffix = ifelse(item_name == "ECG interpretation", "LBCLSIG", suffix),
suffix = ifelse(is.na(suffix), "VAL", suffix),
# TODO: improve code below to handle exceptions in a more general manner
suffix = ifelse(suffix %in% c("LBORRES", "VSORRES", "EGORRES") |
item_group %in% c("Cytogenetics", "General"),
"VAL", suffix)
suffix_names = suffix_names %|_|% ifelse(is.na(suffix) | grepl("ORRES$", suffix) | item_group == "General", "VAL", suffix)
LDSamson marked this conversation as resolved.
Show resolved Hide resolved
) |>
dplyr::select(-var) |>
dplyr::select(-var, -suffix) |>
dplyr::mutate(
edit_date_time = max(edit_date_time, na.rm = TRUE),
.by = c(subject_id, item_name, event_name, event_repeat)
) |>
tidyr::pivot_wider(names_from = suffix, values_from = item_value) |>
tidyr::pivot_wider(names_from = suffix_names, values_from = item_value) |>
add_missing_columns(expected_columns) |>
dplyr::mutate(
LBORNR_Lower = as.numeric(ifelse(!is.na(lower_limit), lower_limit, LBORNR_Lower)),
Expand All @@ -112,12 +109,32 @@ merge_meta_with_data <- function(
"item_value" = VAL,
"reason_notdone" = LBREASND
) |>
apply_study_specific_fixes()
dplyr::mutate(region = region %|_|% "Missing") |>
apply_custom_functions(meta$settings$post_merge_fns)
attr(merged_data, "synch_time") <- synch_time
merged_data
}


#' Apply study-specific suffix fixes
#'
#' These changes are study/EDC-specific and part of the legacy code for ClinSight.
#'
#' @param data A data frame
#'
#' @return A data frame.
apply_study_specific_suffix_fixes <- function(data) {
dplyr::mutate(data,
suffix = ifelse(item_name == "ECG interpretation", "LBCLSIG", suffix),
suffix = ifelse(is.na(suffix), "VAL", suffix),
# TODO: improve code below to handle exceptions in a more general manner
suffix_names = ifelse(suffix %in% c("LBORRES", "VSORRES", "EGORRES") |
item_group %in% c("Cytogenetics", "General"),
"VAL", suffix)
)
}


jthompson-arcus marked this conversation as resolved.
Show resolved Hide resolved
#' Apply study-specific fixes
#'
#' These changes are probably study-specific and need to be changed accordingly.
Expand Down Expand Up @@ -186,6 +203,19 @@ apply_study_specific_fixes <- function(
)
}

#' Apply custom modification functions
#'
#' @param data A data frame (for example, raw data merged).
#' @param functions A character vector containing the names of the functions to
#' apply to the data. Default is NULL.
#' @param .default A character vector containing the names of the functions to
#' apply if none are provided. Default is "identity".
apply_custom_functions <- function(data, functions = NULL, .default = "identity") {
Reduce(\(x1, x2) do.call(x2, list(x1)), # Apply next function to output of previous
functions %||% .default, # Apply default functions if no additional functions provided
init = data) # Initialize with the data object
}

#' Get appdata
#'
#' Converts data to a list of data frames in the expected format to be used by the
Expand Down
35 changes: 20 additions & 15 deletions R/fct_data_helpers.R
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know what you guys think about this change. Basic idea is that there is a sheet in the metadata file where settings can be stored. Since settings could have more than one values, the default data frame is simplified to a list with NA's removed.

Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ get_metadata <- function(
meta <- lapply(sheets, function(x){
readxl::read_excel(filepath, sheet = x, col_types = "text")
})

meta$settings <- meta$settings |>
lapply(\(x) as.character(na.omit(x))) |>
Filter(f = length)
jthompson-arcus marked this conversation as resolved.
Show resolved Hide resolved

if(length(expand_tab_items[nchar(expand_tab_items) > 0 ] ) == 0) return(meta)
if("items_expanded" %in% names(meta)) warning({
"Table 'items_expanded' already present. The old table will be overwritten."
Expand Down Expand Up @@ -144,31 +149,31 @@ add_timevars_to_data <- function(
dplyr::mutate(
edit_date_time = as.POSIXct(edit_date_time, tz = "UTC"),
event_date = as.Date(event_date),
day = event_date - min(event_date, na.rm = TRUE),
vis_day = ifelse(event_id %in% c("SCR", "VIS", "VISEXT", "VISVAR", "FU1", "FU2"), day, NA),
day = day %|_|% event_date - min(event_date, na.rm = TRUE),
vis_day = ifelse(grepl("^SCR|^VIS|^FU", event_id, ignore.case = TRUE), day, NA),
vis_num = as.numeric(factor(vis_day))-1,
event_name = dplyr::case_when(
event_id == "SCR" ~ "Screening",
event_id %in% c("VIS", "VISEXT", "VISVAR") ~ paste0("Visit ", vis_num),
grepl("^FU[[:digit:]]+", event_id) ~ paste0("Visit ", vis_num, "(FU)"),
event_id == "UN" ~ paste0("Unscheduled visit ", event_repeat),
event_id == "EOT" ~ "EoT",
event_id == "EXIT" ~ "Exit",
form_id %in% c("AE", "CM", "CP", "MH", "MH", "MHTR", "PR", "ST", "CMTR", "CMHMA") ~ "Any visit",
TRUE ~ paste0("Other (", event_name, ")")
event_name = event_name %|_|% dplyr::case_when(
grepl("^SCR", event_id, ignore.case = TRUE) ~ "Screening",
grepl("^VIS", event_id, ignore.case = TRUE) ~ paste0("Visit ", vis_num),
grepl("^FU[[:digit:]]+", event_id, ignore.case = TRUE) ~ paste0("Visit ", vis_num, "(FU)"),
grepl("^UN", event_id, ignore.case = TRUE) ~ paste0("Unscheduled visit ", event_repeat),
toupper(event_id) == "EOT" ~ "EoT",
toupper(event_id) == "EXIT" ~ "Exit",
grepl("^AE|^CM|^CP|^MH|^PR|^ST", form_id) ~ "Any visit",
.default = paste0("Other (", event_id, ")")
),
event_label = dplyr::case_when(
event_label = event_label %|_|% dplyr::case_when(
!is.na(vis_num) ~ paste0("V", vis_num),
event_id == "UN" ~ paste0("UV", event_repeat),
TRUE ~ event_name
grepl("^UN", event_id, ignore.case = TRUE) ~ paste0("UV", event_repeat),
.default = event_name
),
.by = subject_id
) |>
dplyr::arrange(
factor(site_code, levels = order_string(site_code)),
factor(subject_id, levels = order_string(subject_id))
)
if(any(grepl("^Other ", df$event_name))) warning(
if(any(is.na(df$event_name) | grepl("^Other ", df$event_name))) warning(
"Undefined Events detected. Please verify data before proceeding."
)
df
Expand Down
18 changes: 18 additions & 0 deletions R/golem_utils_server.R
jthompson-arcus marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,21 @@ drop_nulls <- function(x) {
x
}
}

#' If x does not exist, return y, otherwise return x
#'
#' @param x,y two elements to test, one potentially not existent
#' @param verbose logical, indicating whether warning message should be displayed.
#'
#' @noRd
#'
#' @examples
#' mtcars2 %|_|% mtcars
"%|_|%" <- function(x, y, verbose = TRUE) {
if (exists(deparse1(substitute(x)), envir = parent.frame())) {
if (verbose) cat("Using user supplied", deparse(deparse1(substitute(x))), "instead of deriving.\n")
x
} else {
y
}
}
Binary file modified R/sysdata.rda
Binary file not shown.
1 change: 0 additions & 1 deletion data-raw/internal_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ clinsight_col_specs <- c(
"subject_id" = "c",
"event_id" = "c",
"event_date" = "D",
"event_name" = "c",
"event_repeat" = "i",
"form_id" = "c",
"form_repeat" = "i",
Expand Down
Binary file modified data-raw/metadata.xlsx
Binary file not shown.
Binary file modified data/metadata.rda
Binary file not shown.
2 changes: 1 addition & 1 deletion inst/golem-config.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
default:
golem_name: clinsight
golem_version: 0.1.0
golem_version: 0.1.0.9001
app_prod: no
user_identification: test_user
study_data: !expr clinsight::clinsightful_data
Expand Down
48 changes: 24 additions & 24 deletions tests/testthat/_snaps/fct_appdata.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,16 @@
df[c(1, 1000, 2000, 3000, 4000, 5000), ]
Output
# A tibble: 6 x 23
site_code subject_id event_id event_date event_name event_repeat form_id
<chr> <chr> <chr> <date> <chr> <int> <chr>
1 9600 9600-001 SCR 2022-11-09 Screening 1 DM
2 <NA> <NA> <NA> NA <NA> NA <NA>
3 <NA> <NA> <NA> NA <NA> NA <NA>
4 <NA> <NA> <NA> NA <NA> NA <NA>
5 <NA> <NA> <NA> NA <NA> NA <NA>
6 <NA> <NA> <NA> NA <NA> NA <NA>
# i 16 more variables: form_repeat <int>, edit_date_time <dttm>, day <drtn>,
# vis_day <dbl>, vis_num <dbl>, event_label <chr>, item_name <chr>,
site_code subject_id event_id event_date event_repeat form_id form_repeat
<chr> <chr> <chr> <date> <int> <chr> <int>
1 9600 9600-001 SCR 2022-11-09 1 DM 1
2 <NA> <NA> <NA> NA NA <NA> NA
3 <NA> <NA> <NA> NA NA <NA> NA
4 <NA> <NA> <NA> NA NA <NA> NA
5 <NA> <NA> <NA> NA NA <NA> NA
6 <NA> <NA> <NA> NA NA <NA> NA
# i 16 more variables: edit_date_time <dttm>, day <drtn>, vis_day <dbl>,
# vis_num <dbl>, event_name <chr>, event_label <chr>, item_name <chr>,
# item_type <chr>, item_group <chr>, item_value <chr>, item_unit <chr>,
# lower_lim <dbl>, upper_lim <dbl>, significance <chr>, reason_notdone <chr>,
# region <chr>
Expand All @@ -51,21 +51,21 @@
df
Output
# A tibble: 543 x 23
site_code subject_id event_id event_date event_name event_repeat form_id
<chr> <chr> <chr> <date> <chr> <int> <chr>
1 9600 9600-001 SCR 2022-11-09 Screening 1 DM
2 9600 9600-001 SCR 2022-11-09 Screening 1 DM
3 9600 9600-002 SCR 2022-01-01 Screening 1 DM
4 9600 9600-002 SCR 2022-01-01 Screening 1 DM
5 9600 9600-002 SCR 2022-01-01 Screening 1 DM
6 9600 9600-002 SCR 2022-01-01 Screening 1 STE
7 9600 9600-002 SCR 2022-01-01 Screening 1 STE
8 9600 9600-002 SCR 2022-01-01 Screening 1 VS
9 9600 9600-002 SCR 2022-01-01 Screening 1 VS
10 9600 9600-002 SCR 2022-01-01 Screening 1 VS
site_code subject_id event_id event_date event_repeat form_id form_repeat
<chr> <chr> <chr> <date> <int> <chr> <int>
1 9600 9600-001 SCR 2022-11-09 1 DM 1
2 9600 9600-001 SCR 2022-11-09 1 DM 1
3 9600 9600-002 SCR 2022-01-01 1 DM 1
4 9600 9600-002 SCR 2022-01-01 1 DM 1
5 9600 9600-002 SCR 2022-01-01 1 DM 1
6 9600 9600-002 SCR 2022-01-01 1 STE 1
7 9600 9600-002 SCR 2022-01-01 1 STE 1
8 9600 9600-002 SCR 2022-01-01 1 VS 1
9 9600 9600-002 SCR 2022-01-01 1 VS 1
10 9600 9600-002 SCR 2022-01-01 1 VS 1
# i 533 more rows
# i 16 more variables: form_repeat <int>, edit_date_time <dttm>, day <drtn>,
# vis_day <dbl>, vis_num <dbl>, event_label <chr>, item_name <chr>,
# i 16 more variables: edit_date_time <dttm>, day <drtn>, vis_day <dbl>,
# vis_num <dbl>, event_name <chr>, event_label <chr>, item_name <chr>,
# item_type <chr>, item_group <chr>, item_value <chr>, item_unit <chr>,
# lower_lim <dbl>, upper_lim <dbl>, significance <chr>, reason_notdone <chr>,
# region <chr>
Expand Down
Binary file modified tests/testthat/fixtures/testapp-raw/altered_metadata.xlsx
Binary file not shown.
33 changes: 33 additions & 0 deletions tests/testthat/test-golem_utils_server.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,36 @@ test_that("%|NA|% works", {
)
})

test_that("%|_|% works", {
expect_equal(
mtcarrs %|_|% iris,
iris
)
test_wrn <- capture_output_lines(
test_out <- mtcars %|_|% iris
)
expect_equal(
test_wrn,
'Using user supplied "mtcars" instead of deriving.'
)
expect_equal(
test_out,
mtcars
)
test_wrn <- capture_output_lines(
test_out <- dplyr::mutate(mtcars, test = mpg %|_|% "RHS")
)
expect_equal(
test_wrn,
'Using user supplied "mpg" instead of deriving.'
)
expect_equal(
test_out,
cbind(mtcars, list(test = mtcars$mpg))
)
expect_equal(
dplyr::mutate(mtcars, test = mpg2 %|_|% "RHS"),
cbind(mtcars, list(test = "RHS"))
)
})

4 changes: 2 additions & 2 deletions tests/testthat/test-rename_raw_data.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
describe("rename_raw_data() renames raw study data and throws informative errors if needed.", {

it("renames a data frame as expected", {
col_names <- metadata$column_names
col_names <- rbind(metadata$column_names, list("Placeholder", "pl"))
testdata <- lapply(mtcars, as.character) |> as.data.frame()
col_names$name_raw <- names(testdata)

Expand All @@ -11,7 +11,7 @@ describe("rename_raw_data() renames raw study data and throws informative errors
})

it("errors with incorrect input", {
col_names <- metadata$column_names
col_names <- rbind(metadata$column_names, list("Placeholder", "pl"))
col_names$name_raw <- names(mtcars)
expect_error(
rename_raw_data(mtcars, column_names = "incorrect input"),
Expand Down
Loading