Skip to content

Commit

Permalink
Merge branch 'openpharma:master' into upstream
Browse files Browse the repository at this point in the history
  • Loading branch information
fcollinbi authored Feb 26, 2024
2 parents e6ca6a2 + 4f8752a commit aa0c8a7
Show file tree
Hide file tree
Showing 87 changed files with 3,284 additions and 988 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: simaerep
Title: Find Clinical Trial Sites Under-Reporting Adverse Events
Version: 0.4.3
Version: 0.4.4
Authors@R: c(
person(given = "Bjoern",
family = "Koneswarakantha",
Expand Down Expand Up @@ -44,6 +44,6 @@ Suggests:
vdiffr,
lintr
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.0
RoxygenNote: 7.2.3
Language: en-US
Config/testthat/edition: 3
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ importFrom(cowplot,get_legend)
importFrom(cowplot,ggdraw)
importFrom(cowplot,plot_grid)
importFrom(dplyr,across)
importFrom(dplyr,all_equal)
importFrom(dplyr,any_of)
importFrom(dplyr,arrange)
importFrom(dplyr,between)
Expand Down Expand Up @@ -102,6 +101,7 @@ importFrom(purrr,possibly)
importFrom(purrr,safely)
importFrom(rlang,":=")
importFrom(rlang,.data)
importFrom(rlang,.env)
importFrom(rlang,enexpr)
importFrom(rlang,env_has)
importFrom(stats,ecdf)
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# simaerep 0.4.4
- allow flexible AE rates in data simulations
- add vignette comparing simaerep to gsm performance
- fix dplyr warnings

# simaerep 0.4.3
- delete performance unit tests (poisson faster than bootstrap) to accommodate CRAN request

Expand Down
13 changes: 2 additions & 11 deletions R/0_imports.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,3 @@
# satisfy lintr
# lintr falsely flags possibly_ecdf as unused variable
# using rlang::.data here causes error with furrr in sim_test_data_portfolio
# patnum, n_ae, visit

if (getRversion() >= "2.15.1") {
utils::globalVariables(c("possibly_ecdf", "patnum", "n_ae", "visit"))
}

#' @importFrom progressr progressor
#' @importFrom cowplot get_legend plot_grid ggdraw draw_label plot_grid plot_grid
#' @importFrom cowplot ggdraw draw_label
Expand All @@ -19,13 +10,13 @@ if (getRversion() >= "2.15.1") {
#' @importFrom furrr future_map future_pmap furrr_options
#' @importFrom progressr with_progress
#' @importFrom stringr str_count str_pad str_length
#' @importFrom rlang := .data enexpr env_has
#' @importFrom rlang := .data enexpr env_has .env
#' @importFrom dplyr select mutate filter summarise group_by summarise_all summarise_at
#' @importFrom dplyr mutate_all mutate_at ungroup vars bind_cols bind_rows pull
#' @importFrom dplyr n_distinct distinct arrange right_join left_join inner_join
#' @importFrom dplyr rename sample_n between row_number dense_rank desc case_when
#' @importFrom dplyr group_by_at n is_grouped_df everything one_of lag any_of across
#' @importFrom dplyr lead all_equal
#' @importFrom dplyr lead
#' @importFrom tidyr tibble unnest nest fill
#' @importFrom knitr kable
#' @importFrom tibble tibble
Expand Down
2 changes: 1 addition & 1 deletion R/S3_orivisit.R
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ as.data.frame.orivisit <- function(x, ..., env = parent.frame()) {
dim <- dim(df)
df_summary <- summarise_df_visit(df)

if (! all_equal(df_summary, x$df_summary)) stop.orivisit()
if (! all.equal(df_summary, x$df_summary, tolerance = 1e-4)) stop.orivisit()
if (! all(dim == x$dim)) stop.orivisit()

return(df)
Expand Down
3 changes: 2 additions & 1 deletion R/lint.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ lint_package <- function(path = ".", ...) {
linters = lintr::linters_with_defaults(
line_length_linter = lintr::line_length_linter(120),
trailing_whitespace_linter = NULL,
cyclocomp_linter = lintr::cyclocomp_linter(25)
cyclocomp_linter = lintr::cyclocomp_linter(25),
indentation_linter = NULL
),
exclusions = list("inst/logo/logo.R", "tests/spelling.R", "vignettes"),
...)
Expand Down
163 changes: 81 additions & 82 deletions R/simaerep.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,13 @@ check_df_visit <- function(df_visit) {

cols_na <- df_visit %>%
summarise_at(
vars(
.data$study_id,
.data$site_number,
.data$patnum,
.data$n_ae,
.data$visit
),
vars(c(
"study_id",
"site_number",
"patnum",
"n_ae",
"visit"
)),
anyNA
) %>%
unlist()
Expand All @@ -60,10 +60,10 @@ check_df_visit <- function(df_visit) {

df_visit %>%
summarise_at(
vars(
.data$n_ae,
.data$visit
),
vars(c(
"n_ae",
"visit"
)),
~ is.numeric(.)
) %>%
unlist() %>%
Expand Down Expand Up @@ -129,13 +129,13 @@ exp_implicit_missing_visits <- function(df_visit) {
group_by(.data$study_id) %>%
mutate(min_study_visit = min(.data$visit),
max_study_visit = max(.data$visit)) %>%
select(
.data$study_id,
.data$site_number,
.data$patnum,
.data$min_study_visit,
.data$max_study_visit
) %>%
select(c(
"study_id",
"site_number",
"patnum",
"min_study_visit",
"max_study_visit"
)) %>%
distinct() %>%
mutate(
visit = map2(
Expand All @@ -144,14 +144,14 @@ exp_implicit_missing_visits <- function(df_visit) {
function(x, y) seq(x, y, 1)
)
) %>%
unnest(.data$visit) %>%
select(
.data$study_id,
.data$site_number,
.data$patnum,
.data$min_study_visit,
.data$visit
)
unnest("visit") %>%
select(c(
"study_id",
"site_number",
"patnum",
"min_study_visit",
"visit"
))

df_visit_out <- df_visit %>%
group_by(.data$study_id, .data$site_number, .data$patnum) %>%
Expand All @@ -164,7 +164,7 @@ exp_implicit_missing_visits <- function(df_visit) {
) %>%
group_by(.data$study_id, .data$site_number, .data$patnum) %>%
arrange(.data$visit) %>%
fill(.data$n_ae, .direction = "down") %>%
fill("n_ae", .direction = "down") %>%
mutate(
min_visit_pat = min(.data$min_visit_pat, na.rm = TRUE),
max_visit_pat = max(.data$max_visit_pat, na.rm = TRUE)
Expand All @@ -177,13 +177,13 @@ exp_implicit_missing_visits <- function(df_visit) {
.data$visit <= .data$max_visit_pat
) %>%
mutate(n_ae = ifelse(is.na(.data$n_ae), 0, .data$n_ae)) %>%
select(
.data$study_id,
.data$site_number,
.data$patnum,
.data$n_ae,
.data$visit
) %>%
select(c(
"study_id",
"site_number",
"patnum",
"n_ae",
"visit"
)) %>%
arrange(.data$study_id, .data$site_number, .data$patnum, .data$visit)

if (nrow(df_visit_out) > nrow(df_visit)) {
Expand Down Expand Up @@ -291,15 +291,15 @@ get_visit_med75 <- function(df_pat,
.data$visit_med75
)
) %>%
select(- .data$study_qup8_max_visit)
select(- "study_qup8_max_visit")
}

df_site <- df_site %>%
select(.data$study_id,
.data$site_number,
.data$n_pat,
.data$n_pat_with_med75,
.data$visit_med75)
select(c("study_id",
"site_number",
"n_pat",
"n_pat_with_med75",
"visit_med75"))

return(df_site)
}
Expand Down Expand Up @@ -486,7 +486,7 @@ eval_sites_deprecated <- function(df_sim_sites,
pval_p_vs_fp_ratio = ifelse(.data$pval_p_vs_fp_ratio < 1, 1, .data$pval_p_vs_fp_ratio),
pval_prob_ur = 1 - 1 / .data$pval_p_vs_fp_ratio
) %>%
select(- .data$min_pval)
select(- "min_pval")
}

if ("prob_low" %in% names(df_out)) {
Expand Down Expand Up @@ -575,20 +575,20 @@ get_ecd_values <- function(df_sim_studies, df_sim_sites, val_str) {

df_ecd <- df_sim_studies %>%
rename(val = !!as.symbol(val_str)) %>%
select(.data$study_id, .data$val) %>%
nest(data = c(.data$val)) %>%
select("study_id", "val") %>%
nest(data = "val") %>%
mutate(.ecdf = map(.data$data, ~ possibly_ecdf(.$val))) %>%
select(- .data$data)
select(- "data")

df_out <- df_sim_sites %>%
rename(val = !!as.symbol(val_str)) %>%
left_join(df_ecd, "study_id") %>%
mutate(ecd_val = map2_dbl(.data$`.ecdf`, .data$val, apply_ecdf)) %>%
rename(
!!as.symbol(val_str) := .data$val,
!!as.symbol(paste0(val_str, "_ecd")) := .data$ecd_val #nolint
!!as.symbol(val_str) := "val",
!!as.symbol(paste0(val_str, "_ecd")) := "ecd_val" #nolint
) %>%
select(- .data$`.ecdf`)
select(- ".ecdf")

return(ungroup(df_out))
}
Expand Down Expand Up @@ -628,11 +628,11 @@ pat_pool <- function(df_visit, df_site) {
df_visit %>%
left_join(df_site, by = c("study_id", "site_number")) %>%
filter(.data$visit <= .data$max_visit_med75_study) %>%
select(.data$study_id,
.data$patnum,
.data$visit,
.data$n_ae) %>%
nest(pat_pool = c(.data$patnum, .data$visit, .data$n_ae))
select(c("study_id",
"patnum",
"visit",
"n_ae")) %>%
nest(pat_pool = c("patnum", "visit", "n_ae"))
}


Expand Down Expand Up @@ -835,8 +835,7 @@ prep_for_sim <- function(df_site, df_visit) {
n_ae_site = map(.data$n_ae_site, "n_ae"),
n_ae_study = map(.data$n_ae_study, "n_ae")
) %>%
select(- .data$patients,
- .data$pat_pool)
select(- c("patients", "pat_pool"))

return(df_sim_prep)

Expand Down Expand Up @@ -914,15 +913,15 @@ sim_after_prep <- function(df_sim_prep,
~ ifelse(.data$n_pat_with_med75_study == 0, NA, .)
)
) %>%
select(- .data$n_ae_site, - .data$n_ae_study) %>%
select(.data$study_id,
.data$site_number,
.data$n_pat,
.data$n_pat_with_med75,
.data$visit_med75,
.data$mean_ae_site_med75,
.data$mean_ae_study_med75,
.data$n_pat_with_med75_study,
select(- c("n_ae_site", "n_ae_study")) %>%
select(c("study_id",
"site_number",
"n_pat",
"n_pat_with_med75",
"visit_med75",
"mean_ae_site_med75",
"mean_ae_study_med75",
"n_pat_with_med75_study"),
dplyr::everything()) %>%
ungroup()

Expand Down Expand Up @@ -982,11 +981,11 @@ sim_after_prep <- function(df_sim_prep,
get_pat_pool_config <- function(df_visit, df_site, min_n_pat_with_med75 = 1) {

# site_config are the number of sites with their individual visit_med75 and n_pat_with_med75
df_site_config <- select(df_site,
.data$study_id,
.data$site_number,
.data$visit_med75,
.data$n_pat_with_med75) %>%
df_site_config <- select(df_site, c(
"study_id",
"site_number",
"visit_med75",
"n_pat_with_med75")) %>%
filter(.data$n_pat_with_med75 >= min_n_pat_with_med75)

# pat_pool gives the patient pool for study from which we sample
Expand All @@ -1000,12 +999,12 @@ get_pat_pool_config <- function(df_visit, df_site, min_n_pat_with_med75 = 1) {
pat_pool = map2(.data$pat_pool, .data$visit_med75, function(x, y) filter(x, visit == y)),
n_pat_study = map2_dbl(.data$pat_pool, .data$n_pat_with_med75, function(x, y) nrow(x) - y)
) %>%
select(.data$study_id,
.data$site_number,
.data$visit_med75,
.data$n_pat_with_med75,
.data$n_pat_study,
.data$pat_pool)
select(c("study_id",
"site_number",
"visit_med75",
"n_pat_with_med75",
"n_pat_study",
"pat_pool"))

return(ungroup(df_site_config))
}
Expand Down Expand Up @@ -1122,7 +1121,7 @@ sim_studies <- function(df_visit,
function(x, y) sample_n(x, y, replace = TRUE)),
n_ae_study = map(.data$n_ae_study, "n_ae")
) %>%
select(- .data$pat_pool)
select(- "pat_pool")

if (poisson_test) {
df_config <- df_config %>%
Expand All @@ -1142,18 +1141,18 @@ sim_studies <- function(df_visit,

if (!keep_ae) {
df_config <- df_config %>%
select(- .data$n_ae_site, - .data$n_ae_study)
select(- c("n_ae_site", "n_ae_study"))
} else {
df_config <- df_config %>%
mutate_at(vars(n_ae_site, n_ae_study), ~ map_chr(., paste, collapse = ","))
mutate_at(vars(c("n_ae_site", "n_ae_study")), ~ map_chr(., paste, collapse = ","))
}

return(ungroup(df_config))
}

df_sim <- tibble(r = seq.int(1, r, 1)) %>%
mutate(n_ae_set = .f_map(.data$r, sim)) %>%
unnest(.data$n_ae_set)
unnest("n_ae_set")

return(ungroup(df_sim))
}
Expand Down Expand Up @@ -1230,10 +1229,10 @@ site_aggr <- function(df_visit,

df_mean_ae_med75 <- df_mean_ae_dev %>%
filter(.data$visit == .data$visit_med75) %>%
rename(mean_ae_site_med75 = .data$mean_ae_site) %>%
select(.data$study_id,
.data$site_number,
.data$mean_ae_site_med75)
rename(mean_ae_site_med75 = "mean_ae_site") %>%
select(c("study_id",
"site_number",
"mean_ae_site_med75"))

# Add mean cumulative AE to site aggregate ----------------------

Expand Down
Loading

0 comments on commit aa0c8a7

Please sign in to comment.