diff --git a/DESCRIPTION b/DESCRIPTION
index 4ec6071..4969410 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
Package: simaerep
Title: Find Clinical Trial Sites Under-Reporting Adverse Events
-Version: 0.4.3
+Version: 0.4.4
Authors@R: c(
person(given = "Bjoern",
family = "Koneswarakantha",
@@ -44,6 +44,6 @@ Suggests:
vdiffr,
lintr
Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.0
+RoxygenNote: 7.2.3
Language: en-US
Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
index 4433ce6..12b80b4 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -46,7 +46,6 @@ importFrom(cowplot,get_legend)
importFrom(cowplot,ggdraw)
importFrom(cowplot,plot_grid)
importFrom(dplyr,across)
-importFrom(dplyr,all_equal)
importFrom(dplyr,any_of)
importFrom(dplyr,arrange)
importFrom(dplyr,between)
@@ -102,6 +101,7 @@ importFrom(purrr,possibly)
importFrom(purrr,safely)
importFrom(rlang,":=")
importFrom(rlang,.data)
+importFrom(rlang,.env)
importFrom(rlang,enexpr)
importFrom(rlang,env_has)
importFrom(stats,ecdf)
diff --git a/NEWS.md b/NEWS.md
index 85249c9..19c05cf 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,8 @@
+# simaerep 0.4.4
+- allow flexible AE rates in data simulations
+- add vignette comparing simaerep to gsm performance
+- fix dplyr warnings
+
# simaerep 0.4.3
- delete performance unit tests (poisson faster than bootstrap) to accommodate CRAN request
diff --git a/R/0_imports.R b/R/0_imports.R
index af83d86..6a3cf13 100644
--- a/R/0_imports.R
+++ b/R/0_imports.R
@@ -1,12 +1,3 @@
-# satisfy lintr
-# lintr falsely flags possibly_ecdf as unused variable
-# using rlang::.data here causes error with furrr in sim_test_data_portfolio
-# patnum, n_ae, visit
-
-if (getRversion() >= "2.15.1") {
- utils::globalVariables(c("possibly_ecdf", "patnum", "n_ae", "visit"))
-}
-
#' @importFrom progressr progressor
#' @importFrom cowplot get_legend plot_grid ggdraw draw_label plot_grid plot_grid
#' @importFrom cowplot ggdraw draw_label
@@ -19,13 +10,13 @@ if (getRversion() >= "2.15.1") {
#' @importFrom furrr future_map future_pmap furrr_options
#' @importFrom progressr with_progress
#' @importFrom stringr str_count str_pad str_length
-#' @importFrom rlang := .data enexpr env_has
+#' @importFrom rlang := .data enexpr env_has .env
#' @importFrom dplyr select mutate filter summarise group_by summarise_all summarise_at
#' @importFrom dplyr mutate_all mutate_at ungroup vars bind_cols bind_rows pull
#' @importFrom dplyr n_distinct distinct arrange right_join left_join inner_join
#' @importFrom dplyr rename sample_n between row_number dense_rank desc case_when
#' @importFrom dplyr group_by_at n is_grouped_df everything one_of lag any_of across
-#' @importFrom dplyr lead all_equal
+#' @importFrom dplyr lead
#' @importFrom tidyr tibble unnest nest fill
#' @importFrom knitr kable
#' @importFrom tibble tibble
diff --git a/R/S3_orivisit.R b/R/S3_orivisit.R
index a92575f..9fb0b0b 100644
--- a/R/S3_orivisit.R
+++ b/R/S3_orivisit.R
@@ -120,7 +120,7 @@ as.data.frame.orivisit <- function(x, ..., env = parent.frame()) {
dim <- dim(df)
df_summary <- summarise_df_visit(df)
- if (! all_equal(df_summary, x$df_summary)) stop.orivisit()
+ if (! all.equal(df_summary, x$df_summary, tolerance = 1e-4)) stop.orivisit()
if (! all(dim == x$dim)) stop.orivisit()
return(df)
diff --git a/R/lint.R b/R/lint.R
index d46482d..889ddf9 100644
--- a/R/lint.R
+++ b/R/lint.R
@@ -16,7 +16,8 @@ lint_package <- function(path = ".", ...) {
linters = lintr::linters_with_defaults(
line_length_linter = lintr::line_length_linter(120),
trailing_whitespace_linter = NULL,
- cyclocomp_linter = lintr::cyclocomp_linter(25)
+ cyclocomp_linter = lintr::cyclocomp_linter(25),
+ indentation_linter = NULL
),
exclusions = list("inst/logo/logo.R", "tests/spelling.R", "vignettes"),
...)
diff --git a/R/simaerep.R b/R/simaerep.R
index 7159176..a2d1271 100644
--- a/R/simaerep.R
+++ b/R/simaerep.R
@@ -43,13 +43,13 @@ check_df_visit <- function(df_visit) {
cols_na <- df_visit %>%
summarise_at(
- vars(
- .data$study_id,
- .data$site_number,
- .data$patnum,
- .data$n_ae,
- .data$visit
- ),
+ vars(c(
+ "study_id",
+ "site_number",
+ "patnum",
+ "n_ae",
+ "visit"
+ )),
anyNA
) %>%
unlist()
@@ -60,10 +60,10 @@ check_df_visit <- function(df_visit) {
df_visit %>%
summarise_at(
- vars(
- .data$n_ae,
- .data$visit
- ),
+ vars(c(
+ "n_ae",
+ "visit"
+ )),
~ is.numeric(.)
) %>%
unlist() %>%
@@ -129,13 +129,13 @@ exp_implicit_missing_visits <- function(df_visit) {
group_by(.data$study_id) %>%
mutate(min_study_visit = min(.data$visit),
max_study_visit = max(.data$visit)) %>%
- select(
- .data$study_id,
- .data$site_number,
- .data$patnum,
- .data$min_study_visit,
- .data$max_study_visit
- ) %>%
+ select(c(
+ "study_id",
+ "site_number",
+ "patnum",
+ "min_study_visit",
+ "max_study_visit"
+ )) %>%
distinct() %>%
mutate(
visit = map2(
@@ -144,14 +144,14 @@ exp_implicit_missing_visits <- function(df_visit) {
function(x, y) seq(x, y, 1)
)
) %>%
- unnest(.data$visit) %>%
- select(
- .data$study_id,
- .data$site_number,
- .data$patnum,
- .data$min_study_visit,
- .data$visit
- )
+ unnest("visit") %>%
+ select(c(
+ "study_id",
+ "site_number",
+ "patnum",
+ "min_study_visit",
+ "visit"
+ ))
df_visit_out <- df_visit %>%
group_by(.data$study_id, .data$site_number, .data$patnum) %>%
@@ -164,7 +164,7 @@ exp_implicit_missing_visits <- function(df_visit) {
) %>%
group_by(.data$study_id, .data$site_number, .data$patnum) %>%
arrange(.data$visit) %>%
- fill(.data$n_ae, .direction = "down") %>%
+ fill("n_ae", .direction = "down") %>%
mutate(
min_visit_pat = min(.data$min_visit_pat, na.rm = TRUE),
max_visit_pat = max(.data$max_visit_pat, na.rm = TRUE)
@@ -177,13 +177,13 @@ exp_implicit_missing_visits <- function(df_visit) {
.data$visit <= .data$max_visit_pat
) %>%
mutate(n_ae = ifelse(is.na(.data$n_ae), 0, .data$n_ae)) %>%
- select(
- .data$study_id,
- .data$site_number,
- .data$patnum,
- .data$n_ae,
- .data$visit
- ) %>%
+ select(c(
+ "study_id",
+ "site_number",
+ "patnum",
+ "n_ae",
+ "visit"
+ )) %>%
arrange(.data$study_id, .data$site_number, .data$patnum, .data$visit)
if (nrow(df_visit_out) > nrow(df_visit)) {
@@ -291,15 +291,15 @@ get_visit_med75 <- function(df_pat,
.data$visit_med75
)
) %>%
- select(- .data$study_qup8_max_visit)
+ select(- "study_qup8_max_visit")
}
df_site <- df_site %>%
- select(.data$study_id,
- .data$site_number,
- .data$n_pat,
- .data$n_pat_with_med75,
- .data$visit_med75)
+ select(c("study_id",
+ "site_number",
+ "n_pat",
+ "n_pat_with_med75",
+ "visit_med75"))
return(df_site)
}
@@ -486,7 +486,7 @@ eval_sites_deprecated <- function(df_sim_sites,
pval_p_vs_fp_ratio = ifelse(.data$pval_p_vs_fp_ratio < 1, 1, .data$pval_p_vs_fp_ratio),
pval_prob_ur = 1 - 1 / .data$pval_p_vs_fp_ratio
) %>%
- select(- .data$min_pval)
+ select(- "min_pval")
}
if ("prob_low" %in% names(df_out)) {
@@ -575,20 +575,20 @@ get_ecd_values <- function(df_sim_studies, df_sim_sites, val_str) {
df_ecd <- df_sim_studies %>%
rename(val = !!as.symbol(val_str)) %>%
- select(.data$study_id, .data$val) %>%
- nest(data = c(.data$val)) %>%
+ select("study_id", "val") %>%
+ nest(data = "val") %>%
mutate(.ecdf = map(.data$data, ~ possibly_ecdf(.$val))) %>%
- select(- .data$data)
+ select(- "data")
df_out <- df_sim_sites %>%
rename(val = !!as.symbol(val_str)) %>%
left_join(df_ecd, "study_id") %>%
mutate(ecd_val = map2_dbl(.data$`.ecdf`, .data$val, apply_ecdf)) %>%
rename(
- !!as.symbol(val_str) := .data$val,
- !!as.symbol(paste0(val_str, "_ecd")) := .data$ecd_val #nolint
+ !!as.symbol(val_str) := "val",
+ !!as.symbol(paste0(val_str, "_ecd")) := "ecd_val" #nolint
) %>%
- select(- .data$`.ecdf`)
+ select(- ".ecdf")
return(ungroup(df_out))
}
@@ -628,11 +628,11 @@ pat_pool <- function(df_visit, df_site) {
df_visit %>%
left_join(df_site, by = c("study_id", "site_number")) %>%
filter(.data$visit <= .data$max_visit_med75_study) %>%
- select(.data$study_id,
- .data$patnum,
- .data$visit,
- .data$n_ae) %>%
- nest(pat_pool = c(.data$patnum, .data$visit, .data$n_ae))
+ select(c("study_id",
+ "patnum",
+ "visit",
+ "n_ae")) %>%
+ nest(pat_pool = c("patnum", "visit", "n_ae"))
}
@@ -829,8 +829,7 @@ prep_for_sim <- function(df_site, df_visit) {
n_ae_site = map(.data$n_ae_site, "n_ae"),
n_ae_study = map(.data$n_ae_study, "n_ae")
) %>%
- select(- .data$patients,
- - .data$pat_pool)
+ select(- c("patients", "pat_pool"))
return(df_sim_prep)
@@ -908,15 +907,15 @@ sim_after_prep <- function(df_sim_prep,
~ ifelse(.data$n_pat_with_med75_study == 0, NA, .)
)
) %>%
- select(- .data$n_ae_site, - .data$n_ae_study) %>%
- select(.data$study_id,
- .data$site_number,
- .data$n_pat,
- .data$n_pat_with_med75,
- .data$visit_med75,
- .data$mean_ae_site_med75,
- .data$mean_ae_study_med75,
- .data$n_pat_with_med75_study,
+ select(- c("n_ae_site", "n_ae_study")) %>%
+ select(c("study_id",
+ "site_number",
+ "n_pat",
+ "n_pat_with_med75",
+ "visit_med75",
+ "mean_ae_site_med75",
+ "mean_ae_study_med75",
+ "n_pat_with_med75_study"),
dplyr::everything()) %>%
ungroup()
@@ -976,11 +975,11 @@ sim_after_prep <- function(df_sim_prep,
get_pat_pool_config <- function(df_visit, df_site, min_n_pat_with_med75 = 1) {
# site_config are the number of sites with their individual visit_med75 and n_pat_with_med75
- df_site_config <- select(df_site,
- .data$study_id,
- .data$site_number,
- .data$visit_med75,
- .data$n_pat_with_med75) %>%
+ df_site_config <- select(df_site, c(
+ "study_id",
+ "site_number",
+ "visit_med75",
+ "n_pat_with_med75")) %>%
filter(.data$n_pat_with_med75 >= min_n_pat_with_med75)
# pat_pool gives the patient pool for study from which we sample
@@ -994,12 +993,12 @@ get_pat_pool_config <- function(df_visit, df_site, min_n_pat_with_med75 = 1) {
pat_pool = map2(.data$pat_pool, .data$visit_med75, function(x, y) filter(x, visit == y)),
n_pat_study = map2_dbl(.data$pat_pool, .data$n_pat_with_med75, function(x, y) nrow(x) - y)
) %>%
- select(.data$study_id,
- .data$site_number,
- .data$visit_med75,
- .data$n_pat_with_med75,
- .data$n_pat_study,
- .data$pat_pool)
+ select(c("study_id",
+ "site_number",
+ "visit_med75",
+ "n_pat_with_med75",
+ "n_pat_study",
+ "pat_pool"))
return(ungroup(df_site_config))
}
@@ -1113,7 +1112,7 @@ sim_studies <- function(df_visit,
function(x, y) sample_n(x, y, replace = TRUE)),
n_ae_study = map(.data$n_ae_study, "n_ae")
) %>%
- select(- .data$pat_pool)
+ select(- "pat_pool")
if (poisson_test) {
df_config <- df_config %>%
@@ -1132,10 +1131,10 @@ sim_studies <- function(df_visit,
if (!keep_ae) {
df_config <- df_config %>%
- select(- .data$n_ae_site, - .data$n_ae_study)
+ select(- c("n_ae_site", "n_ae_study"))
} else {
df_config <- df_config %>%
- mutate_at(vars(n_ae_site, n_ae_study), ~ map_chr(., paste, collapse = ","))
+ mutate_at(vars(c("n_ae_site", "n_ae_study")), ~ map_chr(., paste, collapse = ","))
}
return(ungroup(df_config))
@@ -1143,7 +1142,7 @@ sim_studies <- function(df_visit,
df_sim <- tibble(r = seq.int(1, r, 1)) %>%
mutate(n_ae_set = .f_map(.data$r, sim)) %>%
- unnest(.data$n_ae_set)
+ unnest("n_ae_set")
return(ungroup(df_sim))
}
@@ -1220,10 +1219,10 @@ site_aggr <- function(df_visit,
df_mean_ae_med75 <- df_mean_ae_dev %>%
filter(.data$visit == .data$visit_med75) %>%
- rename(mean_ae_site_med75 = .data$mean_ae_site) %>%
- select(.data$study_id,
- .data$site_number,
- .data$mean_ae_site_med75)
+ rename(mean_ae_site_med75 = "mean_ae_site") %>%
+ select(c("study_id",
+ "site_number",
+ "mean_ae_site_med75"))
# Add mean cumulative AE to site aggregate ----------------------
diff --git a/R/simaerep_plot.R b/R/simaerep_plot.R
index 8f796ca..9c35e90 100644
--- a/R/simaerep_plot.R
+++ b/R/simaerep_plot.R
@@ -86,9 +86,9 @@ plot_dots <- function(df,
}
p <- df %>%
- ggplot(aes_string("x", "y", color = "color")) +
+ ggplot(aes(x, y, color = .data$color)) +
geom_point(size = size_dots) +
- geom_text(aes_string(label = "n_ae"),
+ geom_text(aes(label = n_ae),
color = "black"
) +
geom_rect(aes(
@@ -167,10 +167,10 @@ plot_sim_example <- function(substract_ae_per_pat = 0,
site = LETTERS[1:3],
patients = c(list(seq(1, 50, 1)), list(seq(1, 40, 1)), list(seq(1, 10, 1)))
) %>%
- tidyr::unnest(.data$patients) %>%
+ tidyr::unnest("patients") %>%
mutate(n_ae = as.integer(runif(min = 0, max = 10, n = nrow(.)))) %>%
mutate(
- n_ae = ifelse(.data$site == "C", .data$n_ae - substract_ae_per_pat, .data$n_ae),
+ n_ae = ifelse(.data$site == "C", .data$n_ae - .env$substract_ae_per_pat, .data$n_ae),
n_ae = ifelse(.data$n_ae < 0, 0, .data$n_ae)
)
@@ -226,7 +226,7 @@ plot_sim_example <- function(substract_ae_per_pat = 0,
group_by(.data$x) %>%
mutate(y = row_number()) %>%
filter(.data$rep <= 1000) %>%
- ggplot(aes_string("x", "y", fill = "color")) +
+ ggplot(aes(x, y, fill = .data$color)) +
geom_raster() +
scale_fill_identity() +
theme_minimal() +
@@ -331,7 +331,7 @@ plot_sim_examples <- function(substract_ae_per_pat = c(0, 1, 3), ...) {
title_add = map(.data$title_add, make_title, angle = 90),
p = pmap(
list(
- substract_ae_per_pat = .data$substract_ae_per_pat,
+ substract_ae_per_pat = substract_ae_per_pat,
title = .data$title,
legend = .data$legend
),
@@ -348,7 +348,7 @@ plot_sim_examples <- function(substract_ae_per_pat = c(0, 1, 3), ...) {
site = LETTERS[1:3],
patients = c(list(seq(1, 50, 1)), list(seq(1, 40, 1)), list(seq(1, 10, 1)))
) %>%
- tidyr::unnest(.data$patients) %>%
+ tidyr::unnest("patients") %>%
mutate(n_ae = as.integer(runif(min = 0, max = 10, n = nrow(.))))
p_legend <- plot_dots(study) +
@@ -445,11 +445,11 @@ plot_study <- function(df_visit,
alert_level_study = NA)
} else {
df_visit <- df_visit %>%
- left_join(select(df_al,
- .data$study_id,
- .data$site_number,
- .data$alert_level_site,
- .data$alert_level_study))
+ left_join(select(df_al, c(
+ "study_id",
+ "site_number",
+ "alert_level_site",
+ "alert_level_study")))
}
# fill in pvalues when missing --------------------------------------------
@@ -462,13 +462,13 @@ plot_study <- function(df_visit,
# make sure ids are character columns
df_visit <- df_visit %>%
- mutate_at(vars(.data$study_id, .data$patnum, .data$site_number), as.character)
+ mutate_at(vars(c("study_id", "patnum", "site_number")), as.character)
df_eval <- df_eval %>%
- mutate_at(vars(.data$study_id, .data$site_number), as.character)
+ mutate_at(vars(c("study_id", "site_number")), as.character)
df_site <- df_site %>%
- mutate_at(vars(.data$study_id, .data$site_number), as.character)
+ mutate_at(vars(c("study_id", "site_number")), as.character)
# filter studies -----------------------------------------------------------
@@ -539,11 +539,11 @@ plot_study <- function(df_visit,
ungroup()
df_ae_dev_patient <- df_visit %>%
- select(.data$study_id,
- .data$site_number,
- .data$patnum,
- .data$visit,
- .data$n_ae) %>%
+ select(c("study_id",
+ "site_number",
+ "patnum",
+ "visit",
+ "n_ae")) %>%
mutate(max_visit_per_pat = max(.data$visit)) %>%
filter(.data$site_number %in% sites_ordered) %>%
ungroup() %>%
@@ -566,7 +566,7 @@ plot_study <- function(df_visit,
)
df_mean_ae_dev_site <- df_mean_ae_dev_site %>%
- select(- .data$visit_med75) %>%
+ select(- "visit_med75") %>%
left_join(df_eval, by = c("study_id", "site_number"))
# we have to split site ae dev up because alert sites get plotted
@@ -579,45 +579,45 @@ plot_study <- function(df_visit,
filter(.data$prob_cut != levels(.data$prob_cut)[1])
df_alert <- df_visit %>%
- select(.data$study_id,
- .data$site_number,
- .data$alert_level_site,
- .data$alert_level_study) %>%
+ select(c("study_id",
+ "site_number",
+ "alert_level_site",
+ "alert_level_study")) %>%
distinct()
df_label <- df_mean_ae_dev_site %>%
filter(.data$visit == .data$visit_med75, .data$site_number %in% sites_ordered) %>%
- select(.data$study_id,
- .data$site_number,
- .data$visit,
- .data$mean_ae) %>%
+ select(c("study_id",
+ "site_number",
+ "visit",
+ "mean_ae")) %>%
left_join(df_site, by = c("study_id", "site_number")) %>%
- select(.data$study_id,
- .data$site_number,
- .data$visit,
- .data$mean_ae,
- .data$n_pat,
- .data$n_pat_with_med75) %>%
+ select(c("study_id",
+ "site_number",
+ "visit",
+ "mean_ae",
+ "n_pat",
+ "n_pat_with_med75")) %>%
left_join(
- select(df_eval,
- - .data$n_pat,
- - .data$n_pat_with_med75)
+ select(df_eval, - c(
+ "n_pat",
+ "n_pat_with_med75"))
, by = c("study_id", "site_number")
) %>%
left_join(df_alert, by = c("study_id", "site_number")) %>%
- select(
- .data$study_id,
- .data$site_number,
- .data$visit,
- .data$mean_ae,
- .data$n_pat,
- .data$n_pat_with_med75,
- .data$prob_low_prob_ur,
- .data$prob_cut,
- .data$color_prob_cut,
- .data$pval_prob_ur,
- .data$alert_level_site
- ) %>%
+ select(c(
+ "study_id",
+ "site_number",
+ "visit",
+ "mean_ae",
+ "n_pat",
+ "n_pat_with_med75",
+ "prob_low_prob_ur",
+ "prob_cut",
+ "color_prob_cut",
+ "pval_prob_ur",
+ "alert_level_site"
+ )) %>%
mutate(
site_number = fct_relevel(.data$site_number, sites_ordered),
color_alert_level = case_when(
@@ -643,22 +643,22 @@ plot_study <- function(df_visit,
)
p_study <- df_mean_ae_dev_site_no_alert %>%
- ggplot(aes_string("visit", "mean_ae"), na.rm = TRUE) +
- geom_line(aes_string(
- group = "site_number",
- color = "color_prob_cut"
+ ggplot(aes(visit, mean_ae), na.rm = TRUE) +
+ geom_line(aes(
+ group = .data$site_number,
+ color = color_prob_cut
)) +
- geom_line(aes_string(
- group = "site_number",
- color = "color_prob_cut"
- ),
- data = df_mean_ae_dev_site_alert,
- size = 1
+ geom_line(aes(
+ group = .data$site_number,
+ color = color_prob_cut
+ ),
+ data = df_mean_ae_dev_site_alert,
+ linewidth = 1
) +
- geom_line(aes_string(group = "study_id"),
+ geom_line(aes(group = .data$study_id),
data = df_mean_ae_dev_study,
color = "gold3",
- size = 1,
+ linewidth = 1,
alpha = 0.5
) +
geom_point(
@@ -704,23 +704,23 @@ plot_study <- function(df_visit,
)
p_site <- df_ae_dev_patient %>%
- ggplot(aes_string("visit", "n_ae"), na.rm = TRUE) +
- geom_line(aes_string(group = "patnum"),
+ ggplot(aes(visit, n_ae), na.rm = TRUE) +
+ geom_line(aes(group = patnum),
color = "grey",
alpha = 0.5
) +
- geom_line(aes_string(
- y = "mean_ae",
- color = "color_prob_cut",
- alpha = 0.5
- ),
- data = df_mean_ae_dev_site,
- size = 1
+ geom_line(aes(
+ y = mean_ae,
+ color = color_prob_cut,
+ alpha = 0.5
+ ),
+ data = df_mean_ae_dev_site,
+ linewidth = 1
) +
geom_line(aes(y = mean_ae),
data = df_mean_ae_dev_study,
color = "gold3",
- size = 1,
+ linewidth = 1,
alpha = 0.5) +
geom_text(aes(label = paste0(n_pat_with_med75, "/", n_pat)),
data = df_label,
@@ -733,8 +733,8 @@ plot_study <- function(df_visit,
x = 0.8 * max_visit,
y = 0.9 * max_ae,
na.rm = TRUE) +
- geom_label(aes_string(label = "alert_level_site",
- color = "color_alert_level"),
+ geom_label(aes(label = .data$alert_level_site,
+ color = .data$color_alert_level),
data = df_label,
x = 0.5 * max_visit,
y = 0.9 * max_ae,
@@ -827,14 +827,14 @@ plot_visit_med75 <- function(df_visit,
df_mean_ae_dev <- get_site_mean_ae_dev(df_visit, df_pat, df_site_max_med75)
df_plot <- df_site_min_med75 %>%
- rename(visit_med75_min = .data$visit_med75) %>%
+ rename(visit_med75_min = "visit_med75") %>%
left_join(
select(
- df_site_max_med75,
- .data$study_id,
- .data$site_number,
- visit_med75_max = .data$visit_med75
- ),
+ df_site_max_med75, c(
+ "study_id",
+ "site_number",
+ visit_med75_max = "visit_med75"
+ )),
by = c(
"study_id",
"site_number"
@@ -843,7 +843,7 @@ plot_visit_med75 <- function(df_visit,
mutate(rnk_sites = rank(desc(.data$n_pat), ties.method = "first")) %>%
filter(.data$rnk_sites <= n_sites) %>%
left_join(df_visit, by = c("study_id", "site_number")) %>%
- left_join(select(df_mean_ae_dev, - .data$visit_med75),
+ left_join(select(df_mean_ae_dev, - "visit_med75"),
by = c("study_id", "site_number", "visit")
) %>%
group_by(.data$study_id, .data$site_number, .data$patnum) %>%
@@ -854,12 +854,12 @@ plot_visit_med75 <- function(df_visit,
ungroup()
df_label <- df_plot %>%
- select(
- .data$study_id,
- .data$site_number,
- .data$n_pat,
- .data$n_pat_with_med75
- ) %>%
+ select(c(
+ "study_id",
+ "site_number",
+ "n_pat",
+ "n_pat_with_med75"
+ )) %>%
distinct() %>%
mutate(label = paste0(.data$n_pat_with_med75, "/", .data$n_pat))
@@ -875,30 +875,30 @@ plot_visit_med75 <- function(df_visit,
filter(.data$site_number %in% unique(df_plot$site_number))
p <- df_plot %>%
- ggplot(aes_string("visit", "n_ae")) +
+ ggplot(aes(visit, n_ae)) +
geom_line(
- aes_string(group = "patnum"),
+ aes(group = patnum),
data = filter(df_plot, .data$has_med75 == "yes"),
color = "grey"
) +
geom_line(
- aes_string(y = "mean_ae_site", group = "site_number"),
+ aes(y = .data$mean_ae_site, group = .data$site_number),
data = df_mean_ae_dev,
color = "slateblue3",
alpha = 0.5,
- size = 2
+ linewidth = 2
) +
- geom_line(aes_string(group = "patnum"),
+ geom_line(aes(group = patnum),
filter(df_plot, .data$has_med75 == "no"),
color = "black",
linetype = 2
) +
- geom_vline(aes_string(xintercept = "visit_med75_max"),
+ geom_vline(aes(xintercept = .data$visit_med75_max),
linetype = 1,
color = "slateblue3"
) +
geom_vline(
- aes_string(xintercept = "visit_med75_min"),
+ aes(xintercept = .data$visit_med75_min),
linetype = 3,
color = "slateblue3"
) +
@@ -907,7 +907,7 @@ plot_visit_med75 <- function(df_visit,
linetype = 2,
color = "slateblue3"
) +
- geom_text(aes_string(label = "label"),
+ geom_text(aes(label = label),
df_label,
x = 0.15 * visit_max,
y = 0.75 * ae_max,
diff --git a/R/simulate_test_data.R b/R/simulate_test_data.R
index b4f39ef..e6dfcfc 100644
--- a/R/simulate_test_data.R
+++ b/R/simulate_test_data.R
@@ -12,6 +12,7 @@
#' @param max_visit_sd standard deviation of maximum number of visits of each
#' patient, Default: 4
#' @param ae_per_visit_mean mean ae per visit per patient, Default: 0.5
+#' @param ae_rates vector with visit-specific ae rates, Default: Null
#' @return tibble with columns site_number, patnum, is_ur, max_visit_mean,
#' max_visit_sd, ae_per_visit_mean, visit, n_ae
#' @details maximum visit number will be sampled from normal distribution with
@@ -25,6 +26,8 @@
#' df_visit <- sim_test_data_study(n_pat = 100, n_sites = 5,
#' frac_site_with_ur = 0.2, ur_rate = 0.5)
#' df_visit[which(df_visit$patnum == "P000001"),]
+#' ae_rates <- c(0.7, rep(0.5, 8), rep(0.3, 5))
+#' sim_test_data_study(n_pat = 100, n_sites = 5, ae_rates = ae_rates)
#' @rdname sim_test_data_study
#' @export
sim_test_data_study <- function(n_pat = 1000,
@@ -33,8 +36,71 @@ sim_test_data_study <- function(n_pat = 1000,
ur_rate = 0,
max_visit_mean = 20,
max_visit_sd = 4,
- ae_per_visit_mean = 0.5
+ ae_per_visit_mean = 0.5,
+ ae_rates = NULL
) {
+
+ # construct patient ae sample function
+ # supports constant and non-constant ae rates
+
+ f_sim_pat <- function(vs_max, vs_sd, is_ur) {
+
+ if (! any(c(is.null(ae_rates), is.na(ae_rates)))) {
+
+ if (is_ur) {
+ ae_rates <- ae_rates * (1-ur_rate) # nolint
+ }
+
+ f_sample_ae <- function(max_visit) {
+
+ # extrapolate missing ae rates by extending last rate
+ fill <- rep(ae_rates[length(ae_rates)], max_visit)
+ fill[seq_along(ae_rates)] <- ae_rates
+ ae_rates <- fill
+
+ aes <- integer(0)
+
+ for (i in seq(1, max_visit)) {
+ ae <- rpois(1, ae_rates[i])
+ aes <- c(aes, ae)
+ }
+
+ return(aes)
+
+ }
+
+ ae_per_visit_mean <- mean(ae_rates)
+
+ } else {
+
+ if (is_ur) {
+ ae_per_visit_mean <- ae_per_visit_mean * (1-ur_rate) # nolint
+ }
+
+ f_sample_ae <- function(max_visit) {
+
+ rpois(max_visit, ae_per_visit_mean)
+
+ }
+
+ }
+
+ aes <- sim_test_data_patient(
+ .f_sample_max_visit = function(x) rnorm(1, mean = vs_max, sd = vs_sd),
+ .f_sample_ae_per_visit = f_sample_ae
+ )
+
+ tibble(
+ ae_per_visit_mean = ae_per_visit_mean,
+ visit = seq(1, length(aes)),
+ n_ae = aes,
+ )
+
+ }
+
+
+
+
tibble(patnum = seq(1, n_pat)) %>%
mutate(patnum = str_pad(patnum, width = 6, side = "left", pad = "0"),
patnum = paste0("P", patnum),
@@ -45,19 +111,13 @@ sim_test_data_study <- function(n_pat = 1000,
site_number = paste0("S", .data$site_number),
max_visit_mean = max_visit_mean,
max_visit_sd = max_visit_sd,
- ae_per_visit_mean = ifelse(.data$is_ur, ae_per_visit_mean * (1 - ur_rate), ae_per_visit_mean),
- aes = pmap(list(vm = max_visit_mean,
- vs = max_visit_sd,
- am = ae_per_visit_mean),
- function(vm, vs, am) {
- sim_test_data_patient(
- .f_sample_max_visit = function() rnorm(1, mean = vm, sd = vs),
- .f_sample_ae_per_visit = function(max_visit) rpois(max_visit, am)
- )
- }
- ),
- aes = map(aes, ~ tibble(visit = seq(1, length(.)), n_ae = .))) %>%
- unnest(aes)
+ aes = pmap(list(.data$max_visit_mean,
+ .data$max_visit_sd,
+ .data$is_ur),
+ f_sim_pat
+ )
+ ) %>%
+ unnest("aes")
}
@@ -284,26 +344,26 @@ sim_ur_scenarios <- function(df_portf,
ur_rate <- ur_rate[ur_rate > 0]
df_grid_gr0 <- df_site %>%
- select(.data$study_id, .data$site_number, .data$n_pat_with_med75, .data$visit_med75) %>%
+ select(c("study_id", "site_number", "n_pat_with_med75", "visit_med75")) %>%
left_join(df_mean_pat, by = c(study_id = "study_id", visit_med75 = "visit")) %>%
mutate(extra_ur_sites = list(0:extra_ur_sites)) %>%
- unnest(.data$extra_ur_sites) %>%
+ unnest("extra_ur_sites") %>%
mutate(
frac_pat_with_ur = (.data$n_pat_with_med75 + .data$extra_ur_sites * .data$mean_n_pat) /
.data$sum_n_pat,
ur_rate = list(ur_rate)
) %>%
- unnest(.data$ur_rate) %>%
- select(
- .data$study_id,
- .data$site_number,
- .data$extra_ur_sites,
- .data$frac_pat_with_ur,
- .data$ur_rate
- )
+ unnest("ur_rate") %>%
+ select(c(
+ "study_id",
+ "site_number",
+ "extra_ur_sites",
+ "frac_pat_with_ur",
+ "ur_rate"
+ ))
df_grid_0 <- df_grid_gr0 %>%
- select(.data$study_id, .data$site_number) %>%
+ select(c("study_id", "site_number")) %>%
distinct() %>%
mutate(extra_ur_sites = 0,
frac_pat_with_ur = 0,
@@ -323,10 +383,10 @@ sim_ur_scenarios <- function(df_portf,
sim_scenario
)
) %>%
- select(- .data$n_ae_site, - .data$n_ae_study) %>%
+ select(- c("n_ae_site", "n_ae_study")) %>%
mutate(n_ae_site = map(.data$scenarios, "n_ae_site"),
n_ae_study = map(.data$scenarios, "n_ae_study")) %>%
- select(- .data$scenarios)
+ select(- "scenarios")
if (progress) {
message("getting under-reporting stats")
@@ -346,7 +406,7 @@ sim_ur_scenarios <- function(df_portf,
group_by(.data$study_id_gr, .data$site_number_gr) %>%
nest() %>%
ungroup() %>%
- select(- .data$study_id_gr, - .data$site_number_gr)
+ select(- c("study_id_gr", "site_number_gr"))
with_progress_cnd(
ls_df_sim_sites <- purrr_bar(
@@ -389,6 +449,7 @@ sim_ur_scenarios <- function(df_portf,
#' @title Simulate Portfolio Test Data
#' @description Simulate visit level data from a portfolio configuration.
#' @param df_config dataframe as returned by \code{\link{get_config}}
+#' @param df_ae_rates dataframe with ae rates. Default: NULL
#' @param parallel logical activate parallel processing, see details, Default: FALSE
#' @param progress logical, Default: TRUE
#'@return dataframe with the following columns: \describe{
@@ -452,7 +513,7 @@ sim_ur_scenarios <- function(df_portf,
#' \code{\link{get_portf_perf}}
#' @rdname sim_test_data_portfolio
#' @export
-sim_test_data_portfolio <- function(df_config, parallel = FALSE, progress = TRUE) {
+sim_test_data_portfolio <- function(df_config, df_ae_rates = NULL, parallel = FALSE, progress = TRUE) {
# checks --------------------------
df_config <- ungroup(df_config)
@@ -478,6 +539,24 @@ sim_test_data_portfolio <- function(df_config, parallel = FALSE, progress = TRUE
)
)
+ # prep ae_rates -----------------
+
+ if (is.null(df_ae_rates)) {
+ df_config$ae_rates <- NA
+ } else {
+ df_ae_rates <- df_ae_rates %>%
+ select(c("study_id", "ae_rate")) %>%
+ group_by(.data$study_id) %>%
+ nest() %>%
+ mutate(
+ ae_rates = map(.data$data, "ae_rate")
+ ) %>%
+ select(- "data")
+
+ df_config <- df_config %>%
+ inner_join(df_ae_rates, by = "study_id")
+ }
+
# exec --------------------------
if (parallel) {
@@ -496,24 +575,26 @@ sim_test_data_portfolio <- function(df_config, parallel = FALSE, progress = TRUE
.data$ae_per_visit_mean,
.data$max_visit_sd,
.data$max_visit_mean,
- .data$n_pat
+ .data$n_pat,
+ .data$ae_rates
),
.purrr = .purrr,
.f = function(ae_per_visit_mean,
max_visit_sd,
max_visit_mean,
- n_pat) {
+ n_pat,
+ ae_rates) {
sim_test_data_study(
n_pat = n_pat,
n_sites = 1,
max_visit_mean = max_visit_mean,
max_visit_sd = max_visit_sd,
- ae_per_visit_mean = ae_per_visit_mean
+ ae_per_visit_mean = ae_per_visit_mean,
+ ae_rates = ae_rates
) %>%
- select(
- # using rlang::.data here causes error with furrr
- patnum, visit, n_ae
- )
+ select(c(
+ "patnum", "visit", "n_ae"
+ ))
},
.progress = progress,
.purrr_args = .purrr_args,
@@ -524,8 +605,8 @@ sim_test_data_portfolio <- function(df_config, parallel = FALSE, progress = TRUE
)
df_portf <- df_config_sim %>%
- unnest(.data$sim) %>%
- select(- .data$n_pat) %>%
+ unnest("sim") %>%
+ select(- c("n_pat", "ae_rates")) %>%
group_by(.data$study_id) %>%
mutate(
# patnums need to be made site exclusive
@@ -612,7 +693,7 @@ get_config <- function(df_site,
pad_width = 4) {
stopifnot(c("study_id", "site_number", "patnum", "max_visit", "max_ae") %in% colnames(df_site))
- stopifnot(nrow(df_site) == nrow(distinct(select(df_site, .data$study_id, .data$site_number, .data$patnum))))
+ stopifnot(nrow(df_site) == nrow(distinct(select(df_site, c("study_id", "site_number", "patnum")))))
df_site %>%
summarise_all(~ ! anyNA(.)) %>%
@@ -734,7 +815,7 @@ get_portf_perf <- function(df_scen, stat = "prob_low_prob_ur", fpr = c(0.001, 0.
0,
.data$n_sites_total)
) %>%
- select(.data$extra_ur_sites, .data$ur_rate, .data$ratio_sites_with_na) %>%
+ select(c("extra_ur_sites", "ur_rate", "ratio_sites_with_na")) %>%
knitr::kable() %>%
paste(collapse = "\n")
@@ -743,7 +824,7 @@ get_portf_perf <- function(df_scen, stat = "prob_low_prob_ur", fpr = c(0.001, 0.
}
- stat_at_0 <- df_scen %>% #nolint
+ stat_at_0 <- df_scen %>% # nolint
filter(.data$ur_rate == 0, .data$frac_pat_with_ur == 0) %>%
pull(.data[[stat]])
@@ -760,7 +841,7 @@ get_portf_perf <- function(df_scen, stat = "prob_low_prob_ur", fpr = c(0.001, 0.
df_prep <- df_scen %>%
mutate(data = list(df_thresh)) %>%
- unnest(.data$data) %>%
+ unnest("data") %>%
mutate(stat = .data[[stat]]) %>%
group_by(.data$fpr, .data$thresh, .data$extra_ur_sites, .data$ur_rate) %>%
summarise(
@@ -771,7 +852,7 @@ get_portf_perf <- function(df_scen, stat = "prob_low_prob_ur", fpr = c(0.001, 0.
df_prep_0 <- df_prep %>%
filter(.data$ur_rate == 0) %>%
mutate(extra_ur_sites = list(unique(df_prep$extra_ur_sites))) %>%
- unnest(.data$extra_ur_sites)
+ unnest("extra_ur_sites")
df_prep_gr0 <- df_prep %>%
filter(.data$ur_rate > 0)
diff --git a/README.md b/README.md
index 88f8d32..d3a2eed 100644
--- a/README.md
+++ b/README.md
@@ -151,11 +151,6 @@ df_visit %>%
aerep <- simaerep(df_visit)
plot(aerep, study = "A")
-#> Warning: `all_equal()` was deprecated in dplyr 1.1.0.
-#> ℹ Please use `all.equal()` instead.
-#> ℹ And manually order the rows/cols as needed
-#> ℹ The deprecated feature was likely used in the simaerep package.
-#> Please report the issue to the authors.
```
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 082351f..ecae09f 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -36,6 +36,7 @@ articles:
- usability_limits
- check_poisson
- visits_or_days
+ - gsm_perf
reference:
- title: S3 Interface
diff --git a/docs/404.html b/docs/404.html
index 72bd360..f31c8d5 100644
--- a/docs/404.html
+++ b/docs/404.html
@@ -45,7 +45,7 @@
@@ -91,6 +91,9 @@
## # A tibble: 59,221 × 9
-## patnum site_number is_ur max_visit_mean max_vi…¹ ae_pe…² visit n_ae study…³
-## <chr> <chr> <lgl> <dbl> <dbl> <dbl> <int> <int> <chr>
-## 1 P000001 S0001 TRUE 20 4 0.25 1 0 ae_per…
-## 2 P000001 S0001 TRUE 20 4 0.25 2 1 ae_per…
-## 3 P000001 S0001 TRUE 20 4 0.25 3 2 ae_per…
-## 4 P000001 S0001 TRUE 20 4 0.25 4 2 ae_per…
-## 5 P000001 S0001 TRUE 20 4 0.25 5 2 ae_per…
-## 6 P000001 S0001 TRUE 20 4 0.25 6 2 ae_per…
-## 7 P000001 S0001 TRUE 20 4 0.25 7 2 ae_per…
-## 8 P000001 S0001 TRUE 20 4 0.25 8 4 ae_per…
-## 9 P000001 S0001 TRUE 20 4 0.25 9 4 ae_per…
-## 10 P000001 S0001 TRUE 20 4 0.25 10 4 ae_per…
-## # … with 59,211 more rows, and abbreviated variable names ¹max_visit_sd,
-## # ²ae_per_visit_mean, ³study_id
+## # A tibble: 58,367 × 9
+## patnum site_number is_ur max_visit_mean max_visit_sd ae_per_visit_mean visit
+## <chr> <chr> <lgl> <dbl> <dbl> <dbl> <int>
+## 1 P000001 S0001 TRUE 20 4 0.25 1
+## 2 P000001 S0001 TRUE 20 4 0.25 2
+## 3 P000001 S0001 TRUE 20 4 0.25 3
+## 4 P000001 S0001 TRUE 20 4 0.25 4
+## 5 P000001 S0001 TRUE 20 4 0.25 5
+## 6 P000001 S0001 TRUE 20 4 0.25 6
+## 7 P000001 S0001 TRUE 20 4 0.25 7
+## 8 P000001 S0001 TRUE 20 4 0.25 8
+## 9 P000001 S0001 TRUE 20 4 0.25 9
+## 10 P000001 S0001 TRUE 20 4 0.25 10
+## # ℹ 58,357 more rows
+## # ℹ 2 more variables: n_ae <int>, study_id <chr>
df_site
## # A tibble: 300 × 6
-## study_id site_number n_pat n_pat_with_med75 visit_med75 mean_ae_s…¹
-## <chr> <chr> <int> <int> <dbl> <dbl>
-## 1 ae_per_visit: 0.05 S0001 10 8 16 0
-## 2 ae_per_visit: 0.05 S0002 10 9 16 0.333
-## 3 ae_per_visit: 0.05 S0003 10 10 15 0.3
-## 4 ae_per_visit: 0.05 S0004 10 9 16 0.111
-## 5 ae_per_visit: 0.05 S0005 10 9 18 0.556
-## 6 ae_per_visit: 0.05 S0006 10 8 17 0
-## 7 ae_per_visit: 0.05 S0007 10 10 16 0.6
-## 8 ae_per_visit: 0.05 S0008 10 10 15 0.5
-## 9 ae_per_visit: 0.05 S0009 10 9 17 0.111
-## 10 ae_per_visit: 0.05 S0010 10 10 16 0.5
-## # … with 290 more rows, and abbreviated variable name ¹mean_ae_site_med75
+## study_id site_number n_pat n_pat_with_med75 visit_med75 mean_ae_site_med75
+## <chr> <chr> <int> <int> <dbl> <dbl>
+## 1 ae_per_vis… S0001 10 9 16 0.556
+## 2 ae_per_vis… S0002 10 10 14 0.3
+## 3 ae_per_vis… S0003 10 8 15 0.375
+## 4 ae_per_vis… S0004 10 8 14 0.375
+## 5 ae_per_vis… S0005 10 9 17 0.333
+## 6 ae_per_vis… S0006 10 7 15 0.143
+## 7 ae_per_vis… S0007 10 8 16 0.375
+## 8 ae_per_vis… S0008 10 7 19 0.714
+## 9 ae_per_vis… S0009 10 9 15 0.333
+## 10 ae_per_vis… S0010 10 7 15 0.714
+## # ℹ 290 more rows
df_sim_sites
## # A tibble: 300 × 10
-## study…¹ site_…² n_pat n_pat…³ visit…⁴ mean_…⁵ mean_…⁶ n_pat…⁷ pval prob_…⁸
-## <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <int> <dbl> <dbl>
-## 1 ae_per… S0001 10 8 16 0 0.699 860 0.00834 0.004
-## 2 ae_per… S0002 10 9 16 0.333 0.696 859 0.307 0.132
-## 3 ae_per… S0003 10 10 15 0.3 0.643 898 0.230 0.106
-## 4 ae_per… S0004 10 9 16 0.111 0.698 859 0.0252 0.016
-## 5 ae_per… S0005 10 9 18 0.556 0.791 704 0.570 0.28
-## 6 ae_per… S0006 10 8 17 0 0.742 787 0.00559 0.007
-## 7 ae_per… S0007 10 10 16 0.6 0.693 858 1 0.44
-## 8 ae_per… S0008 10 10 15 0.5 0.640 898 0.840 0.358
-## 9 ae_per… S0009 10 9 17 0.111 0.742 786 0.0178 0.013
-## 10 ae_per… S0010 10 10 16 0.5 0.695 858 0.699 0.304
-## # … with 290 more rows, and abbreviated variable names ¹study_id, ²site_number,
-## # ³n_pat_with_med75, ⁴visit_med75, ⁵mean_ae_site_med75, ⁶mean_ae_study_med75,
-## # ⁷n_pat_with_med75_study, ⁸prob_low
+## study_id site_number n_pat n_pat_with_med75 visit_med75 mean_ae_site_med75
+## <chr> <chr> <int> <int> <dbl> <dbl>
+## 1 ae_per_vis… S0001 10 9 16 0.556
+## 2 ae_per_vis… S0002 10 10 14 0.3
+## 3 ae_per_vis… S0003 10 8 15 0.375
+## 4 ae_per_vis… S0004 10 8 14 0.375
+## 5 ae_per_vis… S0005 10 9 17 0.333
+## 6 ae_per_vis… S0006 10 7 15 0.143
+## 7 ae_per_vis… S0007 10 8 16 0.375
+## 8 ae_per_vis… S0008 10 7 19 0.714
+## 9 ae_per_vis… S0009 10 9 15 0.333
+## 10 ae_per_vis… S0010 10 7 15 0.714
+## # ℹ 290 more rows
+## # ℹ 4 more variables: mean_ae_study_med75 <dbl>, n_pat_with_med75_study <int>,
+## # pval <dbl>, prob_low <dbl>
set.seed(1)
-future::plan(multiprocess)
## Warning: Strategy 'multiprocess' is deprecated in future (>= 1.20.0)
-## [2020-10-30]. Instead, explicitly specify either 'multisession' (recommended) or
-## 'multicore'. In the current R session, 'multiprocess' equals 'multisession'.
-## Warning in supportsMulticoreAndRStudio(...): [ONE-TIME WARNING] Forked
-## processing ('multicore') is not supported when running R from RStudio
-## because it is considered unstable. For more details, how to control forked
-## processing or not, and how to silence this warning in future R sessions,
-## see ?parallelly::supportsMulticore
-
-df_sim_studies <- sim_studies(df_visit, df_site,
+plan(multisession, workers = 6)
+
+df_sim_studies <- sim_studies(df_visit, df_site,
r = 100,
parallel = TRUE,
poisson_test = TRUE,
@@ -276,20 +271,20 @@ Simulate Studies
df_sim_studies
## # A tibble: 30,000 × 8
-## r study_id site_number visit_me…¹ n_pat…² n_pat…³ pval prob_…⁴
-## <dbl> <chr> <chr> <dbl> <int> <dbl> <dbl> <dbl>
-## 1 1 ae_per_visit: 0.05 S0001 16 8 860 0.389 0.208
-## 2 1 ae_per_visit: 0.05 S0002 16 9 859 0.841 0.362
-## 3 1 ae_per_visit: 0.05 S0003 15 10 898 1 1
-## 4 1 ae_per_visit: 0.05 S0004 16 9 859 0.672 0.363
-## 5 1 ae_per_visit: 0.05 S0005 18 9 704 1 1
-## 6 1 ae_per_visit: 0.05 S0006 17 8 787 0.543 0.26
-## 7 1 ae_per_visit: 0.05 S0007 16 10 858 1 0.48
-## 8 1 ae_per_visit: 0.05 S0008 15 10 898 0.105 0.047
-## 9 1 ae_per_visit: 0.05 S0009 17 9 786 1 0.562
-## 10 1 ae_per_visit: 0.05 S0010 16 10 858 1 0.576
-## # … with 29,990 more rows, and abbreviated variable names ¹visit_med75,
-## # ²n_pat_with_med75, ³n_pat_study, ⁴prob_low
+## r study_id site_number visit_med75 n_pat_with_med75 n_pat_study pval
+## <dbl> <chr> <chr> <dbl> <int> <dbl> <dbl>
+## 1 1 ae_per_vis… S0001 16 9 819 1
+## 2 1 ae_per_vis… S0002 14 10 911 1
+## 3 1 ae_per_vis… S0003 15 8 872 0.0511
+## 4 1 ae_per_vis… S0004 14 8 913 0.827
+## 5 1 ae_per_vis… S0005 17 9 750 1
+## 6 1 ae_per_vis… S0006 15 7 873 1
+## 7 1 ae_per_vis… S0007 16 8 820 0.391
+## 8 1 ae_per_vis… S0008 19 7 564 1
+## 9 1 ae_per_vis… S0009 15 9 871 1
+## 10 1 ae_per_vis… S0010 15 7 873 1
+## # ℹ 29,990 more rows
+## # ℹ 1 more variable: prob_low <dbl>
+df_check_pval <- get_ecd_values(df_sim_studies, df_sim_sites, val_str = "pval") df_check_pval
-## # A tibble: 300 × 11 -## study…¹ site_…² n_pat n_pat…³ visit…⁴ mean_…⁵ mean_…⁶ n_pat…⁷ pval prob_…⁸ -## <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <int> <dbl> <dbl> -## 1 ae_per… S0001 10 8 16 0 0.699 860 0.00834 0.004 -## 2 ae_per… S0002 10 9 16 0.333 0.696 859 0.307 0.132 -## 3 ae_per… S0003 10 10 15 0.3 0.643 898 0.230 0.106 -## 4 ae_per… S0004 10 9 16 0.111 0.698 859 0.0252 0.016 -## 5 ae_per… S0005 10 9 18 0.556 0.791 704 0.570 0.28 -## 6 ae_per… S0006 10 8 17 0 0.742 787 0.00559 0.007 -## 7 ae_per… S0007 10 10 16 0.6 0.693 858 1 0.44 -## 8 ae_per… S0008 10 10 15 0.5 0.640 898 0.840 0.358 -## 9 ae_per… S0009 10 9 17 0.111 0.742 786 0.0178 0.013 -## 10 ae_per… S0010 10 10 16 0.5 0.695 858 0.699 0.304 -## # … with 290 more rows, 1 more variable: pval_ecd <dbl>, and abbreviated -## # variable names ¹study_id, ²site_number, ³n_pat_with_med75, ⁴visit_med75, -## # ⁵mean_ae_site_med75, ⁶mean_ae_study_med75, ⁷n_pat_with_med75_study, -## # ⁸prob_low
diff --git a/docs/articles/check_poisson_files/figure-html/unnamed-chunk-4-1.png b/docs/articles/check_poisson_files/figure-html/unnamed-chunk-4-1.png index 4eb24a0..e25f87d 100644 Binary files a/docs/articles/check_poisson_files/figure-html/unnamed-chunk-4-1.png and b/docs/articles/check_poisson_files/figure-html/unnamed-chunk-4-1.png differ diff --git a/docs/articles/check_poisson_files/figure-html/unnamed-chunk-5-1.png b/docs/articles/check_poisson_files/figure-html/unnamed-chunk-5-1.png index deeab82..8a3a9bb 100644 Binary files a/docs/articles/check_poisson_files/figure-html/unnamed-chunk-5-1.png and b/docs/articles/check_poisson_files/figure-html/unnamed-chunk-5-1.png differ diff --git a/docs/articles/gsm_perf.html b/docs/articles/gsm_perf.html new file mode 100644 index 0000000..f8ae437 --- /dev/null +++ b/docs/articles/gsm_perf.html @@ -0,0 +1,1643 @@ + + + + + + + ++## study_id site_number n_pat n_pat_with_med75 visit_med75 mean_ae_site_med75 +## <chr> <chr> <int> <int> <dbl> <dbl> +## 1 ae_per_vis… S0001 10 9 16 0.556 +## 2 ae_per_vis… S0002 10 10 14 0.3 +## 3 ae_per_vis… S0003 10 8 15 0.375 +## 4 ae_per_vis… S0004 10 8 14 0.375 +## 5 ae_per_vis… S0005 10 9 17 0.333 +## 6 ae_per_vis… S0006 10 7 15 0.143 +## 7 ae_per_vis… S0007 10 8 16 0.375 +## 8 ae_per_vis… S0008 10 7 19 0.714 +## 9 ae_per_vis… S0009 10 9 15 0.333 +## 10 ae_per_vis… S0010 10 7 15 0.714 +## # ℹ 290 more rows +## # ℹ 5 more variables: mean_ae_study_med75 <dbl>, n_pat_with_med75_study <int>, +## # pval <dbl>, prob_low <dbl>, pval_ecd <dbl>+@@ -408,7 +403,7 @@df_check_pval %>% ggplot(aes(log(pval, base = 10), log(pval_ecd, base = 10))) + geom_point(alpha = 0.5, size = 2) + @@ -342,28 +336,27 @@
Perform Same Check on
Note: Dots on the edge of the graph that are cut off have zero value for the corresponding axis. For the tie simulations using 1000 repeats the smallest value greater zero we get for prob_low is 0.001 (1e-3).
-+df_check_prob <- get_ecd_values(df_sim_studies, df_sim_sites, val_str = "prob_low") df_check_prob
-## # A tibble: 300 × 11 -## study…¹ site_…² n_pat n_pat…³ visit…⁴ mean_…⁵ mean_…⁶ n_pat…⁷ pval prob_…⁸ -## <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <int> <dbl> <dbl> -## 1 ae_per… S0001 10 8 16 0 0.699 860 0.00834 0.004 -## 2 ae_per… S0002 10 9 16 0.333 0.696 859 0.307 0.132 -## 3 ae_per… S0003 10 10 15 0.3 0.643 898 0.230 0.106 -## 4 ae_per… S0004 10 9 16 0.111 0.698 859 0.0252 0.016 -## 5 ae_per… S0005 10 9 18 0.556 0.791 704 0.570 0.28 -## 6 ae_per… S0006 10 8 17 0 0.742 787 0.00559 0.007 -## 7 ae_per… S0007 10 10 16 0.6 0.693 858 1 0.44 -## 8 ae_per… S0008 10 10 15 0.5 0.640 898 0.840 0.358 -## 9 ae_per… S0009 10 9 17 0.111 0.742 786 0.0178 0.013 -## 10 ae_per… S0010 10 10 16 0.5 0.695 858 0.699 0.304 -## # … with 290 more rows, 1 more variable: prob_low_ecd <dbl>, and abbreviated -## # variable names ¹study_id, ²site_number, ³n_pat_with_med75, ⁴visit_med75, -## # ⁵mean_ae_site_med75, ⁶mean_ae_study_med75, ⁷n_pat_with_med75_study, -## # ⁸prob_low
+## study_id site_number n_pat n_pat_with_med75 visit_med75 mean_ae_site_med75 +## <chr> <chr> <int> <int> <dbl> <dbl> +## 1 ae_per_vis… S0001 10 9 16 0.556 +## 2 ae_per_vis… S0002 10 10 14 0.3 +## 3 ae_per_vis… S0003 10 8 15 0.375 +## 4 ae_per_vis… S0004 10 8 14 0.375 +## 5 ae_per_vis… S0005 10 9 17 0.333 +## 6 ae_per_vis… S0006 10 7 15 0.143 +## 7 ae_per_vis… S0007 10 8 16 0.375 +## 8 ae_per_vis… S0008 10 7 19 0.714 +## 9 ae_per_vis… S0009 10 9 15 0.333 +## 10 ae_per_vis… S0010 10 7 15 0.714 +## # ℹ 290 more rows +## # ℹ 5 more variables: mean_ae_study_med75 <dbl>, n_pat_with_med75_study <int>, +## # pval <dbl>, prob_low <dbl>, prob_low_ecd <dbl>+df_check_prob %>% ggplot(aes(log(prob_low, base = 10), log(prob_low_ecd, base = 10))) + geom_point(alpha = 0.5, size = 2) + @@ -387,6 +380,8 @@
Conclusion
+plan(sequential)
Conclusion -
Site built with pkgdown 2.0.5.
+Site built with pkgdown 2.0.7.
Comparing {simaerep} and {gsm} Performance • simaerep + + + + + + + + + + + + + + + + + + + +++ + + + + + + + diff --git a/docs/articles/gsm_perf_files/figure-html/plot-1.png b/docs/articles/gsm_perf_files/figure-html/plot-1.png new file mode 100644 index 0000000..2feedb9 Binary files /dev/null and b/docs/articles/gsm_perf_files/figure-html/plot-1.png differ diff --git a/docs/articles/gsm_perf_files/figure-html/unnamed-chunk-10-1.png b/docs/articles/gsm_perf_files/figure-html/unnamed-chunk-10-1.png new file mode 100644 index 0000000..5098a97 Binary files /dev/null and b/docs/articles/gsm_perf_files/figure-html/unnamed-chunk-10-1.png differ diff --git a/docs/articles/gsm_perf_files/figure-html/unnamed-chunk-3-1.png b/docs/articles/gsm_perf_files/figure-html/unnamed-chunk-3-1.png new file mode 100644 index 0000000..91a7a28 Binary files /dev/null and b/docs/articles/gsm_perf_files/figure-html/unnamed-chunk-3-1.png differ diff --git a/docs/articles/gsm_perf_files/figure-html/unnamed-chunk-4-1.png b/docs/articles/gsm_perf_files/figure-html/unnamed-chunk-4-1.png new file mode 100644 index 0000000..e27fc8a Binary files /dev/null and b/docs/articles/gsm_perf_files/figure-html/unnamed-chunk-4-1.png differ diff --git a/docs/articles/gsm_perf_files/figure-html/unnamed-chunk-6-1.png b/docs/articles/gsm_perf_files/figure-html/unnamed-chunk-6-1.png new file mode 100644 index 0000000..524a0f5 Binary files /dev/null and b/docs/articles/gsm_perf_files/figure-html/unnamed-chunk-6-1.png differ diff --git a/docs/articles/gsm_perf_files/figure-html/unnamed-chunk-9-1.png b/docs/articles/gsm_perf_files/figure-html/unnamed-chunk-9-1.png new file mode 100644 index 0000000..7289510 Binary files /dev/null and b/docs/articles/gsm_perf_files/figure-html/unnamed-chunk-9-1.png differ diff --git a/docs/articles/index.html b/docs/articles/index.html index 8b6151d..d1f3485 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -23,7 +23,7 @@+ + + + + ++ + + + +++ + + +++ + + +Comparing {simaerep} and {gsm} Performance
+ + + Source:vignettes/gsm_perf.Rmd
++ +gsm_perf.Rmd
++Load +
+++suppressPackageStartupMessages(library(tidyverse)) +suppressPackageStartupMessages(library(knitr)) +suppressPackageStartupMessages(library(furrr)) +suppressPackageStartupMessages(library(future)) +suppressPackageStartupMessages(library(simaerep)) + +plan(multisession, workers = 6)
++Install {gsm} +
+++devtools::install_github("Gilead-BioStats/gsm", ref = "main")
++Introduction +
+The {gsm} R +package provides a standardized Risk Based Quality Monitoring (RBQM) +framework for clinical trials that pairs a flexible data pipeline with +robust reports. It also uses Funnel Plots to flag outliers which provide +broader tolerance limits for sites with low exposure and narrower limits +for sites with higher exposure. This method is different to the event +rate based limits we have used in previous heuristics to measure +{simaerep} performance. Funnel plots are discussed in greater detail by +Zink et +al. 2018
+One of the draw backs of using funnel plots for flagging is that they +assume that the AE rate remains constant over the course of the +study.
+++Prepare Data +
+++Load Portfolio Configurations +
+We have prepared a snapshot of the AE reporting configuration of our +current portfolio. For each study we have also measured a visit-specific +AE rate which allows us to generate a synthetic portfolio with flexible +AE rates across a study.
+++df_config <- readr::read_csv("ae_conf_20240220.csv") +df_ae_rates <- readr::read_csv("ae_rates_20240220.csv") + +df_config %>% + head(25) %>% + knitr::kable()
+
+ ++ ++ + + + + + + + +study_id +ae_per_visit_mean +site_number +max_visit_sd +max_visit_mean +n_pat ++ +0001 +0.2973806 +4746 +3.5355339 +16.50000 +2 ++ +0001 +0.2973806 +4747 +0.7071068 +27.50000 +2 ++ +0001 +0.2973806 +4750 +0.5000000 +19.25000 +4 ++ +0001 +0.2973806 +4815 +8.3765546 +22.16667 +6 ++ +0001 +0.2973806 +4816 +0.0000000 +27.00000 +1 ++ +0001 +0.2973806 +4817 +0.0000000 +31.00000 +1 ++ +0001 +0.2973806 +4818 +0.0000000 +30.00000 +1 ++ +0001 +0.2973806 +4891 +2.3804761 +22.00000 +7 ++ +0001 +0.2973806 +4893 +0.0000000 +20.00000 +2 ++ +0001 +0.2973806 +4932 +1.0954451 +26.80000 +5 ++ +0001 +0.2973806 +4941 +1.7320508 +27.00000 +3 ++ +0001 +0.2973806 +4942 +1.4142136 +25.00000 +2 ++ +0001 +0.2973806 +4943 +0.0000000 +25.00000 +1 ++ +0001 +0.2973806 +4966 +0.0000000 +18.00000 +1 ++ +0001 +0.2973806 +4967 +0.8164966 +18.00000 +4 ++ +0001 +0.2973806 +4968 +0.0000000 +24.00000 +1 ++ +0001 +0.2973806 +4969 +1.5275252 +21.33333 +3 ++ +0001 +0.2973806 +4984 +1.7078251 +30.75000 +4 ++ +0001 +0.2973806 +4985 +8.9087972 +21.16667 +6 ++ +0001 +0.2973806 +4986 +0.5773503 +20.33333 +3 ++ +0001 +0.2973806 +4988 +1.4142136 +19.00000 +2 ++ +0001 +0.2973806 +5079 +2.1602469 +23.00000 +4 ++ +0001 +0.2973806 +5080 +0.0000000 +23.00000 +1 ++ +0001 +0.2973806 +5081 +0.0000000 +23.00000 +1 ++ + +0001 +0.2973806 +5082 +1.4142136 +22.00000 +2 ++
++ + +study_id +cum_visit +ae_rate +n_pat ++ +0001 +1 +0.017699 +113 ++ +0001 +2 +0.088496 +113 ++ +0001 +3 +0.230088 +113 ++ +0001 +4 +0.223214 +112 ++ +0001 +5 +0.187500 +112 ++ +0001 +6 +0.196429 +112 ++ +0001 +7 +0.396396 +111 ++ +0001 +8 +0.207207 +111 ++ +0001 +9 +0.216216 +111 ++ +0001 +10 +0.315315 +111 ++ +0001 +11 +0.263636 +110 ++ +0001 +12 +0.318182 +110 ++ +0001 +13 +0.345455 +110 ++ +0001 +14 +0.379630 +108 ++ +0001 +15 +0.401869 +107 ++ +0001 +16 +0.485981 +107 ++ +0001 +17 +0.383178 +107 ++ +0001 +18 +0.396226 +106 ++ +0001 +19 +0.370000 +100 ++ +0001 +20 +0.439560 +91 ++ +0001 +21 +0.369863 +73 ++ +0001 +22 +0.370968 +62 ++ +0001 +23 +0.345455 +55 ++ +0001 +24 +0.446809 +47 ++ + +0001 +25 +0.292683 +41 +++Simulate Portfolio +
+We generate two synthetic portfolios with no AE under-reporting +sites. One portfolio with a fixed AE rate for all visits and another one +with a flexible visit-specific AE rate.
+++df_portf_fix <- sim_test_data_portfolio(df_config, parallel = TRUE, progress = TRUE) + +df_portf_fix %>% + head(25) %>% + knitr::kable()
+
++ ++ + + + + + + + + + +study_id +ae_per_visit_mean +site_number +max_visit_sd +max_visit_mean +patnum +visit +n_ae ++ +0001 +0.2973806 +4746 +3.535534 +16.5 +0001 +1 +0 ++ +0001 +0.2973806 +4746 +3.535534 +16.5 +0001 +2 +0 ++ +0001 +0.2973806 +4746 +3.535534 +16.5 +0001 +3 +0 ++ +0001 +0.2973806 +4746 +3.535534 +16.5 +0001 +4 +0 ++ +0001 +0.2973806 +4746 +3.535534 +16.5 +0001 +5 +0 ++ +0001 +0.2973806 +4746 +3.535534 +16.5 +0001 +6 +0 ++ +0001 +0.2973806 +4746 +3.535534 +16.5 +0001 +7 +0 ++ +0001 +0.2973806 +4746 +3.535534 +16.5 +0001 +8 +0 ++ +0001 +0.2973806 +4746 +3.535534 +16.5 +0001 +9 +0 ++ +0001 +0.2973806 +4746 +3.535534 +16.5 +0001 +10 +0 ++ +0001 +0.2973806 +4746 +3.535534 +16.5 +0001 +11 +0 ++ +0001 +0.2973806 +4746 +3.535534 +16.5 +0002 +1 +1 ++ +0001 +0.2973806 +4746 +3.535534 +16.5 +0002 +2 +1 ++ +0001 +0.2973806 +4746 +3.535534 +16.5 +0002 +3 +1 ++ +0001 +0.2973806 +4746 +3.535534 +16.5 +0002 +4 +2 ++ +0001 +0.2973806 +4746 +3.535534 +16.5 +0002 +5 +2 ++ +0001 +0.2973806 +4746 +3.535534 +16.5 +0002 +6 +3 ++ +0001 +0.2973806 +4746 +3.535534 +16.5 +0002 +7 +4 ++ +0001 +0.2973806 +4746 +3.535534 +16.5 +0002 +8 +4 ++ +0001 +0.2973806 +4746 +3.535534 +16.5 +0002 +9 +5 ++ +0001 +0.2973806 +4746 +3.535534 +16.5 +0002 +10 +5 ++ +0001 +0.2973806 +4746 +3.535534 +16.5 +0002 +11 +6 ++ +0001 +0.2973806 +4746 +3.535534 +16.5 +0002 +12 +6 ++ +0001 +0.2973806 +4746 +3.535534 +16.5 +0002 +13 +6 ++ + +0001 +0.2973806 +4746 +3.535534 +16.5 +0002 +14 +6 +++df_portf_flex <- sim_test_data_portfolio(df_config, df_ae_rates = df_ae_rates, parallel = TRUE, progress = TRUE)
++Compare AE rates +
+Next we confirm the different AE rates in our two synthetic +portfolios.
+++df_rate_fix <- df_portf_fix %>% + mutate(ae_rate = coalesce(n_ae - lag(n_ae), n_ae), .by = c("study_id", "patnum")) %>% + summarise(ae_rate = mean(ae_rate), .by = c("study_id", "visit")) %>% + mutate(rate = "fix") + + +df_rate_flex <- df_portf_flex %>% + mutate(ae_rate = coalesce(n_ae - lag(n_ae), n_ae), .by = c("study_id", "patnum")) %>% + summarise(ae_rate = mean(ae_rate), .by = c("study_id", "visit")) %>% + mutate(rate = "flex")
++bind_rows(df_rate_flex, df_rate_fix) %>% + ggplot(aes(visit, ae_rate)) + + geom_line(aes(group = study_id), alpha = 0.2) + + geom_smooth() + + facet_wrap(~ rate) + + labs(title = "Average AE rates per Study")
+
++bind_rows(df_rate_flex, df_rate_fix) %>% + filter(dense_rank(study_id) <= 16) %>% + ggplot(aes(visit, ae_rate)) + + geom_line(aes(group = rate, color = rate)) + + facet_wrap(~ study_id, scales = "free") + + labs(title = "Average AE rates for Selected Studies")
+
We can confirm that the AE rates in the “flexible” portfolio are not +constant. Moreover we see that the AE rate profile is very unique for +each study.
+++Apply {gsm} +
+++Example +
+Here we demonstrate how to use the {gsm} package on our simulated +portfolios.
+++get_SUBJ <- function(df_portf) { + df_portf %>% + select(study_id, siteid = site_number, subjid = patnum, timeonstudy = visit) %>% + summarise(timeonstudy = max(timeonstudy), .by = c(study_id, siteid, subjid)) %>% + group_by(study_id) %>% + nest() +} + + +get_AE <- function(df_portf) { + df_portf_fix %>% + select(study_id, subjid = patnum, n_ae) %>% + summarise(n_ae = max(n_ae), .by = c(study_id, subjid)) %>% + filter(n_ae > 0) %>% + mutate(n_ae = map(n_ae, ~ tibble(n = seq(1, .)), .progress = TRUE)) %>% + unnest(n_ae) %>% + select(- n) %>% + group_by(study_id) %>% + nest() +} + +dfSUBJ_fix <- get_SUBJ(df_portf_fix) +dfAE_fix <- get_AE(df_portf_fix)
++dfInput <- gsm::AE_Map_Raw(list(dfSUBJ = dfSUBJ_fix$data[[1]], dfAE = dfAE_fix$data[[1]])) +dfInput
+## # A tibble: 113 × 5 +## SubjectID SiteID Exposure Count Rate +## <chr> <chr> <int> <int> <dbl> +## 1 0001 4746 11 0 0 +## 2 0002 4746 17 6 0.353 +## 3 0003 4747 27 6 0.222 +## 4 0004 4747 27 5 0.185 +## 5 0005 4750 18 10 0.556 +## 6 0006 4750 19 8 0.421 +## 7 0007 4750 19 8 0.421 +## 8 0008 4750 18 7 0.389 +## 9 0009 4815 15 3 0.2 +## 10 0010 4815 31 2 0.0645 +## # ℹ 103 more rows
++dfTransformed <- gsm::Transform_Rate( + dfInput, + strNumeratorCol = "Count", + strDenominatorCol = "Exposure" + ) +dfTransformed
+## # A tibble: 44 × 4 +## GroupID Numerator Denominator Metric +## <chr> <int> <int> <dbl> +## 1 4746 6 28 0.214 +## 2 4747 11 54 0.204 +## 3 4750 33 74 0.446 +## 4 4815 33 145 0.228 +## 5 4816 8 27 0.296 +## 6 4817 5 31 0.161 +## 7 4818 5 30 0.167 +## 8 4891 45 144 0.312 +## 9 4893 13 40 0.325 +## 10 4932 36 132 0.273 +## # ℹ 34 more rows
++dfAnalyzed <- gsm::Analyze_NormalApprox(dfTransformed) +dfAnalyzed
+## # A tibble: 44 × 7 +## GroupID Numerator Denominator Metric OverallMetric Factor Score +## <chr> <int> <int> <dbl> <dbl> <dbl> <dbl> +## 1 4815 33 145 0.228 0.300 1.31 -1.65 +## 2 4941 17 80 0.212 0.300 1.31 -1.48 +## 3 4817 5 31 0.161 0.300 1.31 -1.47 +## 4 4818 5 30 0.167 0.300 1.31 -1.39 +## 5 5079 20 89 0.225 0.300 1.31 -1.35 +## 6 4747 11 54 0.204 0.300 1.31 -1.34 +## 7 5311 3 20 0.15 0.300 1.31 -1.27 +## 8 5084 4 21 0.190 0.300 1.31 -0.953 +## 9 4943 5 25 0.2 0.300 1.31 -0.949 +## 10 4746 6 28 0.214 0.300 1.31 -0.860 +## # ℹ 34 more rows
++dfFlagged <- gsm::Flag_NormalApprox(dfAnalyzed, vThreshold = c(-3, -2, 2, 3)) +dfFlagged
+## # A tibble: 44 × 8 +## GroupID Numerator Denominator Metric OverallMetric Factor Score Flag +## <chr> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> +## 1 5168 9 15 0.6 0.300 1.31 2.22 1 +## 2 5194 76 197 0.386 0.300 1.31 2.31 1 +## 3 4750 33 74 0.446 0.300 1.31 2.40 1 +## 4 4815 33 145 0.228 0.300 1.31 -1.65 0 +## 5 4941 17 80 0.212 0.300 1.31 -1.48 0 +## 6 4817 5 31 0.161 0.300 1.31 -1.47 0 +## 7 4818 5 30 0.167 0.300 1.31 -1.39 0 +## 8 5079 20 89 0.225 0.300 1.31 -1.35 0 +## 9 4747 11 54 0.204 0.300 1.31 -1.34 0 +## 10 5311 3 20 0.15 0.300 1.31 -1.27 0 +## # ℹ 34 more rows
++dfSummary <- gsm::Summarize(dfFlagged) +dfSummary
+## # A tibble: 44 × 6 +## GroupID Numerator Denominator Metric Score Flag +## <chr> <int> <int> <dbl> <dbl> <dbl> +## 1 5168 9 15 0.6 2.22 1 +## 2 4750 33 74 0.446 2.40 1 +## 3 5194 76 197 0.386 2.31 1 +## 4 5083 18 44 0.409 1.39 0 +## 5 5265 16 42 0.381 1.01 0 +## 6 5130 52 140 0.371 1.62 0 +## 7 4986 21 58 0.362 0.908 0 +## 8 5131 8 23 0.348 0.442 0 +## 9 4985 38 111 0.342 0.860 0 +## 10 5224 8 24 0.333 0.316 0 +## # ℹ 34 more rows
++dfBounds <- gsm::Analyze_NormalApprox_PredictBounds(dfTransformed, vThreshold = c(-3, -2, 2, 3)) +dfBounds
+## # A tibble: 1,246 × 5 +## Threshold Denominator LogDenominator Numerator Metric +## <dbl> <dbl> <dbl> <dbl> <dbl> +## 1 -3 28.1 3.34 0.0765 0.00272 +## 2 -3 28.8 3.36 0.187 0.00649 +## 3 -3 29.6 3.39 0.299 0.0101 +## 4 -3 30.3 3.41 0.413 0.0136 +## 5 -3 31.0 3.43 0.527 0.0170 +## 6 -3 31.7 3.46 0.643 0.0203 +## 7 -3 32.5 3.48 0.760 0.0234 +## 8 -3 33.2 3.50 0.878 0.0264 +## 9 -3 33.9 3.52 0.997 0.0294 +## 10 -3 34.7 3.55 1.12 0.0322 +## # ℹ 1,236 more rows
++chart <- gsm::Visualize_Scatter(dfFlagged, dfBounds) +chart
+
++Simulate UR +
+We write a function that removes a given ratio of AEs from one site +in the data set and returns its z-score.
+++sim_site_ur_gsm <- function(site, ur_rate, dfTransformed) { + dfTransformed <- dfTransformed %>% + mutate( + Numerator = ifelse(GroupID == site, Numerator * (1 - ur_rate), Numerator), + Metric = Numerator / Denominator + ) + + gsm::Analyze_NormalApprox(dfTransformed) %>% + filter(GroupID == site) %>% + pull(Score) +} + +sim_site_ur_gsm("4747", ur_rate = 0.75, dfTransformed)
+## [1] -3.095796
We write another function that systematically applies this +
+sim_site_ur_gsm
across all sites in all studies across a +range of under-reporting ratios.++sim_ur_gsm <- function(dfSUBJ, dfAE) { + dfSUBJ %>% + inner_join(dfAE, by = "study_id") %>% + ungroup() %>% + mutate( + trans = map2(data.x, data.y, ~ gsm::AE_Map_Raw(list(dfSUBJ = .x, dfAE = .y))), + trans = map(trans, ~ gsm::Transform_Rate(., strNumeratorCol = "Count", strDenominatorCol = "Exposure")), + sites = map(data.x, ~ distinct(., siteid)) + ) %>% + select(- starts_with("data.")) %>% + unnest(sites) %>% + mutate(ur = list(tibble(ur_rate = c(0, 0.1, 0.25, 0.5, 0.75, 1)))) %>% + unnest(ur) %>% + mutate( + score = pmap_dbl(list(siteid, ur_rate, trans), sim_site_ur_gsm, .progress = TRUE) + ) +} + +df_sim_gsm_fix <- sim_ur_gsm(dfSUBJ_fix, dfAE_fix)
+## Warning: There were 3116 warnings in `mutate()`. +## The first warning was: +## ℹ In argument: `z_i = ifelse(...)`. +## Caused by warning: +## ! There was 1 warning in `mutate()`. +## ℹ In argument: `z_0 = ifelse(...)`. +## Caused by warning in `sqrt()`: +## ! NaNs produced +## ℹ Run `dplyr::last_dplyr_warnings()` to see the 3115 remaining warnings.
++df_sim_gsm_fix
+## # A tibble: 129,966 × 5 +## study_id trans siteid ur_rate score +## <chr> <list> <chr> <dbl> <dbl> +## 1 0001 <tibble [44 × 4]> 4746 0 -0.860 +## 2 0001 <tibble [44 × 4]> 4746 0.1 -1.07 +## 3 0001 <tibble [44 × 4]> 4746 0.25 -1.38 +## 4 0001 <tibble [44 × 4]> 4746 0.5 -1.87 +## 5 0001 <tibble [44 × 4]> 4746 0.75 -2.33 +## 6 0001 <tibble [44 × 4]> 4746 1 -2.75 +## 7 0001 <tibble [44 × 4]> 4747 0 -1.34 +## 8 0001 <tibble [44 × 4]> 4747 0.1 -1.61 +## 9 0001 <tibble [44 × 4]> 4747 0.25 -1.99 +## 10 0001 <tibble [44 × 4]> 4747 0.5 -2.57 +## # ℹ 129,956 more rows
We repeat the same steps for the portfolio with the flexible AE +rates.
+++dfSUBJ_flex <- get_SUBJ(df_portf_flex) +dfAE_flex<- get_AE(df_portf_flex) + + +df_sim_gsm_flex <- sim_ur_gsm(dfSUBJ_flex, dfAE_flex)
+## Warning: There were 3085 warnings in `mutate()`. +## The first warning was: +## ℹ In argument: `z_i = ifelse(...)`. +## Caused by warning: +## ! There was 1 warning in `mutate()`. +## ℹ In argument: `z_0 = ifelse(...)`. +## Caused by warning in `sqrt()`: +## ! NaNs produced +## ℹ Run `dplyr::last_dplyr_warnings()` to see the 3084 remaining warnings.
++UR {simaerep} +
+We simulate under-reporting for both portfolios using {simaerep}
+++df_sim_simaerep_fix <- sim_ur_scenarios( + df_portf_fix, + extra_ur_sites = 0, + ur_rate = c(0, 0.1, 0.25, 0.5, 0.75, 1), + parallel = TRUE, + poisson = TRUE, + prob_lower = TRUE, + progress = TRUE +)
++df_sim_simaerep_flex <- sim_ur_scenarios( + df_portf_flex, + extra_ur_sites = 0, + ur_rate = c(0, 0.1, 0.25, 0.5, 0.75, 1), + parallel = TRUE, + poisson = TRUE, + prob_lower = TRUE, + progress = TRUE +)
++Evaluate +
+++Combine Results +
+++df_sim_gsm_fix$ae_rate <- "AE rate: fix" +df_sim_gsm_flex$ae_rate <- "AE rate: flexible" +df_sim_simaerep_fix$ae_rate <- "AE rate: fix" +df_sim_simaerep_flex$ae_rate <- "AE rate: flexible" + +df_sim_thresh2 <- bind_rows(df_sim_gsm_fix, df_sim_gsm_flex) %>% + mutate( + is_ur = score <= -2, + type = "{gsm} - thresh: -2", + site_number = siteid + ) %>% + select(type, ae_rate, study_id, site_number, ur_rate, is_ur, score) + + +df_sim_simaerep_threshp95 <- bind_rows(df_sim_simaerep_fix, df_sim_simaerep_flex) %>% + mutate( + is_ur = prob_low_prob_ur >= 0.95, + type = "{simaerep} - thresh: 0.95" + ) %>% + select(type, ae_rate, study_id, site_number, ur_rate, is_ur, score = prob_low_prob_ur) + + +df_eval <- bind_rows( + df_sim_thresh2, + df_sim_simaerep_threshp95, +)
++Aggregate +
+++get_prop_test_ci95 <- function(..., ix) { + + stopifnot(ix %in% c(1, 2)) + + tryCatch( + prop.test(...)$conf.int[ix], + error = function(cnd) c(NA, NA)[ix] + ) +} + +aggr_results <- function(df_eval) { + +df_perf <- df_eval %>% + summarise( + n = n(), + .by = c(type, ae_rate, ur_rate, is_ur) + ) %>% + pivot_wider( + names_from = is_ur, + values_from = n, + names_prefix = "is_ur_", + values_fill = 0 + ) %>% + mutate( + n_sites = is_ur_TRUE + is_ur_FALSE + is_ur_NA, + ratio = is_ur_TRUE / n_sites, + ratio_type = ifelse(ur_rate == 0, "fpr", "tpr"), + ci95_low = map2_dbl(is_ur_TRUE, n_sites, ~ get_prop_test_ci95(.x, .y, ix = 1)), + ci95_high = map2_dbl(is_ur_TRUE, n_sites, ~ get_prop_test_ci95(.x, .y, ix = 2)) + ) +} + +df_perf <- aggr_results(df_eval)
++Results +
+++Table +
+ ++
++ ++ + + + + + + + + + + + + +type +ae_rate +ur_rate +is_ur_FALSE +is_ur_TRUE +is_ur_NA +n_sites +ratio +ratio_type +ci95_low +ci95_high ++ +{gsm} - thresh: -2 +AE rate: fix +0.00 +20830 +299 +532 +21661 +0.014 +fpr +0.012 +0.015 ++ +{gsm} - thresh: -2 +AE rate: fix +0.10 +19851 +1278 +532 +21661 +0.059 +tpr +0.056 +0.062 ++ +{gsm} - thresh: -2 +AE rate: fix +0.25 +15848 +5282 +531 +21661 +0.244 +tpr +0.238 +0.250 ++ +{gsm} - thresh: -2 +AE rate: fix +0.50 +9154 +11987 +520 +21661 +0.553 +tpr +0.547 +0.560 ++ +{gsm} - thresh: -2 +AE rate: fix +0.75 +5214 +15941 +506 +21661 +0.736 +tpr +0.730 +0.742 ++ +{gsm} - thresh: -2 +AE rate: fix +1.00 +2915 +18251 +495 +21661 +0.843 +tpr +0.838 +0.847 ++ +{gsm} - thresh: -2 +AE rate: flexible +0.00 +20899 +241 +521 +21661 +0.011 +fpr +0.010 +0.013 ++ +{gsm} - thresh: -2 +AE rate: flexible +0.10 +20669 +471 +521 +21661 +0.022 +tpr +0.020 +0.024 ++ +{gsm} - thresh: -2 +AE rate: flexible +0.25 +19742 +1399 +520 +21661 +0.065 +tpr +0.061 +0.068 ++ +{gsm} - thresh: -2 +AE rate: flexible +0.50 +16448 +4698 +515 +21661 +0.217 +tpr +0.211 +0.222 ++ +{gsm} - thresh: -2 +AE rate: flexible +0.75 +12320 +8830 +511 +21661 +0.408 +tpr +0.401 +0.414 ++ +{gsm} - thresh: -2 +AE rate: flexible +1.00 +8727 +12437 +497 +21661 +0.574 +tpr +0.568 +0.581 ++ +{simaerep} - thresh: 0.95 +AE rate: fix +0.00 +21367 +294 +0 +21661 +0.014 +fpr +0.012 +0.015 ++ +{simaerep} - thresh: 0.95 +AE rate: fix +0.10 +20366 +1295 +0 +21661 +0.060 +tpr +0.057 +0.063 ++ +{simaerep} - thresh: 0.95 +AE rate: fix +0.25 +17155 +4506 +0 +21661 +0.208 +tpr +0.203 +0.214 ++ +{simaerep} - thresh: 0.95 +AE rate: fix +0.50 +10635 +11026 +0 +21661 +0.509 +tpr +0.502 +0.516 ++ +{simaerep} - thresh: 0.95 +AE rate: fix +0.75 +5845 +15816 +0 +21661 +0.730 +tpr +0.724 +0.736 ++ +{simaerep} - thresh: 0.95 +AE rate: fix +1.00 +4053 +17608 +0 +21661 +0.813 +tpr +0.808 +0.818 ++ +{simaerep} - thresh: 0.95 +AE rate: flexible +0.00 +21340 +321 +0 +21661 +0.015 +fpr +0.013 +0.017 ++ +{simaerep} - thresh: 0.95 +AE rate: flexible +0.10 +20234 +1427 +0 +21661 +0.066 +tpr +0.063 +0.069 ++ +{simaerep} - thresh: 0.95 +AE rate: flexible +0.25 +16661 +5000 +0 +21661 +0.231 +tpr +0.225 +0.237 ++ +{simaerep} - thresh: 0.95 +AE rate: flexible +0.50 +9878 +11783 +0 +21661 +0.544 +tpr +0.537 +0.551 ++ +{simaerep} - thresh: 0.95 +AE rate: flexible +0.75 +5216 +16445 +0 +21661 +0.759 +tpr +0.753 +0.765 ++ + +{simaerep} - thresh: 0.95 +AE rate: flexible +1.00 +3595 +18066 +0 +21661 +0.834 +tpr +0.829 +0.839 +++Plot Performance Metrics +
++
+- {gsm} has better performance than {simaerep} when the AE rate is +fixed, while {simaerep} greatly outperforms {gsm} when the AE rate is +flexible and mimics the AE rates encountered in real study data +sets.
+++plot_perf <- function(df_perf) { + + df_perf %>% + mutate(ur_rate = paste0("under-reporting rate: ", ur_rate, " - ", ratio_type), + ur_rate = ifelse(str_detect(ur_rate, "fpr"), "fpr", ur_rate)) %>% + group_by(ur_rate) %>% + ggplot(aes(type, ratio)) + + geom_errorbar(aes(ymin = ci95_low, ymax = ci95_high, color = type), linewidth = 1) + + facet_grid(ur_rate ~ ae_rate) + + coord_flip() + + theme(legend.position = "bottom") + + labs( + x = "", + y = "CI95 Performance Ratio", + title = "{simaerep} vs {gsm} Performance" + ) + + scale_color_manual(values = c("#5491CC", "#F46626")) +} + +plot_perf(df_perf)
+
++plan(sequential)
plot(aerep)
## study = NULL, defaulting to study:A
-## Warning: `all_equal()` was deprecated in dplyr 1.1.0.
-## ℹ Please use `all.equal()` instead.
-## ℹ And manually order the rows/cols as needed
-## ℹ The deprecated feature was likely used in the simaerep package.
-## Please report the issue to the authors.
For modifying the default parameters please check out the
simaerep()
documentation.
Site built with pkgdown 2.0.5.
+Site built with pkgdown 2.0.7.
diff --git a/docs/articles/portfolio_perf.html b/docs/articles/portfolio_perf.html index aac6361..b0dab2c 100644 --- a/docs/articles/portfolio_perf.html +++ b/docs/articles/portfolio_perf.html @@ -46,7 +46,7 @@ @@ -92,6 +92,9 @@
df_visit <- df_data_grid %>%
mutate(
@@ -4407,17 +4410,17 @@ Heuristic Rank## # A tibble: 15,674 × 6
## study_id site_number visit n_ae ae_per_visit ls_study_ae_per_visit
## <chr> <chr> <int> <int> <dbl> <list>
-## 1 0001 0001 44 8 0.182 <dbl [43]>
-## 2 0001 0002 49 18 0.367 <dbl [43]>
-## 3 0001 0003 74 17 0.230 <dbl [43]>
-## 4 0001 0004 191 49 0.257 <dbl [43]>
-## 5 0001 0005 30 9 0.3 <dbl [43]>
-## 6 0001 0006 35 17 0.486 <dbl [43]>
+## 1 0001 0001 42 10 0.238 <dbl [43]>
+## 2 0001 0002 60 14 0.233 <dbl [43]>
+## 3 0001 0003 78 25 0.321 <dbl [43]>
+## 4 0001 0004 189 43 0.228 <dbl [43]>
+## 5 0001 0005 30 7 0.233 <dbl [43]>
+## 6 0001 0006 35 9 0.257 <dbl [43]>
## 7 0001 0007 36 9 0.25 <dbl [43]>
-## 8 0001 0008 166 48 0.289 <dbl [43]>
-## 9 0001 0009 40 14 0.35 <dbl [43]>
-## 10 0001 0010 138 33 0.239 <dbl [43]>
-## # … with 15,664 more rows
+## 8 0001 0008 192 64 0.333 <dbl [43]>
+## 9 0001 0009 40 8 0.2 <dbl [43]>
+## 10 0001 0010 134 30 0.224 <dbl [43]>
+## # ℹ 15,664 more rows
We write a function that: - determines how many sites should be flagged in a study - pools ae_per_visit rates and ranks sites (using dense_rank()) - site gets flagged if the specific rank for a site is @@ -4472,11 +4475,11 @@
## # A tibble: 6 × 3
## ur_rate ae_per_visit is_ur
## <dbl> <dbl> <lgl>
-## 1 0 0.182 FALSE
-## 2 0.1 0.164 FALSE
-## 3 0.25 0.136 TRUE
-## 4 0.5 0.0909 TRUE
-## 5 0.75 0.0455 TRUE
+## 1 0 0.238 FALSE
+## 2 0.1 0.214 FALSE
+## 3 0.25 0.179 FALSE
+## 4 0.5 0.119 TRUE
+## 5 0.75 0.0595 TRUE
## 6 1 0 TRUE
We apply.
@@ -5095,7 +5098,7 @@Plot Performance Metrics -
Site built with pkgdown 2.0.5.
+Site built with pkgdown 2.0.7.
df_vs <- haven::read_sas('advs.sas7bdat') %>%
select(STUDYID, SUBJID, SITEID, ADY)
@@ -184,7 +187,7 @@ SAS files## 8 CO-101-001 01001001 001 29
## 9 CO-101-001 01001001 001 29
## 10 CO-101-001 01001001 001 38
-## # … with 45,166 more rows
+## # ℹ 45,166 more rows
In order to assign each AE to a visit we union both event tables and sort by date.
@@ -797,7 +800,7 @@+## # ℹ 5,422 more rows{simaerep}## 8 CO-101-001 001 01001001 3 8 ## 9 CO-101-001 001 01001001 3 9 ## 10 CO-101-001 001 01001001 6 10 -## # … with 5,422 more rows
aerep <- simaerep(df_visit)
@@ -835,7 +838,7 @@ {simaerep}
-Site built with pkgdown 2.0.5.
+Site built with pkgdown 2.0.7.
Site built with pkgdown 2.0.5.
+Site built with pkgdown 2.0.7.
Site built with pkgdown 2.0.5.
+Site built with pkgdown 2.0.7.
diff --git a/docs/articles/visits_or_days.html b/docs/articles/visits_or_days.html index c1b0568..0a3940c 100644 --- a/docs/articles/visits_or_days.html +++ b/docs/articles/visits_or_days.html @@ -46,7 +46,7 @@ @@ -92,6 +92,9 @@We do have gaps in between the days leading to implicitly missing
values. simaerep
will correct this automatically and throw
a warning.
Then we proceed as usual.
df_sim_sites <- sim_sites(df_site, df_visit = df_days)
@@ -530,7 +533,7 @@ Compare
diff --git a/docs/authors.html b/docs/authors.html
index f05bf46..3a3a274 100644
--- a/docs/authors.html
+++ b/docs/authors.html
@@ -23,7 +23,7 @@
Koneswarakantha B (2023). +
Koneswarakantha B (2024). simaerep: Find Clinical Trial Sites Under-Reporting Adverse Events. -https://openpharma.github.io/simaerep/, https://github.com/openpharma/simaerep. +R package version 0.4.4, https://github.com/openpharma/simaerep, https://openpharma.github.io/simaerep/.
@Manual{, title = {simaerep: Find Clinical Trial Sites Under-Reporting Adverse Events}, author = {Bjoern Koneswarakantha}, - year = {2023}, - note = {https://openpharma.github.io/simaerep/, https://github.com/openpharma/simaerep}, + year = {2024}, + note = {R package version 0.4.4, https://github.com/openpharma/simaerep}, + url = {https://openpharma.github.io/simaerep/}, }@@ -130,7 +134,7 @@
Left panel shows mean AE reporting per site (lightblue and darkblue lines) against mean AE reporting of the entire study (golden line). Single sites are plotted in descending order by AE under-reporting probability on the right panel in which grey lines denote cumulative AE count of single patients. Grey dots in the left panel plot indicate sites that were picked for single plotting. AE under-reporting probability of dark blue lines crossed threshold of 95%. Numbers in the upper left corner indicate the ratio of patients that have been used for the analysis against the total number of patients. Patients that have not been on the study long enough to reach the evaluation point (visit_med75, see introduction) will be ignored.
@@ -459,7 +457,7 @@Site built with pkgdown 2.0.5.
+Site built with pkgdown 2.0.7.
diff --git a/docs/reference/is_orivisit.html b/docs/reference/is_orivisit.html index 8d7235d..e84811d 100644 --- a/docs/reference/is_orivisit.html +++ b/docs/reference/is_orivisit.html @@ -23,7 +23,7 @@ @@ -67,6 +67,9 @@sim_test_data_portfolio(df_config, parallel = FALSE, progress = TRUE)
sim_test_data_portfolio(
+ df_config,
+ df_ae_rates = NULL,
+ parallel = FALSE,
+ progress = TRUE
+)
dataframe as returned by get_config
dataframe with ae rates. Default: NULL
logical activate parallel processing, see details, Default: FALSE
mean ae per visit per patient, Default: 0.5
vector with visit-specific ae rates, Default: Null