diff --git a/R/simaerep.R b/R/simaerep.R index e77f76a..a6cf91d 100644 --- a/R/simaerep.R +++ b/R/simaerep.R @@ -448,15 +448,20 @@ eval_sites <- function(df_sim_sites, #'@keywords internal p_adjust <- function(df, col, suffix, method = "BH") { + col_adj <- paste0(col, "_adj") + col_suffix <- paste0(col, suffix) + if (is.na(method) || is.null(method) || method %in% c("None", "none")) { - return(df) + df_out <- df %>% + mutate( + !! as.name(col_suffix) := 1 - .data[[col]] + ) + + return(df_out) } if (inherits(df, "data.frame")) { - col_adj <- paste0(col, "_adj") - col_suffix <- paste0(col, suffix) - df_out <- df %>% mutate( !! as.name(col_adj) := p.adjust(.data[[col]], method = method), diff --git a/R/simaerep_plot.R b/R/simaerep_plot.R index 9b8da05..2a1f335 100644 --- a/R/simaerep_plot.R +++ b/R/simaerep_plot.R @@ -539,14 +539,26 @@ plot_study <- function(df_visit, # ordered sites ------------------------------------------------------------- + if (any(stringr::str_detect(colnames(df_eval), "_adj$"))) { + thresh <- 0.5 + breaks <- c(0, 0.5, 0.75, 0.95, ifelse(max(df_eval[[prob_col]], na.rm = TRUE) > 0.95, + max(df_eval[[prob_col]], na.rm = TRUE) + 0.1, + NA)) + } else { + thresh <- 0.9 + breaks <- c(0, 0.9, 0.95, 0.99, ifelse(max(df_eval[[prob_col]], na.rm = TRUE) > 0.95, + max(df_eval[[prob_col]], na.rm = TRUE) + 0.1, + NA)) + } + n_site_ur_gr_0p5 <- df_eval %>% - filter(.data[[prob_col]] > 0.5) %>% + filter(.data[[prob_col]] > thresh) %>% nrow() if (n_site_ur_gr_0p5 > 0) { sites_ordered <- df_eval %>% arrange(.data$study_id, desc(.data[[prob_col]]), .data[[col_mean_site]]) %>% - filter(.data[[prob_col]] > 0.5) %>% + filter(.data[[prob_col]] > thresh) %>% head(n_sites) %>% .$site_number } else { @@ -604,10 +616,7 @@ plot_study <- function(df_visit, # define score cut-offs + labels---------------------------------------------- palette <- RColorBrewer::brewer.pal(9, "Blues")[c(3, 5, 7, 9)] - breaks <- c(0, 0.5, 0.75, 0.95, ifelse(max(df_eval[[prob_col]], na.rm = TRUE) > 0.95, - max(df_eval[[prob_col]], na.rm = TRUE) + 0.1, - NA) - ) + breaks <- breaks[! is.na(breaks)] diff --git a/README.Rmd b/README.Rmd index ac7736f..1b327a9 100644 --- a/README.Rmd +++ b/README.Rmd @@ -71,6 +71,9 @@ Download as pdf in the [release section](https://github.com/openpharma/simaerep/ ## Application +Recommended Threshold: `aerep$dfeval$prob_low_prob_ur: 0.95` + + ```{r fig.width=10} suppressPackageStartupMessages(library(simaerep)) @@ -96,9 +99,57 @@ df_visit %>% aerep <- simaerep(df_visit) -plot(aerep, study = "A") +plot(aerep, study = "A") ``` *Left panel shows mean AE reporting per site (lightblue and darkblue lines) against mean AE reporting of the entire study (golden line). Single sites are plotted in descending order by AE under-reporting probability on the right panel in which grey lines denote cumulative AE count of single patients. Grey dots in the left panel plot indicate sites that were picked for single plotting. AE under-reporting probability of dark blue lines crossed threshold of 95%. Numbers in the upper left corner indicate the ratio of patients that have been used for the analysis against the total number of patients. Patients that have not been on the study long enough to reach the evaluation point (visit_med75, see introduction) will be ignored.* +## Optimized Statistical Performance + +Following the recommendation of our latest [performance benchmark](https://openpharma.github.io/simaerep/articles/performance.html) statistical performance can be increased by using the [inframe](https://openpharma.github.io/simaerep/articles/inframe.html) algorithm without multiplicity correction. + +**Note that the plot is more noisy because no patients are excluded and only a few patients contribute to the event count at higher visits** + +Recommended Threshold: `aerep$dfeval$prob_low_prob_ur: 0.99` + +```{r} +aerep <- simaerep( + df_visit, + inframe = TRUE, + visit_med75 = FALSE, + mult_corr = FALSE +) + +plot(aerep, study = "A") +``` + +## In Database Calculation + +The [inframe](https://openpharma.github.io/simaerep/articles/inframe.html) algorithm uses only `dbplyr` compatible table operations and can be executed within a database backend as we demonstrate here using `duckdb`. + +However, we need to provide a in database table that has as many rows as the desired replications in our simulation, instead of providing an integer for the `r` parameter. + +```{r} +con <- DBI::dbConnect(duckdb::duckdb(), dbdir = ":memory:") +df_r <- tibble(rep = seq(1, 1000)) + +dplyr::copy_to(con, df_visit, "visit") +dplyr::copy_to(con, df_r, "r") + +tbl_visit <- tbl(con, "visit") +tbl_r <- tbl(con, "r") + + +aerep <- simaerep( + tbl_visit, + r = tbl_r, + inframe = TRUE, + visit_med75 = FALSE, + mult_corr = FALSE +) + +plot(aerep, df_visit = tbl_visit) + +DBI::dbDisconnect(con) +``` diff --git a/README.md b/README.md index b419a18..425fbbf 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,8 @@ using ## Application +Recommended Threshold: `aerep$dfeval$prob_low_prob_ur: 0.95` + ``` r suppressPackageStartupMessages(library(simaerep)) @@ -143,7 +145,7 @@ df_visit %>% aerep <- simaerep(df_visit) -plot(aerep, study = "A") +plot(aerep, study = "A") ``` @@ -159,3 +161,71 @@ upper left corner indicate the ratio of patients that have been used for the analysis against the total number of patients. Patients that have not been on the study long enough to reach the evaluation point (visit_med75, see introduction) will be ignored.* + +## Optimized Statistical Performance + +Following the recommendation of our latest [performance +benchmark](https://openpharma.github.io/simaerep/articles/performance.html) +statistical performance can be increased by using the +[inframe](https://openpharma.github.io/simaerep/articles/inframe.html) +algorithm without multiplicity correction. + +**Note that the plot is more noisy because no patients are excluded and +only a few patients contribute to the event count at higher visits** + +Recommended Threshold: `aerep$dfeval$prob_low_prob_ur: 0.99` + +``` r +aerep <- simaerep( + df_visit, + inframe = TRUE, + visit_med75 = FALSE, + mult_corr = FALSE +) + +plot(aerep, study = "A") +``` + + + +## In Database Calculation + +The +[inframe](https://openpharma.github.io/simaerep/articles/inframe.html) +algorithm uses only `dbplyr` compatible table operations and can be +executed within a database backend as we demonstrate here using +`duckdb`. + +However, we need to provide a in database table that has as many rows as +the desired replications in our simulation, instead of providing an +integer for the `r` parameter. + +``` r +con <- DBI::dbConnect(duckdb::duckdb(), dbdir = ":memory:") +df_r <- tibble(rep = seq(1, 1000)) + +dplyr::copy_to(con, df_visit, "visit") +dplyr::copy_to(con, df_r, "r") + +tbl_visit <- tbl(con, "visit") +tbl_r <- tbl(con, "r") + + +aerep <- simaerep( + tbl_visit, + r = tbl_r, + inframe = TRUE, + visit_med75 = FALSE, + mult_corr = FALSE +) + +plot(aerep, df_visit = tbl_visit) +#> study = NULL, defaulting to study:A +``` + + + +``` r + +DBI::dbDisconnect(con) +``` diff --git a/cran-comments.md b/cran-comments.md index 7faabc9..c42e918 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,4 +1,4 @@ -# submission simaerep v0.5.0 +# submission simaerep v0.6.0 ## Test Results @@ -6,7 +6,7 @@ No notes, warnings or errors ## Test Environments -- Rhub, debian +- Rhub - linux, macos, macos-arm64, windows devel - GitHub CI/CD macOS-latest diff --git a/docs/404.html b/docs/404.html index e474e67..7790f5b 100644 --- a/docs/404.html +++ b/docs/404.html @@ -34,7 +34,7 @@ simaerep - 0.5.0.900 + 0.6.0