Merge pull request #244 from oxford-pharmacoepi/min_cell_count

#205
OHDSI · May 28, 2024 · 032a4ca · 032a4ca
2 parents 4420482 + 651175c
commit 032a4ca
Show file tree

Hide file tree

Showing 15 changed files with 415 additions and 56 deletions.
diff --git a/R/inputValidation.R b/R/inputValidation.R
@@ -27,7 +27,7 @@ checkInputgenerateSequenceCohortSet <- function(cdm,
   checkmate::assertCharacter(name, len = 1, any.missing = FALSE, add = errorMessage)
 
   ## Check date
-  checkcohortDateRange(cohortDateRange, errorMessage)
+  checkCohortDateRange(cohortDateRange, errorMessage)
 
   ## Checks that Index and Marker ids exist in Index and Marker tables
   checkCohortIds(cohort = cdm[[indexTable]],
@@ -58,9 +58,10 @@ checkInputgenerateSequenceCohortSet <- function(cdm,
 }
 
 checkInputSummariseSequenceRatios <- function(cohort,
-                                             cohortId,
-                                             confidenceInterval,
-                                             movingAverageRestriction) {
+                                              cohortId,
+                                              confidenceInterval,
+                                              movingAverageRestriction,
+                                              minCellCount) {
 
   # Check cdm objects, writing schema and index/marker tables
   cdm <- omopgenerics::cdmReference(cohort)
@@ -78,19 +79,23 @@ checkInputSummariseSequenceRatios <- function(cohort,
   # Check the rest of inputs
   errorMessage <- checkmate::makeAssertCollection()
 
+  # Check minCellCount
+  checkMinCellCount(minCellCount, errorMessage)
+
   ## Check confidenceInterval
   checkConfidenceInterval(confidenceInterval, errorMessage)
 
   ## Check movingAverageRestriction
-  checkmovingAverageRestriction(movingAverageRestriction, errorMessage)
+  checkMovingAverageRestriction(movingAverageRestriction, errorMessage)
 
   # Report errors
   checkmate::reportAssertions(collection = errorMessage)
 }
 
 checkInputSummariseTemporalSymmetry <- function(cohort,
                                                 cohortId,
-                                                timescale) {
+                                                timescale,
+                                                minCellCount) {
 
   # Check cdm objects, writing schema and index/marker tables
   cdm <- omopgenerics::cdmReference(cohort)
@@ -108,8 +113,11 @@ checkInputSummariseTemporalSymmetry <- function(cohort,
   # Check the rest of inputs
   errorMessage <- checkmate::makeAssertCollection()
 
+  # Check minCellCount
+  checkMinCellCount(minCellCount, errorMessage)
+
   # Check timescale
-  checktimeScale(timescale, errorMessage)
+  checkTimeScale(timescale, errorMessage)
 
   # Report errors
   checkmate::reportAssertions(collection = errorMessage)
@@ -142,6 +150,14 @@ checkInputPlotTemporalSymmetry <- function(result,
                                            colours,
                                            scales) {
 
+  result_check <- result %>%
+    dplyr::pull("estimate_value")
+
+  if (all(is.na(result_check))){
+    cli::cli_abort("Aborted! All the temporal symmetry results are NAs, no plots
+    could be produced")
+  }
+
   # Check the rest of inputs
   errorMessage <- checkmate::makeAssertCollection()
 
@@ -170,6 +186,15 @@ checkInputPlotSequenceRatios <- function(result,
                                         labs,
                                         colours) {
 
+  result_check <- result %>%
+    dplyr::filter(.data$estimate_name == "point_estimate") %>%
+    dplyr::pull("estimate_value")
+
+  if (all(is.na(result_check))){
+    cli::cli_abort("Aborted! All the sequence ratios are NAs, no plots could be
+                   produced")
+  }
+
   # Check the rest of inputs
   errorMessage <- checkmate::makeAssertCollection()
 
@@ -255,7 +280,7 @@ checkWashoutWindow <- function(washoutWindow, errorMessage) {
 }
 
 # Check movingAverageRestriction (Inf or numeric)
-checkmovingAverageRestriction <- function(movingAverageRestriction, errorMessage){
+checkMovingAverageRestriction <- function(movingAverageRestriction, errorMessage){
   if (movingAverageRestriction != Inf) {
     checkmate::assertIntegerish(
       movingAverageRestriction,
@@ -277,6 +302,19 @@ checkDaysPriorObservation <- function(daysPriorObservation, errorMessage){
   }
 }
 
+# Check minCellCount (has to be numeric)
+checkMinCellCount <- function(minCellCount, errorMessage){
+  if (minCellCount != Inf) {
+    checkmate::assertIntegerish(
+      minCellCount,
+      lower = 0, any.missing = FALSE, max.len = 4, add = errorMessage
+    )
+  }
+  if(!(is.finite(minCellCount))){
+    errorMessage$push("minCellCount has to be finite.")
+  }
+}
+
 # Check combinationWindow (a numeric of length 2)
 checkCombinationWindow <- function(combinationWindow, errorMessage){
   checkmate::assert_numeric(combinationWindow, len = 2, any.missing = FALSE, add = errorMessage)
@@ -298,7 +336,7 @@ checkCombinationWindow <- function(combinationWindow, errorMessage){
   }
 }
 
-checkcohortDateRange <- function(cohortDateRange, errorMessage) {
+checkCohortDateRange <- function(cohortDateRange, errorMessage) {
   checkmate::assertDate(cohortDateRange, len = 2, add = errorMessage)
   if (all(!is.na(cohortDateRange))) {
     if (cohortDateRange[1] >= cohortDateRange[2]) {
@@ -397,7 +435,7 @@ checkScales <- function(scales, errorMessage) {
   }
 }
 
-checktimeScale <- function(timescale, errorMessage){
+checkTimeScale <- function(timescale, errorMessage){
   checkmate::assert_character(timescale,
                               len = 1,
                               add = errorMessage)

diff --git a/R/plotSequenceRatios.R b/R/plotSequenceRatios.R
@@ -22,7 +22,8 @@
 #'                                  indexTable = "cohort_1",
 #'                                  markerTable = "cohort_2",
 #'                                  name = "joined_cohort")
-#' sequence_ratio <- summariseSequenceRatios(cohort = cdm$joined_cohort)
+#' sequence_ratio <- summariseSequenceRatios(cohort = cdm$joined_cohort,
+#'                                           minCellCount = 0)
 #' plotSequenceRatios(result = sequence_ratio)
 #' CDMConnector::cdmDisconnect(cdm = cdm)
 #' }

diff --git a/R/plotTemporalSymmetry.R b/R/plotTemporalSymmetry.R
@@ -24,7 +24,8 @@
 #'                                  indexTable = "cohort_1",
 #'                                  markerTable = "cohort_2",
 #'                                  name = "joined_cohort")
-#' temporal_symmetry <- summariseTemporalSymmetry(cohort = cdm$joined_cohort)
+#' temporal_symmetry <- summariseTemporalSymmetry(cohort = cdm$joined_cohort,
+#'                                                minCellCount = 0)
 #' plotTemporalSymmetry(result = temporal_symmetry)
 #' CDMConnector::cdmDisconnect(cdm = cdm)
 #' }

diff --git a/R/summariseSequenceRatios.R b/R/summariseSequenceRatios.R
@@ -7,6 +7,8 @@
 #' @param cohortId The Ids in the cohort that are to be included in the analyses.
 #' @param confidenceInterval Default is 95, indicating the central 95% confidence interval.
 #' @param movingAverageRestriction The moving window when calculating nSR, default is 548.
+#' @param minCellCount The minimum number of events to reported, below which
+#' results will be obscured. If 0, all results will be reported.
 #'
 #' @return
 #' A local table with all the analyses.
@@ -26,15 +28,17 @@
 #' }
 #'
 summariseSequenceRatios <- function(cohort,
-                                   cohortId = NULL,
-                                   confidenceInterval = 95,
-                                   movingAverageRestriction = 548) {
+                                    cohortId = NULL,
+                                    confidenceInterval = 95,
+                                    movingAverageRestriction = 548,
+                                    minCellCount = 5) {
 
   # checks
   checkInputSummariseSequenceRatios(cohort = cohort,
-                                   cohortId = cohortId,
-                                   confidenceInterval = confidenceInterval,
-                                   movingAverageRestriction = movingAverageRestriction)
+                                    cohortId = cohortId,
+                                    confidenceInterval = confidenceInterval,
+                                    movingAverageRestriction = movingAverageRestriction,
+                                    minCellCount = minCellCount)
 
   if (is.null(cohortId)){
     cohortId <- cohort %>%
@@ -130,5 +134,68 @@ summariseSequenceRatios <- function(cohort,
     PatientProfiles::addCdmName(cdm = omopgenerics::cdmReference(cohort)) %>%
     getSummarisedResult()
 
-  return(output)
+ setting <- omopgenerics::settings(output)
+
+  output <- output |>
+    omopgenerics::suppress(minCellCount = minCellCount)
+
+  counts <- output %>%
+    dplyr::filter(.data$estimate_name == "count" | .data$estimate_name == "percentage")
+
+  output_sr <- output %>% dplyr::filter(.data$variable_level == "sequence_ratio")
+
+  index_count <- output %>%
+    dplyr::filter(!.data$variable_level == "sequence_ratio",
+                  .data$variable_name == "index",
+                  .data$estimate_name == "count") %>%
+    dplyr::rename("variable_name_index" = "variable_name",
+                  "variable_level_index" = "variable_level",
+                  "estimate_name_index" = "estimate_name",
+                  "estimate_type_index" = "estimate_type",
+                  "estimate_value_index" = "estimate_value")
+
+  output_suppressed <- output_sr %>%
+    dplyr::left_join(index_count,
+                     by = c("result_id", "cdm_name", "group_name",
+                            "group_level", "strata_name", "strata_level",
+                            "additional_name", "additional_level"),
+                     relationship = "many-to-many") %>%
+    dplyr::mutate(estimate_value =
+                    dplyr::case_when(
+                      is.na(.data$estimate_value_index) ~ NA,
+                      T ~ .data$estimate_value
+                    )
+    ) %>%
+    dplyr::select(dplyr::all_of(omopgenerics::resultColumns()))
+
+  marker_count <- output %>%
+    dplyr::filter(!.data$variable_level == "sequence_ratio",
+                  .data$variable_name == "marker",
+                  .data$estimate_name == "count") %>%
+    dplyr::rename("variable_name_index" = "variable_name",
+                  "variable_level_index" = "variable_level",
+                  "estimate_name_index" = "estimate_name",
+                  "estimate_type_index" = "estimate_type",
+                  "estimate_value_index" = "estimate_value")
+
+  output_suppressed <- output_suppressed %>%
+    dplyr::left_join(marker_count,
+                     by = c("result_id", "cdm_name", "group_name",
+                            "group_level", "strata_name", "strata_level",
+                            "additional_name", "additional_level"),
+                     relationship = "many-to-many") %>%
+    dplyr::mutate(estimate_value =
+                    dplyr::case_when(
+                      is.na(.data$estimate_value_index) ~ NA,
+                      T ~ .data$estimate_value
+                    )
+    ) %>%
+    dplyr::select(dplyr::all_of(omopgenerics::resultColumns()))|>
+    rbind(counts) %>%
+    dplyr::arrange(.data$group_level) |>
+    omopgenerics::newSummarisedResult(
+      settings = setting
+    )
+
+  return(output_suppressed)
 }
diff --git a/R/summariseTemporalSymmetry.R b/R/summariseTemporalSymmetry.R
@@ -1,14 +1,18 @@
 #' Summarise temporal symmetry
 #'
 #' @description
-#' Using generateSequenceCohortSet to obtain temporal symmetry (aggregated counts) of two cohorts.
+#' Using generateSequenceCohortSet to obtain temporal symmetry (aggregated
+#' counts) of two cohorts.
 #'
 #' @param cohort A cohort table in the cdm.
 #' @param cohortId The Ids in the cohort that are to be included in the analyses.
 #' @param timescale Timescale for the x axis of the plot (month, day, year).
+#' @param minCellCount The minimum number of events to reported, below which
+#' results will be obscured. If 0, all results will be reported.
 #'
 #' @return
-#' An aggregated table with difference in time (marker - index) and the relevant counts.
+#' An aggregated table with difference in time (marker - index) and the relevant
+#'  counts.
 #' @export
 #'
 #' @examples
@@ -24,12 +28,14 @@
 #'
 summariseTemporalSymmetry <- function(cohort,
                                       cohortId = NULL,
-                                      timescale = "month") {
+                                      timescale = "month",
+                                      minCellCount = 5) {
 
   # checks
   checkInputSummariseTemporalSymmetry(cohort = cohort,
                                       cohortId = cohortId,
-                                      timescale = timescale)
+                                      timescale = timescale,
+                                      minCellCount = minCellCount)
 
   index_names <- attr(cohort, "cohort_set") %>%
     dplyr::select("cohort_definition_id", "index_name", "index_id", "marker_id")
@@ -104,6 +110,7 @@ summariseTemporalSymmetry <- function(cohort,
     dplyr::select(dplyr::all_of(omopgenerics::resultColumns())) |>
     omopgenerics::newSummarisedResult(
       settings = setting
-    )
+    ) |>
+    omopgenerics::suppress(minCellCount = minCellCount)
   return(output_sum)
 }
diff --git a/README.md b/README.md
@@ -94,7 +94,7 @@ cdm$aspirin_amoxicillin %>%
   dplyr::glimpse()
 #> Rows: ??
 #> Columns: 6
-#> Database: DuckDB v0.10.1 [xihangc@Windows 10 x64:R 4.3.1/C:\Users\xihangc\AppData\Local\Temp\RtmpiEXmDQ\file644034f02d4d.duckdb]
+#> Database: DuckDB v0.10.1 [xihangc@Windows 10 x64:R 4.3.1/C:\Users\xihangc\AppData\Local\Temp\RtmpItoTy4\file480c3f693d88.duckdb]
 #> $ cohort_definition_id <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
 #> $ subject_id           <int> 65, 119, 185, 144, 235, 197, 310, 280, 316, 331, …
 #> $ cohort_start_date    <date> 1968-07-29, 1967-05-28, 1947-04-07, 1978-10-30, …
@@ -128,11 +128,11 @@ res %>% glimpse()
 #> $ group_level      <chr> "aspirin &&& amoxicillin", "aspirin &&& amoxicillin",…
 #> $ strata_name      <chr> "overall", "overall", "overall", "overall", "overall"…
 #> $ strata_level     <chr> "overall", "overall", "overall", "overall", "overall"…
-#> $ variable_name    <chr> "index", "index", "marker", "marker", "crude", "adjus…
-#> $ variable_level   <chr> "first_pharmac", "first_pharmac", "first_pharmac", "f…
-#> $ estimate_name    <chr> "count", "percentage", "count", "percentage", "point_…
-#> $ estimate_type    <chr> "integer", "numeric", "integer", "numeric", "numeric"…
-#> $ estimate_value   <chr> "56", "58.9", "39", "41.1", "1.43589743589744", "1.35…
+#> $ variable_name    <chr> "crude", "adjusted", "crude", "crude", "adjusted", "a…
+#> $ variable_level   <chr> "sequence_ratio", "sequence_ratio", "sequence_ratio",…
+#> $ estimate_name    <chr> "point_estimate", "point_estimate", "lower_CI", "uppe…
+#> $ estimate_type    <chr> "numeric", "numeric", "numeric", "numeric", "numeric"…
+#> $ estimate_value   <chr> "1.43589743589744", "1.35265700483092", "0.9573119756…
 #> $ additional_name  <chr> "overall", "overall", "overall", "overall", "overall"…
 #> $ additional_level <chr> "overall", "overall", "overall", "overall", "overall"…
 ```

diff --git a/man/plotSequenceRatios.Rd b/man/plotSequenceRatios.Rd
diff --git a/man/plotTemporalSymmetry.Rd b/man/plotTemporalSymmetry.Rd
diff --git a/man/summariseSequenceRatios.Rd b/man/summariseSequenceRatios.Rd