Skip to content

Commit

Permalink
Merge pull request #244 from oxford-pharmacoepi/min_cell_count
Browse files Browse the repository at this point in the history
  • Loading branch information
xihang-chen authored May 28, 2024
2 parents 4420482 + 651175c commit 032a4ca
Show file tree
Hide file tree
Showing 15 changed files with 415 additions and 56 deletions.
58 changes: 48 additions & 10 deletions R/inputValidation.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ checkInputgenerateSequenceCohortSet <- function(cdm,
checkmate::assertCharacter(name, len = 1, any.missing = FALSE, add = errorMessage)

## Check date
checkcohortDateRange(cohortDateRange, errorMessage)
checkCohortDateRange(cohortDateRange, errorMessage)

## Checks that Index and Marker ids exist in Index and Marker tables
checkCohortIds(cohort = cdm[[indexTable]],
Expand Down Expand Up @@ -58,9 +58,10 @@ checkInputgenerateSequenceCohortSet <- function(cdm,
}

checkInputSummariseSequenceRatios <- function(cohort,
cohortId,
confidenceInterval,
movingAverageRestriction) {
cohortId,
confidenceInterval,
movingAverageRestriction,
minCellCount) {

# Check cdm objects, writing schema and index/marker tables
cdm <- omopgenerics::cdmReference(cohort)
Expand All @@ -78,19 +79,23 @@ checkInputSummariseSequenceRatios <- function(cohort,
# Check the rest of inputs
errorMessage <- checkmate::makeAssertCollection()

# Check minCellCount
checkMinCellCount(minCellCount, errorMessage)

## Check confidenceInterval
checkConfidenceInterval(confidenceInterval, errorMessage)

## Check movingAverageRestriction
checkmovingAverageRestriction(movingAverageRestriction, errorMessage)
checkMovingAverageRestriction(movingAverageRestriction, errorMessage)

# Report errors
checkmate::reportAssertions(collection = errorMessage)
}

checkInputSummariseTemporalSymmetry <- function(cohort,
cohortId,
timescale) {
timescale,
minCellCount) {

# Check cdm objects, writing schema and index/marker tables
cdm <- omopgenerics::cdmReference(cohort)
Expand All @@ -108,8 +113,11 @@ checkInputSummariseTemporalSymmetry <- function(cohort,
# Check the rest of inputs
errorMessage <- checkmate::makeAssertCollection()

# Check minCellCount
checkMinCellCount(minCellCount, errorMessage)

# Check timescale
checktimeScale(timescale, errorMessage)
checkTimeScale(timescale, errorMessage)

# Report errors
checkmate::reportAssertions(collection = errorMessage)
Expand Down Expand Up @@ -142,6 +150,14 @@ checkInputPlotTemporalSymmetry <- function(result,
colours,
scales) {

result_check <- result %>%
dplyr::pull("estimate_value")

if (all(is.na(result_check))){
cli::cli_abort("Aborted! All the temporal symmetry results are NAs, no plots
could be produced")
}

# Check the rest of inputs
errorMessage <- checkmate::makeAssertCollection()

Expand Down Expand Up @@ -170,6 +186,15 @@ checkInputPlotSequenceRatios <- function(result,
labs,
colours) {

result_check <- result %>%
dplyr::filter(.data$estimate_name == "point_estimate") %>%
dplyr::pull("estimate_value")

if (all(is.na(result_check))){
cli::cli_abort("Aborted! All the sequence ratios are NAs, no plots could be
produced")
}

# Check the rest of inputs
errorMessage <- checkmate::makeAssertCollection()

Expand Down Expand Up @@ -255,7 +280,7 @@ checkWashoutWindow <- function(washoutWindow, errorMessage) {
}

# Check movingAverageRestriction (Inf or numeric)
checkmovingAverageRestriction <- function(movingAverageRestriction, errorMessage){
checkMovingAverageRestriction <- function(movingAverageRestriction, errorMessage){
if (movingAverageRestriction != Inf) {
checkmate::assertIntegerish(
movingAverageRestriction,
Expand All @@ -277,6 +302,19 @@ checkDaysPriorObservation <- function(daysPriorObservation, errorMessage){
}
}

# Check minCellCount (has to be numeric)
checkMinCellCount <- function(minCellCount, errorMessage){
if (minCellCount != Inf) {
checkmate::assertIntegerish(
minCellCount,
lower = 0, any.missing = FALSE, max.len = 4, add = errorMessage
)
}
if(!(is.finite(minCellCount))){
errorMessage$push("minCellCount has to be finite.")
}
}

# Check combinationWindow (a numeric of length 2)
checkCombinationWindow <- function(combinationWindow, errorMessage){
checkmate::assert_numeric(combinationWindow, len = 2, any.missing = FALSE, add = errorMessage)
Expand All @@ -298,7 +336,7 @@ checkCombinationWindow <- function(combinationWindow, errorMessage){
}
}

checkcohortDateRange <- function(cohortDateRange, errorMessage) {
checkCohortDateRange <- function(cohortDateRange, errorMessage) {
checkmate::assertDate(cohortDateRange, len = 2, add = errorMessage)
if (all(!is.na(cohortDateRange))) {
if (cohortDateRange[1] >= cohortDateRange[2]) {
Expand Down Expand Up @@ -397,7 +435,7 @@ checkScales <- function(scales, errorMessage) {
}
}

checktimeScale <- function(timescale, errorMessage){
checkTimeScale <- function(timescale, errorMessage){
checkmate::assert_character(timescale,
len = 1,
add = errorMessage)
Expand Down
3 changes: 2 additions & 1 deletion R/plotSequenceRatios.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
#' indexTable = "cohort_1",
#' markerTable = "cohort_2",
#' name = "joined_cohort")
#' sequence_ratio <- summariseSequenceRatios(cohort = cdm$joined_cohort)
#' sequence_ratio <- summariseSequenceRatios(cohort = cdm$joined_cohort,
#' minCellCount = 0)
#' plotSequenceRatios(result = sequence_ratio)
#' CDMConnector::cdmDisconnect(cdm = cdm)
#' }
Expand Down
3 changes: 2 additions & 1 deletion R/plotTemporalSymmetry.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
#' indexTable = "cohort_1",
#' markerTable = "cohort_2",
#' name = "joined_cohort")
#' temporal_symmetry <- summariseTemporalSymmetry(cohort = cdm$joined_cohort)
#' temporal_symmetry <- summariseTemporalSymmetry(cohort = cdm$joined_cohort,
#' minCellCount = 0)
#' plotTemporalSymmetry(result = temporal_symmetry)
#' CDMConnector::cdmDisconnect(cdm = cdm)
#' }
Expand Down
81 changes: 74 additions & 7 deletions R/summariseSequenceRatios.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#' @param cohortId The Ids in the cohort that are to be included in the analyses.
#' @param confidenceInterval Default is 95, indicating the central 95% confidence interval.
#' @param movingAverageRestriction The moving window when calculating nSR, default is 548.
#' @param minCellCount The minimum number of events to reported, below which
#' results will be obscured. If 0, all results will be reported.
#'
#' @return
#' A local table with all the analyses.
Expand All @@ -26,15 +28,17 @@
#' }
#'
summariseSequenceRatios <- function(cohort,
cohortId = NULL,
confidenceInterval = 95,
movingAverageRestriction = 548) {
cohortId = NULL,
confidenceInterval = 95,
movingAverageRestriction = 548,
minCellCount = 5) {

# checks
checkInputSummariseSequenceRatios(cohort = cohort,
cohortId = cohortId,
confidenceInterval = confidenceInterval,
movingAverageRestriction = movingAverageRestriction)
cohortId = cohortId,
confidenceInterval = confidenceInterval,
movingAverageRestriction = movingAverageRestriction,
minCellCount = minCellCount)

if (is.null(cohortId)){
cohortId <- cohort %>%
Expand Down Expand Up @@ -130,5 +134,68 @@ summariseSequenceRatios <- function(cohort,
PatientProfiles::addCdmName(cdm = omopgenerics::cdmReference(cohort)) %>%
getSummarisedResult()

return(output)
setting <- omopgenerics::settings(output)

output <- output |>
omopgenerics::suppress(minCellCount = minCellCount)

counts <- output %>%
dplyr::filter(.data$estimate_name == "count" | .data$estimate_name == "percentage")

output_sr <- output %>% dplyr::filter(.data$variable_level == "sequence_ratio")

index_count <- output %>%
dplyr::filter(!.data$variable_level == "sequence_ratio",
.data$variable_name == "index",
.data$estimate_name == "count") %>%
dplyr::rename("variable_name_index" = "variable_name",
"variable_level_index" = "variable_level",
"estimate_name_index" = "estimate_name",
"estimate_type_index" = "estimate_type",
"estimate_value_index" = "estimate_value")

output_suppressed <- output_sr %>%
dplyr::left_join(index_count,
by = c("result_id", "cdm_name", "group_name",
"group_level", "strata_name", "strata_level",
"additional_name", "additional_level"),
relationship = "many-to-many") %>%
dplyr::mutate(estimate_value =
dplyr::case_when(
is.na(.data$estimate_value_index) ~ NA,
T ~ .data$estimate_value
)
) %>%
dplyr::select(dplyr::all_of(omopgenerics::resultColumns()))

marker_count <- output %>%
dplyr::filter(!.data$variable_level == "sequence_ratio",
.data$variable_name == "marker",
.data$estimate_name == "count") %>%
dplyr::rename("variable_name_index" = "variable_name",
"variable_level_index" = "variable_level",
"estimate_name_index" = "estimate_name",
"estimate_type_index" = "estimate_type",
"estimate_value_index" = "estimate_value")

output_suppressed <- output_suppressed %>%
dplyr::left_join(marker_count,
by = c("result_id", "cdm_name", "group_name",
"group_level", "strata_name", "strata_level",
"additional_name", "additional_level"),
relationship = "many-to-many") %>%
dplyr::mutate(estimate_value =
dplyr::case_when(
is.na(.data$estimate_value_index) ~ NA,
T ~ .data$estimate_value
)
) %>%
dplyr::select(dplyr::all_of(omopgenerics::resultColumns()))|>
rbind(counts) %>%
dplyr::arrange(.data$group_level) |>
omopgenerics::newSummarisedResult(
settings = setting
)

return(output_suppressed)
}
17 changes: 12 additions & 5 deletions R/summariseTemporalSymmetry.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
#' Summarise temporal symmetry
#'
#' @description
#' Using generateSequenceCohortSet to obtain temporal symmetry (aggregated counts) of two cohorts.
#' Using generateSequenceCohortSet to obtain temporal symmetry (aggregated
#' counts) of two cohorts.
#'
#' @param cohort A cohort table in the cdm.
#' @param cohortId The Ids in the cohort that are to be included in the analyses.
#' @param timescale Timescale for the x axis of the plot (month, day, year).
#' @param minCellCount The minimum number of events to reported, below which
#' results will be obscured. If 0, all results will be reported.
#'
#' @return
#' An aggregated table with difference in time (marker - index) and the relevant counts.
#' An aggregated table with difference in time (marker - index) and the relevant
#' counts.
#' @export
#'
#' @examples
Expand All @@ -24,12 +28,14 @@
#'
summariseTemporalSymmetry <- function(cohort,
cohortId = NULL,
timescale = "month") {
timescale = "month",
minCellCount = 5) {

# checks
checkInputSummariseTemporalSymmetry(cohort = cohort,
cohortId = cohortId,
timescale = timescale)
timescale = timescale,
minCellCount = minCellCount)

index_names <- attr(cohort, "cohort_set") %>%
dplyr::select("cohort_definition_id", "index_name", "index_id", "marker_id")
Expand Down Expand Up @@ -104,6 +110,7 @@ summariseTemporalSymmetry <- function(cohort,
dplyr::select(dplyr::all_of(omopgenerics::resultColumns())) |>
omopgenerics::newSummarisedResult(
settings = setting
)
) |>
omopgenerics::suppress(minCellCount = minCellCount)
return(output_sum)
}
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ cdm$aspirin_amoxicillin %>%
dplyr::glimpse()
#> Rows: ??
#> Columns: 6
#> Database: DuckDB v0.10.1 [xihangc@Windows 10 x64:R 4.3.1/C:\Users\xihangc\AppData\Local\Temp\RtmpiEXmDQ\file644034f02d4d.duckdb]
#> Database: DuckDB v0.10.1 [xihangc@Windows 10 x64:R 4.3.1/C:\Users\xihangc\AppData\Local\Temp\RtmpItoTy4\file480c3f693d88.duckdb]
#> $ cohort_definition_id <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
#> $ subject_id <int> 65, 119, 185, 144, 235, 197, 310, 280, 316, 331, …
#> $ cohort_start_date <date> 1968-07-29, 1967-05-28, 1947-04-07, 1978-10-30, …
Expand Down Expand Up @@ -128,11 +128,11 @@ res %>% glimpse()
#> $ group_level <chr> "aspirin &&& amoxicillin", "aspirin &&& amoxicillin",…
#> $ strata_name <chr> "overall", "overall", "overall", "overall", "overall"…
#> $ strata_level <chr> "overall", "overall", "overall", "overall", "overall"…
#> $ variable_name <chr> "index", "index", "marker", "marker", "crude", "adjus
#> $ variable_level <chr> "first_pharmac", "first_pharmac", "first_pharmac", "f
#> $ estimate_name <chr> "count", "percentage", "count", "percentage", "point_
#> $ estimate_type <chr> "integer", "numeric", "integer", "numeric", "numeric"…
#> $ estimate_value <chr> "56", "58.9", "39", "41.1", "1.43589743589744", "1.35
#> $ variable_name <chr> "crude", "adjusted", "crude", "crude", "adjusted", "a
#> $ variable_level <chr> "sequence_ratio", "sequence_ratio", "sequence_ratio",
#> $ estimate_name <chr> "point_estimate", "point_estimate", "lower_CI", "uppe
#> $ estimate_type <chr> "numeric", "numeric", "numeric", "numeric", "numeric"…
#> $ estimate_value <chr> "1.43589743589744", "1.35265700483092", "0.9573119756
#> $ additional_name <chr> "overall", "overall", "overall", "overall", "overall"…
#> $ additional_level <chr> "overall", "overall", "overall", "overall", "overall"…
```
Expand Down
3 changes: 2 additions & 1 deletion man/plotSequenceRatios.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/plotTemporalSymmetry.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion man/summariseSequenceRatios.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 032a4ca

Please sign in to comment.