Skip to content

Commit

Permalink
Merge branch 'develop' into validate_external_improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
egillax committed Nov 12, 2024
2 parents 1ba12be + 5f53965 commit 2ab8d7c
Show file tree
Hide file tree
Showing 31 changed files with 1,219 additions and 956 deletions.
9 changes: 7 additions & 2 deletions .github/workflows/pkgdown.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4

- uses: r-lib/actions/setup-pandoc@v2

Expand All @@ -28,9 +28,14 @@ jobs:

- uses: r-lib/actions/setup-r-dependencies@v2
with:
cache: always
extra-packages: any::pkgdown, ohdsi/OhdsiRTools
needs: website

- uses: lycheeverse/lychee-action@v2
with:
args: --base . --verbose --no-progress --accept '100..=103, 200..=299, 403' './**/*.md' './**/*.Rmd'

- name: Build site
run: Rscript -e 'pkgdown::build_site_github_pages(new_process = FALSE, install = TRUE)'

Expand All @@ -39,7 +44,7 @@ jobs:

- name: Deploy to GitHub pages 🚀
if: github.event_name != 'pull_request'
uses: JamesIves/github-pages-deploy-action@4.1.4
uses: JamesIves/github-pages-deploy-action@v4
with:
clean: false
branch: gh-pages
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ export(plotDemographicSummary)
export(plotF1Measure)
export(plotGeneralizability)
export(plotLearningCurve)
export(plotNetBenefit)
export(plotPlp)
export(plotPrecisionRecall)
export(plotPredictedPDF)
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
PatientLevelPrediction develop
======================
- Fixed linting errors and R codestyle in docs to conform to HADES style
- Remove links to pdf's, point to website instead.
- Fix broken links in Readme and BuildingPredictiveModels vignette
- Added an action to detect broken links in repo
- Official maintainer updated to Egill Fridgeirsson


Expand Down
9 changes: 6 additions & 3 deletions R/EvaluationSummary.R
Original file line number Diff line number Diff line change
Expand Up @@ -451,11 +451,14 @@ averagePrecision <- function(prediction){
}



calibrationInLarge <- function(prediction){
#' Calculate the calibration in large
#' @param prediction A prediction dataframe
#' @return data.frame with meanPredictionRisk, observedRisk, and N
#' @keywords internal
calibrationInLarge <- function(prediction) {

result <- data.frame(meanPredictionRisk = mean(prediction$value),
observedRisk = sum(prediction$outcomeCount)/nrow(prediction),
observedRisk = sum(prediction$outcomeCount) / nrow(prediction),
N = nrow(prediction)
)

Expand Down
119 changes: 119 additions & 0 deletions R/Plotting.R
Original file line number Diff line number Diff line change
Expand Up @@ -1216,6 +1216,125 @@ plotSmoothCalibration <- function(plpResult,
return(plots)
}

#' Plot the net benefit
#' @param plpResult A plp result object as generated using the \code{\link{runPlp}} function.
#' @param typeColumn The name of the column specifying the evaluation type
#' @param saveLocation Directory to save plot (if NULL plot is not saved)
#' @param fileName Name of the file to save to plot, for example 'plot.png'. See the function \code{ggsave} in the ggplot2 package for supported file formats.
#' @param evalType Which evaluation type to plot for. For example `Test`, `Train`. If NULL everything is plotted
#' @param ylim The y limits for the plot, if NULL the limits are calculated from the data
#' @param xlim The x limits for the plot, if NULL the limits are calculated from the data
#' @return A list of ggplot objects
#' @export
plotNetBenefit <- function(plpResult,
typeColumn = "evaluation",
saveLocation = NULL,
fileName = "netBenefit.png",
evalType = NULL,
ylim = NULL,
xlim = NULL
) {
if (is.null(evalType)) {
evalTypes <- unique(plpResult$performanceEvaluation$thresholdSummary[, typeColumn])
} else {
evalTypes <- evalType
}

plots <- list()
length(plots) <- length(evalTypes)
for (i in 1:length(evalTypes)) {
evalType <- evalTypes[i]
# calculate net benefit straight from predictions instead of thresholdSummary.
nbData <- getNetBenefit(plpResult, evalType)
if (is.null(ylim)) {
ylim <- c(min(nbData$netBenefit),
max(nbData$netBenefit))
}
if (is.null(xlim)) {
# match limit in Smooth Calibration by default
xlim <- c(min(nbData$threshold),
max(max(plpResult$performanceEvaluation$calibrationSummary$averagePredictedProbability),
max(plpResult$performanceEvaluation$calibrationSummary$observedIncidence)))

}

plots[[i]] <- ggplot2::ggplot(data = nbData, ggplot2::aes(x = .data$threshold)) +
ggplot2::geom_line(ggplot2::aes(y = .data$treatAll, color = "Treat All", linetype = "Treat All")) +
ggplot2::geom_line(ggplot2::aes(y = .data$treatNone, color = "Treat None", linetype = "Treat None")) +
ggplot2::geom_line(ggplot2::aes(y = .data$netBenefit, color = "Net Benefit", linetype = "Net Benefit")) +
ggplot2::scale_color_manual(
name = "Strategies",
values = c(
"Model" = "blue",
"Treat all" = "red",
"Treat None" = "brown"
)
) +
ggplot2::scale_linetype_manual(
name = "Strategies",
values = c(
"Net Benefit" = "solid",
"Treat All" = "dashed",
"Treat None" = "dashed"
)
) +
ggplot2::labs(
x = "Prediction Threshold",
y = "Net Benefit"
) +
ggplot2::ggtitle(evalType) +
ggplot2::coord_cartesian(xlim = xlim, ylim = ylim)
}

plot <- gridExtra::marrangeGrob(plots, nrow = length(plots), ncol = 1)

if (!is.null(saveLocation)) {
if (!dir.exists(saveLocation)) {
dir.create(saveLocation, recursive = TRUE)
}
ggplot2::ggsave(file.path(saveLocation, fileName), plot, width = 5, height = 4.5, dpi = 400)
}
return(plot)
}

#' Calculate net benefit in an efficient way
#' @param plpResult A plp result object as generated using the \code{\link{runPlp}} function.
#' @param evalType The evaluation type column
#' @return A data frame with the net benefit, treat all and treat none values
#' @keywords internal
getNetBenefit <- function(plpResult, evalType) {
prediction <- plpResult$prediction %>% dplyr::filter(.data$evaluationType == evalType)
if (nrow(prediction) == 0) {
stop("No prediction data found for evaluation type ", evalType)
}
prediction <- prediction %>% dplyr::arrange(dplyr::desc(.data$value)) %>%
dplyr::mutate(
cumsumTrue = cumsum(.data$outcomeCount),
cumsumFalse = cumsum(1 - .data$outcomeCount)
)
trueCount <- sum(prediction$outcomeCount)
n <- nrow(prediction)
falseCount <- n - trueCount
outcomeRate <- trueCount / n

nbData <- prediction %>%
dplyr::group_by(.data$value) %>%
dplyr::summarise(
threshold = unique(.data$value),
TP = max(.data$cumsumTrue),
FP = max(.data$cumsumFalse),
) %>%
dplyr::ungroup()

nbData <- nbData %>%
dplyr::mutate(
netBenefit = (.data$TP / n) - (.data$FP / n) * (.data$threshold / (1 - .data$threshold)),
treatAll = outcomeRate - (1 - outcomeRate) * .data$threshold / (1 - .data$threshold),
treatNone = 0
)
return(nbData)
}

plotSmoothCalibrationLoess <- function(data, span = 0.75) {
fit <- stats::loess(y ~ p, data = data, degree = 2, span = span)
predictedFit <- stats::predict(fit, se = TRUE)
Expand Down
Loading

0 comments on commit 2ab8d7c

Please sign in to comment.