diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index d4c3cf9..0f2fe08 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -43,7 +43,7 @@ jobs: - uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: any::rcmdcheck, margins=?ignore + extra-packages: any::rcmdcheck needs: check - uses: r-lib/actions/check-r-package@v2 diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index b6e0377..c9f0165 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -34,14 +34,9 @@ jobs: - uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: any::pkgdown, local::., margins=?ignore + extra-packages: any::pkgdown, local::. needs: website - - name: Install archived dependencies (margins) - run: | - R -e 'install.packages("https://cran.r-project.org/src/contrib/Archive/prediction/prediction_0.3.17.tar.gz")' - R -e 'install.packages("https://cran.r-project.org/src/contrib/Archive/margins/margins_0.3.26.tar.gz")' - - name: Build site run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) shell: Rscript {0} diff --git a/.github/workflows/rhub.yaml b/.github/workflows/rhub.yaml index e16437f..a66ce37 100644 --- a/.github/workflows/rhub.yaml +++ b/.github/workflows/rhub.yaml @@ -60,7 +60,7 @@ jobs: with: token: ${{ secrets.RHUB_TOKEN }} job-config: ${{ matrix.config.job-config }} - extra-packages: any::rcmdcheck, margins=?ignore + extra-packages: any::rcmdcheck - uses: r-hub/actions/run-check@v1 with: token: ${{ secrets.RHUB_TOKEN }} @@ -90,7 +90,7 @@ jobs: with: job-config: ${{ matrix.config.job-config }} token: ${{ secrets.RHUB_TOKEN }} - extra-packages: any::rcmdcheck, margins=?ignore + extra-packages: any::rcmdcheck - uses: r-hub/actions/run-check@v1 with: job-config: ${{ matrix.config.job-config }} diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 6365cf6..e9e4c6f 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -23,7 +23,7 @@ jobs: - uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: any::covr, margins=?ignore + extra-packages: any::covr needs: coverage - name: Test coverage diff --git a/DESCRIPTION b/DESCRIPTION index 9969e12..f7800dd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: beeca Title: Binary Endpoint Estimation with Covariate Adjustment -Version: 0.1.3.9000 +Version: 0.2.0 Authors@R: c(person(given = "Alex", family = "Przybylski", @@ -33,7 +33,7 @@ Maintainer: Alex Przybylski License: LGPL (>= 3) Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 Suggests: knitr, rmarkdown, @@ -41,7 +41,7 @@ Suggests: tidyr, marginaleffects, margins, - RobinCar (== 0.3.0) + RobinCar (>= 0.3.0) Config/testthat/edition: 3 Depends: R (>= 2.10) diff --git a/NAMESPACE b/NAMESPACE index 0d6a44d..0ce768e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,4 +17,5 @@ importFrom(stats,predict) importFrom(stats,terms) importFrom(stats,var) importFrom(stats,vcov) +importFrom(utils,combn) importFrom(utils,packageVersion) diff --git a/NEWS.md b/NEWS.md index b165148..93fc6ee 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,6 @@ -# beeca (development version) +# beeca 0.2.0 + +- Extensions to allow for more than two treatment arms in the model fit. # beeca 0.1.3 diff --git a/R/apply_contrast.R b/R/apply_contrast.R index d4b042e..d6d6b7c 100644 --- a/R/apply_contrast.R +++ b/R/apply_contrast.R @@ -27,9 +27,10 @@ #' The choice of contrast affects how treatment effects are calculated and #' interpreted. Default is `diff`. #' -#' @param reference a string indicating which treatment group should be considered as -#' the reference level. Accepted values are one of the levels in the treatment -#' variable. Default to the first level used in the `glm` object. +#' @param reference a string or list of strings indicating which treatment +#' group(s) to use as reference level for pairwise comparisons. Accepted values +#' must be a subset of the levels in the treatment variable. Default to the +#' first n-1 treatment levels used in the `glm` object. #' #' This parameter influences the calculation of treatment effects #' relative to the chosen reference group. @@ -68,10 +69,11 @@ #' fit3$marginal_est #' fit3$marginal_se #' +#' @importFrom utils combn #' @export #' apply_contrast <- function(object, contrast = c("diff", "rr", "or", "logrr", "logor"), reference) { - # assert means are available in object + # Assert means are available in object if (!"counterfactual.means" %in% names(object)) { msg <- sprintf( "Missing counterfactual means. First run `%1$s <- average_predictions(%1$s)`.", @@ -79,7 +81,7 @@ apply_contrast <- function(object, contrast = c("diff", "rr", "or", "logrr", "lo ) stop(msg, call. = FALSE) } - # assert varcov is available in object + # Assert varcov is available in object if (!"robust_varcov" %in% names(object)) { msg <- sprintf( "Missing robust varcov. First run `%1$s <- get_varcov(%1$s, ...)`.", @@ -92,53 +94,81 @@ apply_contrast <- function(object, contrast = c("diff", "rr", "or", "logrr", "lo object <- .assert_sanitized(object, trt, warn = T) data <- .get_data(object) - # assert non-missing reference + + # Assert non-missing reference or set default if (missing(reference)) { - reference <- object$xlevels[[trt]][1] - warning(sprintf("No reference argument was provided, using %s as the reference level", reference), call. = FALSE) + reference <- object$xlevels[[trt]][-nlevels(data[[trt]])] + warning(sprintf("No reference argument was provided, using {%s} as the reference level(s)", paste(reference, collapse = ", ")), call. = FALSE) + } + + # Assert reference to be subset of the treatment levels + if (!all(reference %in% levels(data[[trt]]))) { + stop("Reference levels must be a subset of treatment levels : ", paste(levels(data[[trt]]), collapse = ", "), ".") } - # assert reference to be one of the treatment levels - if (!reference %in% levels(data[[trt]])) { - stop("Reference must be one of : ", paste(levels(data[[trt]]), collapse = ", "), ".") + # Only accept at most nlevels(trt)-1 reference levels + if (length(reference) > (nlevels(data[[trt]])-1)) { + stop(sprintf("Too many reference levels provided. Expecting at most %s value(s) from the possible treatment levels: {%s}", + nlevels(data[[trt]]) - 1, + paste(levels(data[[trt]]), collapse = ", "))) } - # match contrast argument and retrieve relevant functions + # Match contrast argument and retrieve relevant helper functions contrast <- match.arg(contrast) c_est <- get(contrast) c_str <- get(paste0(contrast, "_str")) c_grad <- get(paste0("grad_", contrast)) - # set reference to 1 if it is the first level - reference_n <- which(levels(data[[trt]]) == reference) - cf_mean_ref <- object$counterfactual.means[reference_n] - cf_mean_inv <- object$counterfactual.means[3 - reference_n] - - trt_ref <- reference - trt_inv <- levels(data[[trt]])[-reference_n] + # Extract counterfactual means + cf_means <- object$counterfactual.means - # apply contrast to point estimate - marginal_est <- c_est(cf_mean_inv, cf_mean_ref) + # Get all pairwise arm comparisons for contrasts + # Format according to specified reference levels + l <- levels(data[[trt]]) + combos <- combn(l, 2, \(x) { + if (any(reference %in% x)) { + ref <- intersect(reference, x)[[1]] + comp <- x[x!=ref] + c(which(ref == l), which(comp==l)) + } else { + c(NA, NA) + } + }) + combos <- combos[, is.finite(colSums(combos)), drop=F] + if (length(reference) > 1) { + combos <- combos[,order(combos[1,])] + } + idxs <- matrix(F, nrow=length(l), ncol=length(l)) + rownames(idxs) <- colnames(idxs) <- l + idxs[cbind(combos[2,], combos[1,])] <- T - object$marginal_est <- marginal_est - names(object$marginal_est) <- "marginal_est" - attr(object$marginal_est, "reference") <- trt_ref - attr(object$marginal_est, "contrast") <- paste0(contrast, ": ", c_str(trt_inv, trt_ref)) - # apply contrast to varcov - gr <- c_grad(cf_mean_inv, cf_mean_ref) + # Get marginal estimates for the contrasts of interest, using correct ref levels + marginal_est <- outer(cf_means, cf_means, c_est) + marginal_est <- marginal_est[idxs] - robust_varcov <- object$robust_varcov + # Define contrast jacobian + gr <- matrix(0, nrow=ncol(combos), ncol=length(l)) + # Apply the grad function to all combinations + contrast_values <- t(apply(combos, 2, function(idx) c_grad(cf_means[idx[2]], cf_means[idx[1]]))) + gr[cbind(1:ncol(combos), combos[1,])] <- contrast_values[, 1] + gr[cbind(1:ncol(combos), combos[2,])] <- contrast_values[, 2] + # Get marginal standard error for contrasts of interest + marginal_se <- sqrt(diag(gr %*% object$robust_varcov %*% t(gr))) - # correct varcov order based on reference - if (reference_n == 2) diag(robust_varcov) <- rev(diag(robust_varcov)) + # Add string description for each comparison + contrast_desc <- paste0(contrast, ": ", + apply(combos, 2, \(x) c_str(l[x[2]], l[x[1]]))) + names(marginal_est) <- contrast_desc + names(marginal_se) <- contrast_desc - marginal_se <- sqrt(t(gr) %*% robust_varcov %*% gr) + object$marginal_est <- marginal_est + attr(object$marginal_est, "reference") <- reference + attr(object$marginal_est, "contrast") <- contrast_desc - object$marginal_se <- marginal_se[[1]] - names(object$marginal_se) <- "marginal_se" - attr(object$marginal_se, "reference") <- trt_ref - attr(object$marginal_se, "contrast") <- paste0(contrast, ": ", c_str(trt_inv, trt_ref)) + object$marginal_se <- marginal_se + attr(object$marginal_se, "reference") <- reference + attr(object$marginal_se, "contrast") <- contrast_desc attr(object$marginal_se, "type") <- attr(object$robust_varcov, "type") return(object) diff --git a/R/estimate_varcov.R b/R/estimate_varcov.R index ed8c3a6..435682c 100644 --- a/R/estimate_varcov.R +++ b/R/estimate_varcov.R @@ -98,7 +98,7 @@ estimate_varcov <- function(object, strata = NULL, method = c("Ge", "Ye"), type = c("HC0", "model-based", "HC3", "HC", "HC1", "HC2", "HC4", "HC4m", "HC5"), mod = FALSE) { - # assert means are available in object + # Assert means are available in object if (!"counterfactual.means" %in% names(object)) { msg <- sprintf( "Missing counterfactual means. First run `%1$s <- average_predictions(%1$s)`.", @@ -115,14 +115,18 @@ estimate_varcov <- function(object, strata = NULL, method = c("Ge", "Ye"), varcov <- varcov_ye(object, trt, strata, mod) } else if (method == "Ge") { type <- match.arg(type) - # throw warning is strata supplied but method == "Ge" is specified + # Throw warning is strata supplied but method == "Ge" is specified if (!is.null(strata)) { warning("Strata is supplied but will be ignored when using method='Ge'", call. = FALSE) } varcov <- varcov_ge(object, trt, type) } - rownames(varcov) <- colnames(varcov) <- levels(.get_data(object)[[trt]]) + # Assert varcov is symmetric + if (isFALSE(all.equal(varcov[lower.tri(varcov)], varcov[upper.tri(varcov)]))) { + stop("Error in calculation of variance covariance matrix: `varcov` is not symmetric.", + call. = FALSE) + } object$robust_varcov <- varcov attr(object$robust_varcov, "type") <- ifelse(method == "Ge", paste0(method, " - ", type), method) @@ -133,58 +137,86 @@ estimate_varcov <- function(object, strata = NULL, method = c("Ge", "Ye"), ## Re-implementation based on Ye et al. (https://doi.org/10.1080/24754269.2023.2205802) varcov_ye <- function(object, trt, strata, mod) { - ## calculate quantities - # get data + data <- .get_data(object) - # counterfactual predictions + # Extract allocation information + pi_all <- prop.table(table(data[, trt])) + n <- length(data[, trt]) + + # Extract observed responses + y_observed <- lapply(levels(data[[trt]]), \(x) object$y[data[, trt] == x]) + names(y_observed) <- levels(data[[trt]]) + + # Extract counterfactual predictions cf_pred <- object$counterfactual.predictions + # Append actual treatment assignment to cf predictions + cf_pred[[trt]] <- data[,trt] - pred_0 <- cf_pred$cf_pred_0 - pred_1 <- cf_pred$cf_pred_1 + # Estimate varcov using Ye et al (2022) - ## Extract allocation information - pi_0 <- mean(data[, trt] == levels(data[[trt]])[1]) - pi_1 <- 1 - pi_0 - n <- length(data[, trt]) + # diag_vars stores variances per treatment arm + diag_vars <- list() + if (mod) { - ## Extract responses on observed arm i and arm j - y_0 <- object$y[data[, trt] == levels(data[[trt]])[1]] - y_1 <- object$y[data[, trt] == levels(data[[trt]])[2]] + for (trtlvl in levels(data[[trt]])) { + var_resid <- var(y_observed[[trtlvl]] - cf_pred[cf_pred[[trt]] == trtlvl, trtlvl]) + cov_obs_pred <- cov(y_observed[[trtlvl]], cf_pred[cf_pred[[trt]] == trtlvl, trtlvl]) + var_pred <- var(cf_pred[, trtlvl]) - ## Predictions on arm i from patients in arm i only - pred_0_0 <- pred_0[data[, trt] == levels(data[[trt]])[1]] - pred_1_1 <- pred_1[data[, trt] == levels(data[[trt]])[2]] + diag_vars[[trtlvl]] <- var_resid / pi_all[[trtlvl]] + 2 * cov_obs_pred - var_pred - ## Predictions on arm i from patients in arm j only - pred_0_1 <- pred_0[data[, trt] == levels(data[[trt]])[2]] - pred_1_0 <- pred_1[data[, trt] == levels(data[[trt]])[1]] + } - ## estimate varcov using Ye et al (2022) - if (mod) { - v_0_0 <- var(y_0 - pred_0_0) / pi_0 + 2 * cov(y_0, pred_0_0) - var(pred_0) - v_1_1 <- var(y_1 - pred_1_1) / pi_1 + 2 * cov(y_1, pred_1_1) - var(pred_1) } else { - # use variance decomposition to match RobinCar implementation - var_y0_pred00 <- var(y_0) - 2 * cov(y_0, pred_0_0) + var(pred_0) - var_y1_pred11 <- var(y_1) - 2 * cov(y_1, pred_1_1) + var(pred_1) + # Use variance decomposition to match RobinCar implementation + + for (trtlvl in levels(data[[trt]])) { + var_obs <- var(y_observed[[trtlvl]]) + cov_obs_pred <- cov(y_observed[[trtlvl]], cf_pred[cf_pred[[trt]] == trtlvl, trtlvl]) + var_pred <- var(cf_pred[, trtlvl]) + + var_obs_pred <- var_obs - 2 * cov_obs_pred + var_pred + + diag_vars[[trtlvl]] <- var_obs_pred / pi_all[[trtlvl]] + 2 * cov_obs_pred - var_pred + + } - v_0_0 <- var_y0_pred00 / pi_0 + 2 * cov(y_0, pred_0_0) - var(pred_0) - v_1_1 <- var_y1_pred11 / pi_1 + 2 * cov(y_1, pred_1_1) - var(pred_1) } - v_0_1 <- v_1_0 <- cov(y_0, pred_1_0) + cov(y_1, pred_0_1) - cov(pred_0, pred_1) + # tri_vars stores trt arm covariances + tri_vars <- list() + for (trtlvl in levels(data[[trt]])) { + + # Covar of observed arm i and corresponding counterfactual predictions for those on arm j + cov_obsi_predj <- sapply(levels(data[[trt]])[levels(data[[trt]]) != trtlvl], + \(x) cov(y_observed[[trtlvl]], cf_pred[cf_pred[[trt]] == trtlvl, x])) - ## Variance of theta_i = p_i(Y=1) on arm i = 0,1 - varcov <- matrix(c( - v_0_0, v_0_1, - v_1_0, v_1_1 - ), nrow = 2, ncol = 2) + # Covar of observed arm j and corresponding counterfactual predictions for those on arm i + cov_obsj_predi <- sapply(levels(data[[trt]])[levels(data[[trt]]) != trtlvl], + \(x) cov(y_observed[[x]], cf_pred[cf_pred[[trt]] == x, trtlvl])) - ## Variance adjustment based on stratified CAR scheme, formula 18: https://doi.org/10.1080/01621459.2022.2049278 + # Covar of counterfactual predictions for arm i and arm j for all subjects + cov_predi_predj <- sapply(levels(data[[trt]])[levels(data[[trt]]) != trtlvl], + \(x) cov(cf_pred[, trtlvl], cf_pred[, x])) + + tri_vars[[trtlvl]] <- cov_obsi_predj + cov_obsj_predi - cov_predi_predj + + } + + # Construct variance covariance matrix + varcov <- matrix(nrow = nlevels(data[[trt]]), + ncol = nlevels(data[[trt]])) + rownames(varcov) <- colnames(varcov) <- levels(data[[trt]]) + + for (trtlvl in rownames(varcov)) { + varcov[trtlvl, c(trtlvl, names(tri_vars[[trtlvl]]))] <- c(diag_vars[[trtlvl]], tri_vars[[trtlvl]]) + } + + # Variance adjustment based on stratified CAR scheme, formula 18: https://doi.org/10.1080/01621459.2022.2049278 if (!is.null(strata)) { - # warn if any single stratification variable has suspiciously many unique values + # Warn if any single stratification variable has suspiciously many unique values n.strata <- sapply(strata, function(x) length(unique(.get_data(object)[[x]]))) if (any(n.strata > 3)) { msg <- sprintf( @@ -201,10 +233,10 @@ varcov_ye <- function(object, trt, strata, mod) { } ye_strata_adjustment <- function(object, trt, strata) { - # get data + data <- .get_data(object) - # assert strata is a column name found in model data + # Assert strata is a column name found in model data if (!all(strata %in% colnames(data))) { msg <- sprintf('Stratification variable(s) "%s" must be found in the model', paste(strata[!strata %in% colnames(data)], collapse = ", ")) stop(msg, call. = FALSE) @@ -213,27 +245,28 @@ ye_strata_adjustment <- function(object, trt, strata) { strata_all <- Reduce(function(...) paste(..., sep = "-"), data[strata]) data <- cbind(data, strata_all) - pi_0 <- mean(data[, trt] == levels(data[[trt]])[1]) - pi_1 <- 1 - pi_0 + # Extract allocation information + pi_all <- prop.table(table(data[,trt])) + # Get predictions for observed data and calculate residuals data$preds <- predict(object, type = "response") data$resid <- object$y - data$preds get_rb <- function(z) { - # assume always two treatment levels - resid_0_z <- data$resid[(data[, trt] == levels(data[[trt]])[1]) & (data$strata_all == z)] - resid_1_z <- data$resid[(data[, trt] == levels(data[[trt]])[2]) & (data$strata_all == z)] - rb_0 <- (1 / pi_0) * mean(resid_0_z) - rb_1 <- (1 / pi_1) * mean(resid_1_z) - return(diag(c(rb_0, rb_1))) + rb_list <- list() + for (trtlvl in levels(data[[trt]])) { + resid_i_z <- data$resid[(data[[trt]] == trtlvl) & (data$strata_all == z)] + rb_list[[trtlvl]] <- (1 / pi_all[[trtlvl]]) * mean(resid_i_z) + } + return(diag(rb_list)) } - strata_levels <- levels(as.factor(data$strata_all)) + omega_sr <- diag(pi_all) - pi_all %*% t(pi_all) - omega_sr <- diag(c(pi_0, pi_1)) - c(pi_0, pi_1) %*% t(c(pi_0, pi_1)) - # always assume permuted block stratified randomization - omega_z <- matrix(0, nrow = length(unique((data[[trt]]))), ncol = length(unique((data[[trt]])))) + # Always assume permuted block stratified randomization + omega_z <- matrix(0, nrow = nlevels(data[[trt]]), ncol = nlevels(data[[trt]])) - # compute outer expectation + # Compute outer expectation + strata_levels <- levels(as.factor(data$strata_all)) erb_s <- lapply(strata_levels, function(x) { erb_i <- get_rb(x) %*% (omega_sr - omega_z) %*% get_rb(x) erb_i * (sum(data$strata_all == x) / nrow(data)) @@ -258,36 +291,25 @@ varcov_ge <- function(object, trt, type) { V <- sandwich::vcovHC(object, type = type) } - # counterfactual predictions + # Counterfactual predictions cf_pred <- object$counterfactual.predictions - pred_0 <- cf_pred$cf_pred_0 - pred_1 <- cf_pred$cf_pred_1 - - # get order of treatment column and assign counterfactual treatments - X0 <- X1 <- data - X0[, trt] <- factor(levels(data[[trt]])[1], levels = levels(data[[trt]])) - X1[, trt] <- factor(levels(data[[trt]])[2], levels = levels(data[[trt]])) - X0 <- model.matrix(object$formula, X0) - X1 <- model.matrix(object$formula, X1) - # compute derivatives - pderiv0 <- pred_0 * (1 - pred_0) - pderiv1 <- pred_1 * (1 - pred_1) - d0 <- (t(pderiv0) %*% as.matrix(X0)) / nrow(X0) - d1 <- (t(pderiv1) %*% as.matrix(X1)) / nrow(X1) - - # variance components aligned with varcov_ye implementation - v_0_0 <- d0 %*% V %*% t(d0) - v_1_1 <- d1 %*% V %*% t(d1) - v_0_1 <- d0 %*% V %*% t(d1) - v_1_0 <- d1 %*% V %*% t(d0) - - varcov_ge <- matrix( - c( - v_0_0, v_0_1, - v_1_0, v_1_1 - ), - nrow = 2, ncol = 2 - ) + # Get derivatives per treatment level counterfactual outcomes + d_list <- list() + for (trtlvl in levels(data[[trt]])) { + X_i <- data + X_i[, trt] <- factor(trtlvl, levels = levels(data[[trt]])) + X_i <- model.matrix(object$formula, X_i) + + # Compute derivative + pderiv_i <- cf_pred[, trtlvl] * (1 - cf_pred[, trtlvl]) + d_i <- (t(pderiv_i) %*% as.matrix(X_i)) / nrow(X_i) + d_list[[trtlvl]] <- d_i + } + + all_d <- do.call(rbind, d_list) + varcov_ge <- all_d %*% V %*% t(all_d) + rownames(varcov_ge) <- colnames(varcov_ge) <- levels(data[[trt]]) + return(varcov_ge) } diff --git a/R/get_marginal_effect.R b/R/get_marginal_effect.R index 1e56ff0..89a785e 100644 --- a/R/get_marginal_effect.R +++ b/R/get_marginal_effect.R @@ -81,38 +81,46 @@ get_marginal_effect <- function(object, trt, strata = NULL, apply_contrast(contrast, reference) data <- .get_data(object) - - trt_ref <- attr(object$marginal_est, "reference") - trt_inv <- levels(data[[trt]])[-which(levels(data[[trt]]) == trt_ref)] + n_trt_levels <- nlevels(data[[trt]]) outcome <- paste(object$formula[[2]]) - marginal_results <- data.frame( - TRTVAR = c(rep(trt, 12)), - TRTVAL = c( - rep(trt_inv, 5), rep(trt_ref, 5), - rep(attributes(object$marginal_est)[["contrast"]], 2) - ), - PARAM = rep(outcome, 12), - ANALTYP1 = c(rep(c(rep("DESCRIPTIVE", 3), rep("INFERENTIAL", 2)), 2), rep("INFERENTIAL", 2)), - STAT = c("N", "n", "%", "risk", "risk_se", "N", "n", "%", "risk", "risk_se", contrast, paste0(contrast, "_se")), - STATVAL = c( - sum(data[trt] == trt_inv), sum(data[outcome][data[trt] == trt_inv] == "1"), - sum(data[outcome][data[trt] == trt_inv] == "1") / sum(data[trt] == trt_inv) * 100, - object$counterfactual.means[trt_inv], sqrt(object$robust_varcov[trt_inv, trt_inv]), - sum(data[trt] == trt_ref), sum(data[outcome][data[trt] == trt_ref] == "1"), - sum(data[outcome][data[trt] == trt_ref] == "1") / sum(data[trt] == trt_ref) * 100, - object$counterfactual.means[trt_ref], sqrt(object$robust_varcov[trt_ref, trt_ref]), - object$marginal_est, object$marginal_se - ), - ANALMETH = c( - rep(c("count", "count", "percentage", "g-computation", attributes(object$marginal_se)[["type"]]), 2), - "g-computation", attributes(object$marginal_se)[["type"]] + # Marginal results for each trt level + marginal_responses <- data.frame( + TRTVAR = rep(trt, 5*n_trt_levels), + TRTVAL = rep(levels(data[[trt]]), each=5), + PARAM = rep(outcome, 5), + ANALTYP1 = rep(c(rep("DESCRIPTIVE", 3), rep("INFERENTIAL", 2)), n_trt_levels), + STAT = rep(c("N", "n", "%", "risk", "risk_se"), n_trt_levels), + STATVAL = c(sapply(levels(data[[trt]]), \(x) { + c( + sum(data[trt] == x), + sum(data[outcome][data[trt] == x] == "1"), + sum(data[outcome][data[trt] == x] == "1") / sum(data[trt] == x) * 100, + object$counterfactual.means[x], + sqrt(object$robust_varcov[x, x]) + ) + }) ), - ANALDESC = paste0("Computed using beeca@", packageVersion("beeca")) - ) |> - dplyr::as_tibble() + ANALMETH = rep(c("count", "count", "percentage", "g-computation", + attributes(object$marginal_se)[["type"]]), n_trt_levels) + ) + + # Contrast results + n_contrasts <- length(object$marginal_est) + + marginal_contrasts <- data.frame(TRTVAR = rep(trt, 2*n_contrasts), + TRTVAL = c(rbind(attributes(object$marginal_est)[["contrast"]], + attributes(object$marginal_se)[["contrast"]])), + PARAM = rep(outcome, 2*n_contrasts), + ANALTYP1 = c(rep("INFERENTIAL", 2*n_contrasts)), + STAT = rep(c(contrast, paste0(contrast, "_se")), n_contrasts), + STATVAL = c(rbind(object$marginal_est, object$marginal_se)), + ANALMETH = rep(c("g-computation", attributes(object$marginal_se)[["type"]]), n_contrasts) + ) - object$marginal_results <- marginal_results + object$marginal_results <- rbind(marginal_responses, + marginal_contrasts) |> dplyr::as_tibble() + object$marginal_results$ANALDESC <- paste0("Computed using beeca@", packageVersion("beeca")) return(object) } diff --git a/R/predict_counterfactuals.R b/R/predict_counterfactuals.R index 9b6a488..64eca24 100644 --- a/R/predict_counterfactuals.R +++ b/R/predict_counterfactuals.R @@ -61,22 +61,21 @@ predict_counterfactuals <- function(object, trt) { data <- .get_data(object) - # calculate counterfactual predictions for each trt level - X0 <- X1 <- data - X0[, trt] <- levels(data[[trt]])[1] - X1[, trt] <- levels(data[[trt]])[2] - cf_pred_0 <- predict(object, newdata = X0, type = "response") - cf_pred_1 <- predict(object, newdata = X1, type = "response") + # Calculate counterfactual predictions for each trt level + cf_preds <- list() + for (trtlevel in levels(data[[trt]])) { + X_cf <- data + X_cf[, trt] <- trtlevel + cf_pred <- predict(object, newdata = X_cf, type = "response") + cf_preds[[trtlevel]] <- cf_pred + } - # store predictions in tidy dataframe - cf_predictions <- data.frame( - cf_pred_0 = cf_pred_0, - cf_pred_1 = cf_pred_1 - ) |> + # Store predictions in tidy dataframe + cf_predictions <- do.call(cbind, cf_preds) |> dplyr::as_tibble() cf_lbl <- \(trt, x) paste0("Counterfactual predictions setting ", trt, "=", x, " for all records") - attr(cf_predictions, "label") <- c(cf_lbl(trt, levels(data[[trt]])[1]), cf_lbl(trt, levels(data[[trt]])[2])) + attr(cf_predictions, "label") <- sapply(levels(data[[trt]]), \(x) cf_lbl(trt, x)) attr(cf_predictions, "treatment.variable") <- trt object$counterfactual.predictions <- cf_predictions diff --git a/R/sanitize.R b/R/sanitize.R index 018e853..75d867e 100644 --- a/R/sanitize.R +++ b/R/sanitize.R @@ -117,9 +117,9 @@ sanitize_variable <- function(model, trt) { msg <- sprintf('Treatment variable "%s" must be of type factor, not "%s".', trt, typeof(data[[trt]])) stop(msg, call. = FALSE) } - # assert trt has 2 levels - if (nlevels(data[[trt]]) != 2) { - msg <- sprintf('Treatment variable "%s" must have 2 levels. Found %s: {%s}.', trt, length(levels(data[[trt]])), paste(levels(data[[trt]]), collapse = ",")) + # assert trt has at least 2 levels + if (nlevels(data[[trt]]) < 2) { + msg <- sprintf('Treatment variable "%s" must have at least 2 levels. Found %s: {%s}.', trt, nlevels(data[[trt]]), paste(levels(data[[trt]]), collapse = ",")) stop(msg, call. = FALSE) } # assert response is coded 0/1 @@ -142,7 +142,7 @@ sanitize_variable <- function(model, trt) { if (is.null(object$sanitized) || object$sanitized == FALSE) { if (warn) { warning("Input object did not meet the expected format for beeca. Results may not be valid. Consider using ?get_marginal_effect", - call. = FALSE + call. = FALSE ) } object <- sanitize_model(object, trt) diff --git a/cran-comments.md b/cran-comments.md index e670830..a57d509 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,18 +1,6 @@ -## Resubmission -This is a resubmission. In this version I have: - -* Updated the description to avoid starting with "A lightweight package..." - -* Removed unnecessary \dontrun{} wrapper for an example ## R CMD check results -0 errors | 0 warnings | 1 note - -I am aware of one possible note: The suggested package 'margins' was very recently reinstated to CRAN and binaries may not be available for certain platforms / R versions. +0 errors | 0 warnings | 0 notes -``` -* checking package dependencies ... NOTE -Package suggested but not available for checking: ‘margins’ -``` diff --git a/data-raw/DATASET.R b/data-raw/DATASET.R index 72154ee..566d5be 100644 --- a/data-raw/DATASET.R +++ b/data-raw/DATASET.R @@ -24,7 +24,6 @@ trial02_cdisc <- safetyData::adam_adtte |> TRTP == "Xanomeline High Dose" ~ 3 ) ) |> - dplyr::filter(TRTPN != 2) |> # restrict to 2 arms in the data dplyr::select( USUBJID, PARAM, AGE, AGEGR1, AGEGR1N, RACE, RACEN, SEX, TRTP, TRTPN, AVAL, AVALC, FASFL diff --git a/data/trial02_cdisc.rda b/data/trial02_cdisc.rda index 923af6a..6f64fd6 100644 Binary files a/data/trial02_cdisc.rda and b/data/trial02_cdisc.rda differ diff --git a/man/apply_contrast.Rd b/man/apply_contrast.Rd index e896b12..c40590f 100644 --- a/man/apply_contrast.Rd +++ b/man/apply_contrast.Rd @@ -22,9 +22,10 @@ and or when computing confidence intervals using normal approximation. The choice of contrast affects how treatment effects are calculated and interpreted. Default is \code{diff}.} -\item{reference}{a string indicating which treatment group should be considered as -the reference level. Accepted values are one of the levels in the treatment -variable. Default to the first level used in the \code{glm} object. +\item{reference}{a string or list of strings indicating which treatment +group(s) to use as reference level for pairwise comparisons. Accepted values +must be a subset of the levels in the treatment variable. Default to the +first n-1 treatment levels used in the \code{glm} object. This parameter influences the calculation of treatment effects relative to the chosen reference group.} diff --git a/man/beeca-package.Rd b/man/beeca-package.Rd index a137665..9661251 100644 --- a/man/beeca-package.Rd +++ b/man/beeca-package.Rd @@ -12,6 +12,7 @@ Performs estimation of marginal treatment effects for binary outcomes when using Useful links: \itemize{ \item \url{https://openpharma.github.io/beeca/} + \item Report bugs at \url{https://github.com/openpharma/beeca/issues} } } diff --git a/tests/testthat/test-apply_contrasts.R b/tests/testthat/test-apply_contrasts.R index 66328ff..5c5d5be 100644 --- a/tests/testthat/test-apply_contrasts.R +++ b/tests/testthat/test-apply_contrasts.R @@ -1,5 +1,6 @@ -# set up example ---------------------------------------------------------- +# set up examples ---------------------------------------------------------- +# 2 arm example data01 <- trial01 |> transform(trtp = as.factor(trtp)) |> dplyr::filter(!is.na(aval)) @@ -13,6 +14,14 @@ example_varcov <- fit1$robust_varcov example_theta_s <- fit1$counterfactual.means[["0"]] example_theta_t <- fit1$counterfactual.means[["1"]] +# 3 arm example +data02 <- trial02_cdisc |> + transform(TRTPN = as.factor(TRTPN)) + +fit_3arm <- glm(AVAL ~ TRTPN + SEX, family = "binomial", data = data02) |> + predict_counterfactuals(trt = "TRTPN") |> + average_predictions() |> + estimate_varcov(method="Ye") # test warnings/errors ------------------------------------------------------------ @@ -69,9 +78,7 @@ test_that("Test valid contrast types", { test_that("Correctly throwing errors on mismatched arguments", { expect_error( fit1 |> - apply_contrast(contrast = "diffs", reference = "0"), - "'arg' should be one of" - ) + apply_contrast(contrast = "diffs", reference = "0")) }) test_that("Test handling of reference levels", { @@ -91,15 +98,25 @@ test_that("Test handling of reference levels", { expect_error( fit |> apply_contrast(contrast = "diffs", reference = "C"), - "Reference must be one of : A, B", + "Reference levels must be a subset of treatment levels : A, B.", + fixed = TRUE + ) + + # Too many reference levels + expect_error( + fit |> + apply_contrast(contrast = "diffs", reference = c("A", "B")), + "Too many reference levels provided.", fixed = TRUE ) + }) test_that("Check model is sanitized", { fit1$sanitized <- NULL - expect_warning(apply_contrast(fit1, reference = "0"), + expect_warning( + apply_contrast(fit1, reference = "0"), "Input object did not meet the expected format for beeca. Results may not be valid. Consider using ?get_marginal_effect", fixed = TRUE ) @@ -272,3 +289,215 @@ test_that("logor contrast is correct for variance estimate", { ) ) }) + + +# tests for data with 3 arms ---------------------------------------- + +test_that("Reference levels correctly handled when >2 treatment levels", { + + expect_warning( + fit_ref12 <- fit_3arm |> apply_contrast() + ) + + # Default reference levels are first n-1 treatment levels + expect_equal( + attr(fit_ref12$marginal_est, "reference"), + fit_3arm$xlevels$TRTPN[-nlevels(fit_3arm$data$TRTPN)] + ) + + expect_equal( + attr(fit_ref12$marginal_est, "contrast"), + c("diff: 2-1", "diff: 3-1", "diff: 3-2") + ) + + # Custom order of reference levels is correctly handled + fit_ref32 <- fit_3arm |> + apply_contrast(contrast = "diff", reference = c("3", "2")) + + expect_equal( + attr(fit_ref32$marginal_est, "contrast"), + c("diff: 1-2", "diff: 1-3", "diff: 2-3") + ) + + expect_equal( + fit_ref32$marginal_est[["diff: 1-2"]], + -fit_ref12$marginal_est[["diff: 2-1"]] + ) + expect_equal( + fit_ref32$marginal_est[["diff: 1-3"]], + -fit_ref12$marginal_est[["diff: 3-1"]] + ) + expect_equal( + fit_ref32$marginal_est[["diff: 2-3"]], + -fit_ref12$marginal_est[["diff: 3-2"]] + ) + + expect_equal( + unname(fit_ref32$marginal_se)[1:3], + unname(fit_ref12$marginal_se)[1:3] + ) + + + gr <- matrix(c(-1,-1,0, + 1,0,-1, + 0,1,1), + nrow=3, ncol=3) + expect_equal( + unname(fit_ref12$marginal_se)[1:3], + sqrt(diag(gr %*% fit_ref12$robust_varcov %*% t(gr))) + ) + + + # Switched references + fit_ref23 <- fit_3arm |> + apply_contrast(contrast = "diff", reference = c("2", "3")) + + gr <- matrix(c(1,0,1, + -1,-1,0, + 0,1,-1), + nrow=3, ncol=3) + + expect_equal( + unname(fit_ref23$marginal_se)[1:3], + sqrt(diag(gr %*% fit_ref23$robust_varcov %*% t(gr))) + ) + + + # Single reference + fit_ref2 <- fit_3arm |> + apply_contrast(contrast = "diff", reference = "2") + + gr <- matrix(c(1,0,-1, + -1,0,1), + nrow=2, ncol=3) + expect_equal( + unname(fit_ref2$marginal_se)[1:2], + sqrt(diag(gr %*% fit_ref2$robust_varcov %*% t(gr))) + ) + + + # Check expected values for other contrast type + # (logor) + fit_ref12_lor <- fit_3arm |> + apply_contrast(contrast = "logor", reference = c("1", "2")) + + fit_ref23_lor <- fit_3arm |> + apply_contrast(contrast = "logor", reference = c("2", "3")) + + expect_equal( + unname(fit_ref12_lor$marginal_est)[1:3], + c(1.70521672372, 1.60865867380, -0.09655804992) + ) + expect_equal( + unname(fit_ref12_lor$marginal_se)[1:3], + c(0.3385774965, 0.3336867307, 0.3470551734) + ) + + expect_equal( + unname(fit_ref23_lor$marginal_est)[1:3], + c(-1.70521672372, -0.09655804992, -1.60865867380) + ) + expect_equal( + unname(fit_ref23_lor$marginal_se)[1:3], + c(0.3385774965, 0.3470551734, 0.3336867307) + ) + +}) + + + + + +# if RobinCar is available compare contrast results +robincar_available <- requireNamespace("RobinCar", quietly = T) + +if (robincar_available){ + + test_that("Contrast results match RobinCar: diff", { + rc_diff <- RobinCar::robincar_glm(data.frame(fit_3arm$data), + response_col = "AVAL", + treat_col = "TRTPN", + formula = fit_3arm$formula, + g_family = fit_3arm$family, + contrast_h = "diff") + fit_ref1 <- fit_3arm |> + apply_contrast(contrast = "diff", reference = "1") + + expect_equal( + unname(fit_ref1$marginal_est[1:2]), + unname(rc_diff$contrast$result$estimate) + ) + + expect_equal( + unname(fit_ref1$marginal_se[1:2]), + unname(rc_diff$contrast$result$se) + ) + + # switch reference + rc_diff_switched <- RobinCar::robincar_glm(data.frame(fit_3arm$data) |> + transform(TRTPN = factor(TRTPN, levels=c("3", "1", "2"))), + response_col = "AVAL", + treat_col = "TRTPN", + formula = fit_3arm$formula, + g_family = fit_3arm$family, + contrast_h = "diff") + fit_ref3 <- fit_3arm |> + apply_contrast(contrast = "diff", reference = "3") + + expect_equal( + unname(fit_ref3$marginal_est[1:2]), + unname(rc_diff_switched$contrast$result$estimate) + ) + + expect_equal( + unname(fit_ref3$marginal_se[1:2]), + unname(rc_diff_switched$contrast$result$se) + ) + + }) + + + test_that("Contrast results match RobinCar: risk ratio", { + rc_rr <- RobinCar::robincar_glm(data.frame(fit_3arm$data), + response_col = "AVAL", + treat_col = "TRTPN", + formula = fit_3arm$formula, + g_family = fit_3arm$family, + contrast_h = "ratio") + fit_ref1 <- fit_3arm |> + apply_contrast(contrast = "rr", reference = "1") + + expect_equal( + unname(fit_ref1$marginal_est[1:2]), + unname(rc_rr$contrast$result$estimate) + ) + + expect_equal( + unname(fit_ref1$marginal_se[1:2]), + unname(rc_rr$contrast$result$se) + ) + + # switch reference + rc_rr_switched <- RobinCar::robincar_glm(data.frame(fit_3arm$data) |> + transform(TRTPN = factor(TRTPN, levels=c("3", "1", "2"))), + response_col = "AVAL", + treat_col = "TRTPN", + formula = fit_3arm$formula, + g_family = fit_3arm$family, + contrast_h = "ratio") + fit_ref3 <- fit_3arm |> + apply_contrast(contrast = "rr", reference = "3") + + expect_equal( + unname(fit_ref3$marginal_est[1:2]), + unname(rc_rr_switched$contrast$result$estimate) + ) + + expect_equal( + unname(fit_ref3$marginal_se[1:2]), + unname(rc_rr_switched$contrast$result$se) + ) + + }) + +} diff --git a/tests/testthat/test-average_predictions.R b/tests/testthat/test-average_predictions.R index 94b52a6..f840308 100644 --- a/tests/testthat/test-average_predictions.R +++ b/tests/testthat/test-average_predictions.R @@ -12,8 +12,8 @@ test_that("averaging counterfactual predictions works", { fit1 <- average_predictions(fit1) expected_output <- c( - "0" = mean(fit1$counterfactual.predictions$cf_pred_0), - "1" = mean(fit1$counterfactual.predictions$cf_pred_1) + "0" = mean(fit1$counterfactual.predictions[["0"]]), + "1" = mean(fit1$counterfactual.predictions[["1"]]) ) expect_equal(fit1$counterfactual.means, expected_output) diff --git a/tests/testthat/test-estimate_varcov.R b/tests/testthat/test-estimate_varcov.R index c789e77..f754b11 100644 --- a/tests/testthat/test-estimate_varcov.R +++ b/tests/testthat/test-estimate_varcov.R @@ -1,4 +1,4 @@ -# set up example ---------------------------------------------------------- +# set up examples ---------------------------------------------------------- data01 <- trial01 |> transform(trtp = as.factor(trtp)) |> @@ -9,6 +9,14 @@ fit1 <- glm(aval ~ trtp + bl_cov, family = "binomial", data = data01) |> gr <- c(-1, 1) +# 3 arm example +data02 <- trial02_cdisc |> + transform(TRTPN = as.factor(TRTPN)) + +fit_3arm <- glm(AVAL ~ TRTPN + SEX, family = "binomial", data = data02) |> + beeca::predict_counterfactuals(trt = "TRTPN") |> + beeca::average_predictions() + # test warnings/errors ------------------------------------------------------------ @@ -20,7 +28,7 @@ test_that("Correctly throwing errors on missing components", { ) }) -test_that("Correctly throwing warnings on arugment mismatch", { +test_that("Correctly throwing errors on argument mismatch", { expect_error( fit1 |> estimate_varcov(method = "Fe") @@ -34,6 +42,13 @@ test_that("Correctly throwing errors on missing stratification variable", { ) }) +test_that("Correctly throwing warnings when strata supplied with method Ge", { + expect_warning( + fit1 |> + estimate_varcov(method = "Ge", strata = "bl_cov_c") + ) +}) + test_that("Check model is sanitized", { fit1$sanitized <- NULL @@ -50,8 +65,10 @@ test_that("Check model is sanitized", { V1 <- estimate_varcov(fit1, method = "Ye")$robust_varcov +V1_3arm <- estimate_varcov(fit_3arm, method = "Ye")$robust_varcov + # if RobinCar is available -robincar_available <- requireNamespace("RobinCar", versionCheck = list(name = "RobinCar", op = "==", version = "0.3.0"), quietly = T) +robincar_available <- requireNamespace("RobinCar", quietly = T) if (robincar_available){ test_that("Correct variance calculation for Ye's method matching RobinCar", { @@ -66,6 +83,20 @@ if (robincar_available){ )$contrast$varcov[1,1] ) }) + + + test_that("3-arm: Correct variance calculation for Ye's method matching RobinCar", { + expect_equal( + matrix(V1_3arm, nrow=3, ncol=3), + unname(RobinCar::robincar_glm(data.frame(fit_3arm$data), + response_col = "AVAL", + treat_col = "TRTPN", + formula = fit_3arm$formula, + g_family = fit_3arm$family)$varcov) + ) + }) + + } else { test_that("Correct variance calculation for Ye's method matching RobinCar", { expect_equal( @@ -74,9 +105,27 @@ if (robincar_available){ ) }) + test_that("3-arm: Correct variance calculation for Ye's method matching RobinCar", { + expect_equal( + matrix(V1_3arm, nrow=3, ncol=3), + matrix(c(2.652272364e-03, 2.576978801e-06, -5.244417729e-07, + 2.576978801e-06, 2.303420439e-03, 1.053457633e-05, + -5.244417729e-07, 1.053457633e-05, 2.379076091e-03), + nrow=3, ncol=3) + ) + }) + } +# varcov must be symmetric +test_that("Ye varcov is symmetric", { + expect_equal( + V1[lower.tri(V1)], + V1[upper.tri(V1)] + ) +}) + ## Ye - stratification ----------------------------------------------------- @@ -85,23 +134,43 @@ fit2 <- glm(aval ~ trtp + bl_cov + bl_cov_c2, data = data01 |> transform(bl_cov_ predict_counterfactuals(trt = "trtp") |> average_predictions() -V2 <- estimate_varcov(fit2, method = "Ye", strata = "bl_cov_c2")$robust_varcov +expect_warning( + V2 <- estimate_varcov(fit2, method = "Ye", strata = "bl_cov_c2")$robust_varcov, + "More than three unique values are found in stratification variable bl_cov_c2. Please double check if the correct stratification variables are supplied via `strata` argument.", + fixed = TRUE +) + +V2_3arm <- estimate_varcov(fit_3arm, method = "Ye", strata = "SEX")$robust_varcov if (robincar_available){ -test_that("Correct variance calculation for Ye's method matcing RobinCar", { - expect_equal( - (t(gr) %*% V2 %*% gr)[1, 1], - RobinCar::robincar_glm(data.frame(fit2$data), - response_col = as.character(fit2$formula[2]), - treat_col = "trtp", - formula = fit2$formula, - car_strata_cols = "bl_cov_c2", - car_scheme = "permuted-block", - g_family = fit2$family, - contrast_h = "diff", - )$contrast$varcov[1,1] - ) -}) + test_that("Correct variance calculation for Ye's method matcing RobinCar", { + expect_equal( + (t(gr) %*% V2 %*% gr)[1, 1], + RobinCar::robincar_glm(data.frame(fit2$data), + response_col = as.character(fit2$formula[2]), + treat_col = "trtp", + formula = fit2$formula, + car_strata_cols = "bl_cov_c2", + car_scheme = "permuted-block", + g_family = fit2$family, + contrast_h = "diff", + )$contrast$varcov[1,1] + ) + }) + + test_that("3-arm: Correct variance calculation for Ye-stratified method matching RobinCar", { + expect_equal( + matrix(V2_3arm, nrow=3, ncol=3), + unname(RobinCar::robincar_glm(data.frame(fit_3arm$data), + response_col = "AVAL", + treat_col = "TRTPN", + formula = fit_3arm$formula, + car_strata_cols = "SEX", + car_scheme = "permuted-block", + g_family = fit_3arm$family)$varcov) + ) + }) + } else { test_that("Correct variance calculation for Ye's method matching RobinCar", { expect_equal( @@ -110,8 +179,27 @@ test_that("Correct variance calculation for Ye's method matcing RobinCar", { ) }) + + test_that("3-arm: Correct variance calculation for Ye-stratified method matching RobinCar", { + expect_equal( + matrix(V2_3arm, nrow=3, ncol=3), + matrix(c(2.622886513e-03, -7.797320489e-06, -4.923527008e-06, + -7.797320489e-06, 2.288915038e-03, 1.360521021e-05, + -4.923527008e-06, 1.360521021e-05, 2.376256827e-03), + nrow=3, ncol=3) + ) + }) + } +# varcov must be symmetric +test_that("Ye-stratified varcov is symmetric", { + expect_equal( + V2[lower.tri(V2)], + V2[upper.tri(V2)] + ) +}) + ## Ye - mod ---------------------------------------------------------------- @@ -133,6 +221,15 @@ test_that("Correct variance calculation for Ye's method based on original manusc +# varcov must be symmetric +test_that("Ye-mod varcov is symmetric", { + expect_equal( + V3[lower.tri(V3)], + V3[upper.tri(V3)] + ) +}) + + ## Ge ---------------------------------------------------------------------- ge_var_paper <- function(glmfit, trt) { @@ -179,11 +276,20 @@ ge_var_paper <- function(glmfit, trt) { return(difP(glmfit)$var[[1]]) } -V3 <- estimate_varcov(fit1, method = "Ge", type = "model-based")$robust_varcov +V4 <- estimate_varcov(fit1, method = "Ge", type = "model-based")$robust_varcov -test_that("Correct variance calculation for Ge's method matcing manuscript code", { +test_that("Correct variance calculation for Ge's method matching manuscript code", { expect_equal( - (t(gr) %*% V3 %*% gr)[1, 1], + (t(gr) %*% V4 %*% gr)[1, 1], ge_var_paper(fit1, "trtp") ) }) + + +# varcov must be symmetric +test_that("Ge varcov is symmetric", { + expect_equal( + V4[lower.tri(V4)], + V4[upper.tri(V4)] + ) +}) diff --git a/tests/testthat/test-get_marginal_effect.R b/tests/testthat/test-get_marginal_effect.R index 8dd2ad4..396f6c9 100644 --- a/tests/testthat/test-get_marginal_effect.R +++ b/tests/testthat/test-get_marginal_effect.R @@ -7,26 +7,32 @@ data01 <- trial01 |> fit1 <- glm(aval ~ trtp + bl_cov, family = "binomial", data = data01) |> get_marginal_effect(trt = "trtp", method = "Ye", contrast = "diff", reference = "0") +# 3-arm example +data02 <- trial02_cdisc |> + transform(TRTPN = as.factor(TRTPN)) + +fit_3arm <- glm(AVAL ~ TRTPN + SEX, family = "binomial", data = data02) |> + get_marginal_effect(trt = "TRTPN", method = "Ye", contrast = "diff", reference = "1") + + # test warnings/errors ------------------------------------------------------------ test_that("Correctly throwing errors on missing argument", { expect_error( glm(aval ~ trtp + bl_cov, family = "binomial", data = data01) |> - get_marginal_effect(method = "Ye"), 'argument "trt" is missing, with no default' - ) + get_marginal_effect(method = "Ye")) }) -test_that("Correctly throwing errors on arugment mismatch", { +test_that("Correctly throwing errors on argument mismatch", { expect_error( glm(aval ~ trtp + bl_cov, family = "binomial", data = data01) |> - get_marginal_effect(method = "Fe"), 'argument "trt" is missing, with no default' - ) + get_marginal_effect(trt = "trtp", method = "Fe")) }) test_that("Correctly throwing warnings on missing value", { expect_warning( glm(aval ~ trtp + bl_cov, family = "binomial", data = trial01 |> - transform(trtp = as.factor(trtp))) |> + transform(trtp = as.factor(trtp))) |> get_marginal_effect(trt = "trtp", method = "Ye", reference = "0"), "There is 1 record omitted from the original data due to missing values, please check if they should be imputed prior to model fitting." ) @@ -42,3 +48,12 @@ test_that("Correctly producing results in ARD format", { test_that("Correctly producing results", { expect_false(any(is.na(fit1$marginal_results))) }) + + +test_that("3-arm: Correctly producing results in ARD format", { + expect_equal(dim(fit_3arm$marginal_results), c(19, 8)) +}) + +test_that("3-arm: Correctly producing results", { + expect_false(any(is.na(fit_3arm$marginal_results))) +}) diff --git a/tests/testthat/test-sanitize.R b/tests/testthat/test-sanitize.R index a17108f..3b5172b 100644 --- a/tests/testthat/test-sanitize.R +++ b/tests/testthat/test-sanitize.R @@ -55,20 +55,13 @@ test_that("Treatment variable in model data", { test_that("Check treatment variable is a factor", { data_complete$trtp <- as.numeric(data_complete$trtp) fit1 <- glm(aval ~ trtp + bl_cov, family = binomial(link = "logit"), data = data_complete) - expect_error(sanitize_model(fit1, "trtp"), + expect_error( + sanitize_model(fit1, "trtp"), 'Treatment variable "trtp" must be of type factor, not "double".', fixed = TRUE ) }) -test_that("Correctly throwing errors on treatment variable > 2 levels", { - fit1 <- glm(aval ~ trtp + bl_cov, family = "binomial", data = trial01 |> transform(trtp = factor(as.numeric(trtp) + rbinom(268, size = 1, prob = 0.5)))) - expect_error(sanitize_model(fit1, "trtp"), - 'Treatment variable "trtp" must have 2 levels. Found 3: {0,1,2}.', - fixed = TRUE - ) -}) - test_that("Check response variable is 0/1", { data_complete$aval <- replace(data_complete$aval, data_complete$aval == "0", "0.5") levels(data_complete[["aval"]]) <- c("0.5", "1") @@ -80,7 +73,8 @@ test_that("Check response variable is 0/1", { test_that("Correctly throw error on incorrect model class", { lm_fit <- stats::lm(aval ~ trtp + bl_cov, data = data_complete) - expect_error(sanitize_model(lm_fit, "trtp"), + expect_error( + sanitize_model(lm_fit, "trtp"), 'Model of class "lm" is not supported.', fixed = TRUE ) @@ -98,7 +92,8 @@ test_that("Throw warning if model matrix not full rank", { mat[["x3"]] <- mat$x1 + mat$x2 fit1 <- glm(y ~ trtp + x1 + x2 + x3, family = "binomial", data = mat) - expect_error(sanitize_model(fit1, "trtp"), + expect_error( + sanitize_model(fit1, "trtp"), "The data does not have full rank, please check glm model fitting.", fixed = TRUE ) @@ -109,12 +104,14 @@ test_that("Throw warning if model not converged", { # fit glm with reduced max iterations so does not converge suppressWarnings( fit1 <- glm(aval ~ trtp, - family = "binomial", data = data, - control = glm.control(maxit = 1) + family = "binomial", + data = data_complete, + control = glm.control(maxit = 1) ) ) - expect_warning(sanitize_model(fit1, "trtp"), + expect_warning( + sanitize_model(fit1, "trtp"), "The glm model was not converged, please check glm model fitting.", fixed = TRUE ) @@ -124,7 +121,8 @@ test_that("Throw warning if model not converged", { test_that("Throw error if treatment not on right hand side of model formula", { fit1 <- glm(trtp ~ aval, family = "binomial", data = data) - expect_error(sanitize_variable(fit1, "trtp"), + expect_error( + sanitize_variable(fit1, "trtp"), 'Did not find the treatment variable "trtp" on right hand side of the model formula', fixed = TRUE ) diff --git a/vignettes/estimand_and_implementations.Rmd b/vignettes/estimand_and_implementations.Rmd index a15fb81..d22fb1e 100644 --- a/vignettes/estimand_and_implementations.Rmd +++ b/vignettes/estimand_and_implementations.Rmd @@ -70,8 +70,6 @@ library(dplyr) ```{r} ## Prepare the dataset for input dat <- trial02_cdisc %>% - ## In this case we are only interested in comparing two arms: Placebo vs Xanomeline High Dose - dplyr::filter(TRTP %in% c("Placebo", "Xanomeline High Dose")) %>% ## Treatment variable must be coded as a factor dplyr::mutate(TRTP = factor(TRTP)) @@ -113,8 +111,12 @@ ci_u <- diff_est + (qnorm(0.975) * diff_se) z_score <- diff_est / diff_se p_value <- 2 * (1 - pnorm(abs(z_score))) -sprintf("The risk difference is %s with 95%% CI: (%s - %s)", round(diff_est, 2), round(ci_l, 2), round(ci_u, 2)) -sprintf("p-value: %s", formatC(p_value, format = "e", digits = 2)) +tibble( + Contrast = marginal_results[marginal_results$STAT == "diff", ]$TRTVAL, + `Risk difference` = round(diff_est, 2), + `95% CI` = sprintf("(%s - %s)", round(ci_l, 2), round(ci_u, 2)), + `p-value` = formatC(p_value, format = "e", digits = 2) +) ``` ## Comparing different implementations @@ -233,7 +235,7 @@ cat("Point estimate", beeca_ye$marginal_est, "\nStandard error estimate", beeca_ **Version 1**: from `RobinCar` package ```{r} -if (requireNamespace("RobinCar", versionCheck = list(name = "RobinCar", op = "==", version = "0.3.0"), quietly = T)) { +if (requireNamespace("RobinCar", quietly = T)) { robincar_ye <- RobinCar::robincar_glm(data.frame(fit$data), response_col = as.character(fit$formula[2]), treat_col = "trtp", formula = fit$formula, g_family = fit$family, contrast_h = "diff")$contrast$result