Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deprecate one-stop-shop causens API #73

Merged
merged 9 commits into from
Feb 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/lint-project.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
run: |
devtools::load_all()
library(lintr)
lint_dir(linters = linters_with_defaults(line_length_linter(120), commented_code_linter=NULL, object_name_linter=NULL))
lint_dir(linters = linters_with_defaults(commented_code_linter=NULL, object_name_linter=NULL))
shell: Rscript {0}
env:
LINTR_ERROR_ON_LINT: true
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@ docs
inst/doc
/doc/
/Meta/
.lintr
..Rcheck/
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@ S3method(summary,bayesian_causens)
S3method(summary,causens_sf)
S3method(summary,monte_carlo_causens)
export(bayesian_causens)
export(causens)
export(causens_monte_carlo)
export(causens_sf)
export(gData_U_binary_Y_binary)
export(gData_U_binary_Y_cont)
export(gData_U_cont_Y_binary)
export(gData_U_cont_Y_cont)
export(plot_causens)
export(process_model_formula)
export(sf)
export(simulate_data)
importFrom(graphics,legend)
Expand Down
102 changes: 65 additions & 37 deletions R/bayesian_causens.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
#' @title Bayesian parametric sensitivity analysis for causal inference
#' @description This function runs a Bayesian sensitivity analysis for causal
#' inference using JAGS or Stan as a backend.
#' @param exposure Exposure variable name in the data frame.
#' @param outcome Outcome variable name in the data frame.
#' @param confounders Confounders' names in the data frame.
#' @param data A data frame containing the exposure, outcome, and confounder variables.
#' inference using JAGS or Stan as a backend. For now, only JAGS is supported.
#' @param trt_model The treatment model object as a formula.
#' @param outcome_model The outcome model object as a formula.
#' @param U_model The unmeasured confounder model object as
#' a formula.
#' @param data A data frame containing the exposure, outcome, and confounder
#' variables.
#' @param beta_uy Prior distribution for the effect of the missing confounder U
#' on the outcome Y.
#' @param alpha_uz Prior distribution for the effect of the missing confounder U
#' on the treatment assignment mechanism Z.
#' @param backend The backend to use for the sensitivity analysis. Currently
#' only "jags" is supported.
#' @param output_trace Whether to output the full trace of the MCMC sampler.
Expand All @@ -13,27 +19,40 @@
#' variable on the outcome, as well as the confounder-adjusted causal effect.
#' @importFrom stats sd update
#' @export
bayesian_causens <- function(exposure, outcome, confounders, data, backend = "jags", output_trace = FALSE, ...) {
bayesian_causens <- function(trt_model, outcome_model,
U_model, data,
beta_uy = ~ dunif(-2, 2),
alpha_uz = ~ dunif(-2, 2),
backend = "jags", output_trace = FALSE, ...) {
sampler_args <- parse_args(...)

# Notation from Bayesian SA paper
Z <- data[[exposure]]
C <- data[confounders]
Y <- data[[outcome]]
N <- nrow(C)
trt_model_info <- process_model_formula(trt_model, data)
Z <- data[[trt_model_info$response_var_name]]
trt_C <- data[trt_model_info$confounder_names]

outcome_model_info <- process_model_formula(outcome_model, data)
Y <- data[[outcome_model_info$response_var_name]]
outcome_C <- data[outcome_model_info$confounder_names]

U_model_info <- process_model_formula(U_model, data)
unmeasured_confounder_C <- data[U_model_info$confounder_names]
# no U variable since it's the missing confounder!

N <- nrow(data)

binary_outcome <- all(Y %in% c(0, 1))

jags_model <- create_jags_model(binary_outcome)
jags_model <- create_jags_model(binary_outcome, beta_uy, alpha_uz)

inits <- list(
beta_Z = 0,
beta_C = rep(0, ncol(C)),
beta_C = rep(0, ncol(outcome_C)),
beta_0 = 0,
beta_U = 0,
gamma_C = rep(0, ncol(C)),
alpha_C = rep(0, ncol(C)),
beta_uy = 0,
gamma_C = rep(0, ncol(unmeasured_confounder_C)),
alpha_C = rep(0, ncol(trt_C)),
alpha_0 = 0,
alpha_uz = 0,
.RNG.name = "base::Wichmann-Hill",
.RNG.seed = 123
)
Expand All @@ -49,12 +68,14 @@ bayesian_causens <- function(exposure, outcome, confounders, data, backend = "ja
textConnection(jags_model),
data = list(
Z = Z,
C = C,
trt_C = trt_C,
outcome_C = outcome_C,
unmeasured_confounder_C = unmeasured_confounder_C,
Y = Y,
N = N,
p_outcome = ncol(C),
p_treatment = ncol(C),
p_unmeasured_confounder = ncol(C)
p_outcome = ncol(outcome_C),
p_treatment = ncol(trt_C),
p_U = ncol(unmeasured_confounder_C)
),
inits = inits
)
Expand All @@ -64,14 +85,17 @@ bayesian_causens <- function(exposure, outcome, confounders, data, backend = "ja
# Extract the posterior samples
samples <- rjags::coda.samples(
model,
variable.names = c("beta_Z", "beta_C", "beta_0", "gamma_C", "alpha_C", "alpha_U", "alpha_0"),
variable.names = c(
"beta_Z", "beta_C", "beta_uy", "beta_0", "gamma_C",
"alpha_C", "alpha_uz", "alpha_0"
),
n.iter = sampler_args$n_samples,
thin = 1
)

causens_obj <- list()
class(causens_obj) <- "bayesian_causens"
causens_obj$call <- paste(exposure, "~", paste(confounders, collapse = " + "))
causens_obj$call <- trt_model
causens_obj$estimated_ate <- mean(samples[[1]][, "beta_Z"])
causens_obj$std_error <- sd(samples[[1]][, "beta_Z"])
causens_obj$ci <- quantile(samples[[1]][, "beta_Z"], c(0.025, 0.975))
Expand Down Expand Up @@ -113,9 +137,9 @@ parse_args <- function(...) {
#'
#' No inputs are given to this function (for now) since data-related information
#' is provided in jags.model() during model initialization.
create_jags_model <- function(binary_outcome) {
# including modelling of unmeasured binary confounder (`eta` is the linear predictor)
likelihood <- "
create_jags_model <- function(binary_outcome, beta_uy, alpha_uz) {
# including modelling of unmeasured binary confounder
likelihood <- paste0("
# Outcome model parameters

beta_0 ~ dunif(-2, 2)
Expand All @@ -125,13 +149,14 @@ create_jags_model <- function(binary_outcome) {
beta_C[c] ~ dunif(-2, 2)
}

beta_U ~ dunif(-2, 2)
"
beta_uy", deparse(beta_uy))

if (binary_outcome) {
likelihood <- paste0(likelihood, "
for (i in 1:N) {
logit(p_Y[i]) <- beta_0 + beta_Z * Z[i] + sum(C[i, 1:p_outcome] * beta_C[1:p_outcome]) + beta_U * U[i]
logit(p_Y[i]) <- beta_0 + beta_Z * Z[i] +
sum(outcome_C[i, 1:p_outcome] * beta_C[1:p_outcome]) +
beta_uy * U[i]
Y[i] ~ dbern(p_Y[i])
}
")
Expand All @@ -140,7 +165,9 @@ create_jags_model <- function(binary_outcome) {
tau_Y ~ dgamma(0.1, 0.1)

for (i in 1:N) {
mu_Y[i] <- beta_0 + beta_Z * Z[i] + sum(C[i, 1:p_outcome] * beta_C[1:p_outcome]) + beta_U * U[i]
mu_Y[i] <- beta_0 + beta_Z * Z[i] +
sum(outcome_C[i, 1:p_outcome] * beta_C[1:p_outcome]) +
beta_uy * U[i]
Y[i] ~ dnorm(mu_Y[i], tau_Y)
}
")
Expand All @@ -149,17 +176,17 @@ create_jags_model <- function(binary_outcome) {
unmeasured_confounder <- "
# Unmeasured Confounder parameters

for (u in 1:p_unmeasured_confounder) {
for (u in 1:p_U) {
gamma_C[u] ~ dunif(-2, 2)
}

for (i in 1:N) {
logit(p_U[i]) <- sum(C[i, 1:p_unmeasured_confounder] * gamma_C[1:p_unmeasured_confounder])
U[i] ~ dbern(p_U[i])
logit(prob_U[i]) <- sum(unmeasured_confounder_C[i, 1:p_U] * gamma_C[1:p_U])
U[i] ~ dbern(prob_U[i])
}
"

treatment_model <- "
treatment_model <- paste0("
# Treatment model parameters

alpha_0 ~ dunif(-2, 2)
Expand All @@ -168,13 +195,14 @@ create_jags_model <- function(binary_outcome) {
alpha_C[z] ~ dunif(-2, 2)
}

alpha_U ~ dunif(-2, 2)

for (i in 1:N) {
logit(p_Z[i]) <- alpha_0 + sum(C[i, 1:p_treatment] * alpha_C[1:p_treatment]) + alpha_U * U[i]
logit(p_Z[i]) <- alpha_0 +
sum(trt_C[i, 1:p_treatment] * alpha_C[1:p_treatment]) +
alpha_uz * U[i]
Z[i] ~ dbern(p_Z[i])
}
"

alpha_uz", deparse(alpha_uz))

return(paste0(
"model{\n",
Expand Down
53 changes: 0 additions & 53 deletions R/causens.R

This file was deleted.

20 changes: 14 additions & 6 deletions R/causens_monte_carlo.R
Original file line number Diff line number Diff line change
@@ -1,22 +1,29 @@
#' @title Monte Carlo sensitivity analysis for causal effects
#'
#' @description This function performs a Monte Carlo sensitivity analysis for causal effects.
#' @description This function performs a Monte Carlo sensitivity analysis for
#' causal effects.
#'
#' @param exposure The name of the exposure variable in the data frame.
#' @param outcome The name of the outcome variable in the data frame.
#' @param confounders The name of the confounders in the data frame.
#' @param data A data frame containing the exposure, outcome, and confounder variables.
#' @param data A data frame containing the exposure, outcome, and confounder
#' variables.
#' @param ... Additional arguments to be passed to the function.
#' @return The estimated causal effect.
#' @importFrom stats as.formula rnorm runif sd
#' @export
causens_monte_carlo <- function(exposure, outcome, confounders, data, ...) {
formula <- as.formula(paste(outcome, "~", exposure, "+", paste(confounders, collapse = "+")))
causens_monte_carlo <- function(outcome, exposure, confounders, data, ...) {
# rather than taking a general model formula, the model formulation here
# is more restrictive and has to be additive without interactions (for now)
formula <- as.formula(paste(
outcome, "~", exposure, "+",
paste(confounders, collapse = "+")
))

if (all(data[[outcome]] %in% c(0, 1))) {
naive_model <- glm(formula, data = data, family = binomial)
} else {
stop("Monte Carlo sensitivity analysis only supports binary outcomes, for now")
stop("Monte Carlo sensitivity analysis only supports binary outcomes")
}

trt_effect_hat <- summary(naive_model)$coefficients["Z", "Estimate"]
Expand All @@ -31,7 +38,8 @@ causens_monte_carlo <- function(exposure, outcome, confounders, data, ...) {
g0 <- runif(1, -5, 5)
gX <- runif(1, -2, 2)

return(trt_effect - log((1 + exp(bU + g0 + gX)) * (1 + exp(g0))) + log((1 + exp(bU + g0)) * (1 + exp(g0 + gX))))
return(trt_effect - log((1 + exp(bU + g0 + gX)) * (1 + exp(g0))) +
log((1 + exp(bU + g0)) * (1 + exp(g0 + gX))))
}

args <- list(...)
Expand Down
24 changes: 16 additions & 8 deletions R/causens_sf.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,26 @@
#' @param fitted_model The treatment model object as a glm.
#' @param exposure The name of the exposure variable.
#' @param outcome The name of the outcome variable.
#' @param data A data frame containing the exposure, outcome, and confounder variables.
#' @param data A data frame containing the exposure, outcome, and confounder
#' variables.
#' @param bootstrap A logical indicating whether to perform bootstrap estimation
#' of the 95\% confidence interval.
#' @param B If the bootstrap argument is TRUE, the number of bootstrap samples
#' to generate.
#' @param ... Additional arguments to be passed to the sensitivity function.
#' @importFrom stats predict
#'
#' @return A point estimate of the corrected ATE.
#' @export
causens_sf <- function(fitted_model, exposure, outcome, data, bootstrap = FALSE, ...) {
causens_sf <- function(trt_model, outcome, data, bootstrap = FALSE,
B = 1000, ...) {
processed_info <- process_model_formula(trt_model, data)
y <- data[[outcome]]
z <- data[[exposure]]
z <- data[[processed_info$response_var_name]]

e <- predict(fitted_model, type = "response")
e <- predict(processed_info$fitted_model, type = "response")

c1 <- sf(z = 1, e = e, ...)
c1 <- sf(z = 1, e = e, ...) # c1, c0, s1, s0 may be passed as kwargs
c0 <- sf(z = 0, e = 1 - e, ...)

# Calculate the Average Treatment Effect
Expand All @@ -34,6 +39,7 @@ causens_sf <- function(fitted_model, exposure, outcome, data, bootstrap = FALSE,

causens_obj <- list()
class(causens_obj) <- "causens_sf"
causens_obj$call <- formula(trt_model)
causens_obj$estimated_ate <- Y1_sf - Y0_sf

if (!bootstrap) {
Expand All @@ -44,16 +50,18 @@ causens_sf <- function(fitted_model, exposure, outcome, data, bootstrap = FALSE,

# Number of bootstrap samples

B <- 1000
ate_bs <- numeric(B)
set.seed(123) # for bootstrap replications

for (b in 1:B) {
data_b <- data[sample(nrow(data), replace = TRUE), ]
y_b <- data_b[[outcome]]
z_b <- data_b[[exposure]]
z_b <- data_b[[processed_info$response_var_name]]

e_b <- predict(fitted_model, newdata = data_b, type = "response")
e_b <- predict(processed_info$fitted_model,
newdata = data_b,
type = "response"
)

c1_b <- sf(z = 1, e = e_b, ...)
c0_b <- sf(z = 0, e = 1 - e_b, ...)
Expand Down
Loading
Loading