Kuan-Liu-Lab · larryshamalama · Feb 5, 2025 · Jan 20, 2025 · Jan 20, 2025 · Jan 21, 2025
diff --git a/.github/workflows/lint-project.yaml b/.github/workflows/lint-project.yaml
@@ -28,7 +28,7 @@ jobs:
         run: |
           devtools::load_all()
           library(lintr)
-          lint_dir(linters = linters_with_defaults(line_length_linter(120), commented_code_linter=NULL, object_name_linter=NULL))
+          lint_dir(linters = linters_with_defaults(commented_code_linter=NULL, object_name_linter=NULL))
         shell: Rscript {0}
         env:
           LINTR_ERROR_ON_LINT: true
diff --git a/.gitignore b/.gitignore
@@ -6,3 +6,5 @@ docs
 inst/doc
 /doc/
 /Meta/
+.lintr
+..Rcheck/
diff --git a/NAMESPACE b/NAMESPACE
@@ -4,14 +4,14 @@ S3method(summary,bayesian_causens)
 S3method(summary,causens_sf)
 S3method(summary,monte_carlo_causens)
 export(bayesian_causens)
-export(causens)
 export(causens_monte_carlo)
 export(causens_sf)
 export(gData_U_binary_Y_binary)
 export(gData_U_binary_Y_cont)
 export(gData_U_cont_Y_binary)
 export(gData_U_cont_Y_cont)
 export(plot_causens)
+export(process_model_formula)
 export(sf)
 export(simulate_data)
 importFrom(graphics,legend)

diff --git a/R/bayesian_causens.R b/R/bayesian_causens.R
@@ -1,10 +1,16 @@
 #' @title Bayesian parametric sensitivity analysis for causal inference
 #' @description This function runs a Bayesian sensitivity analysis for causal
-#' inference using JAGS or Stan as a backend.
-#' @param exposure Exposure variable name in the data frame.
-#' @param outcome Outcome variable name in the data frame.
-#' @param confounders Confounders' names in the data frame.
-#' @param data A data frame containing the exposure, outcome, and confounder variables.
+#' inference using JAGS or Stan as a backend. For now, only JAGS is supported.
+#' @param trt_model The treatment model object as a formula.
+#' @param outcome_model The outcome model object as a formula.
+#' @param U_model The unmeasured confounder model object as
+#' a formula.
+#' @param data A data frame containing the exposure, outcome, and confounder
+#' variables.
+#' @param beta_uy Prior distribution for the effect of the missing confounder U
+#' on the outcome Y.
+#' @param alpha_uz Prior distribution for the effect of the missing confounder U
+#' on the treatment assignment mechanism Z.
 #' @param backend The backend to use for the sensitivity analysis. Currently
 #' only "jags" is supported.
 #' @param output_trace Whether to output the full trace of the MCMC sampler.
@@ -13,27 +19,40 @@
 #' variable on the outcome, as well as the confounder-adjusted causal effect.
 #' @importFrom stats sd update
 #' @export
-bayesian_causens <- function(exposure, outcome, confounders, data, backend = "jags", output_trace = FALSE, ...) {
+bayesian_causens <- function(trt_model, outcome_model,
+                             U_model, data,
+                             beta_uy = ~ dunif(-2, 2),
+                             alpha_uz = ~ dunif(-2, 2),
+                             backend = "jags", output_trace = FALSE, ...) {
   sampler_args <- parse_args(...)
 
-  # Notation from Bayesian SA paper
-  Z <- data[[exposure]]
-  C <- data[confounders]
-  Y <- data[[outcome]]
-  N <- nrow(C)
+  trt_model_info <- process_model_formula(trt_model, data)
+  Z <- data[[trt_model_info$response_var_name]]
+  trt_C <- data[trt_model_info$confounder_names]
+
+  outcome_model_info <- process_model_formula(outcome_model, data)
+  Y <- data[[outcome_model_info$response_var_name]]
+  outcome_C <- data[outcome_model_info$confounder_names]
+
+  U_model_info <- process_model_formula(U_model, data)
+  unmeasured_confounder_C <- data[U_model_info$confounder_names]
+  # no U variable since it's the missing confounder!
+
+  N <- nrow(data)
 
   binary_outcome <- all(Y %in% c(0, 1))
 
-  jags_model <- create_jags_model(binary_outcome)
+  jags_model <- create_jags_model(binary_outcome, beta_uy, alpha_uz)
 
   inits <- list(
     beta_Z = 0,
-    beta_C = rep(0, ncol(C)),
+    beta_C = rep(0, ncol(outcome_C)),
     beta_0 = 0,
-    beta_U = 0,
-    gamma_C = rep(0, ncol(C)),
-    alpha_C = rep(0, ncol(C)),
+    beta_uy = 0,
+    gamma_C = rep(0, ncol(unmeasured_confounder_C)),
+    alpha_C = rep(0, ncol(trt_C)),
     alpha_0 = 0,
+    alpha_uz = 0,
     .RNG.name = "base::Wichmann-Hill",
     .RNG.seed = 123
   )
@@ -49,12 +68,14 @@ bayesian_causens <- function(exposure, outcome, confounders, data, backend = "ja
       textConnection(jags_model),
       data = list(
         Z = Z,
-        C = C,
+        trt_C = trt_C,
+        outcome_C = outcome_C,
+        unmeasured_confounder_C = unmeasured_confounder_C,
         Y = Y,
         N = N,
-        p_outcome = ncol(C),
-        p_treatment = ncol(C),
-        p_unmeasured_confounder = ncol(C)
+        p_outcome = ncol(outcome_C),
+        p_treatment = ncol(trt_C),
+        p_U = ncol(unmeasured_confounder_C)
       ),
       inits = inits
     )
@@ -64,14 +85,17 @@ bayesian_causens <- function(exposure, outcome, confounders, data, backend = "ja
     # Extract the posterior samples
     samples <- rjags::coda.samples(
       model,
-      variable.names = c("beta_Z", "beta_C", "beta_0", "gamma_C", "alpha_C", "alpha_U", "alpha_0"),
+      variable.names = c(
+        "beta_Z", "beta_C", "beta_uy", "beta_0", "gamma_C",
+        "alpha_C", "alpha_uz", "alpha_0"
+      ),
       n.iter = sampler_args$n_samples,
       thin = 1
     )
 
     causens_obj <- list()
     class(causens_obj) <- "bayesian_causens"
-    causens_obj$call <- paste(exposure, "~", paste(confounders, collapse = " + "))
+    causens_obj$call <- trt_model
     causens_obj$estimated_ate <- mean(samples[[1]][, "beta_Z"])
     causens_obj$std_error <- sd(samples[[1]][, "beta_Z"])
     causens_obj$ci <- quantile(samples[[1]][, "beta_Z"], c(0.025, 0.975))
@@ -113,9 +137,9 @@ parse_args <- function(...) {
 #'
 #' No inputs are given to this function (for now) since data-related information
 #' is provided in jags.model() during model initialization.
-create_jags_model <- function(binary_outcome) {
-  # including modelling of unmeasured binary confounder (`eta` is the linear predictor)
-  likelihood <- "
+create_jags_model <- function(binary_outcome, beta_uy, alpha_uz) {
+  # including modelling of unmeasured binary confounder
+  likelihood <- paste0("
   # Outcome model parameters
 
   beta_0 ~ dunif(-2, 2)
@@ -125,13 +149,14 @@ create_jags_model <- function(binary_outcome) {
     beta_C[c] ~ dunif(-2, 2)
   }
 
-  beta_U ~ dunif(-2, 2)
-  "
+  beta_uy", deparse(beta_uy))
 
   if (binary_outcome) {
     likelihood <- paste0(likelihood, "
     for (i in 1:N) {
-      logit(p_Y[i]) <- beta_0 + beta_Z * Z[i] + sum(C[i, 1:p_outcome] * beta_C[1:p_outcome]) + beta_U * U[i]
+      logit(p_Y[i]) <- beta_0 + beta_Z * Z[i] +
+                       sum(outcome_C[i, 1:p_outcome] * beta_C[1:p_outcome]) +
+                       beta_uy * U[i]
       Y[i] ~ dbern(p_Y[i])
     }
     ")
@@ -140,7 +165,9 @@ create_jags_model <- function(binary_outcome) {
     tau_Y ~ dgamma(0.1, 0.1)
 
     for (i in 1:N) {
-      mu_Y[i] <- beta_0 + beta_Z * Z[i] + sum(C[i, 1:p_outcome] * beta_C[1:p_outcome]) + beta_U * U[i]
+      mu_Y[i] <- beta_0 + beta_Z * Z[i] +
+                 sum(outcome_C[i, 1:p_outcome] * beta_C[1:p_outcome]) +
+                 beta_uy * U[i]
       Y[i] ~ dnorm(mu_Y[i], tau_Y)
     }
     ")
@@ -149,17 +176,17 @@ create_jags_model <- function(binary_outcome) {
   unmeasured_confounder <- "
   # Unmeasured Confounder parameters
 
-  for (u in 1:p_unmeasured_confounder) {
+  for (u in 1:p_U) {
     gamma_C[u] ~ dunif(-2, 2)
   }
 
   for (i in 1:N) {
-    logit(p_U[i]) <- sum(C[i, 1:p_unmeasured_confounder] * gamma_C[1:p_unmeasured_confounder])
-    U[i] ~ dbern(p_U[i])
+    logit(prob_U[i]) <- sum(unmeasured_confounder_C[i, 1:p_U] * gamma_C[1:p_U])
+    U[i] ~ dbern(prob_U[i])
   }
   "
 
-  treatment_model <- "
+  treatment_model <- paste0("
   # Treatment model parameters
 
   alpha_0 ~ dunif(-2, 2)
@@ -168,13 +195,14 @@ create_jags_model <- function(binary_outcome) {
     alpha_C[z] ~ dunif(-2, 2)
   }
 
-  alpha_U ~ dunif(-2, 2)
-
   for (i in 1:N) {
-    logit(p_Z[i]) <- alpha_0 + sum(C[i, 1:p_treatment] * alpha_C[1:p_treatment]) + alpha_U * U[i]
+    logit(p_Z[i]) <- alpha_0 +
+                     sum(trt_C[i, 1:p_treatment] * alpha_C[1:p_treatment]) +
+                     alpha_uz * U[i]
     Z[i] ~ dbern(p_Z[i])
   }
-  "
+
+  alpha_uz", deparse(alpha_uz))
 
   return(paste0(
     "model{\n",

diff --git a/R/causens.R b/R/causens.R
diff --git a/R/causens_monte_carlo.R b/R/causens_monte_carlo.R
@@ -1,22 +1,29 @@
 #' @title Monte Carlo sensitivity analysis for causal effects
 #'
-#' @description This function performs a Monte Carlo sensitivity analysis for causal effects.
+#' @description This function performs a Monte Carlo sensitivity analysis for
+#' causal effects.
 #'
 #' @param exposure The name of the exposure variable in the data frame.
 #' @param outcome The name of the outcome variable in the data frame.
 #' @param confounders The name of the confounders in the data frame.
-#' @param data A data frame containing the exposure, outcome, and confounder variables.
+#' @param data A data frame containing the exposure, outcome, and confounder
+#' variables.
 #' @param ... Additional arguments to be passed to the function.
 #' @return The estimated causal effect.
 #' @importFrom stats as.formula rnorm runif sd
 #' @export
-causens_monte_carlo <- function(exposure, outcome, confounders, data, ...) {
-  formula <- as.formula(paste(outcome, "~", exposure, "+", paste(confounders, collapse = "+")))
+causens_monte_carlo <- function(outcome, exposure, confounders, data, ...) {
+  # rather than taking a general model formula, the model formulation here
+  # is more restrictive and has to be additive without interactions (for now)
+  formula <- as.formula(paste(
+    outcome, "~", exposure, "+",
+    paste(confounders, collapse = "+")
+  ))
 
   if (all(data[[outcome]] %in% c(0, 1))) {
     naive_model <- glm(formula, data = data, family = binomial)
   } else {
-    stop("Monte Carlo sensitivity analysis only supports binary outcomes, for now")
+    stop("Monte Carlo sensitivity analysis only supports binary outcomes")
   }
 
   trt_effect_hat <- summary(naive_model)$coefficients["Z", "Estimate"]
@@ -31,7 +38,8 @@ causens_monte_carlo <- function(exposure, outcome, confounders, data, ...) {
     g0 <- runif(1, -5, 5)
     gX <- runif(1, -2, 2)
 
-    return(trt_effect - log((1 + exp(bU + g0 + gX)) * (1 + exp(g0))) + log((1 + exp(bU + g0)) * (1 + exp(g0 + gX))))
+    return(trt_effect - log((1 + exp(bU + g0 + gX)) * (1 + exp(g0))) +
+             log((1 + exp(bU + g0)) * (1 + exp(g0 + gX))))
   }
 
   args <- list(...)

diff --git a/R/causens_sf.R b/R/causens_sf.R
@@ -6,21 +6,26 @@
 #' @param fitted_model The treatment model object as a glm.
 #' @param exposure The name of the exposure variable.
 #' @param outcome The name of the outcome variable.
-#' @param data A data frame containing the exposure, outcome, and confounder variables.
+#' @param data A data frame containing the exposure, outcome, and confounder
+#' variables.
 #' @param bootstrap A logical indicating whether to perform bootstrap estimation
 #' of the 95\% confidence interval.
+#' @param B If the bootstrap argument is TRUE, the number of bootstrap samples
+#' to generate.
 #' @param ... Additional arguments to be passed to the sensitivity function.
 #' @importFrom stats predict
 #'
 #' @return A point estimate of the corrected ATE.
 #' @export
-causens_sf <- function(fitted_model, exposure, outcome, data, bootstrap = FALSE, ...) {
+causens_sf <- function(trt_model, outcome, data, bootstrap = FALSE,
+                       B = 1000, ...) {
+  processed_info <- process_model_formula(trt_model, data)
   y <- data[[outcome]]
-  z <- data[[exposure]]
+  z <- data[[processed_info$response_var_name]]
 
-  e <- predict(fitted_model, type = "response")
+  e <- predict(processed_info$fitted_model, type = "response")
 
-  c1 <- sf(z = 1, e = e, ...)
+  c1 <- sf(z = 1, e = e, ...) # c1, c0, s1, s0 may be passed as kwargs
   c0 <- sf(z = 0, e = 1 - e, ...)
 
   # Calculate the Average Treatment Effect
@@ -34,6 +39,7 @@ causens_sf <- function(fitted_model, exposure, outcome, data, bootstrap = FALSE,
 
   causens_obj <- list()
   class(causens_obj) <- "causens_sf"
+  causens_obj$call <- formula(trt_model)
   causens_obj$estimated_ate <- Y1_sf - Y0_sf
 
   if (!bootstrap) {
@@ -44,16 +50,18 @@ causens_sf <- function(fitted_model, exposure, outcome, data, bootstrap = FALSE,
 
   # Number of bootstrap samples
 
-  B <- 1000
   ate_bs <- numeric(B)
   set.seed(123) # for bootstrap replications
 
   for (b in 1:B) {
     data_b <- data[sample(nrow(data), replace = TRUE), ]
     y_b <- data_b[[outcome]]
-    z_b <- data_b[[exposure]]
+    z_b <- data_b[[processed_info$response_var_name]]
 
-    e_b <- predict(fitted_model, newdata = data_b, type = "response")
+    e_b <- predict(processed_info$fitted_model,
+      newdata = data_b,
+      type = "response"
+    )
 
     c1_b <- sf(z = 1, e = e_b, ...)
     c0_b <- sf(z = 0, e = 1 - e_b, ...)
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,3 +6,5 @@ docs @@
     inst/doc
     /doc/
     /Meta/
+    .lintr
+    ..Rcheck/