-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
318f7b1
commit eaa1044
Showing
8 changed files
with
347 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(blinreg_me) | ||
export(blogireg_me) | ||
export(vecchia_cov) | ||
importFrom(Matrix,Diagonal) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,217 @@ | ||
#' Bayesian logistic regression models with correlated measurement errors | ||
#' | ||
#' This function implements the Bayesian logistic regression model with correlated measurement error of covariate(s). | ||
#' Denote \eqn{Y_i} be a binary response, \eqn{X_i} be a \eqn{q\times 1} covariate of \eqn{i}th observation that is subject to measurement error, | ||
#' and \eqn{Z_i} be a \eqn{p\times 1} covariate without measurement error. | ||
#' The likelihood model is Bayesian logistic regression, | ||
#' \deqn{logit(Pr(Y_i = 1)) = \beta_0 + X_i^\top \beta_x + Z_i^\top \beta_z, \quad i=1,\dots,n, } | ||
#' independently, and correlated measurement error of \eqn{X_i, i=1,\dots,n} is incorporated into the model as a multivariate normal prior. | ||
#' For example when \eqn{q=1}, we have \eqn{n-}dimensional multivariate normal prior | ||
#' \deqn{(X_1,\dots,X_n)\sim N_n(\mu_X, Q_X^{-1}).} | ||
#' Also, we consider semiconjugate priors for regression coefficients; | ||
#' \deqn{\beta_0 \sim N(0, V_\beta), \quad \beta_{x,j} \stackrel{iid}{\sim} N(0, V_\beta), \quad \beta_{z,k} \stackrel{iid}{\sim} N(0, V_\beta)} | ||
#' The function \code{blogireg_me()} implements the Gibbs sampler for posterior inference using Polya-Gamma augmentation. | ||
#' Most importantly, it allows sparse matrix input for \eqn{Q_X} for scalable computation. | ||
#' | ||
#' @param Y n by 1 matrix, binary responses | ||
#' @param X_mean n by 1 matrix of mean of X \eqn{\mu_X}, or list of n by 1 matrices of length q. | ||
#' @param X_prec n by n matrix precision matrix of X \eqn{Q_X}, or list of n by n matrices of length q. Support sparse matrix format from Matrix package. | ||
#' @param Z n by p matrix, covariates without measurement error | ||
#' @param nburn integer, burn-in iteration | ||
#' @param nthin integer, thin-in rate | ||
#' @param nsave integer, number of posterior samples | ||
#' @param prior list of prior parameters, default is var_beta = 100,a_Y = 0.01, b_Y = 0.01 | ||
#' @param saveX logical, save X or not | ||
#' | ||
#' @return list of (1) posterior, the (nsave)x(q+p) matrix of posterior samples as a coda object, | ||
#' (2) cputime, cpu time taken in seconds, | ||
#' and optionally (3) X_save, posterior samples of X | ||
#' @export | ||
#' | ||
#' @examples | ||
#' | ||
#'\dontrun{ | ||
#' data(ozone) | ||
#' data(health_sim) | ||
#' library(bspme) | ||
#' data(ozone) | ||
#' data(health_sim) | ||
#' library(fields) | ||
#' # exposure mean and covariance at health subject locations with 06/18/1987 data (date id = 16) | ||
#' # using Gaussian process with prior mean zero and exponential covariance kernel | ||
#' # with fixed range 300 (in miles) and stdev 15 (in ppb) | ||
#' | ||
#' ozone16 = ozone[ozone$date_id==16,] | ||
#' | ||
#' Dxx = rdist.earth(cbind(ozone16$coords_lon, ozone16$coords_lat)) | ||
#' Dyy = rdist.earth(cbind(health_sim$coords_y_lon, health_sim$coords_y_lat)) | ||
#' Dxy = rdist.earth(cbind(ozone16$coords_lon, ozone16$coords_lat), | ||
#' cbind(health_sim$coords_y_lon, health_sim$coords_y_lat)) | ||
#' | ||
#' Kxx = Exponential(Dxx, range = 300, phi=15^2) | ||
#' Kyy = Exponential(Dyy, range = 300, phi=15^2) | ||
#' Kxy = Exponential(Dxy, range = 300, phi=15^2) | ||
#' | ||
#' X_mean = t(Kxy) %*% solve(Kxx, ozone16$ozone_ppb) | ||
#' X_cov = Kyy - t(Kxy) %*% solve(Kxx, Kxy) | ||
#' | ||
#' # visualize | ||
#' par(mfrow = c(1,3)) | ||
#' quilt.plot(cbind(ozone16$coords_lon, ozone16$coords_lat), | ||
#' ozone16$ozone_ppb, main = "ozone measurements"); US(add= T) | ||
#' | ||
#' quilt.plot(cbind(health_sim$coords_y_lon, health_sim$coords_y_lat), | ||
#' X_mean, main = "health subjects, mean of exposure"); US(add= T) | ||
#' points(cbind(ozone16$coords_lon, ozone16$coords_lat), pch = 17) | ||
#' | ||
#' quilt.plot(cbind(health_sim$coords_y_lon, health_sim$coords_y_lat), | ||
#' sqrt(diag(X_cov)), main = "health subjects, sd of exposure"); US(add= T) | ||
#' points(cbind(ozone16$coords_lon, ozone16$coords_lat), pch = 17) | ||
#' | ||
#' # vecchia approximation | ||
#' run_vecchia = vecchia_cov(X_cov, coords = cbind(health_sim$coords_y_lon, health_sim$coords_y_lat), | ||
#' n.neighbors = 10) | ||
#' Q_sparse = run_vecchia$Q | ||
#' run_vecchia$cputime | ||
#' | ||
#' # fit the model | ||
#' fit_me = blogireg_me(Y = health_sim$Ybinary, | ||
#' X_mean = X_mean, | ||
#' X_prec = Q_sparse, # sparse precision matrix | ||
#' Z = health_sim$Z, | ||
#' nburn = 100, | ||
#' nsave = 1000, | ||
#' nthin = 1) | ||
#' fit_me$cputime | ||
#' summary(fit_me$posterior) | ||
#' library(bayesplot) | ||
#' bayesplot::mcmc_trace(fit_me$posterior) | ||
#' } | ||
#' | ||
blogireg_me <- function(Y, | ||
X_mean, | ||
X_prec, | ||
Z, | ||
nburn = 2000, | ||
nsave = 2000, | ||
nthin = 5, | ||
prior = NULL, | ||
saveX = F){ | ||
|
||
# prior input, default | ||
if(is.null(prior)){ | ||
prior = list(var_beta = 100) | ||
} | ||
var_beta = prior$var_beta | ||
|
||
n_y = length(Y) | ||
if(is.vector(Z)) Z = as.matrix(Z) | ||
if(!is.list(X_mean) & !is.list(X_prec)){ | ||
q = 1 | ||
X_mean = list(X_mean) | ||
X_prec = list(X_prec) | ||
}else if(is.list(X_mean) & is.list(X_prec)){ | ||
q = length(X_mean) | ||
if(length(X_prec)!=q) stop("list length does not match") | ||
}else{ | ||
stop("X_mean is not vector/matrix or list") | ||
} | ||
X_prec_X_mean = list() | ||
X_spamstruct = vector(mode = 'list', length = q) | ||
sparsealgo = rep(T,q) | ||
|
||
for(qq in 1:q){ | ||
X_prec_X_mean[[qq]] = as.numeric(X_prec[[qq]]%*%X_mean[[qq]]) | ||
|
||
if(!("sparseMatrix" %in% is(X_prec[[qq]]))){ | ||
print(paste0(qq,"th X_prec is not a sparse matrix! Using dense algorithm, which may very slow when n is large")) | ||
sparsealgo[qq] = F | ||
}else{ | ||
X_prec[[qq]] = as(as(X_prec[[qq]], "generalMatrix"), "CsparseMatrix") | ||
X_prec[[qq]] = spam::as.spam.dgCMatrix(X_prec[[qq]])# spam object | ||
X_spamstruct[[qq]] = spam::chol(X_prec[[qq]]) | ||
} | ||
} | ||
X = matrix(0, n_y, q) | ||
for(qq in 1:q) X[,qq] = X_mean[[q]] | ||
if(is.null(names(X_mean))){ | ||
colnames(X) = paste0("exposure.",1:q) | ||
}else{ | ||
colnames(X) = paste0("exposure.",names(X_mean)) | ||
} | ||
|
||
p = ncol(Z) | ||
if(is.null(colnames(Z))){ | ||
colnames(Z) = paste0("covariate.",1:p) | ||
}else{ | ||
colnames(Z) = paste0("covariate.",colnames(Z)) | ||
} | ||
df_temp = as.data.frame(cbind(X,Z)) | ||
D = model.matrix( ~ ., df_temp) | ||
|
||
|
||
# prior | ||
Sigma_beta = diag(var_beta, ncol(D))# 3 coefficients(beta0, beta1, betaz) | ||
Sigma_betainv = solve(Sigma_beta) | ||
|
||
# initialize | ||
beta = rep(0.1, ncol(D)) | ||
omega = rep(0.1, n_y) # aux variable | ||
Dbeta = D%*%beta | ||
|
||
beta_save = matrix(0, nsave, ncol(D)) | ||
colnames(beta_save) <- colnames(D) | ||
if(saveX){ | ||
X_save = array(0, dim = c(nsave, n_y, q)) | ||
dimnames(X_save)[[3]] = names(X_mean) | ||
} | ||
|
||
YtY = crossprod(Y) | ||
#browser() | ||
# sampler starts | ||
isave = 0 | ||
isnegative = numeric(n_y) | ||
pb <- txtProgressBar(style=3) | ||
t_start = Sys.time() | ||
for(imcmc in 1:(nsave*nthin + nburn)){ | ||
setTxtProgressBar(pb, imcmc/(nsave*nthin + nburn)) | ||
# update omega | ||
Dbeta = D%*%beta | ||
omega = BayesLogit::rpg.devroye(num = n_y, h=1, z = Dbeta) | ||
# sample beta | ||
Vbetainv = Sigma_betainv + crossprod(sqrt(omega)*D) # same as t(D)%*%diag(omega)%*%D | ||
betatilde = solve(Vbetainv,crossprod(D, Y-0.5)) # Y: binary vector | ||
beta = as.numeric(spam::rmvnorm.prec(1, mu = betatilde, Q = Vbetainv)) | ||
|
||
for(qq in 1:q){ | ||
# 1st is intercept | ||
b_G = X_prec_X_mean[[qq]] + beta[qq + 1]*omega*((Y-0.5)/omega-D[,-(qq+1)]%*%beta[-(qq+1)]) | ||
Qtilde = X_prec[[qq]] # dense or spam | ||
if(sparsealgo[qq]){ | ||
Qtilde = Qtilde + spam::diag.spam(omega*beta[qq+1]^2, n_y, n_y) | ||
}else{ | ||
diag(Qtilde) = diag(Qtilde) + omega*beta[qq+1]^2 | ||
} | ||
Xstar = spam::rmvnorm.canonical(1, b = as.vector(b_G), | ||
Q = Qtilde,# dense or spam | ||
Rstruct = X_spamstruct[[qq]]) #browser() | ||
if(imcmc > nburn) isnegative = isnegative + (Xstar<0) | ||
D[,(qq+1)] = as.vector(Xstar) | ||
} | ||
|
||
|
||
if((imcmc > nburn)&(imcmc%%nthin==0)){ | ||
isave = isave + 1 | ||
beta_save[isave,] = beta | ||
if(saveX) X_save[isave,,] = D[,2:(q+1)] | ||
} | ||
} | ||
t_diff = difftime(Sys.time(), t_start, units = "secs") | ||
print(paste0("Exposure components contains negative vaules total ",sum(isnegative)," times among (# exposures) x n_y x (MCMC iter after burnin) = ",q," x ",n_y," x ",nsave*nthin," instances")) | ||
out = list() | ||
out$posterior = beta_save | ||
out$posterior = coda::mcmc(out$posterior) | ||
out$time = t_diff | ||
if(saveX) out$X_save = X_save | ||
out | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Oops, something went wrong.