Skip to content

Commit

Permalink
update blogireg_me
Browse files Browse the repository at this point in the history
  • Loading branch information
changwoo-lee committed Dec 29, 2023
1 parent 318f7b1 commit eaa1044
Show file tree
Hide file tree
Showing 8 changed files with 347 additions and 11 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
Imports:
BayesLogit,
coda,
fields,
spam,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand

export(blinreg_me)
export(blogireg_me)
export(vecchia_cov)
importFrom(Matrix,Diagonal)
12 changes: 6 additions & 6 deletions R/blinreg_me.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
#' The function \code{blinreg_me()} implements the Gibbs sampler for posterior inference. Most importantly, it allows sparse matrix input for \eqn{Q_X} for scalable computation.
#'
#' @param Y n by 1 matrix, response
#' @param X_mean n by 1 matrix or list of n by 1 matrices of length q, mean of X \eqn{\mu_X}.
#' @param X_prec n by n matrix or list of n by n matrices of length q, precision matrix of X \eqn{Q_X}. Support sparse matrix format from Matrix package.
#' @param X_mean n by 1 matrix of mean of X \eqn{\mu_X}, or list of n by 1 matrices of length q.
#' @param X_prec n by n matrix precision matrix of X \eqn{Q_X}, or list of n by n matrices of length q. Support sparse matrix format from Matrix package.
#' @param Z n by p matrix, covariates without measurement error
#' @param nburn integer, burn-in iteration
#' @param nthin integer, thin-in rate
Expand Down Expand Up @@ -73,7 +73,7 @@
#' run_vecchia$cputime
#'
#' # fit the model
#' fit_me = blm_me(Y = health_sim$Y,
#' fit_me = blinreg_me(Y = health_sim$Y,
#' X_mean = X_mean,
#' X_prec = Q_sparse, # sparse precision matrix
#' Z = health_sim$Z,
Expand All @@ -100,9 +100,9 @@ blinreg_me <- function(Y,
if(is.null(prior)){
prior = list(var_beta = 100,a_Y = 0.01, b_Y = 0.01)
}
var_beta = 100
a_Y = 0.01
b_Y = 0.01
var_beta = prior$var_beta
a_Y = prior$a_Y
b_Y = prior$b_Y

n_y = length(Y)
if(is.vector(Z)) Z = as.matrix(Z)
Expand Down
217 changes: 217 additions & 0 deletions R/blogireg_me.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
#' Bayesian logistic regression models with correlated measurement errors
#'
#' This function implements the Bayesian logistic regression model with correlated measurement error of covariate(s).
#' Denote \eqn{Y_i} be a binary response, \eqn{X_i} be a \eqn{q\times 1} covariate of \eqn{i}th observation that is subject to measurement error,
#' and \eqn{Z_i} be a \eqn{p\times 1} covariate without measurement error.
#' The likelihood model is Bayesian logistic regression,
#' \deqn{logit(Pr(Y_i = 1)) = \beta_0 + X_i^\top \beta_x + Z_i^\top \beta_z, \quad i=1,\dots,n, }
#' independently, and correlated measurement error of \eqn{X_i, i=1,\dots,n} is incorporated into the model as a multivariate normal prior.
#' For example when \eqn{q=1}, we have \eqn{n-}dimensional multivariate normal prior
#' \deqn{(X_1,\dots,X_n)\sim N_n(\mu_X, Q_X^{-1}).}
#' Also, we consider semiconjugate priors for regression coefficients;
#' \deqn{\beta_0 \sim N(0, V_\beta), \quad \beta_{x,j} \stackrel{iid}{\sim} N(0, V_\beta), \quad \beta_{z,k} \stackrel{iid}{\sim} N(0, V_\beta)}
#' The function \code{blogireg_me()} implements the Gibbs sampler for posterior inference using Polya-Gamma augmentation.
#' Most importantly, it allows sparse matrix input for \eqn{Q_X} for scalable computation.
#'
#' @param Y n by 1 matrix, binary responses
#' @param X_mean n by 1 matrix of mean of X \eqn{\mu_X}, or list of n by 1 matrices of length q.
#' @param X_prec n by n matrix precision matrix of X \eqn{Q_X}, or list of n by n matrices of length q. Support sparse matrix format from Matrix package.
#' @param Z n by p matrix, covariates without measurement error
#' @param nburn integer, burn-in iteration
#' @param nthin integer, thin-in rate
#' @param nsave integer, number of posterior samples
#' @param prior list of prior parameters, default is var_beta = 100,a_Y = 0.01, b_Y = 0.01
#' @param saveX logical, save X or not
#'
#' @return list of (1) posterior, the (nsave)x(q+p) matrix of posterior samples as a coda object,
#' (2) cputime, cpu time taken in seconds,
#' and optionally (3) X_save, posterior samples of X
#' @export
#'
#' @examples
#'
#'\dontrun{
#' data(ozone)
#' data(health_sim)
#' library(bspme)
#' data(ozone)
#' data(health_sim)
#' library(fields)
#' # exposure mean and covariance at health subject locations with 06/18/1987 data (date id = 16)
#' # using Gaussian process with prior mean zero and exponential covariance kernel
#' # with fixed range 300 (in miles) and stdev 15 (in ppb)
#'
#' ozone16 = ozone[ozone$date_id==16,]
#'
#' Dxx = rdist.earth(cbind(ozone16$coords_lon, ozone16$coords_lat))
#' Dyy = rdist.earth(cbind(health_sim$coords_y_lon, health_sim$coords_y_lat))
#' Dxy = rdist.earth(cbind(ozone16$coords_lon, ozone16$coords_lat),
#' cbind(health_sim$coords_y_lon, health_sim$coords_y_lat))
#'
#' Kxx = Exponential(Dxx, range = 300, phi=15^2)
#' Kyy = Exponential(Dyy, range = 300, phi=15^2)
#' Kxy = Exponential(Dxy, range = 300, phi=15^2)
#'
#' X_mean = t(Kxy) %*% solve(Kxx, ozone16$ozone_ppb)
#' X_cov = Kyy - t(Kxy) %*% solve(Kxx, Kxy)
#'
#' # visualize
#' par(mfrow = c(1,3))
#' quilt.plot(cbind(ozone16$coords_lon, ozone16$coords_lat),
#' ozone16$ozone_ppb, main = "ozone measurements"); US(add= T)
#'
#' quilt.plot(cbind(health_sim$coords_y_lon, health_sim$coords_y_lat),
#' X_mean, main = "health subjects, mean of exposure"); US(add= T)
#' points(cbind(ozone16$coords_lon, ozone16$coords_lat), pch = 17)
#'
#' quilt.plot(cbind(health_sim$coords_y_lon, health_sim$coords_y_lat),
#' sqrt(diag(X_cov)), main = "health subjects, sd of exposure"); US(add= T)
#' points(cbind(ozone16$coords_lon, ozone16$coords_lat), pch = 17)
#'
#' # vecchia approximation
#' run_vecchia = vecchia_cov(X_cov, coords = cbind(health_sim$coords_y_lon, health_sim$coords_y_lat),
#' n.neighbors = 10)
#' Q_sparse = run_vecchia$Q
#' run_vecchia$cputime
#'
#' # fit the model
#' fit_me = blogireg_me(Y = health_sim$Ybinary,
#' X_mean = X_mean,
#' X_prec = Q_sparse, # sparse precision matrix
#' Z = health_sim$Z,
#' nburn = 100,
#' nsave = 1000,
#' nthin = 1)
#' fit_me$cputime
#' summary(fit_me$posterior)
#' library(bayesplot)
#' bayesplot::mcmc_trace(fit_me$posterior)
#' }
#'
blogireg_me <- function(Y,
X_mean,
X_prec,
Z,
nburn = 2000,
nsave = 2000,
nthin = 5,
prior = NULL,
saveX = F){

# prior input, default
if(is.null(prior)){
prior = list(var_beta = 100)
}
var_beta = prior$var_beta

n_y = length(Y)
if(is.vector(Z)) Z = as.matrix(Z)
if(!is.list(X_mean) & !is.list(X_prec)){
q = 1
X_mean = list(X_mean)
X_prec = list(X_prec)
}else if(is.list(X_mean) & is.list(X_prec)){
q = length(X_mean)
if(length(X_prec)!=q) stop("list length does not match")
}else{
stop("X_mean is not vector/matrix or list")
}
X_prec_X_mean = list()
X_spamstruct = vector(mode = 'list', length = q)
sparsealgo = rep(T,q)

for(qq in 1:q){
X_prec_X_mean[[qq]] = as.numeric(X_prec[[qq]]%*%X_mean[[qq]])

if(!("sparseMatrix" %in% is(X_prec[[qq]]))){
print(paste0(qq,"th X_prec is not a sparse matrix! Using dense algorithm, which may very slow when n is large"))
sparsealgo[qq] = F
}else{
X_prec[[qq]] = as(as(X_prec[[qq]], "generalMatrix"), "CsparseMatrix")
X_prec[[qq]] = spam::as.spam.dgCMatrix(X_prec[[qq]])# spam object
X_spamstruct[[qq]] = spam::chol(X_prec[[qq]])
}
}
X = matrix(0, n_y, q)
for(qq in 1:q) X[,qq] = X_mean[[q]]
if(is.null(names(X_mean))){
colnames(X) = paste0("exposure.",1:q)
}else{
colnames(X) = paste0("exposure.",names(X_mean))
}

p = ncol(Z)
if(is.null(colnames(Z))){
colnames(Z) = paste0("covariate.",1:p)
}else{
colnames(Z) = paste0("covariate.",colnames(Z))
}
df_temp = as.data.frame(cbind(X,Z))
D = model.matrix( ~ ., df_temp)


# prior
Sigma_beta = diag(var_beta, ncol(D))# 3 coefficients(beta0, beta1, betaz)
Sigma_betainv = solve(Sigma_beta)

# initialize
beta = rep(0.1, ncol(D))
omega = rep(0.1, n_y) # aux variable
Dbeta = D%*%beta

beta_save = matrix(0, nsave, ncol(D))
colnames(beta_save) <- colnames(D)
if(saveX){
X_save = array(0, dim = c(nsave, n_y, q))
dimnames(X_save)[[3]] = names(X_mean)
}

YtY = crossprod(Y)
#browser()
# sampler starts
isave = 0
isnegative = numeric(n_y)
pb <- txtProgressBar(style=3)
t_start = Sys.time()
for(imcmc in 1:(nsave*nthin + nburn)){
setTxtProgressBar(pb, imcmc/(nsave*nthin + nburn))
# update omega
Dbeta = D%*%beta
omega = BayesLogit::rpg.devroye(num = n_y, h=1, z = Dbeta)
# sample beta
Vbetainv = Sigma_betainv + crossprod(sqrt(omega)*D) # same as t(D)%*%diag(omega)%*%D
betatilde = solve(Vbetainv,crossprod(D, Y-0.5)) # Y: binary vector
beta = as.numeric(spam::rmvnorm.prec(1, mu = betatilde, Q = Vbetainv))

for(qq in 1:q){
# 1st is intercept
b_G = X_prec_X_mean[[qq]] + beta[qq + 1]*omega*((Y-0.5)/omega-D[,-(qq+1)]%*%beta[-(qq+1)])
Qtilde = X_prec[[qq]] # dense or spam
if(sparsealgo[qq]){
Qtilde = Qtilde + spam::diag.spam(omega*beta[qq+1]^2, n_y, n_y)
}else{
diag(Qtilde) = diag(Qtilde) + omega*beta[qq+1]^2
}
Xstar = spam::rmvnorm.canonical(1, b = as.vector(b_G),
Q = Qtilde,# dense or spam
Rstruct = X_spamstruct[[qq]]) #browser()
if(imcmc > nburn) isnegative = isnegative + (Xstar<0)
D[,(qq+1)] = as.vector(Xstar)
}


if((imcmc > nburn)&(imcmc%%nthin==0)){
isave = isave + 1
beta_save[isave,] = beta
if(saveX) X_save[isave,,] = D[,2:(q+1)]
}
}
t_diff = difftime(Sys.time(), t_start, units = "secs")
print(paste0("Exposure components contains negative vaules total ",sum(isnegative)," times among (# exposures) x n_y x (MCMC iter after burnin) = ",q," x ",n_y," x ",nsave*nthin," instances"))
out = list()
out$posterior = beta_save
out$posterior = coda::mcmc(out$posterior)
out$time = t_diff
if(saveX) out$X_save = X_save
out
}
2 changes: 1 addition & 1 deletion R/vecchia_cov.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#' Vecchia approximation given a covariance matrix
#'
#' Given a multivariate normal (MVN) distribution with covariance matrix \eqn{\Sigma},
#' this function finds a sparse precision matrix (inverse covariance) Q based on the Vecchia approximation,
#' this function finds a sparse precision matrix (inverse covariance) \eqn{Q} based on the Vecchia approximation,
#' where \eqn{N(\mu, Q^{-1})} is the sparse MVN that approximates original MVN \eqn{N(\mu, \Sigma)}.
#'
#' @param Sigma n by n covariance matrix
Expand Down
6 changes: 3 additions & 3 deletions man/blinreg_me.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit eaa1044

Please sign in to comment.