Skip to content

Commit

Permalink
vignette update
Browse files Browse the repository at this point in the history
  • Loading branch information
changwoo-lee committed Jan 11, 2024
1 parent f8ede69 commit 9863fee
Show file tree
Hide file tree
Showing 18 changed files with 567 additions and 483 deletions.
87 changes: 50 additions & 37 deletions R/bglm_me.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,63 +43,76 @@
#' @export
#'
#' @examples
#'
#'\dontrun{
#' data(ozone)
#' data(health_sim)
#' \dontrun{
#' library(bspme)
#' data(ozone)
#' data(NO2_Jan2012)
#' data(health_sim)
#' library(fields)
#' # exposure mean and covariance at health participant locations with 06/18/1987 data (date id = 16)
#' # using Gaussian process with prior mean zero and exponential covariance kernel
#' # with fixed range 300 (in miles) and stdev 15 (in ppb)
#' library(maps)
#' # Obtain the predicted exposure mean and covariance at health subject locations based on Jan 10, 2012 data
#' # using Gaussian process prior with mean zero and exponential covariance kernel
#' # with fixed range 5 (in miles) and stdev 0.5 (in ppbv)
#'
#' # exposure data
#' data_jan10 = NO2_Jan2012[NO2_Jan2012$date == as.POSIXct("2012-01-10"),]
#' coords_monitor = cbind(data_jan10$lon, data_jan10$lat)
#'
#' ozone16 = ozone[ozone$date_id==16,]
#' # health data
#' coords_health = cbind(health_sim$lon, health_sim$lat)
#'
#' Dxx = rdist.earth(cbind(ozone16$coords_lon, ozone16$coords_lat))
#' Dyy = rdist.earth(cbind(health_sim$coords_y_lon, health_sim$coords_y_lat))
#' Dxy = rdist.earth(cbind(ozone16$coords_lon, ozone16$coords_lat),
#' cbind(health_sim$coords_y_lon, health_sim$coords_y_lat))
#' distmat_xx <- rdist.earth(coords_monitor)
#' distmat_xy <- rdist.earth(coords_monitor, coords_health)
#' distmat_yy <- rdist.earth(coords_health)
#'
#' Kxx = Exponential(Dxx, range = 300, phi=15^2)
#' Kyy = Exponential(Dyy, range = 300, phi=15^2)
#' Kxy = Exponential(Dxy, range = 300, phi=15^2)
#' a = 5; sigma = 1; # assume known
#'
#' X_mean = t(Kxy) %*% solve(Kxx, ozone16$ozone_ppb)
#' X_cov = Kyy - t(Kxy) %*% solve(Kxx, Kxy)
#' Sigmaxx = fields::Matern(distmat_xx, smoothness = 0.5, range = a, phi = sigma^2)
#' Sigmaxy = fields::Matern(distmat_xy, smoothness = 0.5, range = a, phi = sigma^2)
#' Sigmayy = fields::Matern(distmat_yy, smoothness = 0.5, range = a, phi = sigma^2)
#'
#' # posterior predictive mean and covariance of exposure at health data
#' X_mean <- t(Sigmaxy) %*% solve(Sigmaxx, data_jan10$lnNO2)
#' X_cov <- Sigmayy - t(Sigmaxy) %*% solve(Sigmaxx,Sigmaxy) # n_y by n_y
#'
#' # visualize
#' par(mfrow = c(1,3))
#' quilt.plot(cbind(ozone16$coords_lon, ozone16$coords_lat),
#' ozone16$ozone_ppb, main = "ozone measurements"); US(add= T)
#' # monitoring station exposure data
#' quilt.plot(cbind(data_jan10$lon, data_jan10$lat),
#' data_jan10$lnNO2, main = "NO2 exposures (in log) at 21 stations",
#' xlab = "longitude", ylab= "latitude", xlim = c(-96.5, -94.5), ylim = c(29, 30.5))
#' maps::map("county", "Texas", add = T)
#'
#' # posterior predictive mean of exposure at health data
#' quilt.plot(cbind(health_sim$lon, health_sim$lat),
#' X_mean, main = "posterior predictive mean of exposure at health data locations",
#' xlab = "longitude", ylab= "latitude", xlim = c(-96.5, -94.5), ylim = c(29, 30.5))
#' maps::map("county", "Texas", add = T)
#'
#' quilt.plot(cbind(health_sim$coords_y_lon, health_sim$coords_y_lat),
#' X_mean, main = "health subjects, mean of exposure"); US(add= T)
#' points(cbind(ozone16$coords_lon, ozone16$coords_lat), pch = 17)
#' # posterior predictive sd of exposure at health data
#' quilt.plot(cbind(health_sim$lon, health_sim$lat),
#' sqrt(diag(X_cov)), main = "posterior predictive sd of exposure at health data locations",
#' xlab = "longitude", ylab= "latitude", xlim = c(-96.5, -94.5), ylim = c(29, 30.5))
#' maps::map("county", "Texas", add = T)
#'
#' quilt.plot(cbind(health_sim$coords_y_lon, health_sim$coords_y_lat),
#' sqrt(diag(X_cov)), main = "health subjects, sd of exposure"); US(add= T)
#' points(cbind(ozone16$coords_lon, ozone16$coords_lat), pch = 17)
#'
#' # vecchia approximation
#' run_vecchia = vecchia_cov(X_cov, coords = cbind(health_sim$coords_y_lon, health_sim$coords_y_lat),
#' run_vecchia = vecchia_cov(X_cov, coords = cbind(health_sim$lon, health_sim$lat),
#' n.neighbors = 10)
#' Q_sparse = run_vecchia$Q
#' run_vecchia$cputime
#'
#' # fit the model
#' fit_me = blogireg_me(Y = health_sim$Ybinary,
#' # fit the model, continuous data
#' fit = bglm_me(Y = health_sim$Ybinary,
#' X_mean = X_mean,
#' X_prec = Q_sparse, # sparse precision matrix
#' Z = health_sim$Z,
#' nburn = 100,
#' family = binomial(link = "logit"),
#' nburn = 1000,
#' nsave = 1000,
#' nthin = 1)
#' fit_me$cputime
#' summary(fit_me$posterior)
#' nthin = 5)
#' fit$cputime
#' summary(fit$posterior)
#' library(bayesplot)
#' bayesplot::mcmc_trace(fit_me$posterior)
#' bayesplot::mcmc_trace(fit$posterior)
#' }
#'
bglm_me <- function(Y,
Expand Down Expand Up @@ -131,8 +144,8 @@ bglm_me <- function(Y,
var_beta = prior$var_beta

n_y = length(Y)
# check z is binary (temporary for family = "binomial")
if(!all(Z %in% c(0,1))) stop("Z must be binary")
# check Y is binary (temporary for family = "binomial")
if(!all(Y %in% c(0,1))) stop("Y must be binary")

if(is.vector(Z)) Z = as.matrix(Z)
if(!is.list(X_mean) & !is.list(X_prec)){
Expand Down
82 changes: 47 additions & 35 deletions R/blm_me.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,63 +40,75 @@
#' @export
#'
#' @examples
#'
#'\dontrun{
#' data(ozone)
#' data(health_sim)
#' \dontrun{
#' library(bspme)
#' data(ozone)
#' data(NO2_Jan2012)
#' data(health_sim)
#' library(fields)
#' # exposure mean and covariance at health subject locations with 06/18/1987 data (date id = 16)
#' # using Gaussian process with prior mean zero and exponential covariance kernel
#' # with fixed range 300 (in miles) and stdev 15 (in ppb)
#' library(maps)
#' # Obtain the predicted exposure mean and covariance at health subject locations based on Jan 10, 2012 data
#' # using Gaussian process prior with mean zero and exponential covariance kernel
#' # with fixed range 5 (in miles) and stdev 0.5 (in ppbv)
#'
#' # exposure data
#' data_jan10 = NO2_Jan2012[NO2_Jan2012$date == as.POSIXct("2012-01-10"),]
#' coords_monitor = cbind(data_jan10$lon, data_jan10$lat)
#'
#' ozone16 = ozone[ozone$date_id==16,]
#' # health data
#' coords_health = cbind(health_sim$lon, health_sim$lat)
#'
#' Dxx = rdist.earth(cbind(ozone16$coords_lon, ozone16$coords_lat))
#' Dyy = rdist.earth(cbind(health_sim$coords_y_lon, health_sim$coords_y_lat))
#' Dxy = rdist.earth(cbind(ozone16$coords_lon, ozone16$coords_lat),
#' cbind(health_sim$coords_y_lon, health_sim$coords_y_lat))
#' distmat_xx <- rdist.earth(coords_monitor)
#' distmat_xy <- rdist.earth(coords_monitor, coords_health)
#' distmat_yy <- rdist.earth(coords_health)
#'
#' Kxx = Exponential(Dxx, range = 300, phi=15^2)
#' Kyy = Exponential(Dyy, range = 300, phi=15^2)
#' Kxy = Exponential(Dxy, range = 300, phi=15^2)
#' a = 5; sigma = 1; # assume known
#'
#' X_mean = t(Kxy) %*% solve(Kxx, ozone16$ozone_ppb)
#' X_cov = Kyy - t(Kxy) %*% solve(Kxx, Kxy)
#' Sigmaxx = fields::Matern(distmat_xx, smoothness = 0.5, range = a, phi = sigma^2)
#' Sigmaxy = fields::Matern(distmat_xy, smoothness = 0.5, range = a, phi = sigma^2)
#' Sigmayy = fields::Matern(distmat_yy, smoothness = 0.5, range = a, phi = sigma^2)
#'
#' # posterior predictive mean and covariance of exposure at health data
#' X_mean <- t(Sigmaxy) %*% solve(Sigmaxx, data_jan10$lnNO2)
#' X_cov <- Sigmayy - t(Sigmaxy) %*% solve(Sigmaxx,Sigmaxy) # n_y by n_y
#'
#' # visualize
#' par(mfrow = c(1,3))
#' quilt.plot(cbind(ozone16$coords_lon, ozone16$coords_lat),
#' ozone16$ozone_ppb, main = "ozone measurements"); US(add= T)
#' # monitoring station exposure data
#' quilt.plot(cbind(data_jan10$lon, data_jan10$lat),
#' data_jan10$lnNO2, main = "NO2 exposures (in log) at 21 stations",
#' xlab = "longitude", ylab= "latitude", xlim = c(-96.5, -94.5), ylim = c(29, 30.5))
#' maps::map("county", "Texas", add = T)
#'
#' # posterior predictive mean of exposure at health data
#' quilt.plot(cbind(health_sim$lon, health_sim$lat),
#' X_mean, main = "posterior predictive mean of exposure at health data locations",
#' xlab = "longitude", ylab= "latitude", xlim = c(-96.5, -94.5), ylim = c(29, 30.5))
#' maps::map("county", "Texas", add = T)
#'
#' quilt.plot(cbind(health_sim$coords_y_lon, health_sim$coords_y_lat),
#' X_mean, main = "health subjects, mean of exposure"); US(add= T)
#' points(cbind(ozone16$coords_lon, ozone16$coords_lat), pch = 17)
#' # posterior predictive sd of exposure at health data
#' quilt.plot(cbind(health_sim$lon, health_sim$lat),
#' sqrt(diag(X_cov)), main = "posterior predictive sd of exposure at health data locations",
#' xlab = "longitude", ylab= "latitude", xlim = c(-96.5, -94.5), ylim = c(29, 30.5))
#' maps::map("county", "Texas", add = T)
#'
#' quilt.plot(cbind(health_sim$coords_y_lon, health_sim$coords_y_lat),
#' sqrt(diag(X_cov)), main = "health subjects, sd of exposure"); US(add= T)
#' points(cbind(ozone16$coords_lon, ozone16$coords_lat), pch = 17)
#'
#' # vecchia approximation
#' run_vecchia = vecchia_cov(X_cov, coords = cbind(health_sim$coords_y_lon, health_sim$coords_y_lat),
#' run_vecchia = vecchia_cov(X_cov, coords = cbind(health_sim$lon, health_sim$lat),
#' n.neighbors = 10)
#' Q_sparse = run_vecchia$Q
#' run_vecchia$cputime
#'
#' # fit the model
#' fit_me = blinreg_me(Y = health_sim$Y,
#' # fit the model, continuous data
#' fit = blm_me(Y = health_sim$Y,
#' X_mean = X_mean,
#' X_prec = Q_sparse, # sparse precision matrix
#' Z = health_sim$Z,
#' nburn = 100,
#' nburn = 1000,
#' nsave = 1000,
#' nthin = 1)
#' fit_me$cputime
#' summary(fit_me$posterior)
#' nthin = 5)
#' fit$cputime
#' summary(fit$posterior)
#' library(bayesplot)
#' bayesplot::mcmc_trace(fit_me$posterior)
#' bayesplot::mcmc_trace(fit$posterior)
#' }
#'
blm_me <- function(Y,
Expand Down
4 changes: 2 additions & 2 deletions R/bspme-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ NULL
#' \describe{
#' \item{Y}{n by 1 matrix, numeric, simulated continuous health outcome}
#' \item{Ybinary}{n by 1 matrix, numeric, simulated binary health outcome}
#' \item{coords_Y_lon}{n by 1 matrix, numeric, simulated health subject longitude}
#' \item{coords_Y_lat}{n by 1 matrix, numeric, simulated health subject latitude}
#' \item{lon}{n by 1 matrix, numeric, simulated health subject longitude}
#' \item{lat}{n by 1 matrix, numeric, simulated health subject latitude}
#' \item{Z}{n by 1 matrix, numeric, covariate}
#' \item{X_true}{n by 1 matrix, numeric, true ozone exposure used for simulation}
#' }
Expand Down
38 changes: 18 additions & 20 deletions data-raw/health_sim.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,29 @@
n_y = 2000 # number of health data
data("NO2_Jan2012")
set.seed(1234567)
coords_health = cbind(rnorm(n_y, -95.3, sd = 0.1),rnorm(n_y, 29.8, sd = 0.2))
coords_health = cbind(rnorm(n_y, -95.5, sd = 0.3),rnorm(n_y, 29.8, sd = 0.2))
# simulate health data based on day 10 (Jan 10, 2012)
data_jan10 = NO2_Jan2012[NO2_Jan2012$date == as.POSIXct("2012-01-10"),]

coords_monitor = cbind(data_jan10$lon, data_jan10$lat)
quilt.plot(coords_monitor, data_jan10$lnNO2)

#quilt.plot(coords_monitor, data_jan10$lnNO2)
# visualize
plot(coords_monitor, pch = 16, col = "purple")
points(coords_health)
fields::US(add = T)


#plot(coords_monitor, pch = 16, col = "purple")
#points(coords_health)
#fields::US(add = T)


sigma <- 0.5 # standard deviation
sigma <- 1 # standard deviation
a <- 5 # range, in miles
## Matrices needed for prediction

distmat_xx <- rdist.earth(coords_monitor)
distmat_xy <- rdist.earth(coords_monitor, coords_health)
distmat_yy <- rdist.earth(coords_health)
distmat_xx <- fields::rdist.earth(coords_monitor)
distmat_xy <- fields::rdist.earth(coords_monitor, coords_health)
distmat_yy <- fields::rdist.earth(coords_health)

Sigmaxx = Matern(distmat_xx, smoothness = 0.5, range = a, phi = sigma^2)
Sigmaxy = Matern(distmat_xy, smoothness = 0.5, range = a, phi = sigma^2)
Sigmayy = Matern(distmat_yy, smoothness = 0.5, range = a, phi = sigma^2)
Sigmaxx = fields::Matern(distmat_xx, smoothness = 0.5, range = a, phi = sigma^2)
Sigmaxy = fields::Matern(distmat_xy, smoothness = 0.5, range = a, phi = sigma^2)
Sigmayy = fields::Matern(distmat_yy, smoothness = 0.5, range = a, phi = sigma^2)

## GP mean
pred_mean <- t(Sigmaxy) %*% solve(Sigmaxx, data_jan10$lnNO2)
Expand All @@ -45,14 +42,15 @@ quilt.plot(coords_health, sqrt(diag(pred_cov)), main = "sd"); US(add= T)
quilt.plot(coords_health, true_exposure, main = "draw from MVN"); US(add= T)

# simulated covariate
z = runif(n_y)
z = rnorm(n_y)

# generate health data
set.seed(1234)
Y = 2500 -30*true_exposure + 500*z + rnorm(n_y, sd = 200)
Y = 1 -2*true_exposure + 3*z + rnorm(n_y, sd = 1)
#summary(Y)
#var(1-2*true_exposure + 3*z) / var(Y)
#summary(lm(Y ~ true_exposure + z))
linpred = 0.2*true_exposure - 2*z
linpred = -1 + 2*true_exposure - 3*z
Ybinary = rbinom(n_y, 1, prob = 1/(1+exp(-linpred)))
#summary(Ybinary)
#summary(glm(Ybinary ~ true_exposure + z), family = "binomial")
Expand All @@ -61,8 +59,8 @@ health_sim = data.frame(Y = Y,
Ybinary = Ybinary,
#X_mean = pred_mean,
#X_cov = pred_cov,
coords_Y_lon = coords_health[,1],
coords_Y_lat = coords_health[,2],
lon = coords_health[,1],
lat = coords_health[,2],
Z = z,
X_true = true_exposure)

Expand Down
Binary file modified data/health_sim.rda
Binary file not shown.
Loading

0 comments on commit 9863fee

Please sign in to comment.