diff --git a/R/jsdm_stancode.R b/R/jsdm_stancode.R index f97117f..85fe886 100644 --- a/R/jsdm_stancode.R +++ b/R/jsdm_stancode.R @@ -105,13 +105,13 @@ jsdm_stancode <- function(method, family, prior = jsdm_prior(), vector[K] sigmas_preds; matrix[K, S] z_preds; // covariance matrix on betas by predictors - cholesky_factor_corr[K] cor_preds;", "unstruct" = " + corr_matrix[K] cor_preds;", "unstruct" = " matrix[K, S] betas;") mglmm_spcov_pars <- " // species covariances vector[S] sigmas_species; matrix[S, N] z_species; - cholesky_factor_corr[S] cor_species;" + corr_matrix[S] cor_species;" gllvm_pars <- " // Factor parameters vector[M] L; // Non-zero factor loadings diff --git a/R/prior.R b/R/prior.R index b5130a4..3108dcf 100644 --- a/R/prior.R +++ b/R/prior.R @@ -30,8 +30,8 @@ #' to be positive (default standard normal) #' @param z_preds The covariate effects (default standard normal) #' @param cor_preds The correlation matrix on the covariate effects (npred by npred -#' matrix represented as a Cholesky factor of a correlation matrix) (default -#' \code{"lkj_corr_cholesky(1)"}) +#' correlation matrix) (default +#' \code{"lkj_corr(1)"}) #' @param betas If covariate effects are unstructured, the prior on the covariate #' effects #' @param a The site level intercepts (default standard normal) @@ -43,7 +43,7 @@ #' @param z_species For MGLMM method, the S by N matrix of species covariance by site #' (default standard normal) #' @param cor_species For MGLMM method, the correlation between species represented -#' as a Cholesky factor correlation matrix (default \code{"lkj_corr_cholesky(1)"}) +#' as a nspecies by nspecies correlation matrix (default \code{"lkj_corr(1)"}) #' @param LV For GLLVM method, the per site latent variable loadings (default #' standard normal) #' @param L For GLLVM method, the non-zero species latent variable loadings (default @@ -64,14 +64,14 @@ #' jsdm_prior <- function(sigmas_preds = "normal(0,1)", z_preds = "normal(0,1)", - cor_preds = "lkj_corr_cholesky(1)", + cor_preds = "lkj_corr(1)", betas = "normal(0,1)", a = "normal(0,1)", a_bar = "normal(0,1)", sigma_a = "normal(0,1)", sigmas_species = "normal(0,1)", z_species = "normal(0,1)", - cor_species = "lkj_corr_cholesky(1)", + cor_species = "lkj_corr(1)", LV = "normal(0,1)", L = "normal(0,1)", sigma_L = "normal(0,1)", diff --git a/R/sim_data_funs.R b/R/sim_data_funs.R index 46f842c..c18ecdf 100644 --- a/R/sim_data_funs.R +++ b/R/sim_data_funs.R @@ -51,13 +51,13 @@ #' currently. #' #' @param beta_param The parameterisation of the environmental covariate effects, by -#' default \code{"cor"}. See details for further information. +#' default \code{"unstruct"}. See details for further information. #' #' @param prior Set of prior specifications from call to [jsdm_prior()] jsdm_sim_data <- function(N, S, D = NULL, K = 0L, family, method = c("gllvm", "mglmm"), species_intercept = TRUE, site_intercept = "none", - beta_param = "cor", + beta_param = "unstruct", prior = jsdm_prior()) { response <- match.arg(family, c("gaussian", "neg_binomial", "poisson", "bernoulli")) site_intercept <- match.arg(site_intercept, c("none","ungrouped","grouped")) @@ -96,6 +96,7 @@ jsdm_sim_data <- function(N, S, D = NULL, K = 0L, family, method = c("gllvm", "m "normal", "inv_gamma", "lkj_corr_cholesky", + "lkj_corr", "student_t", "cauchy", "gamma" @@ -118,6 +119,7 @@ jsdm_sim_data <- function(N, S, D = NULL, K = 0L, family, method = c("gllvm", "m "normal" = "rnorm", "inv_gamma" = "rinvgamma", "lkj_corr_cholesky" = "rlkj", + "lkj_corr" = "rlkj", "student_t" = "rstudentt", "cauchy" = "rcauchy", "gamma" = "rgamma" @@ -141,6 +143,12 @@ jsdm_sim_data <- function(N, S, D = NULL, K = 0L, family, method = c("gllvm", "m ) fun_args <- as.list(c(fun_arg1, as.numeric(unlist(y[[1]][[1]])[-1]))) + if(x == "cor_preds" & grepl("lkj_corr_cholesky\\(",prior$cor_species)) + fun_args <- c(fun_args,1) + if(x == "cor_species" & grepl("lkj_corr_cholesky\\(",prior$cor_species)) + fun_args <- c(fun_args,1) + + return(list(fun_name, fun_args)) }) names(prior_func) <- names(prior_split) @@ -175,22 +183,22 @@ jsdm_sim_data <- function(N, S, D = NULL, K = 0L, family, method = c("gllvm", "m # covariate parameters if(beta_param == "cor"){ - beta_sds <- abs(do.call( + sigmas_preds <- abs(do.call( match.fun(prior_func[["sigmas_preds"]][[1]]), prior_func[["sigmas_preds"]][[2]] )) - z_betas <- matrix(do.call( + z_preds <- matrix(do.call( match.fun(prior_func[["z_preds"]][[1]]), prior_func[["z_preds"]][[2]] ), ncol = S, nrow = J) if (K == 0) { - beta_sim <- beta_sds %*% z_betas + beta_sim <- sigmas_preds %*% z_preds } else { cor_preds <- do.call( match.fun(prior_func[["cor_preds"]][[1]]), prior_func[["cor_preds"]][[2]] ) - beta_sim <- (diag(beta_sds) %*% cor_preds) %*% z_betas + beta_sim <- (diag(sigmas_preds) %*% cor_preds) %*% z_preds } } else if (beta_param == "unstruct"){ beta_sim <- matrix(do.call( @@ -223,15 +231,15 @@ jsdm_sim_data <- function(N, S, D = NULL, K = 0L, family, method = c("gllvm", "m if (method == "mglmm") { # u covariance - u_sds <- abs(do.call( + sigmas_species <- abs(do.call( match.fun(prior_func[["sigmas_species"]][[1]]), prior_func[["sigmas_species"]][[2]] )) - u_ftilde <- matrix(do.call( + z_species <- matrix(do.call( match.fun(prior_func[["z_species"]][[1]]), prior_func[["z_species"]][[2]] ), nrow = S, ncol = N) - u_ij <- t((diag(u_sds) %*% cor_species) %*% u_ftilde) + u_ij <- t((diag(sigmas_species) %*% cor_species) %*% z_species) } if (method == "gllvm") { @@ -254,7 +262,7 @@ jsdm_sim_data <- function(N, S, D = NULL, K = 0L, family, method = c("gllvm", "m } } - L_sigma <- abs(do.call( + sigma_L <- abs(do.call( match.fun(prior_func[["sigma_L"]][[1]]), prior_func[["sigma_L"]][[2]] )) @@ -264,7 +272,7 @@ jsdm_sim_data <- function(N, S, D = NULL, K = 0L, family, method = c("gllvm", "m prior_func[["LV"]][[2]] ), nrow = D, ncol = N) - LV_sum <- (L * L_sigma) %*% LV + LV_sum <- (L * sigma_L) %*% LV } # variance parameters @@ -309,8 +317,8 @@ jsdm_sim_data <- function(N, S, D = NULL, K = 0L, family, method = c("gllvm", "m ) if(beta_param == "cor"){ - pars$beta_sds <- beta_sds - pars$z_betas <- z_betas + pars$sigmas_preds <- sigmas_preds + pars$z_preds <- z_preds } if (site_intercept == "ungrouped") { @@ -321,12 +329,18 @@ jsdm_sim_data <- function(N, S, D = NULL, K = 0L, family, method = c("gllvm", "m if (method == "gllvm") { pars$L <- L pars$LV <- LV - pars$L_sigma <- L_sigma + pars$sigma_L <- sigma_L } if (method == "mglmm") { - pars$u_sds <- u_sds + pars$sigmas_species <- sigmas_species pars$cor_species <- cor_species - pars$u_ftilde <- u_ftilde + pars$z_species <- z_species + } + if (response == "gaussian") { + pars$sigma <- sigma + } + if (response == "neg_binomial") { + pars$kappa <- kappa } if (isTRUE(species_intercept)) { if (K > 0) { @@ -440,7 +454,7 @@ rgbeta <- #' @rdname sim_helpers #' @export rlkj <- - function(n, eta = 1, cholesky = TRUE) { + function(n, eta = 1, cholesky = FALSE) { if (n < 2) { stop("Dimension of correlation matrix must be >= 2") } diff --git a/R/stan_jsdm.R b/R/stan_jsdm.R index 7e96999..26177f2 100644 --- a/R/stan_jsdm.R +++ b/R/stan_jsdm.R @@ -56,7 +56,7 @@ #' quantities (by default TRUE), required for loo #' #' @param beta_param The parameterisation of the environmental covariate effects, by -#' default \code{"cor"}. See details for further information. +#' default \code{"unstruct"}. See details for further information. #' #' @param ... Arguments passed to [rstan::sampling()] #' @@ -97,7 +97,7 @@ stan_jsdm <- function(X, ...) UseMethod("stan_jsdm") stan_jsdm.default <- function(X = NULL, Y = NULL, species_intercept = TRUE, method, dat_list = NULL, family, site_intercept = "none", D = NULL, prior = jsdm_prior(), site_groups = NULL, - beta_param = "cor", + beta_param = "unstruct", save_data = TRUE, iter = 4000, log_lik = TRUE, ...) { family <- match.arg(family, c("gaussian", "bernoulli", "poisson", "neg_binomial")) beta_param <- match.arg(beta_param, c("cor", "unstruct")) diff --git a/man/jsdm_prior.Rd b/man/jsdm_prior.Rd index 29c9eaf..33d21b9 100644 --- a/man/jsdm_prior.Rd +++ b/man/jsdm_prior.Rd @@ -8,14 +8,14 @@ jsdm_prior( sigmas_preds = "normal(0,1)", z_preds = "normal(0,1)", - cor_preds = "lkj_corr_cholesky(1)", + cor_preds = "lkj_corr(1)", betas = "normal(0,1)", a = "normal(0,1)", a_bar = "normal(0,1)", sigma_a = "normal(0,1)", sigmas_species = "normal(0,1)", z_species = "normal(0,1)", - cor_species = "lkj_corr_cholesky(1)", + cor_species = "lkj_corr(1)", LV = "normal(0,1)", L = "normal(0,1)", sigma_L = "normal(0,1)", @@ -32,8 +32,8 @@ to be positive (default standard normal)} \item{z_preds}{The covariate effects (default standard normal)} \item{cor_preds}{The correlation matrix on the covariate effects (npred by npred -matrix represented as a Cholesky factor of a correlation matrix) (default -\code{"lkj_corr_cholesky(1)"})} +correlation matrix) (default +\code{"lkj_corr(1)"})} \item{betas}{If covariate effects are unstructured, the prior on the covariate effects} @@ -52,7 +52,7 @@ covariances, constrained to be positive (default half standard normal)} (default standard normal)} \item{cor_species}{For MGLMM method, the correlation between species represented -as a Cholesky factor correlation matrix (default \code{"lkj_corr_cholesky(1)"})} +as a nspecies by nspecies correlation matrix (default \code{"lkj_corr(1)"})} \item{LV}{For GLLVM method, the per site latent variable loadings (default standard normal)} diff --git a/man/jsdm_sim_data.Rd b/man/jsdm_sim_data.Rd index b1c17ff..20dc14f 100644 --- a/man/jsdm_sim_data.Rd +++ b/man/jsdm_sim_data.Rd @@ -15,7 +15,7 @@ jsdm_sim_data( method = c("gllvm", "mglmm"), species_intercept = TRUE, site_intercept = "none", - beta_param = "cor", + beta_param = "unstruct", prior = jsdm_prior() ) @@ -48,7 +48,7 @@ with no grouping). Defaults to no site intercept, grouped is not supported currently.} \item{beta_param}{The parameterisation of the environmental covariate effects, by -default \code{"cor"}. See details for further information.} +default \code{"unstruct"}. See details for further information.} \item{prior}{Set of prior specifications from call to \code{\link[=jsdm_prior]{jsdm_prior()}}} diff --git a/man/sim_helpers.Rd b/man/sim_helpers.Rd index 5aaa7ce..d5ef0c5 100644 --- a/man/sim_helpers.Rd +++ b/man/sim_helpers.Rd @@ -10,7 +10,7 @@ \usage{ rgampois(n, mu, scale) -rlkj(n, eta = 1, cholesky = TRUE) +rlkj(n, eta = 1, cholesky = FALSE) rinvgamma(n, shape, scale) diff --git a/man/stan_jsdm.Rd b/man/stan_jsdm.Rd index 56d8fa7..10ee733 100644 --- a/man/stan_jsdm.Rd +++ b/man/stan_jsdm.Rd @@ -19,7 +19,7 @@ stan_jsdm(X, ...) D = NULL, prior = jsdm_prior(), site_groups = NULL, - beta_param = "cor", + beta_param = "unstruct", save_data = TRUE, iter = 4000, log_lik = TRUE, @@ -62,7 +62,7 @@ grouping)} per site} \item{beta_param}{The parameterisation of the environmental covariate effects, by -default \code{"cor"}. See details for further information.} +default \code{"unstruct"}. See details for further information.} \item{save_data}{If the data used to fit the model should be saved in the model object, by default TRUE.} diff --git a/tests/testthat/test-posterior_predict.R b/tests/testthat/test-posterior_predict.R index 1c6fa8f..4cc895e 100644 --- a/tests/testthat/test-posterior_predict.R +++ b/tests/testthat/test-posterior_predict.R @@ -4,7 +4,7 @@ bern_pred_data <- matrix(rnorm(100 * 2), nrow = 100) colnames(bern_pred_data) <- c("V1", "V2") suppressWarnings(bern_fit <- stan_gllvm( dat_list = bern_sim_data, family = "bern", - refresh = 0, iter = 1000, chains = 2 + refresh = 0, iter = 500, chains = 2 )) test_that("posterior linpred errors appropriately", { @@ -65,8 +65,8 @@ test_that("posterior_(lin)pred works with gllvm", { expect_false(any(sapply(bern_pred, function(x) x < 0))) bern_pred2 <- posterior_predict(bern_fit, - newdata = bern_pred_data, - ndraws = 50, list_index = "species" + newdata = bern_pred_data, + ndraws = 50, list_index = "species" ) expect_length(bern_pred2, 9) @@ -79,7 +79,7 @@ pois_pred_data <- matrix(rnorm(100 * 2), nrow = 100) colnames(pois_pred_data) <- c("V1", "V2") suppressWarnings(pois_fit <- stan_mglmm( dat_list = pois_sim_data, family = "pois", - refresh = 0, chains = 2 + refresh = 0, chains = 2, iter = 500 )) test_that("posterior_(lin)pred works with mglmm", { pois_pred <- posterior_predict(pois_fit, ndraws = 100) @@ -89,11 +89,36 @@ test_that("posterior_(lin)pred works with mglmm", { expect_false(any(sapply(pois_pred, function(x) x < 0))) pois_pred2 <- posterior_predict(pois_fit, - newdata = pois_pred_data, - ndraws = 50, list_index = "species" + newdata = pois_pred_data, + ndraws = 50, list_index = "species" ) expect_length(pois_pred2, 9) expect_false(any(sapply(pois_pred2, anyNA))) expect_false(any(sapply(pois_pred2, function(x) x < 0))) }) + +negb_sim_data <- mglmm_sim_data(N = 100, S = 9, K = 2, family = "neg_bin", + site_intercept = "ungrouped") +negb_pred_data <- matrix(rnorm(100 * 2), nrow = 100) +colnames(negb_pred_data) <- c("V1", "V2") +suppressWarnings(negb_fit <- stan_mglmm( + dat_list = negb_sim_data, family = "neg_bin", + refresh = 0, chains = 2, iter = 500 +)) +test_that("posterior_(lin)pred works with mglmm and negbin", { + negb_pred <- posterior_predict(negb_fit, ndraws = 100) + + expect_length(negb_pred, 100) + expect_false(any(sapply(negb_pred, anyNA))) + expect_false(any(sapply(negb_pred, function(x) x < 0))) + + negb_pred2 <- posterior_predict(negb_fit, + newdata = negb_pred_data, + ndraws = 50, list_index = "species" + ) + + expect_length(negb_pred2, 9) + expect_false(any(sapply(negb_pred2, anyNA))) + expect_false(any(sapply(negb_pred2, function(x) x < 0))) +}) diff --git a/tests/testthat/test-sim_data_funs.R b/tests/testthat/test-sim_data_funs.R index db04f5c..887ebd4 100644 --- a/tests/testthat/test-sim_data_funs.R +++ b/tests/testthat/test-sim_data_funs.R @@ -68,6 +68,23 @@ test_that("mglmm_sim_data returns a list of correct length", { )) }) +test_that("jsdm_sim_data returns all appropriate pars", { + mglmm_sim <- jsdm_sim_data(100,12,family = "gaussian", method = "mglmm", + beta_param = "cor") + expect_named(mglmm_sim$pars, c( + "betas","sigmas_preds","z_preds","sigmas_species", + "cor_species","z_species","sigma" + )) + gllvm_sim <- jsdm_sim_data(100,12,D=2,family = "neg_bin", method = "gllvm", + beta_param = "unstruct", + site_intercept = "ungrouped") + expect_named(gllvm_sim$pars, c( + "betas","a_bar","sigma_a","a","L","LV","sigma_L","kappa" + )) + + +}) + test_that("prior specification works", { jsdm_sim <- jsdm_sim_data( N = 100, S = 8, K = 2, family = "gaus", method = "mglmm", diff --git a/vignettes/jsdmstan-overview.Rmd b/vignettes/jsdmstan-overview.Rmd index 1a5717f..1fb78d2 100644 --- a/vignettes/jsdmstan-overview.Rmd +++ b/vignettes/jsdmstan-overview.Rmd @@ -17,7 +17,7 @@ knitr::opts_chunk$set( ```{r setup} library(jsdmstan) library(ggplot2) -set.seed(82631729) +set.seed(8264372) ``` # Joint Species Distribution Models @@ -74,8 +74,8 @@ This default behaviour can be overridden through specifying the `beta_param` arg First we can use the in-built functions for simulating data according to the MGLMM model - we'll choose to simulate 15 species over 200 sites with 2 environmental covariates. The species are assumed to follow a Poisson distribution (with a log-link), and we use the defaults of including a species-specific intercept but no site-specific intercept. At the moment only default priors (standard normal distribution) are supported. We can do this using either the `jsdm_sim_data` function with `method = "mglmm"` or with the `mglmm_sim_data` function which just calls `jsdm_sim_data` in the background. ```{r} -nsites <- 25 -nspecies <- 6 +nsites <- 60 +nspecies <- 8 ncovar <- 2 mglmm_test_data <- mglmm_sim_data(N = nsites, S = nspecies, K = ncovar, family = "pois") @@ -160,16 +160,15 @@ As we have run the above model on simulated data and the original data list cont ```{r} mcmc_plot(mglmm_fit, plotfun = "recover_hist", - pars = c("sigmas_preds[1]","sigmas_preds[2]", - "sigmas_preds[3]"), - true = mglmm_test_data$pars$beta_sds) + pars = paste0("sigmas_species[",1:8,"]"), + true = mglmm_test_data$pars$sigmas_species) ``` ```{r} mcmc_plot(mglmm_fit, plotfun = "recover_intervals", - pars = paste0("cor_species[",rep(1:nspecies, nspecies),",", - rep(1:nspecies, each = nspecies),"]"), - true = c(mglmm_test_data$pars$cor_species)) + + pars = paste0("cor_species[",rep(1:nspecies, nspecies:1),",", + unlist(sapply(1:8, ":",8)),"]"), + true = c(mglmm_test_data$pars$cor_species[lower.tri(mglmm_test_data$pars$cor_species, diag = TRUE)])) + theme(axis.text.x = element_text(angle = 90)) ``` @@ -182,7 +181,8 @@ The model fitting workflow for latent variable models is very similar to that ab set.seed(3562251) gllvm_test_data <- gllvm_sim_data(N = 32, S = 7, D = 2, family = "gaussian", prior = jsdm_prior(sigma = "student_t(3,0,1)", - sigmas_preds = "student_t(3,0,1)")) + sigmas_preds = "student_t(3,0,1)"), + beta_param = "cor") ``` @@ -191,6 +191,7 @@ gllvm_fit <- stan_jsdm(Y = gllvm_test_data$Y, X = gllvm_test_data$X, D = gllvm_test_data$D, family = "gaussian", method = "gllvm", + beta_param = "cor", prior = jsdm_prior(sigma = "student_t(3,0,1)", sigmas_preds = "student_t(3,0,1)"), refresh = 0)