Skip to content

Commit

Permalink
update of case study 5
Browse files Browse the repository at this point in the history
  • Loading branch information
snaketron committed Mar 26, 2024
1 parent d43cb09 commit 309f546
Show file tree
Hide file tree
Showing 5 changed files with 253 additions and 23 deletions.
Binary file added data/d_zibb_5.RData
Binary file not shown.
149 changes: 149 additions & 0 deletions inst/scripts/d_zibb_5.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
require(rstan)

# Stan generative model
sim_stan <- "
functions {
int zibb_rng(int y, int n, real mu, real phi, real kappa) {
if (bernoulli_rng(kappa) == 1) {
return (0);
} else {
return (beta_binomial_rng(n, mu * phi, (1 - mu) * phi));
}
}
}
data {
int<lower=0> N_sample; // number of repertoires
int<lower=0> N_gene; // gene
int<lower=0> N_individual; // number of individuals
int<lower=0> N_condition; // number of conditions
array [N_sample] int N; // repertoire size
array [N_sample] int condition_id; // id of conditions
array [N_sample] int individual_id; // id of replicate
real <lower=0> phi;
real <lower=0, upper=1> kappa;
array [N_condition] vector [N_gene] beta_condition;
vector <lower=0> [N_condition] sigma_condition;
real <lower=0> sigma_alpha;
}
generated quantities {
vector [N_gene] alpha;
array [N_individual] vector [N_gene] alpha_individual;
array [N_sample] vector [N_gene] beta_individual;
// generate usage
array [N_sample] vector <lower=0, upper=1> [N_gene] theta;
array [N_gene, N_sample] int Y;
for(i in 1:N_gene) {
alpha[i] = normal_rng(-3, 0.5);
}
for(i in 1:N_sample) {
for(j in 1:N_gene) {
alpha_individual[individual_id[i]][j] = normal_rng(alpha[j], sigma_alpha);
beta_individual[i][j] = normal_rng(beta_condition[condition_id[i]][j], sigma_condition[condition_id[i]]);
theta[i][j] = inv_logit(alpha_individual[individual_id[i]][j] + beta_individual[i][j]);
Y[j, i] = zibb_rng(Y[j, i], N[i], theta[i][j], phi, kappa);
}
}
}
"

# compile model
model <- rstan::stan_model(model_code = sim_stan)



# generate data based on the following parameters parameters
set.seed(11132)
N_gene <- 8
N_individual <- 10
N_condition <- 2
N_sample <- N_individual*N_condition

condition_id <- rep(x = 1:N_condition, each = N_individual)

N <- rep(x = 1000, times = N_sample)

individual_id <- rep(x = 1:N_individual, times = N_condition)

phi <- 200
kappa <- 0.02

beta_condition <- array(data = 0, dim = c(N_condition, N_gene))
for(c in 1:N_condition) {
for(g in 1:N_gene) {
u <- runif(n = 1, min = 0, max = 1)
if(u <= 0.8) {
beta_condition[c,g] <- rnorm(n = 1, mean = 0, sd = 0.1)
} else {
beta_condition[c,g] <- rnorm(n = 1, mean = 0, sd = 2)
}
}
}

sigma_condition <- rep(x = 0.5, times = N_condition)
sigma_alpha <- 0.25


l <- list(N_sample = N_sample,
N_gene = N_gene,
N_individual = N_individual,
N_condition = N_condition,
N = N,
condition_id = condition_id,
individual_id = individual_id,
phi = phi,
kappa = kappa,
beta_condition = beta_condition,
sigma_condition = sigma_condition,
sigma_alpha = sigma_alpha)

# simulate
sim <- rstan::sampling(object = model,
data = l,
iter = 1,
chains = 1,
algorithm="Fixed_param")

# extract simulation and convert into data frame which can
# be used as input of IgGeneUsage
ysim <- rstan::extract(object = sim, par = "Y")$Y
ysim <- ysim[1,,]

ysim_df <- reshape2::melt(ysim)
colnames(ysim_df) <- c("gene_name", "sample_id", "gene_usage_count")

m <- data.frame(sample_id = 1:l$N_sample,
individual_id = l$individual_id,
condition_id = l$condition_id)

ysim_df <- merge(x = ysim_df, y = m, by = "sample_id", all.x = T)

ysim_df$condition <- paste0("C_", ysim_df$condition_id)
ysim_df$gene_name <- paste0("G_", ysim_df$gene_name)
ysim_df$individual_id <- paste0("I_", ysim_df$individual_id)

ysim_df$condition_id <- NULL
ysim_df$sample_id <- NULL
ysim_df <- ysim_df[, c("individual_id", "condition", "gene_name",
"gene_usage_count")]

d_zibb_5 <- ysim_df

# save
save(d_zibb_5, file = "data/d_zibb_5.RData", compress = T)


# save(sim, file = "mytests/sim_d_zibb_4.RData", compress = T)
ggplot(data = d_zibb_5)+
facet_wrap(facets = ~gene_name, scales = "free_y")+
geom_point(aes(x = condition, y = gene_usage_count, group = individual_id))+
geom_line(aes(x = condition, y = gene_usage_count, group = individual_id))+
theme_bw(base_size = 10)+
theme(legend.position = "none")

2 changes: 1 addition & 1 deletion inst/stan/dgu_paired.stan
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ transformed parameters {
for(i in 1:N_individual) {
alpha_individual[i] = alpha + sigma_alpha * z_alpha_individual[i];
for(j in 1:N_condition) {
beta_individual[i,j] = beta_condition[j] + sigma_individual[j] * z_beta_individual[i,j];
beta_individual[i,j] = beta_condition[j] + sigma_individual[j] * z_beta_individual[i,j];
}
}

Expand Down
39 changes: 39 additions & 0 deletions man/d_zibb_5.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
\name{d_zibb_5}
\alias{d_zibb_5}
\docType{data}
\title{Simulated Ig gene usage data}

\description{
A small example of paired-sample IRRs with these features:

\itemize{
\item 2 conditions
\item 10 individuals with one IRRs per condition
\item 8 Ig genes
}
This dataset was simulated from zero-inflated beta-binomial (ZIBB)
distribution. Simulation code is available in inst/scripts/d_zibb_5.R
}

\usage{
data("d_zibb_5", package = "IgGeneUsage")
}

\format{
A data frame with 4 columns:
\itemize{
\item "individual_id"
\item "condition"
\item "gene_name"
\item "gene_name_count"
}
This format is accepted by IgGeneUsage.
}
\source{
Simulation code is provided in inst/scripts/d_zibb_5.R
}
\examples{
data("d_zibb_5", package = "IgGeneUsage")
head(d_zibb_5)
}
\keyword{d_zibb_5}
86 changes: 64 additions & 22 deletions vignettes/User_Manual.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -548,24 +548,13 @@ g2 <- ggplot(data = stats)+
# Case Study C: analyzing paired-IRRs

```{r}
require(IgGeneUsage)
require(rstan)
require(knitr)
require(ggplot2)
require(ggforce)
require(ggrepel)
require(reshape2)
require(patchwork)
```

```{r}
data("d_zibb_3", package = "IgGeneUsage")
data("d_zibb_5", package = "IgGeneUsage")
```


```{r, fig.width=6, fig.height=6}
ggplot(data = d_zibb_3)+
facet_wrap(facets = ~gene_name)+
```{r, fig.width=6, fig.height=4}
ggplot(data = d_zibb_5)+
facet_wrap(facets = ~gene_name, ncol = 4, scales = "free_y")+
geom_line(aes(x = condition, y = gene_usage_count, group = individual_id))+
geom_point(aes(x = condition, y = gene_usage_count, col = condition))+
theme_bw(base_size = 11)+
Expand All @@ -575,12 +564,11 @@ ggplot(data = d_zibb_3)+
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4))
```


```{r}
M <- DGU(ud = d_zibb_3, # input data
M <- DGU(ud = d_zibb_5, # input data
mcmc_warmup = 500, # how many MCMC warm-ups per chain (default: 500)
mcmc_steps = 1500, # how many MCMC steps per chain (default: 1,500)
mcmc_chains = 3, # how many MCMC chain to run (default: 4)
mcmc_chains = 1, # how many MCMC chain to run (default: 4)
mcmc_cores = 3, # how many PC cores to use? (e.g. parallel chains)
hdi_lvl = 0.95, # highest density interval level (de fault: 0.95)
adapt_delta = 0.8, # MCMC target acceptance rate (default: 0.95)
Expand All @@ -590,16 +578,15 @@ M <- DGU(ud = d_zibb_3, # input data


```{r, fig.height = 3, fig.width = 6}
rstan::stan_rhat(object = M$fit)|
rstan::stan_ess(object = M$fit)
rstan::stan_rhat(object = M$fit)|rstan::stan_ess(object = M$fit)
```


```{r, fig.weight = 7, fig.height = 4}
g1 <- ggplot(data = M$gu)+
geom_errorbar(aes(x = gene_name, y = prob_mean, ymin = prob_L,
ymax = prob_H, col = condition),
width = 0.1, position = position_dodge(width = 0.4))+
width = 0.01, position = position_dodge(width = 0.4))+
geom_point(aes(x = gene_name, y = prob_mean, col = condition), size = 1,
position = position_dodge(width = 0.4))+
theme_bw(base_size = 11)+
Expand All @@ -616,7 +603,7 @@ g2 <- ggplot(data = stats)+
facet_wrap(facets = ~contrast)+
geom_hline(yintercept = 0, linetype = "dashed", col = "gray")+
geom_errorbar(aes(x = pmax, y = es_mean, ymin = es_L, ymax = es_H),
col = "darkgray")+
col = "darkgray", width = 0.01)+
geom_point(aes(x = pmax, y = es_mean, col = contrast))+
geom_text_repel(data = stats[stats$pmax >= 0.9, ],
aes(x = pmax, y = es_mean, label = gene_fac),
Expand All @@ -635,6 +622,61 @@ g2 <- ggplot(data = stats)+



<!-- ```{r} -->
<!-- Mu <- DGU(ud = d_zibb_5, # input data -->
<!-- mcmc_warmup = 500, # how many MCMC warm-ups per chain (default: 500) -->
<!-- mcmc_steps = 1500, # how many MCMC steps per chain (default: 1,500) -->
<!-- mcmc_chains = 3, # how many MCMC chain to run (default: 4) -->
<!-- mcmc_cores = 3, # how many PC cores to use? (e.g. parallel chains) -->
<!-- hdi_lvl = 0.95, # highest density interval level (de fault: 0.95) -->
<!-- adapt_delta = 0.8, # MCMC target acceptance rate (default: 0.95) -->
<!-- max_treedepth = 10, # tree depth evaluated at each step (default: 12) -->
<!-- paired = FALSE) -->
<!-- ``` -->



<!-- ```{r, fig.weight = 7, fig.height = 4} -->
<!-- g1 <- ggplot(data = Mu$gu)+ -->
<!-- geom_errorbar(aes(x = gene_name, y = prob_mean, ymin = prob_L, -->
<!-- ymax = prob_H, col = condition), -->
<!-- width = 0.01, position = position_dodge(width = 0.4))+ -->
<!-- geom_point(aes(x = gene_name, y = prob_mean, col = condition), size = 1, -->
<!-- position = position_dodge(width = 0.4))+ -->
<!-- theme_bw(base_size = 11)+ -->
<!-- theme(legend.position = "top")+ -->
<!-- ylab(label = "GU [probability]")+ -->
<!-- theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4)) -->


<!-- stats <- Mu$dgu -->
<!-- stats <- stats[order(abs(stats$es_mean), decreasing = FALSE), ] -->
<!-- stats$gene_fac <- factor(x = stats$gene_name, levels = unique(stats$gene_name)) -->

<!-- g2 <- ggplot(data = stats)+ -->
<!-- facet_wrap(facets = ~contrast)+ -->
<!-- geom_hline(yintercept = 0, linetype = "dashed", col = "gray")+ -->
<!-- geom_errorbar(aes(x = pmax, y = es_mean, ymin = es_L, ymax = es_H), -->
<!-- col = "darkgray", width = 0.01)+ -->
<!-- geom_point(aes(x = pmax, y = es_mean, col = contrast))+ -->
<!-- geom_text_repel(data = stats[stats$pmax >= 0.9, ], -->
<!-- aes(x = pmax, y = es_mean, label = gene_fac), -->
<!-- min.segment.length = 0, size = 2.75)+ -->
<!-- theme_bw(base_size = 11)+ -->
<!-- theme(legend.position = "top")+ -->
<!-- xlab(label = expression(pi))+ -->
<!-- xlim(c(0, 1))+ -->
<!-- ylab(expression(gamma)) -->
<!-- ``` -->


<!-- ```{r, fig.height = 6, fig.width = 7} -->
<!-- (g1/g2) -->
<!-- ``` -->




# Session

```{r}
Expand Down

0 comments on commit 309f546

Please sign in to comment.