Skip to content

Commit

Permalink
bugfixes
Browse files Browse the repository at this point in the history
  • Loading branch information
snaketron committed Apr 5, 2024
1 parent 8ae858c commit 918a983
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 49 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: IgGeneUsage
Type: Package
Title: Differential gene usage in immune repertoires
Version: 1.17.23
Version: 1.17.24
Authors@R:
person(given = "Simo",
family = "Kitanovski",
Expand Down
4 changes: 2 additions & 2 deletions R/utils_ppc.R
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@ get_ppc_condition <- function(glm,
}

# condition map
condition_map <- data.frame(condition_name = ud$condition_names,
condition_id = ud$condition_id)
condition_map <- data.frame(condition_name = ud$condition_name_of_sample,
condition_id = ud$condition_id_of_sample)
condition_map <- condition_map[duplicated(condition_map)==FALSE,]
rownames(condition_map) <- condition_map$condition_id
yhat$gene_name <- ud$gene_names[yhat$gene_id]
Expand Down
17 changes: 9 additions & 8 deletions R/utils_usage.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,27 +34,28 @@ get_usage <- function(u) {
check_paired <- function(u,
has_balanced_replicates,
has_replicates,
has_condition) {
if(has_condition==FALSE) {
has_conditions) {
if(has_conditions==FALSE) {
return(FALSE)
}
if(has_balanced_replicates==FALSE) {
return(FALSE)
}

if(has_replicates) {
q <- u[duplicated(u[,c("individual_id","condition","replicate")])==FALSE,]
q <- u[duplicated(u[,c("individual_id","condition",
"replicate_id")]) == FALSE,]
q$f <- 1
q <- aggregate(f~individual_id+condition+replicate, data = q,
FUN = sum, simplify = FALSE, drop = FALSE)
q <- aggregate(f~individual_id+condition+replicate_id,
data = q, FUN = sum, drop = FALSE)
q$f[is.null(q$f)|is.na(q$f)] <- 0
return(ifelse(test = any(q$f!=1), yes = FALSE, no = TRUE))
}
else {
q <- u[duplicated(u[,c("individual_id", "condition")])==FALSE,]
q$f <- 1
q <- aggregate(f~individual_id+condition, data = q, FUN = sum,
simplify = FALSE, drop = FALSE)
q <- aggregate(f~individual_id+condition,
data = q, FUN = sum, drop = FALSE)
q$f[is.null(q$f)|is.na(q$f)] <- 0
return(ifelse(test = any(q$f!=1), yes = FALSE, no = TRUE))
}
Expand Down Expand Up @@ -158,7 +159,7 @@ get_usage <- function(u) {
u = u,
has_balanced_replicates = has_balanced_replicates,
has_replicates = has_replicates,
has_condition = has_condition)
has_conditions = has_conditions)

return(list(Y = Y,
N = as.numeric(N),
Expand Down
Binary file modified data/d_zibb_3.RData
Binary file not shown.
19 changes: 13 additions & 6 deletions inst/scripts/d_zibb_3.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,24 @@ data {
array [N_individual] int condition_id; // id of conditions
real <lower=0> phi;
real <lower=0, upper=1> kappa;
array [N_condition] vector [N_gene] beta_condition;
vector <lower=0> [N_condition] sigma_individual;
vector <lower=0> [N_condition] sigma_condition;
}
generated quantities {
vector [N_gene] alpha;
array [N_individual] vector <lower=0, upper=1> [N_gene] theta;
array [N_individual] vector [N_gene] beta_individual;
array [N_condition] vector [N_gene] beta_condition;
// generate usage
array [N_gene, N_individual] int Y;
for(i in 1:N_condition) {
for(j in 1:N_gene) {
beta_condition[i][j] = normal_rng(0, sigma_condition[i]);
}
}
for(i in 1:N_gene) {
alpha[i] = normal_rng(-3.0, 1.0);
Expand All @@ -54,10 +61,10 @@ N_condition <- 3
N_individual <- 5
N_gene <- 8
N <- 10^3
sigma_individual <- runif(n = N_condition, min = 0.1, max = 0.6)
beta_condition <- t(replicate(n = N_condition, expr = rnorm(n = N_gene, mean = 0, sd = 1)))
sigma_individual <- runif(n = N_condition, min = 0.1, max = 0.2)
sigma_condition <- runif(n = N_condition, min = 0.2, max = 0.6)
phi <- 200
kappa <- 0.03
kappa <- 0.015

condition_id <- rep(x = 1:N_condition, each = N_individual)

Expand All @@ -67,7 +74,7 @@ l <- list(N_individual = N_individual*N_condition,
N = N,
condition_id = condition_id,
sigma_individual = sigma_individual,
beta_condition = beta_condition,
sigma_condition = sigma_condition,
phi = phi,
kappa = kappa)

Expand All @@ -77,7 +84,7 @@ sim <- rstan::sampling(object = m,
iter = 1,
chains = 1,
algorithm = "Fixed_param",
seed = 123456)
seed = 12346)


# extract simulation and convert into data frame which can
Expand Down
57 changes: 25 additions & 32 deletions vignettes/User_Manual.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -88,19 +88,19 @@ on the posterior distribution of $\gamma$, and are thus related.
`r Biocpkg("IgGeneUsage")` has a couple of built-in Ig gene usage datasets.
Some were obtained from studies and others were simulated.

Lets look into the simulated dataset `d_zibb_2`. This dataset was generated
Lets look into the simulated dataset `d_zibb_3`. This dataset was generated
by a zero-inflated beta-binomial (ZIBB) model, and `r Biocpkg("IgGeneUsage")`
was designed to fit ZIBB-distributed data.

```{r}
data("d_zibb_2", package = "IgGeneUsage")
knitr::kable(head(d_zibb_2))
data("d_zibb_3", package = "IgGeneUsage")
knitr::kable(head(d_zibb_3))
```

We can also visualize `d_zibb_2` with `r CRANpkg("ggplot")`:
We can also visualize `d_zibb_3` with `r CRANpkg("ggplot")`:

```{r, fig.width=6, fig.height=3.25}
ggplot(data = d_zibb_2)+
ggplot(data = d_zibb_3)+
geom_point(aes(x = gene_name, y = gene_usage_count, col = condition),
position = position_dodge(width = .7), shape = 21)+
theme_bw(base_size = 11)+
Expand All @@ -113,10 +113,10 @@ ggplot(data = d_zibb_2)+

## DGU analysis
As main input `r Biocpkg("IgGeneUsage")` uses a data.frame formatted as e.g.
`d_zibb_2`. Other input parameters allow you to configure specific settings
`d_zibb_3`. Other input parameters allow you to configure specific settings
of the `r CRANpkg("rstan")` sampler.

In this example, we analyze `d_zibb_2` with 3 MCMC chains, 1500 iterations
In this example, we analyze `d_zibb_3` with 3 MCMC chains, 1500 iterations
each including 500 warm-ups using a single CPU core (Hint: for parallel
chain execution set parameter `mcmc_cores` = 3). We report for each model
parameter its mean and 95% highest density interval (HDIs).
Expand All @@ -129,8 +129,8 @@ issue with a reproducible script at the Bioconductor support site or on
Github[^3].

```{r}
M <- DGU(ud = d_zibb_2, # input data
mcmc_warmup = 500, # how many MCMC warm-ups per chain (default: 500)
M <- DGU(ud = d_zibb_3, # input data
mcmc_warmup = 300, # how many MCMC warm-ups per chain (default: 500)
mcmc_steps = 1500, # how many MCMC steps per chain (default: 1,500)
mcmc_chains = 3, # how many MCMC chain to run (default: 4)
mcmc_cores = 1, # how many PC cores to use? (e.g. parallel chains)
Expand Down Expand Up @@ -182,7 +182,7 @@ summary(M)
rstan::check_hmc_diagnostics(M$fit)
```

* rhat < 1.03 and n_eff > 0
* rhat < 1.05 and n_eff > 0


```{r, fig.height = 3, fig.width = 6}
Expand All @@ -197,7 +197,7 @@ Error bars show 95% HDI of mean posterior prediction. The predictions can be
compared with the observed data (x-axis). For points near the diagonal
$\rightarrow$ accurate prediction.

```{r, fig.height = 3.25, fig.width = 7}
```{r, fig.height = 4, fig.width = 7}
ggplot(data = M$ppc$ppc_rep)+
facet_wrap(facets = ~individual_id, ncol = 5)+
geom_abline(intercept = 0, slope = 1, linetype = "dashed", col = "darkgray")+
Expand Down Expand Up @@ -366,7 +366,7 @@ by evaluating their variability for a specific gene.
This analysis can be computationally demanding.

```{r}
L <- LOO(ud = d_zibb_2, # input data
L <- LOO(ud = d_zibb_3, # input data
mcmc_warmup = 500, # how many MCMC warm-ups per chain (default: 500)
mcmc_steps = 1000, # how many MCMC steps per chain (default: 1,500)
mcmc_chains = 1, # how many MCMC chain to run (default: 4)
Expand All @@ -376,6 +376,7 @@ L <- LOO(ud = d_zibb_2, # input data
max_treedepth = 10) # tree depth evaluated at each step (default: 12)
```


Next, we collected the results (GU and DGU) from each LOO iteration:

```{r}
Expand All @@ -388,32 +389,32 @@ L_dgu <- do.call(rbind, lapply(X = L, FUN = function(x){return(x$dgu)}))

## LOO-DGU: variability of effect size $\gamma$

```{r, fig.width=6.5, fig.height=4}
```{r, fig.width=6, fig.height=5}
ggplot(data = L_dgu)+
facet_wrap(facets = ~contrast, ncol = 1)+
geom_hline(yintercept = 0, linetype = "dashed", col = "gray")+
geom_errorbar(aes(x = gene_name, y = es_mean, ymin = es_L,
ymax = es_H, col = contrast, group = loo_id),
width = 0.1, position = position_dodge(width = 0.5))+
width = 0.1, position = position_dodge(width = 0.75))+
geom_point(aes(x = gene_name, y = es_mean, col = contrast,
group = loo_id), size = 1,
position = position_dodge(width = 0.5))+
position = position_dodge(width = 0.75))+
theme_bw(base_size = 11)+
theme(legend.position = "top")+
ylab(expression(gamma))+
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4))
theme(legend.position = "none")+
ylab(expression(gamma))
```

## LOO-DGU: variability of $\pi$

```{r, fig.width=6.5, fig.height=4}
```{r, fig.width=6, fig.height=5}
ggplot(data = L_dgu)+
facet_wrap(facets = ~contrast, ncol = 1)+
geom_point(aes(x = gene_name, y = pmax, col = contrast,
group = loo_id), size = 1,
position = position_dodge(width = 0.5))+
theme_bw(base_size = 11)+
theme(legend.position = "top")+
ylab(expression(pi))+
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4))
theme(legend.position = "none")+
ylab(expression(pi))
```


Expand All @@ -425,24 +426,16 @@ ggplot(data = L_gu)+
geom_errorbar(aes(x = gene_name, y = prob_mean, ymin = prob_L,
ymax = prob_H, col = condition,
group = interaction(loo_id, condition)),
width = 0.1, position = position_dodge(width = 0.5))+
width = 0.1, position = position_dodge(width = 1))+
geom_point(aes(x = gene_name, y = prob_mean, col = condition,
group = interaction(loo_id, condition)), size = 1,
position = position_dodge(width = 0.5))+
position = position_dodge(width = 1))+
theme_bw(base_size = 11)+
theme(legend.position = "top")+
ylab("GU [probability]")+
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4))
```

# Hierarchical clustering analaysis

```{r, fig.width=6, fig.height=4}
# x <- M$theta
x <- acast(individual_id~gene_name, data = M$theta, value.var = "theta_mean")
plot(hclust(dist(x, method = "euclidean"), method = "average"))
```


# Case Study B: analyzing IRRs containing biological replicates
Expand Down

0 comments on commit 918a983

Please sign in to comment.