Skip to content

Commit

Permalink
bugfix in theta
Browse files Browse the repository at this point in the history
  • Loading branch information
snaketron committed Feb 14, 2024
1 parent 6247dc0 commit 1bbb804
Show file tree
Hide file tree
Showing 8 changed files with 118 additions and 44 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ Suggests:
ggplot2,
ggforce,
gridExtra,
ggrepel
ggrepel,
MASS
LinkingTo:
BH (>= 1.66.0),
Rcpp (>= 0.12.0),
Expand Down
26 changes: 12 additions & 14 deletions R/dgu.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,18 +39,15 @@ DGU <- function(ud,
pars = m$pars,
refresh = 50)

if(m$model_type=="GU") {
message("Computing summaries ... \n")
gu <- get_gu_summary_gu(glm = glm, hdi_lvl = hdi_lvl, ud = ud)
dgu <- NA
dgu_prob <- NA
}
if(m$model_type=="DGU") {
message("Computing summaries ... \n")
gu <- get_gu_summary_dgu(glm = glm, hdi_lvl = hdi_lvl, ud = ud)
dgu <- get_dgu_summary(glm = glm, hdi_lvl = hdi_lvl, ud = ud)
dgu_prob <- get_dgu_prob_summary(glm = glm, hdi_lvl = hdi_lvl, ud = ud)
}
message("Computing summaries ... \n")
gu <- get_condition_prop(glm = glm, hdi_lvl = hdi_lvl, ud = ud,
model_type = m$model_type)
dgu <- get_dgu(glm = glm, hdi_lvl = hdi_lvl, ud = ud,
model_type = m$model_type)
dgu_prob <- get_dgu_prob(glm = glm, hdi_lvl = hdi_lvl, ud = ud,
model_type = m$model_type)
theta <- get_sample_prop_gu(glm = glm, hdi_lvl = hdi_lvl, ud = ud)


# ppc
message("Computing posterior predictions ... \n")
Expand All @@ -62,7 +59,8 @@ DGU <- function(ud,
return (list(dgu = dgu,
dgu_prob = dgu_prob,
gu = gu,
glm = glm,
theta = theta,
ppc = ppc,
ud = ud))
ud = ud,
fit = glm))
}
12 changes: 4 additions & 8 deletions R/loo.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@

# Description:
# LOO = leave-one-out
# ud: 4 columns
# * sample_id: char column
# * condition: char column
# * gene_name: char column
# * gene_usage_count: num column
LOO <- function(ud,
mcmc_warmup = 500,
mcmc_steps = 1500,
Expand All @@ -28,8 +23,7 @@ LOO <- function(ud,
ud <- udp$proc_ud

# setup control list
control_list <- list(adapt_delta = adapt_delta,
max_treedepth = max_treedepth)
control_list <- list(adapt_delta = adapt_delta, max_treedepth = max_treedepth)

# unique repertoire names
ud$loo_id <- ud$sample_id
Expand Down Expand Up @@ -74,6 +68,9 @@ LOO <- function(ud,
if(is.data.frame(out$dgu_prob)==TRUE) {
out$dgu_prob$loo_id <- rs[r]
}
if(is.data.frame(out$theta)==TRUE) {
out$theta$loo_id <- rs[r]
}

# collect results
loo_out[[rs[r]]] <- out
Expand All @@ -82,4 +79,3 @@ LOO <- function(ud,
return (loo_out)
}


10 changes: 8 additions & 2 deletions R/utils_dgu.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@ get_contrast_map <- function(ud) {
}


get_dgu_summary <- function(glm, hdi_lvl, ud) {
get_dgu <- function(glm, hdi_lvl, ud, model_type) {
if(model_type == "GU") {
return(NA)
}
contrast_map <- get_contrast_map(ud)

dgu_summary <- summary(object = glm,
Expand Down Expand Up @@ -69,7 +72,10 @@ get_dgu_summary <- function(glm, hdi_lvl, ud) {
}


get_dgu_prob_summary <- function(glm, hdi_lvl, ud) {
get_dgu_prob <- function(glm, hdi_lvl, ud, model_type) {
if(model_type == "GU") {
return(NA)
}
contrast_map <- get_contrast_map(ud)

dgu_summary <- summary(object = glm,
Expand Down
50 changes: 48 additions & 2 deletions R/utils_gu.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@

get_gu_summary_dgu <- function(glm, hdi_lvl, ud) {
get_condition_prop <- function(glm, hdi_lvl, ud, model_type) {
if(model_type == "DGU") {
return(get_condition_prop_dgu(glm = glm, hdi_lvl = hdi_lvl, ud = ud))
}
if(model_type == "GU") {
return(get_condition_prop_gu(glm = glm, hdi_lvl = hdi_lvl, ud = ud))
}
}


get_condition_prop_dgu <- function(glm, hdi_lvl, ud) {

gu_summary <- summary(object = glm,
digits = 4,
Expand Down Expand Up @@ -38,7 +48,7 @@ get_gu_summary_dgu <- function(glm, hdi_lvl, ud) {
}


get_gu_summary_gu <- function(glm, hdi_lvl, ud) {
get_condition_prop_gu <- function(glm, hdi_lvl, ud) {

gu_summary <- summary(object = glm,
digits = 4,
Expand Down Expand Up @@ -69,3 +79,39 @@ get_gu_summary_gu <- function(glm, hdi_lvl, ud) {
return(gu_summary)
}


get_sample_prop_gu <- function(glm, hdi_lvl, ud) {

gu <- summary(object = glm, digits = 4, pars = "theta",
prob = c(0.5, (1-hdi_lvl)/2, 1-(1-hdi_lvl)/2))
gu <- data.frame(gu$summary)
colnames(gu) <- c("theta_mean", "theta_mean_se",
"theta_sd", "theta_median",
"theta_L", "theta_H",
"Neff", "Rhat")
gu[, c("Rhat", "Neff")] <- NULL

par <- rownames(gu)
par <- gsub(pattern = "theta|\\[|\\]", replacement = '', x = par)
par <- do.call(rbind, strsplit(x = par, split = ','))

gu$gene_id <- as.numeric(par[,2])
gu$sample_id <- as.numeric(par[,1])

gu$gene_name <- ud$gene_names[gu$gene_id]
gu$sample_name <- ud$sample_names[gu$sample_id]

m <- ud$proc_ud[, c("sample_id", "individual_id", "individual_org_name",
"gene_name", "condition", "gene_usage_prop")]

gu <- merge(x = gu, y = m,
by.x = c("sample_id", "gene_name"),
by.y = c("sample_id", "gene_name"))

# remove unused vars
gu$gene_id <- NULL
gu$sample_id <- NULL
rownames(gu) <- NULL
return(gu)
}

8 changes: 4 additions & 4 deletions R/utils_usage.R
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ get_model <- function(has_replicates, has_conditions, debug = FALSE) {
"sigma_individual", "sigma_replicate",
"beta_sample", "beta_individual", "beta_condition",
"Yhat_rep", "Yhat_rep_prop", "Yhat_condition_prop",
"log_lik")
"log_lik", "theta")
model_name <- "GU_rep"
}
else {
Expand All @@ -119,7 +119,7 @@ get_model <- function(has_replicates, has_conditions, debug = FALSE) {
"sigma_individual",
"beta_individual", "beta_condition",
"Yhat_rep", "Yhat_rep_prop", "Yhat_condition_prop",
"log_lik")
"log_lik", "theta")
model_name <- "GU"
}
}
Expand All @@ -134,7 +134,7 @@ get_model <- function(has_replicates, has_conditions, debug = FALSE) {
"sigma_condition", "sigma_individual", "sigma_replicate",
"beta_sample", "beta_individual", "beta_condition",
"Yhat_rep", "Yhat_rep_prop", "Yhat_condition_prop",
"log_lik", "dgu", "dgu_prob")
"log_lik", "dgu", "dgu_prob", "theta")
model_name <- "DGU_rep"
}
else {
Expand All @@ -147,7 +147,7 @@ get_model <- function(has_replicates, has_conditions, debug = FALSE) {
"sigma_condition", "sigma_individual",
"beta_individual", "beta_condition",
"Yhat_rep", "Yhat_rep_prop", "Yhat_condition_prop",
"log_lik", "dgu", "dgu_prob")
"log_lik", "dgu", "dgu_prob", "theta")
model_name <- "DGU"
}
}
Expand Down
4 changes: 2 additions & 2 deletions man/DGU.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ of HDI), H (high boundary of HDI); 2) contrast = direction of the effect; 3)
pmax = probability of DGU. This summary is only available if the input data
contains at least two conditions}
\item{gu}{gene usage (GU) summary of each gene in each condition}
\item{glm}{stanfit object}
\item{fit}{stanfit object}
\item{ppc}{two types of posterior predictive checks: 1) repertoire-
specific, 2) condition-specific}
\item{ud}{processed gene usage data used for the model}
Expand Down Expand Up @@ -86,7 +86,7 @@ M <- DGU(ud = d_zibb_2,
max_treedepth = 10)

# look at DGU results
head(M$glm)
head(M$fit)

# look at posterior predictive checks (PPC)
head(M$ppc)
Expand Down
49 changes: 38 additions & 11 deletions vignettes/User_Manual.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ require(ggforce)
require(gridExtra)
require(ggrepel)
require(reshape2)
require(MASS)
```


Expand Down Expand Up @@ -221,15 +222,15 @@ summary(M)
* none found

```{r}
rstan::check_hmc_diagnostics(M$glm)
rstan::check_hmc_diagnostics(M$fit)
```

* rhat < 1.03 and n_eff > 0


```{r, fig.height = 3, fig.width = 6}
gridExtra::grid.arrange(rstan::stan_rhat(object = M$glm),
rstan::stan_ess(object = M$glm),
gridExtra::grid.arrange(rstan::stan_rhat(object = M$fit),
rstan::stan_ess(object = M$fit),
nrow = 1)
```

Expand Down Expand Up @@ -271,7 +272,8 @@ ggplot(data = M$ppc$ppc_condition)+
theme_bw(base_size = 11)+
theme(legend.position = "top")+
xlab(label = "Observed usage [%]")+
ylab(label = "PPC usage [%]")
ylab(label = "PPC usage [%]")+
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4))
```


Expand All @@ -288,7 +290,7 @@ For `es` we also have the mean, median standard error (se), standard
deviation (sd), L (low bound of 95% HDI), H (high bound of 95% HDI)

```{r}
kable(x = head(M$dgu), row.names = FALSE, digits = 3)
kable(x = head(M$dgu), row.names = FALSE, digits = 2)
```

### DGU: differential gene usage
Expand Down Expand Up @@ -352,6 +354,7 @@ ggplot()+
position = position_dodge(width = 0.35), width = 0.15)+
theme_bw(base_size = 11)+
theme(legend.position = "top")+
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4))+
ylab(label = "PPC usage [%]")+
xlab(label = '')
```
Expand All @@ -371,7 +374,8 @@ ggplot()+
theme_bw(base_size = 11)+
theme(legend.position = "top")+
ylab(label = "PPC usage [count]")+
xlab(label = '')
xlab(label = '')+
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4))
```

## GU: gene usage summary
Expand All @@ -389,7 +393,8 @@ ggplot(data = M$gu)+
position = position_dodge(width = 0.4))+
theme_bw(base_size = 11)+
theme(legend.position = "top")+
ylab(label = "GU [probability]")
ylab(label = "GU [probability]")+
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4))
```

# Leave-one-out (LOO) analysis
Expand Down Expand Up @@ -439,7 +444,8 @@ ggplot(data = L_dgu)+
position = position_dodge(width = 0.5))+
theme_bw(base_size = 11)+
theme(legend.position = "top")+
ylab(expression(gamma))
ylab(expression(gamma))+
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4))
```

## LOO-DGU: variability of $\pi$
Expand All @@ -451,13 +457,14 @@ ggplot(data = L_dgu)+
position = position_dodge(width = 0.5))+
theme_bw(base_size = 11)+
theme(legend.position = "top")+
ylab(expression(pi))
ylab(expression(pi))+
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4))
```


## LOO-GU: variability of the gene usage

```{r, fig.width=6.5, fig.height=4}
```{r, fig.width=6, fig.height=4}
ggplot(data = L_gu)+
geom_hline(yintercept = 0, linetype = "dashed", col = "gray")+
geom_errorbar(aes(x = gene_name, y = prob_mean, ymin = prob_L,
Expand All @@ -469,9 +476,29 @@ ggplot(data = L_gu)+
position = position_dodge(width = 0.5))+
theme_bw(base_size = 11)+
theme(legend.position = "top")+
ylab("GU [probability]")
ylab("GU [probability]")+
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4))
```

# Multidimensional scaling (MDS) analaysis based

```{r, fig.width=5, fig.height=4}
# x <- M$theta
x <- acast(individual_id~gene_name,
data = M$theta, value.var = "theta_mean")
nmds <- isoMDS(dist(x), k=2)
x <- data.frame(x = nmds$points[,1], y = nmds$points[,2], id = row.names(x))
ggplot(data = x)+
geom_point(aes(x = x, y = y), shape = 21, size = 2)+
geom_text_repel(aes(x = x, y = y, label = id),
size = 3.5, min.segment.length = 0)+
theme_bw()
```


# Session

```{r}
Expand Down

0 comments on commit 1bbb804

Please sign in to comment.