diff --git a/07-beta.Rmd b/07-beta.Rmd index 17ecd21..b2e1ed8 100644 --- a/07-beta.Rmd +++ b/07-beta.Rmd @@ -205,27 +205,50 @@ euclidean_patient_status_plot ### PCoA plot with continuous variable -We can do the same as above using any continuous variable. E.g. let us -see how the plotted samples differ in their alpha diversity (as an -example of a continuous variable): +We can also overlay a continuous variable on a PCoA plot. E.g. let us +use the alcohol study dataset from [curatedMetagenomicData](https://bioconductor.org/packages/release/data/experiment/vignettes/curatedMetagenomicData/inst/doc/curatedMetagenomicData.html). +Perform PCoA and use the BMI as our continuous variable: + +```{r,echo=FALSE, message=FALSE, warning=FALSE} +# Installing the package +if (!require(curatedMetagenomicData)){ + BiocManager::install("curatedMetagenomicData") +} +``` -```{r pcoa_coloring} -# Adds coloring information to the data frame, creates new column -euclidean_shannon_pcoa_df <- cbind(euclidean_pcoa_df, - shannon = colData(tse)$Shannon_diversity) +```{r, message=FALSE, warning=FALSE} +# Retrieving data as a TreeSummarizedExperiment object, and agglomerating to a genus level. +library(curatedMetagenomicData) +library(dplyr) +library(DT) +# Querying the data +tse_data <- sampleMetadata %>% + filter(age >= 18) %>% # taking only data of age 18 or above + filter(!is.na(alcohol)) %>% # excluding missing values + returnSamples("relative_abundance") -# Creates a plot -euclidean_shannon_plot <- ggplot(data = euclidean_shannon_pcoa_df, - aes(x=pcoa1, y=pcoa2, - color = shannon)) + - geom_point() + - labs(x = "PC1", - y = "PC2", - title = "PCoA with Aitchison distances") + - theme(title = element_text(size = 12)) # makes titles smaller +# Agglomeration +tse_genus <- agglomerateByRank(tse_data, rank="Genus") +``` -euclidean_shannon_plot +Performing PCoA with Bray-Curtis dissimilarity. + +```{r pcoa_coloring} +library(scater) +tse_genus <- runMDS(tse_genus, FUN = vegan::vegdist, + name = "PCoA_BC", exprs_values = "relative_abundance") + +# Retrieving the explained variance +e <- attr(reducedDim(tse_genus, "PCoA_BC"), "eig"); +var_explained <- e/sum(e[e>0])*100 + +# Visualization +plot <-plotReducedDim(tse_genus,"PCoA_BC", colour_by = "BMI")+ + labs(x=paste("PC1 (",round(var_explained[1],1),"%)"), + y=paste("PC2 (",round(var_explained[2],1),"%)"), + color="") +plot ``` ## Estimating associations with an external variable