From 627ae85d756b087688349863b2aedfe992f978b3 Mon Sep 17 00:00:00 2001 From: Chouaib Benchraka Date: Mon, 19 Jul 2021 13:32:50 +0300 Subject: [PATCH 1/3] Sec 8-2-2 new content; issue #44 --- 07-beta.Rmd | 63 ++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 17 deletions(-) diff --git a/07-beta.Rmd b/07-beta.Rmd index 17ecd21..124c4e0 100644 --- a/07-beta.Rmd +++ b/07-beta.Rmd @@ -205,27 +205,56 @@ euclidean_patient_status_plot ### PCoA plot with continuous variable -We can do the same as above using any continuous variable. E.g. let us -see how the plotted samples differ in their alpha diversity (as an -example of a continuous variable): +We can also overlay a continuous variable on a PCoA plot. E.g. let us +use the alcohol study dataset from [curatedMetagenomicData](https://bioconductor.org/packages/release/data/experiment/vignettes/curatedMetagenomicData/inst/doc/curatedMetagenomicData.html). +Perform PCoA and use the BMI as our continuous variable: + +```{r,echo=FALSE, message=FALSE, warning=FALSE} +# Installing the package +if (!require(curatedMetagenomicData)){ + BiocManager::install("curatedMetagenomicData") +} +``` -```{r pcoa_coloring} -# Adds coloring information to the data frame, creates new column -euclidean_shannon_pcoa_df <- cbind(euclidean_pcoa_df, - shannon = colData(tse)$Shannon_diversity) +```{r, message=FALSE, warning=FALSE} +# Retrieving data as a TreeSummarizedExperiment object, and agglomerating to a genus level. +library(curatedMetagenomicData) +library(dplyr) +library(DT) +# Querying the data +TreeSE <- sampleMetadata %>% + filter(age >= 18) %>% + filter(!is.na(alcohol)) %>% + filter(body_site == "stool") %>% + select(where(~ !all(is.na(.x)))) %>% + returnSamples("relative_abundance") + +# Agglomeration +TreeSE_genus <- agglomerateByRank(TreeSE, rank="Genus") +``` -# Creates a plot -euclidean_shannon_plot <- ggplot(data = euclidean_shannon_pcoa_df, - aes(x=pcoa1, y=pcoa2, - color = shannon)) + - geom_point() + - labs(x = "PC1", - y = "PC2", - title = "PCoA with Aitchison distances") + - theme(title = element_text(size = 12)) # makes titles smaller +Performing PCoA with Bray-Curtis dissimilarity. -euclidean_shannon_plot +```{r pcoa_coloring} +library(scater) +rel_abund_assay <- assays(TreeSE_genus)$relative_abundance +# Computing Bray-Curtis +bray_curtis <- vegan::vegdist(t(rel_abund_assay), method = "bray") + +# Performing PCoA +bray_curtis_pcoa <- ecodist::pco(bray_curtis) + +# Storing results in the TreeSummarizedExperiment object +reducedDim(TreeSE_genus, "PCoA" )<- cbind(bray_curtis_pcoa$vectors[,1], + bray_curtis_pcoa$vectors[,2]) + +# Visualization +plot <-plotReducedDim(TreeSE_genus,"PCoA", colour_by = "BMI")+ + labs(x=paste("PCoA 1 (",round(bray_curtis_pcoa$values[1],1),"%)"), + y=paste("PCoA 2 (",round(bray_curtis_pcoa$values[2],1),"%)"), + color="") +plot ``` ## Estimating associations with an external variable From 20c3175c7ca3b0aee7efc22c37b4604ddfebcab6 Mon Sep 17 00:00:00 2001 From: Chouaib Benchraka Date: Mon, 19 Jul 2021 19:35:12 +0300 Subject: [PATCH 2/3] Redoing data import and PCoA. --- 07-beta.Rmd | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/07-beta.Rmd b/07-beta.Rmd index 124c4e0..d99f878 100644 --- a/07-beta.Rmd +++ b/07-beta.Rmd @@ -223,36 +223,30 @@ library(curatedMetagenomicData) library(dplyr) library(DT) # Querying the data -TreeSE <- sampleMetadata %>% - filter(age >= 18) %>% - filter(!is.na(alcohol)) %>% - filter(body_site == "stool") %>% - select(where(~ !all(is.na(.x)))) %>% +tse_data <- sampleMetadata %>% + filter(age >= 18) %>% # taking only data of age 18 or above + filter(!is.na(alcohol)) %>% # excluding missing values returnSamples("relative_abundance") # Agglomeration -TreeSE_genus <- agglomerateByRank(TreeSE, rank="Genus") +tse_Genus <- agglomerateByRank(tse_data, rank="Genus") ``` Performing PCoA with Bray-Curtis dissimilarity. ```{r pcoa_coloring} library(scater) -rel_abund_assay <- assays(TreeSE_genus)$relative_abundance -# Computing Bray-Curtis -bray_curtis <- vegan::vegdist(t(rel_abund_assay), method = "bray") +tse_Genus <- runMDS(tse_Genus, FUN = vegan::vegdist, + name = "PCoA_BC", exprs_values = "relative_abundance") -# Performing PCoA -bray_curtis_pcoa <- ecodist::pco(bray_curtis) - -# Storing results in the TreeSummarizedExperiment object -reducedDim(TreeSE_genus, "PCoA" )<- cbind(bray_curtis_pcoa$vectors[,1], - bray_curtis_pcoa$vectors[,2]) +# Retrieving the explained variance +e <- attr(reducedDim(tse_Genus, "PCoA_BC"), "eig"); +var_explained <- e/sum(e[e>0])*100 # Visualization -plot <-plotReducedDim(TreeSE_genus,"PCoA", colour_by = "BMI")+ - labs(x=paste("PCoA 1 (",round(bray_curtis_pcoa$values[1],1),"%)"), - y=paste("PCoA 2 (",round(bray_curtis_pcoa$values[2],1),"%)"), +plot <-plotReducedDim(tse_Genus,"PCoA_BC", colour_by = "BMI")+ + labs(x=paste("PC 1 (",round(var_explained[1],1),"%)"), + y=paste("PC 2 (",round(var_explained[2],1),"%)"), color="") plot ``` From 892e4b1bc2620eb470cde00d8d50a201f443385d Mon Sep 17 00:00:00 2001 From: Chouaib Benchraka Date: Mon, 19 Jul 2021 19:49:30 +0300 Subject: [PATCH 3/3] Minor Fixes. --- 07-beta.Rmd | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/07-beta.Rmd b/07-beta.Rmd index d99f878..b2e1ed8 100644 --- a/07-beta.Rmd +++ b/07-beta.Rmd @@ -229,24 +229,24 @@ tse_data <- sampleMetadata %>% returnSamples("relative_abundance") # Agglomeration -tse_Genus <- agglomerateByRank(tse_data, rank="Genus") +tse_genus <- agglomerateByRank(tse_data, rank="Genus") ``` Performing PCoA with Bray-Curtis dissimilarity. ```{r pcoa_coloring} library(scater) -tse_Genus <- runMDS(tse_Genus, FUN = vegan::vegdist, +tse_genus <- runMDS(tse_genus, FUN = vegan::vegdist, name = "PCoA_BC", exprs_values = "relative_abundance") # Retrieving the explained variance -e <- attr(reducedDim(tse_Genus, "PCoA_BC"), "eig"); +e <- attr(reducedDim(tse_genus, "PCoA_BC"), "eig"); var_explained <- e/sum(e[e>0])*100 # Visualization -plot <-plotReducedDim(tse_Genus,"PCoA_BC", colour_by = "BMI")+ - labs(x=paste("PC 1 (",round(var_explained[1],1),"%)"), - y=paste("PC 2 (",round(var_explained[2],1),"%)"), +plot <-plotReducedDim(tse_genus,"PCoA_BC", colour_by = "BMI")+ + labs(x=paste("PC1 (",round(var_explained[1],1),"%)"), + y=paste("PC2 (",round(var_explained[2],1),"%)"), color="") plot ```