Raw data link for steps 3 and 4:
Result link for step 5 and later can be retrieved from the GitHub repo by using the R code below: Please set a project folder here, the files will be available in this folder for later steps.
project_dir <- "~/data/"
dir.create(project_dir)
if(!require(piggyback)) install.packages("piggyback")
library(piggyback)
setwd(project_dir)
grouped_bio_rep_all_pixel_mean_label_time_merge.csv
test.zip
Time_merged_PCA_all_bio.rda
pb_releases<-function (repo = guess_repo(), .token = gh::gh_token(), verbose = getOption("piggyback.verbose",
default = TRUE))
{
mc <- match.call()
encl <- parent.env(environment("piggyback"))
called_args <- as.list(mc)[-1]
default_args <- encl$`_default_args`
default_args <- default_args[setdiff(names(default_args),
names(called_args))]
called_args[encl$`_omit_args`] <- NULL
called_args<<-called_args
args <- c(lapply(called_args, eval, parent.frame()), lapply(default_args,
eval, envir = environment()))
key <- encl$`_hash`(c(encl$`_f_hash`, args, lapply(encl$`_additional`,
function(x) eval(x[[2L]], environment(x)))))
res <- encl$`_cache`$get(key)
if (inherits(res, "key_missing")) {
mc[[1L]] <- encl$`_f`
res <- withVisible(eval(mc, parent.frame()))
encl$`_cache`$set(key, res)
}
if (res$visible) {
res$value
}
else {
invisible(res$value)
}
}
repo= "MASHUOA/HiTMaP"
releases <- pb_releases(repo = repo, .token = .token, verbose = FALSE)
(df <- piggyback:::pb_info(repo, tag="1.0.1"))
pb_download("all_merged_data_df_bind2.zip",
repo = "MASHUOA/Spatially_temporally_resolved_analysis",
dest = ".", tag = "review")
pb_download(file="all_merged_data_df_bind2.zip", repo = "MASHUOA/Spatially_temporally_resolved_analysis", dest = ".",show_progress = T)
pb_download("Data.tar.gz", repo = "MASHUOA/Spatially_temporally_resolved_analysis", dest = ".")
untar('Data.tar.gz',exdir =".", tar="tar")
#unlink('Data.tar.gz')
list.dirs()
Raw data and Result files for all steps can be retrieved via dropbox for reviewing process: Dropbox transfer
post_mod <-
function (fig, point_size) {
for (n in 1:length(fig[["x"]][["data"]])) {
fig[["x"]][["data"]][[n]][["marker"]]$size = point_size
}
return(fig)
}
# m/z match with tolerance
`%~%` <- function(x, y) {
x = as.numeric(x)
y = as.numeric(y)
sapply(x, function(.x, tol = 0.000012) {
any(sapply(y, function(.y)
isTRUE(all.equal(
.x, .y, tolerance = tol
))))
})
}
knitr::opts_chunk$set(
echo = TRUE,
fig.height = 12,
fig.width = 12,
message = FALSE,
warning = FALSE
)
#Adopted from metaboanalyst to make the local rsever available
init.global.vars <- function(anal.type) {
# other global variables
msg.vec <<- ""
err.vec <<- ""
# for network analysis
module.count <<- 0
# counter for naming different json file (pathway viewer)
smpdbpw.count <<- 0
mdata.all <<- list()
mdata.siggenes <<- vector("list")
meta.selected <<- TRUE
anal.type <<- anal.type
if (.on.public.web) {
# disable parallel prcessing for public server
library(BiocParallel)
register(SerialParam())
} else {
if ("stat" %in% anal.type |
"msetqea" %in% anal.type |
"pathqea" %in% anal.type |
"roc" %in% anal.type)
# start Rserve engine for Rpackage
load_Rserve()
}
# plotting required by all
Cairo::CairoFonts(
regular = "Arial:style=Regular",
bold = "Arial:style=Bold",
italic = "Arial:style=Italic",
bolditalic = "Arial:style=Bold Italic",
symbol = "Symbol"
)
# sqlite db path for gene annotation
if (file.exists("/home/glassfish/sqlite/")) {
#.on.public.web
url.pre <<- "/home/glassfish/sqlite/"
} else if (file.exists("/home/jasmine/Downloads/sqlite/")) {
#jasmine's local
url.pre <<- "/home/jasmine/Downloads/sqlite/"
#api.base <<- "localhost:8686"
} else if (file.exists("/Users/soufanom/Documents/Projects/gene-id-mapping/")) {
# soufan laptop
url.pre <<-
"/Users/soufanom/Documents/Projects/gene-id-mapping/"
} else if (file.exists("~/Documents/Projects/gene-id-mapping/")) {
url.pre <<- "~/Documents/Projects/gene-id-mapping/"
} else if (file.exists("/Users/xia/Dropbox/sqlite/")) {
# xia local
url.pre <<- "/Users/xia/Dropbox/sqlite/"
} else if (file.exists("/media/zzggyy/disk/sqlite/")) {
url.pre <<- "/media/zzggyy/disk/sqlite/"
#zgy local)
} else if (file.exists("/home/zgy/sqlite/")) {
url.pre <<- "/home/zgy/sqlite/"
#zgy local)
} else if (file.exists("/home/le/sqlite/")) {
# le local
url.pre <<- "/home/le/sqlite/"
} else if (file.exists("/home/qiang/Music/")) {
# qiang local
url.pre <<- "/home/qiang/sqlite/"
} else{
url.pre <<-
paste0(dirname(
system.file(
"database",
"sqlite/GeneID_25Species_JE/ath_genes.sqlite",
package = "MetaboAnalystR"
)
), "/")
}
api.base <<- "http://api.xialab.ca"
#api.base <<- "132.216.38.6:8987"
}
load_Rserve <- function() {
installed <- c("Rserve") %in% rownames(installed.packages())
if (installed) {
# first need to start up an Rserve instance
suppressMessages(library(Rserve))
Rserve::Rserve(args = "--no-save")
} else{
print("Please install Rserve R package!")
}
}
.on.public.web <<- F
# load pixel level data for all runs
load_pixel_label <-
function(combinedimdata,
datafile_base,
workdir,
coordata_file = "coordata.csv",
pixel_idx_col = base::row.names,
label_col = "pattern",
...) {
library(Cardinal)
library(stringr)
library(HiTMaP)
library(stringr)
library(dplyr)
datafile <- str_remove(datafile_base, "\\.imzML$")
if (length(workdir) != length(datafile)) {
workdir = rep(workdir[1], length(datafile))
}
datafile_imzML = datafile
coordata_file_tb <- NULL
for (z in 1:length(datafile)) {
name <- basename(datafile[z])
name <- gsub(".imzML$", "", name)
name <- gsub("/$", "", name)
setwd(workdir[z])
folder <- base::dirname(datafile[z])
#imdata <- Cardinal::readImzML(datafile[z],preprocessing = F,attach.only = T,resolution = 200,rotate = rotate[z],as="MSImageSet",BPPARAM = BPPARAM)
if (!str_detect(datafile[z], ".imzML$")) {
datafile_imzML[z] <- paste0(datafile[z], ".imzML")
}
coordata_file_tb[[datafile[z]]] <-
read.csv(paste0(workdir[z], "/", datafile[z], " ID/", coordata_file))
coordata_file_tb[[datafile[z]]]$run <- datafile[z]
coordata_file_tb[[datafile[z]]]$pixel_idx <-
pixel_idx_col(coordata_file_tb[[datafile[z]]])
}
coordata_file_bind <- do.call(rbind, coordata_file_tb)
coordata_file_bind$run <-
as.factor(tolower(coordata_file_bind$run))
coordata_file_bind$pixel_idx <-
as.numeric(coordata_file_bind$pixel_idx)
Pixel_run <- run(combinedimdata)
Pixel_run <- data.frame(run = Pixel_run)
Pixel_run <-
Pixel_run %>% group_by(run) %>% summarise(pixel_idx = 1:length(run))
label_run <-
merge(
Pixel_run,
coordata_file_bind,
by = c("run", "pixel_idx"),
all.x = T,
sort = F
)
Pixel_label <- label_run[[label_col]]
return(Pixel_label)
}
match_mz = function(x,
ref,
tol = 1e-10,
unit = c("Da", "ppm")) {
sref = sort(ref)
i = findInterval(x, sref, all.inside = TRUE)
dif1 = abs(x - sref[i])
dif2 = abs(x - sref[i + 1])
dif = dif1 > dif2
dif1[dif] = dif2[dif]
i[dif] = i[dif] + 1
res_indx <- rep(NA, length(x))
if (unit[1] == "ppm") {
tol_ppm <- tol
tol <- tol_ppm * x / 1000000
}
res_indx[dif1 <= tol] <- sref[i[dif1 <= tol]]
res_indx
}
# 13C metabolites library modification and adoption
mummichog.lib.mod <-
function(lib = "bta_kegg",
lib.new = "bta_kegg_13C",
C13_number = c(3, 6),
C13_deltamass = 1.003355,
wd = getwd(),
force_update_lib = F,
adducts_list = "all",
method = c("new_cpd", "new_adduct")) {
filenm <- paste(wd, "/", lib, ".qs", sep = "")
mum.url <-
paste(
"https://www.metaboanalyst.ca/resources/libs/mummichog/",
paste(lib, ".qs", sep = ""),
sep = ""
)
if (`|`(force_update_lib, !file.exists(filenm))) {
download.file(mum.url,
destfile = filenm,
method = "libcurl",
mode = "wb")
}
mummichog.lib <- qs::qread(filenm)
cmpd.map <- MetaboAnalystR:::.get.my.lib("compound_db.qs")
hit.inx <-
match(tolower(mummichog.lib$cpd.lib$id), tolower(cmpd.map$kegg))
nms <- cmpd.map[hit.inx, "smiles"]
c_count <- stringr::str_count(nms, "C|c")
nms_pub <- cmpd.map[hit.inx, "pubchem_id"]
df <-
data.frame(
smile = nms,
c_count,
mw_c_ratio = mummichog.lib$cpd.lib$mw / c_count,
id = mummichog.lib$cpd.lib$id,
mw = mummichog.lib$cpd.lib$mw,
name = mummichog.lib$cpd.lib$name
)
mummichog.lib.mod <- NULL
mummichog.lib.new <- mummichog.lib
if (method[1] == "new_cpd") {
for (cnum in C13_number) {
(df$c_count >= cnum) -> mod.item
(df$mw >= 26.09864 * as.numeric(cnum)) -> mod.item2
mod.item[is.na(mod.item)] <- F
mod.item2[is.na(mod.item2)] <- F
mod.item_final <-
as.numeric(ifelse(mod.item, mod.item, mod.item2))
mummichog.lib.mod[[cnum]] <- mummichog.lib
lapply(mummichog.lib.mod[[cnum]][["pathways"]][["cpds"]], function(x, cnum) {
paste0(x, "_13C", cnum)
}, cnum) -> mummichog.lib.mod[[cnum]][["pathways"]][["cpds"]]
mummichog.lib.mod[[cnum]][["cpd.lib"]][["id"]] <-
paste0(mummichog.lib.mod[[cnum]][["cpd.lib"]][["id"]], "_13C", cnum)
mummichog.lib.mod[[cnum]][["cpd.lib"]][["name"]] <-
paste0(mummichog.lib.mod[[cnum]][["cpd.lib"]][["name"]], "_13C", cnum)
mummichog.lib.mod[[cnum]][["cpd.lib"]][["mw"]] <-
mummichog.lib.mod[[cnum]][["cpd.lib"]][["mw"]] + C13_deltamass * as.numeric(cnum) *
mod.item_final
mummichog.lib.mod[[cnum]][["cpd.lib"]][["adducts"]][["dpj_positive"]] <-
mummichog.lib.mod[[cnum]][["cpd.lib"]][["adducts"]][["dpj_positive"]] + C13_deltamass *
as.numeric(cnum) * mod.item_final
mummichog.lib.mod[[cnum]][["cpd.lib"]][["adducts"]][["positive"]] <-
mummichog.lib.mod[[cnum]][["cpd.lib"]][["adducts"]][["positive"]] + C13_deltamass *
as.numeric(cnum) * mod.item_final
mummichog.lib.mod[[cnum]][["cpd.lib"]][["adducts"]][["negative"]] <-
mummichog.lib.mod[[cnum]][["cpd.lib"]][["adducts"]][["negative"]] + C13_deltamass *
as.numeric(cnum) * mod.item_final
}
for (cnum in C13_number) {
lapply(1:length(mummichog.lib.new$pathways$cpds), function(x) {
c(mummichog.lib.new$pathways$cpds[[x]],
mummichog.lib.mod[[cnum]]$pathways$cpds[[x]])
}) -> mummichog.lib.new$pathways$cpds
mummichog.lib.new$cpd.lib$id <-
c(mummichog.lib.new$cpd.lib$id,
mummichog.lib.mod[[cnum]]$cpd.lib$id)
mummichog.lib.new$cpd.lib$name <-
c(mummichog.lib.new$cpd.lib$name,
mummichog.lib.mod[[cnum]]$cpd.lib$name)
mummichog.lib.new$cpd.lib$mw <-
c(mummichog.lib.new$cpd.lib$mw,
mummichog.lib.mod[[cnum]]$cpd.lib$mw)
mummichog.lib.new[["cpd.lib"]][["adducts"]]$dpj_positive <-
rbind(mummichog.lib.new[["cpd.lib"]][["adducts"]]$dpj_positive,
mummichog.lib.mod[[cnum]][["cpd.lib"]][["adducts"]]$dpj_positive)
mummichog.lib.new[["cpd.lib"]][["adducts"]]$positive <-
rbind(mummichog.lib.new[["cpd.lib"]][["adducts"]]$positive,
mummichog.lib.mod[[cnum]][["cpd.lib"]][["adducts"]]$positive)
mummichog.lib.new[["cpd.lib"]][["adducts"]]$negative <-
rbind(mummichog.lib.new[["cpd.lib"]][["adducts"]]$negative,
mummichog.lib.mod[[cnum]][["cpd.lib"]][["adducts"]]$negative)
}
}
cpd.lib <- mummichog.lib.new$cpd.lib
ms_modes <- c('dpj_positive', 'positive', 'negative')
adducts <- list()
for (ms_mode in ms_modes) {
if (adducts_list[1] == "all") {
adducts[[ms_mode]] <-
MetaboAnalystR:::Compound_function_mzlist(ms_mode, cpd.lib$mw)
} else{
resdf <-
MetaboAnalystR:::Compound_function_mzlist(ms_mode, cpd.lib$mw)
if (sum(adducts_list %in% colnames(resdf)) != 0) {
adducts[[ms_mode]] <-
resdf[, adducts_list[adducts_list %in% colnames(resdf)]]
} else{
adducts[[ms_mode]] <- resdf
}
}
}
if (method[1] == "new_adduct") {
ms_modes <- c('dpj_positive', 'positive', 'negative')
for (ms_mode in ms_modes) {
new_adduct <- NULL
for (cnum in (C13_number)) {
new_adduct[[cnum]] <-
adducts[[ms_mode]] + C13_deltamass * as.numeric(cnum)
colnames(new_adduct[[cnum]]) <-
stringr::str_replace(colnames(new_adduct[[cnum]]),
"^M",
paste0("[M_13C", cnum, "]"))
}
new_adduct <- do.call(cbind, new_adduct)
adducts[[ms_mode]] <- cbind(adducts[[ms_mode]], new_adduct)
}
}
cpd.lib$adducts <- adducts
cpd.tree <- list()
for (ms_mode in ms_modes) {
l2 <- list()
l2[[49]] <- ""
l2[[2001]] <- ""
mz.mat <- cpd.lib$adducts[[ms_mode]]
floor.mzs <- floor(mz.mat)
for (i in 1:nrow(floor.mzs)) {
neighbourhood <- floor.mzs[i, ]
for (n in neighbourhood) {
if ((n > 50) & (n < 2000)) {
l2[[n]] <- append(l2[[n]], i)
}
}
}
cpd.tree[[ms_mode]] <- lapply(l2, unique)
}
# set up the variables
mummichog.lib.new <- list(
pathways = mummichog.lib.new$pathways,
cpd.tree = cpd.tree,
cpd.lib = cpd.lib
)
qs::qsave(mummichog.lib.new, paste0(wd, "/", lib.new, ".qs"))
return(mummichog.lib.new)
}
# annotate and summarize the pathway mapping result
annotate_PSEA <- function(mSet, mummichog.lib.new) {
lapply(mSet[["path.hits"]], function(x) {
data.frame(
cpd_id = paste0(x, collapse = ";"),
cpd_names = paste0(mummichog.lib.new$cpd.lib$name[match(x, mummichog.lib.new$cpd.lib$id)], collapse = ";")
)
}) -> res
res <- do.call(rbind, res)
cbind(mSet[["mummi.resmat"]], res) -> resdf
return(resdf)
}
The t-SNE on large scale data requires https://github.com/KlugerLab/FIt-SNE to be installed. FFTW (http://www.fftw.org/) is also required. Please follow the instruction on this website and install the prerequisite “FFTW”.
Please follow the instruction from the repo to get “fftRtsne” function available and working properly.
For detailed installation guide of MetaboAnalystR please visit: https://github.com/xia-lab/MetaboAnalystR
#t-SNE package folder
tsnefolder<-"~/FIt-SNE/"
source(paste0(tsnefolder,"fast_tsne.R"))
FAST_TSNE_SCRIPT_DIR <- tsnefolder
if (!require(Cardinal)){
remotes::install_github("kuwisdelu/Cardinal",force=T)
}
if (!require(HiTMaP)){
install.packages("remotes")
remotes::install_github("MASHUOA/HiTMaP",force=T)
}
#closeAllConnections()
register(SnowParam())
options(Cardinal.verbose=FALSE)
options(Cardinal.progress=T)
RNGkind("Mersenne-Twister")
parallel=try(detectCores()/2)
if (parallel<1 | is.null(parallel)){parallel=1}
BPPARAM=SnowParam()
bpprogressbar(BPPARAM)=TRUE
setCardinalBPPARAM(BPPARAM)
#Cardinal
install.packages("BiocManager")
BiocManager::install("Cardinal")
#MetaboAnalystR
metanr_packages <- function(){
metr_pkgs <- c("impute", "pcaMethods", "globaltest", "GlobalAncova", "Rgraphviz", "preprocessCore", "genefilter", "SSPA", "sva", "limma", "KEGGgraph", "siggenes","BiocParallel", "MSnbase", "multtest", "RBGL", "edgeR", "fgsea", "devtools", "crmn")
list_installed <- installed.packages()
new_pkgs <- subset(metr_pkgs, !(metr_pkgs %in% list_installed[, "Package"]))
if(length(new_pkgs)!=0){if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install(new_pkgs)
print(c(new_pkgs, " packages added..."))
}
if((length(new_pkgs)<1)){
print("No new packages added...")
}
}
metanr_packages()
install.packages("devtools")
library(devtools)
devtools::install_github("xia-lab/MetaboAnalystR", build = TRUE, build_vignettes = TRUE, build_manual =T)
#HiMAP
install.packages("remotes")
#library(devtools)
library(remotes)
Sys.setenv("R_REMOTES_NO_ERRORS_FROM_WARNINGS" = "true")
remotes::install_github("MASHUOA/HiTMaP",force=T)
remotes::install_github("kuwisdelu/Cardinal",force=T)
3
no
#Update all dependencies
BiocManager::install(ask = F)
yes
library(HiTMaP)
install.packages("dplyr")
install.packages("stringr")
install.packages("readr")
install.packages("hexbin")
install.packages("magick")
install.packages("FactoMineR")
install.packages("M3C")
install.packages("plotly")
install.packages("rsvd")
install.packages("egg")
install.packages("reshape2")
install.packages("flextable")
install.packages("grid")
install.packages("wesanderson")
install.packages("ggpubr")
Working directory setup
library(MetaboAnalystR)
library(Cardinal)
#change to your own project directory here
project_dir <- "~/data/"
Raw_data_dir <- "/raw/"
large_files_dir <- "/large_files/"
spatial_alignment_dir <- "/spatial_alignment/"
dim_reduction_dir <- "/dim_reduction/"
PCA_dir <- "/pca_anal_new/"
UMAP_dir <- "/UMAP/"
tSNE_dir <- "/tSNE/"
FDR_dir <- "/SQRTP_0.1/"
visualization_dir <- "/visualization/"
shrink_dir<- "/data_shrink/"
heximg_dir <- "/heximg/"
dir.create(paste0(project_dir, spatial_alignment_dir))
dir.create(paste0(project_dir, FDR_dir))
dir.create(paste0(project_dir, dim_reduction_dir))
dir.create(paste0(project_dir,visualization_dir))
#data infromation
fileinfo_v2 <- readr::read_csv(paste0(project_dir,"/fileinfo_v2.csv"))
data pre-processing: Note: this step require a large amount of memory and time, it is recommended to run it on a high performance computing cluster.
library(HiTMaP)
library(BiocParallel)
library(Cardinal)
library(ggplot2)
library(readr)
wd=paste0(project_dir, Raw_data_dir)
setwd(wd)
fileinfo_v2 <- read_csv(paste0(project_dir,"/fileinfo_v2.csv"))
register(SnowParam())
options(Cardinal.verbose=FALSE)
options(Cardinal.progress=FALSE)
RNGkind("Mersenne-Twister")
parallel=try(detectCores()/2)
if (parallel<1 | is.null(parallel)){parallel=1}
BPPARAM=bpparam("SnowParam")
BiocParallel::bpworkers(BPPARAM)=6
bpprogressbar(BPPARAM)=TRUE
setCardinalBPPARAM(BPPARAM)
datafile_base<-fileinfo_v2$filenames
datafile<-fileinfo_v2$filenames
datafile<-paste0(wd,"/",datafile)
#pre-processing
HiTMaP:::Preprocessing_segmentation(datafile_base,
workdir=wd,
segmentation_num=5,
ppm=5,Bypass_Segmentation=T,
Segmentation=c("spatialKMeans","spatialShrunkenCentroids","Virtual_segmentation","none","def_file"),
Segmentation_def="segmentation_def.csv",
Segmentation_ncomp="auto-detect",
Segmentation_variance_coverage=0.8,
Smooth_range=1,
colorstyle="Set1",
Virtual_segmentation_rankfile=NULL,
rotate=fileinfo_v2,
BPPARAM=BPPARAM,
preprocess=list(force_preprocess=F,
use_preprocessRDS=T,
smoothSignal=list(method="Disable"),
reduceBaseline=list(method="locmin"),
peakPick=list(method="adaptive"),
peakAlign=list(tolerance=5, units="ppm", level="global"),
normalize=list(method="reference",mz=229.04734)))
#data binning
HiTMaP:::Load_IMS_decov_combine(datafile_base,workdir=wd,SPECTRUM_batch="overall",
ppm=8 ,threshold=0.00000005,rotate=paste0(wd,"/","fileinfo_v2.csv"),mzrange="auto-detect",
deconv_peaklist="Load_exist",preprocessRDS_rotated=F)
library(Cardinal)
library(readr)
library(hexbin)
library(dplyr)
wd <- project_dir
outwd<- paste0(project_dir, spatial_alignment_dir)
#load IMS data of full m/z features in all runs
combinedimdata <- readRDS(paste0(project_dir, large_files_dir,"/combinedimdata_norm_full.rds"))
fileinfo_v2 <- readr::read_csv(paste0(wd,"/fileinfo_v2.csv"))
fileinfo_v2$filenames<-tolower(fileinfo_v2$filenames)
fileinfo_v2<-fileinfo_v2[fileinfo_v2$filenames %in% levels(run(combinedimdata)),]
coordata<-combinedimdata@elementMetadata@coord@listData
coordata$run<-run(combinedimdata)
coordata<-as.data.frame(coordata)
pixeldf<-coordata
pixeldf$x->pixeldf$x_scale
pixeldf$y->pixeldf$y_scale
#lens rescaling
for (imsrun in unique(pixeldf$run)){
pixeldf$x[pixeldf$run == imsrun]->x
pixeldf$y[pixeldf$run == imsrun]->y
x_scale<-(x-(max(x)-min(x))/2)/(max(y)-min(y))
y_scale<-(y-(max(y)-min(y))/2)/(max(y)-min(y))
pixeldf$x_scale[pixeldf$run == imsrun]<-x_scale
pixeldf$y_scale[pixeldf$run == imsrun]<-y_scale
}
pixeldf %>% group_by(run) %>% summarize(xmin=range(x_scale)[1],ymin=range(y_scale)[1])->pixeldf_sum
bin<-hexbin(pixeldf$x_scale,pixeldf$y_scale, xbins=round(sqrt(nrow(pixeldf)/7/length(unique(pixeldf$run)))), IDs= TRUE)
bin@cID->pixeldf$new_label_scale
# write.csv(pixeldf,paste0(wd,"all_coordata_label_sel_arc_bind_scale.csv"))
wd <- project_dir
outwd<- paste0(project_dir, spatial_alignment_dir)
if (exists("combinedimdata") == TRUE){
message("combinedimdata exists.")
} else {
combinedimdata <- readRDS(paste0(project_dir, large_files_dir,"/combinedimdata_norm_full.rds"))
message("combinedimdata has been loaded.")
}
combinedimdata->combinedimdata_full
final_selection_df<-read.csv(paste0(wd,"/final_selection.csv"))
fileinfo_v2$filenames<-tolower(fileinfo_v2$filenames)
fileinfo_v2<-fileinfo_v2[fileinfo_v2$filenames %in% levels(run(combinedimdata_full)),]
coordata<-combinedimdata_full@elementMetadata@coord@listData
coordata$run<-run(combinedimdata_full)
coordata<-as.data.frame(coordata)
#hexbin all features
library(hexbin)
label_sel="all"
merged_data_df<-NULL
coordata_label_sel_arc<-NULL
setCardinalBPPARAM(BPPARAM=SerialParam())
setCardinalVerbose(verbose=F)
for (imsrun in fileinfo_v2$filenames){
message(imsrun)
time_cls<-fileinfo_v2$class[match(imsrun,fileinfo_v2$filenames)]
combinedimdata_full@elementMetadata@coord@listData->coordata
coordata$run<-run(combinedimdata_full)
coordata<-as.data.frame(coordata)
coordata_run<-coordata[coordata$run==imsrun,]
coordata_run<-pixeldf[pixeldf$run==imsrun,]
rownames(coordata_run)<-1:nrow(coordata_run)
library(hexbin)
combinedimdata_full[,run(combinedimdata_full)==imsrun]->combinedimdata_full_sample_bin
bincell_df<-NULL
for (bincell in unique(coordata_run$new_label_scale)){
combinedimdata_full_sample_bin[,which(coordata_run$new_label_scale==bincell)] %>% summarizeFeatures( FUN = "mean") %>% process()->meanspec
meanspec@featureData@listData[["mean"]]->bincell_df[[bincell]]
}
bincell_df_bind<-do.call(cbind,bincell_df)
colnames(bincell_df_bind)<-paste(imsrun,time_cls,unique(coordata_run$new_label_scale),sep = "@")
merged_data_df[[imsrun]]<-bincell_df_bind
coordata_label_sel_arc[[imsrun]]<-coordata_run
}
coordata_label_sel_arc_bind<-do.call(rbind,coordata_label_sel_arc)
merged_data_df_bind<-do.call(cbind,merged_data_df)
rownames(merged_data_df_bind)<-combinedimdata_full@featureData@mz
#Generate hexbin data
write.csv(merged_data_df_bind,paste0(outwd,"/",label_sel,"_merged_data_df_bind2",".csv"))
#Generate Master Table
write.csv(coordata_label_sel_arc_bind,paste0(outwd,"/",label_sel,"_coordata_label_sel_arc_bind2",".csv"))
message(label_sel," DONE")
wd<-paste0(project_dir, spatial_alignment_dir)
setwd(wd)
library(readr)
library(stringr)
all_data_original<-read_csv(paste0(wd,"/all_merged_data_df_bind2.csv"))
all_data_original<-as.data.frame(all_data_original)
rownames(all_data_original)<-all_data_original[,1]
all_data_original[,-1]->all_data_original
col.nms.split<-str_split_fixed(colnames(all_data_original),"@",3)
all_data_original_t<-t(all_data_original)
group_sum <- rowsum(all_data_original_t, group = interaction (col.nms.split[,2],col.nms.split[,3]))
group_sum_t<-as.data.frame(t(group_sum))
a = paste(col.nms.split[,2], col.nms.split[,3], sep=".")
for (col in colnames(group_sum_t)) {
group_sum_t [,col]= group_sum_t [,col] / sum(a==col)
# message(col,sum(a==col))
}
grouped_bio_rep_all_pixel_mean<-group_sum_t
grouped_bio_rep_all_pixel_mean<-as.data.frame(grouped_bio_rep_all_pixel_mean)
col.nms.split_2<-str_split_fixed(colnames(grouped_bio_rep_all_pixel_mean),"\\.",2)
Label<-col.nms.split_2[,1]
grouped_bio_rep_all_pixel_mean<-rbind(Label, grouped_bio_rep_all_pixel_mean)
row.names(grouped_bio_rep_all_pixel_mean)[1]<-"Label"
grouped_bio_rep_all_pixel_mean<-grouped_bio_rep_all_pixel_mean[,order(grouped_bio_rep_all_pixel_mean[1,])]
cls_merge<-list()
for (cls in unique(Label)){
group_sum_t[,col.nms.split_2[,1]==cls]->temp
colnames(temp)<-str_remove(colnames(temp),paste0(cls,"\\."))
row.names(temp)<-paste0(row.names(temp),"@",cls)
cls_merge[[cls]]<-(temp)
}
library(plyr)
cls_merge_bind<-rbind.fill(cls_merge)
rownms<-unlist(lapply(cls_merge,rownames))
rownames(cls_merge_bind)<-rownms
write.csv(cls_merge_bind, paste0(wd, "grouped_bio_rep_all_pixel_mean_label_time_merge.csv"))
check if the file needed exists in the right folder; otherwise download from the repository
file.exists(paste0(project_dir, spatial_alignment_dir, "grouped_bio_rep_all_pixel_mean_label_time_merge.csv"))
library(dplyr)
library(stringr)
library(FactoMineR)
#read reshaped data set
cls_merge_bind<-readr::read_csv(paste0(project_dir, spatial_alignment_dir, "grouped_bio_rep_all_pixel_mean_label_time_merge.csv"))
cls_merge_bind<-as.data.frame(cls_merge_bind)
rownames(cls_merge_bind)<-cls_merge_bind[,1]
cls_merge_bind[,1]<-NULL
cls_merge_mt<-t(as.matrix(cls_merge_bind))
cls_merge_mt[is.na(cls_merge_mt)]<-0
cls_merge_mt_rowsum<-rowsum(cls_merge_mt,rep(1,nrow(cls_merge_mt)))
cls_merge_mt_non_zero<-cls_merge_mt[,cls_merge_mt_rowsum!=0]
PCA<-FactoMineR::PCA(cls_merge_mt_non_zero, graph = TRUE, ncp = 5)
PCA.contri<-data.frame(PCA[["var"]][["contrib"]])
PCA.contri.feature <-stringr::str_split_fixed(rownames(PCA.contri),"@", 2)
colnames(PCA.contri.feature )<- c("mz","class")
PCA.contri<-cbind(PCA.contri.feature, PCA.contri)
colnames(PCA.contri)[!(colnames(PCA.contri) %in% c("mz", "class"))]->dimname
dir.create(paste0(wd,PCA_dir))
#Top loading feature selection
Dim.feature.contri.list<- list()
Dim.feature.contri.summmary.list<- list()
for (dim in dimname){
PCA.contri[,c("mz", "class",dim)]->dim_df
colnames(dim_df)<-c("mz", "class","Contribution")
dim_df$logcontrib<-log(dim_df$Contribution)
dim_df$mz<-as.numeric(dim_df$mz)
dim_df %>% mutate(new_bin = ntile(Contribution, n=30))->dim_df
dim_df$Dim=dim
write.csv(dim_df[dim_df$new_bin %in% c(30),],paste0(project_dir, dim_reduction_dir, PCA_dir,dim,"_anno_freq_top1bin",".csv"),row.names = F)
feature_slt<- unique(dim_df[dim_df$new_bin %in% c(30),"mz"])
Dim.feature.contri.list[[dim]]<-feature_slt
Dim.feature.contri.summmary.list[[dim]]<-dim_df
}
Dim.feature.contri.summmary.list.bind<-do.call(rbind,Dim.feature.contri.summmary.list)
Dim.feature.contri.summmary.list.bind %>% group_by(mz, Dim) %>% mutate(Rank = rank(Contribution)) -> Dim.feature.contri.summmary.list.bind
save(list = c("PCA","PCA.contri","PCA.desc","Dim.feature.contri.list","cls_merge_mt","Dim.feature.contri.summmary.list.bind"), file = paste0(project_dir, dim_reduction_dir, PCA_dir,"Time_merged_PCA_all_bio.rda"))
library(dplyr)
library(Cardinal)
if (exists("Dim.feature.contri.summmary.list.bind")==TRUE) {
message("PCA result has been retrieved.")
} else {
load(paste0(project_dir, dim_reduction_dir, PCA_dir,"Time_merged_PCA_all_bio.rda"))
message("PCA result has been loaded.")
}
dims.pca <- PCA[["ind"]][["coord"]]
dims.pca<- as.data.frame(dims.pca)
all_coordata_label_sel_arc_bind<-read.csv(paste0(project_dir, spatial_alignment_dir,"all_coordata_label_sel_arc_bind2.csv"),row.names = 1)
example_imdata<-combinedimdata[,run(combinedimdata) == "4hrsilglucose_2nd_20201109_trim"]
dir.create(paste0(project_dir, dim_reduction_dir, PCA_dir,"/kmean_seg_single_demo/"))
for (a in 1:5) {
dims.pca<-dims.pca[as.numeric(rownames(dims.pca)) %in% all_coordata_label_sel_arc_bind$new_label_scale[all_coordata_label_sel_arc_bind$run == "4hrsilglucose_2nd_20201109_trim"],]
dims.pca$Rank<-dense_rank(dims.pca[,a])
png(paste0(project_dir, dim_reduction_dir, PCA_dir,"/kmean_seg_single_demo/","Dim.",a,"_pca_single_dim_lens_projections.png"),height = 5,width = 6,units = "in",res = 600)
match(all_coordata_label_sel_arc_bind$new_label_scale, as.numeric(rownames(dims.pca)))-> rank_index
all_coordata_label_sel_arc_bind$Rank<- dims.pca$Rank[rank_index]
factor(all_coordata_label_sel_arc_bind$Rank[all_coordata_label_sel_arc_bind$run == "4hrsilglucose_2nd_20201109_trim"])->final_selection
fig.arbitrary_seg<-image(example_imdata, factor(final_selection) ~ x * y,superpose=F, key=F, xlab= NULL, col = grDevices::colorRampPalette(rev(RColorBrewer::brewer.pal(11, "RdYlBu")))(max(dims.pca$Rank)) )
fig.arbitrary_seg[["par"]][["ann"]]=F
fig.arbitrary_seg[["par"]][["bty"]]="n"
fig.arbitrary_seg[["par"]][["pty"]]="s"
fig.arbitrary_seg[["par"]][["xaxt"]]="n"
fig.arbitrary_seg[["par"]][["yaxt"]]="n"
fig.arbitrary_seg[["par"]][["fg"]]="white"
fig.arbitrary_seg[["par"]][["oma"]]=c(0, 0, 0, 0)
print(fig.arbitrary_seg)
dev.off()
}
library(readr)
library(Cardinal)
library(rsvd)
library(dplyr)
library(plotly)
library(M3C)
dir.create(paste0(project_dir, dim_reduction_dir, UMAP_dir))
all_coordata_label_sel_arc_bind<-read.csv(paste0(project_dir, spatial_alignment_dir,"all_coordata_label_sel_arc_bind2.csv"),row.names = 1)
cls_merge_bind<-readr::read_csv(paste0(project_dir, spatial_alignment_dir,"/grouped_bio_rep_all_pixel_mean_label_time_merge.csv"))
cls_merge_bind<-as.data.frame(cls_merge_bind)
rownames(cls_merge_bind)<-cls_merge_bind[,1]
cls_merge_bind[,1]<-NULL
cls_merge_mt<-(as.matrix(cls_merge_bind))
cls_merge_mt[is.na(cls_merge_mt)]<-0
cls_merge_mt==0->cls_merge_mt_blk_indx
rowSums(cls_merge_mt_blk_indx)->fblk_count
fblk_count_thres<-fblk_count>=max(fblk_count)*0.90
cls_merge_mt_test<-t(cls_merge_mt[!fblk_count_thres,])
##Time series umap
features.umap = M3C::umap((cls_merge_mt))
Time_merged_umap<-data.frame(features.umap$data$X1,features.umap$data$X2,rownames(cls_merge_mt_test))
colnames(Time_merged_umap)<-c("x","y","bin")
saveRDS(Time_merged_umap, paste0(project_dir, dim_reduction_dir, UMAP_dir, "/Time_merged_umap_all_bio.rds"))
#Detect if "combinedimdata" exists
if (exists("combinedimdata") == TRUE){
message("combinedimdata exists.")
} else {
combinedimdata <- readRDS(paste0(project_dir, large_files_dir,"/combinedimdata_norm_full.rds"))
message("combinedimdata has been loaded.")
}
library(dplyr)
library(Cardinal)
Time_merged_umap <- readRDS(paste0(project_dir, dim_reduction_dir, UMAP_dir,"Time_merged_umap_all_bio.rds"))
all_coordata_label_sel_arc_bind<-read.csv(paste0(project_dir, spatial_alignment_dir,"all_coordata_label_sel_arc_bind2.csv"),row.names = 1)
example_imdata<-combinedimdata[,run(combinedimdata) == "4hrsilglucose_2nd_20201109_trim"]
dir.create(paste0(project_dir, dim_reduction_dir, UMAP_dir,"/kmean_seg_single_demo/"))
for (a in c("x", "y")) {
Time_merged_umap <-Time_merged_umap[as.numeric(Time_merged_umap$bin) %in% all_coordata_label_sel_arc_bind$new_label_scale[all_coordata_label_sel_arc_bind$run == "4hrsilglucose_2nd_20201109_trim"],]
Time_merged_umap$Rank<-dense_rank(Time_merged_umap[,a])
png(paste0(project_dir, dim_reduction_dir, UMAP_dir,"/kmean_seg_single_demo/","umap_",a,"_single_dim_lens_projections.png"),height = 5,width = 6,units = "in",res = 600)
match(all_coordata_label_sel_arc_bind$new_label_scale, as.numeric(Time_merged_umap$bin))-> rank_index
all_coordata_label_sel_arc_bind$Rank<- Time_merged_umap$Rank[rank_index]
factor(all_coordata_label_sel_arc_bind$Rank[all_coordata_label_sel_arc_bind$run == "4hrsilglucose_2nd_20201109_trim"])->final_selection
fig.arbitrary_seg<-image(example_imdata, factor(final_selection) ~ x * y,superpose=F, key=F, xlab= NULL, col = grDevices::colorRampPalette(rev(RColorBrewer::brewer.pal(11, "RdYlBu")))(max(Time_merged_umap$Rank)) )
fig.arbitrary_seg[["par"]][["ann"]]=F
fig.arbitrary_seg[["par"]][["bty"]]="n"
fig.arbitrary_seg[["par"]][["pty"]]="s"
fig.arbitrary_seg[["par"]][["xaxt"]]="n"
fig.arbitrary_seg[["par"]][["yaxt"]]="n"
fig.arbitrary_seg[["par"]][["fg"]]="white"
fig.arbitrary_seg[["par"]][["oma"]]=c(0, 0, 0, 0)
print(fig.arbitrary_seg)
dev.off()
}
dir.create(paste0(project_dir, dim_reduction_dir, tSNE_dir))
all_coordata_label_sel_arc_bind<-read.csv(paste0(project_dir, spatial_alignment_dir,"all_coordata_label_sel_arc_bind2.csv"),row.names = 1)
cls_merge_bind<-readr::read_csv(paste0(project_dir, spatial_alignment_dir,"/grouped_bio_rep_all_pixel_mean_label_time_merge.csv"))
cls_merge_bind<-as.data.frame(cls_merge_bind)
rownames(cls_merge_bind)<-cls_merge_bind[,1]
cls_merge_bind[,1]<-NULL
cls_merge_mt<-(as.matrix(cls_merge_bind))
cls_merge_mt[is.na(cls_merge_mt)]<-0
cls_merge_mt==0->cls_merge_mt_blk_indx
rowSums(cls_merge_mt_blk_indx)->fblk_count
fblk_count_thres<-fblk_count>=max(fblk_count)*0.90
cls_merge_mt_test<-t(cls_merge_mt[!fblk_count_thres,])
##Time series tSNE
features.tsne= fftRtsne (cls_merge_mt_test, dims = 2, max_iter = 1000)
features.tsne<-as.data.frame(features.tsne)
features.tsne$Bin<-rownames(cls_merge_mt_test)
Time_merged_tsne<-data.frame(features.tsne$V1,features.tsne$V2, features.tsne$Bin)
saveRDS(Time_merged_tsne, paste0(wd, "/Time_merged_tsne_all_bio.rds"))
#Detect if "combinedimdata" exists
if (exists("combinedimdata") == TRUE){
message("combinedimdata exists.")
} else {
combinedimdata <- readRDS(paste0(project_dir, large_files_dir,"/combinedimdata_norm_full.rds"))
message("combinedimdata has been loaded.")
}
library(dplyr)
library(Cardinal)
Time_merged_tsne <- readRDS(paste0(project_dir, dim_reduction_dir, tSNE_dir,"Time_merged_tsne_all_bio.rds"))
colnames(Time_merged_tsne)<- c("x", "y", "bin")
all_coordata_label_sel_arc_bind<-read.csv(paste0(project_dir, spatial_alignment_dir,"all_coordata_label_sel_arc_bind2.csv"),row.names = 1)
example_imdata<-combinedimdata[,run(combinedimdata) == "4hrsilglucose_2nd_20201109_trim"]
dir.create(paste0(project_dir, dim_reduction_dir, tSNE_dir,"/kmean_seg_single_demo/"))
for (a in c("x", "y")) {
Time_merged_tsne <-Time_merged_tsne [as.numeric(Time_merged_tsne $bin) %in% all_coordata_label_sel_arc_bind$new_label_scale[all_coordata_label_sel_arc_bind$run == "4hrsilglucose_2nd_20201109_trim"],]
Time_merged_tsne $Rank<-dense_rank(Time_merged_tsne[,a])
png(paste0(project_dir, dim_reduction_dir, tSNE_dir,"/kmean_seg_single_demo/","tsne_",a,"_single_dim_lens_projections.png"),height = 5,width = 6,units = "in",res = 600)
match(all_coordata_label_sel_arc_bind$new_label_scale, as.numeric(Time_merged_tsne $bin))-> rank_index
all_coordata_label_sel_arc_bind$Rank<- Time_merged_tsne$Rank[rank_index]
factor(all_coordata_label_sel_arc_bind$Rank[all_coordata_label_sel_arc_bind$run == "4hrsilglucose_2nd_20201109_trim"])->final_selection
fig.arbitrary_seg<-image(example_imdata, factor(final_selection) ~ x * y,superpose=F, key=F, xlab= NULL, col = grDevices::colorRampPalette(rev(RColorBrewer::brewer.pal(11, "RdYlBu")))(max(Time_merged_tsne$Rank)) )
fig.arbitrary_seg[["par"]][["ann"]]=F
fig.arbitrary_seg[["par"]][["bty"]]="n"
fig.arbitrary_seg[["par"]][["pty"]]="s"
fig.arbitrary_seg[["par"]][["xaxt"]]="n"
fig.arbitrary_seg[["par"]][["yaxt"]]="n"
fig.arbitrary_seg[["par"]][["fg"]]="white"
fig.arbitrary_seg[["par"]][["oma"]]=c(0, 0, 0, 0)
print(fig.arbitrary_seg)
dev.off()
}
mummichog.lib.mod<-function(lib="bta_kegg",lib.new="bta_kegg_13C",C13_number=c(3,6),C13_deltamass=1.003355,wd=getwd(),force_update_lib=F,adducts_list="all",method=c("new_cpd","new_adduct")){
filenm <- paste(wd,"/",lib, ".qs", sep="")
mum.url <- paste("https://www.metaboanalyst.ca/resources/libs/mummichog/", paste(lib, ".qs", sep=""), sep="")
if(`|`(force_update_lib,!file.exists(filenm))){
download.file(mum.url, destfile = filenm, method="libcurl", mode = "wb")
}
mummichog.lib <- qs::qread(filenm)
cmpd.map <- MetaboAnalystR:::.get.my.lib("compound_db.qs")
hit.inx <- match(tolower(mummichog.lib$cpd.lib$id), tolower(cmpd.map$kegg))
nms <- cmpd.map[hit.inx, "smiles"]
c_count <-stringr::str_count(nms,"C|c")
nms_pub <- cmpd.map[hit.inx, "pubchem_id"]
df<-data.frame(smile=nms,c_count,mw_c_ratio=mummichog.lib$cpd.lib$mw/c_count,id=mummichog.lib$cpd.lib$id,mw=mummichog.lib$cpd.lib$mw,name=mummichog.lib$cpd.lib$name)
mummichog.lib.mod<-NULL
mummichog.lib.new<-mummichog.lib
if (method[1]=="new_cpd"){
for (cnum in C13_number){
(df$c_count>=cnum)->mod.item
(df$mw>=26.09864*as.numeric(cnum))->mod.item2
mod.item[is.na(mod.item)]<-F
mod.item2[is.na(mod.item2)]<-F
mod.item_final<-as.numeric(ifelse(mod.item,mod.item,mod.item2))
mummichog.lib.mod[[cnum]]<-mummichog.lib
lapply(mummichog.lib.mod[[cnum]][["pathways"]][["cpds"]],function(x,cnum){
paste0(x,"_13C",cnum)
},cnum)->mummichog.lib.mod[[cnum]][["pathways"]][["cpds"]]
mummichog.lib.mod[[cnum]][["cpd.lib"]][["id"]]<-paste0(mummichog.lib.mod[[cnum]][["cpd.lib"]][["id"]],"_13C",cnum)
mummichog.lib.mod[[cnum]][["cpd.lib"]][["name"]]<-paste0(mummichog.lib.mod[[cnum]][["cpd.lib"]][["name"]],"_13C",cnum)
mummichog.lib.mod[[cnum]][["cpd.lib"]][["mw"]]<-mummichog.lib.mod[[cnum]][["cpd.lib"]][["mw"]] + C13_deltamass*as.numeric(cnum)*mod.item_final
mummichog.lib.mod[[cnum]][["cpd.lib"]][["adducts"]][["dpj_positive"]]<-mummichog.lib.mod[[cnum]][["cpd.lib"]][["adducts"]][["dpj_positive"]]+ C13_deltamass*as.numeric(cnum)*mod.item_final
mummichog.lib.mod[[cnum]][["cpd.lib"]][["adducts"]][["positive"]]<-mummichog.lib.mod[[cnum]][["cpd.lib"]][["adducts"]][["positive"]]+ C13_deltamass*as.numeric(cnum)*mod.item_final
mummichog.lib.mod[[cnum]][["cpd.lib"]][["adducts"]][["negative"]]<-mummichog.lib.mod[[cnum]][["cpd.lib"]][["adducts"]][["negative"]]+ C13_deltamass*as.numeric(cnum)*mod.item_final
}
for (cnum in C13_number){
lapply(1:length(mummichog.lib.new$pathways$cpds),function(x){
c(mummichog.lib.new$pathways$cpds[[x]],mummichog.lib.mod[[cnum]]$pathways$cpds[[x]])
})->mummichog.lib.new$pathways$cpds
mummichog.lib.new$cpd.lib$id<-c(mummichog.lib.new$cpd.lib$id,mummichog.lib.mod[[cnum]]$cpd.lib$id)
mummichog.lib.new$cpd.lib$name<-c(mummichog.lib.new$cpd.lib$name,mummichog.lib.mod[[cnum]]$cpd.lib$name)
mummichog.lib.new$cpd.lib$mw<-c(mummichog.lib.new$cpd.lib$mw,mummichog.lib.mod[[cnum]]$cpd.lib$mw)
mummichog.lib.new[["cpd.lib"]][["adducts"]]$dpj_positive<-rbind(mummichog.lib.new[["cpd.lib"]][["adducts"]]$dpj_positive,mummichog.lib.mod[[cnum]][["cpd.lib"]][["adducts"]]$dpj_positive)
mummichog.lib.new[["cpd.lib"]][["adducts"]]$positive<-rbind(mummichog.lib.new[["cpd.lib"]][["adducts"]]$positive,mummichog.lib.mod[[cnum]][["cpd.lib"]][["adducts"]]$positive)
mummichog.lib.new[["cpd.lib"]][["adducts"]]$negative<-rbind(mummichog.lib.new[["cpd.lib"]][["adducts"]]$negative,mummichog.lib.mod[[cnum]][["cpd.lib"]][["adducts"]]$negative)
}
}
#MetaboAnalystR:::CreateLibFromKEGG(mummichog.lib.new$cpd.lib, mummichog.lib.new$pathways, lib.new)
cpd.lib <- mummichog.lib.new$cpd.lib;
ms_modes <- c('dpj_positive', 'positive', 'negative');
adducts <- list();
for (ms_mode in ms_modes){
if(adducts_list[1]=="all"){
adducts[[ms_mode]] <- MetaboAnalystR:::Compound_function_mzlist(ms_mode, cpd.lib$mw)
}else{
resdf <- MetaboAnalystR:::Compound_function_mzlist(ms_mode, cpd.lib$mw)
if(sum(adducts_list %in% colnames(resdf))!=0){
adducts[[ms_mode]]<-resdf[,adducts_list[adducts_list %in% colnames(resdf)]]
}else{
adducts[[ms_mode]]<-resdf
}
}
}
if (method[1]=="new_adduct"){
ms_modes <- c('dpj_positive', 'positive', 'negative')
for (ms_mode in ms_modes){
new_adduct<-NULL
for (cnum in (C13_number)){
new_adduct[[cnum]]<-adducts[[ms_mode]]+C13_deltamass*as.numeric(cnum)
colnames(new_adduct[[cnum]])<-stringr::str_replace(colnames(new_adduct[[cnum]]),"^M",paste0("[M_13C",cnum,"]"))
}
new_adduct<-do.call(cbind,new_adduct)
adducts[[ms_mode]]<-cbind(adducts[[ms_mode]],new_adduct)
}
}
cpd.lib$adducts <- adducts;
# create a dictionary for look up in the range of 50-2000
# now need to create ladder (tree) for each new mz
# key is the mass 50 to 2000, values are the compounds (if any of their modified mw gives the value)
# now create cpd tree for each mass pos
# note, this can be slow, but this can be created before hand
# for each species and for each mode
# note l2 only stores the index of the cpd.lib
cpd.tree <- list();
for (ms_mode in ms_modes){
l2 <- list();
l2[[49]] <- "";
l2[[2001]] <- "";
mz.mat <- cpd.lib$adducts[[ms_mode]];
floor.mzs <- floor(mz.mat);
for(i in 1:nrow(floor.mzs)){
neighbourhood <- floor.mzs[i,];
for(n in neighbourhood){
if((n>50) & (n<2000)){
l2[[n]] <- append(l2[[n]], i);
}
}
}
cpd.tree[[ms_mode]] <- lapply(l2, unique);
}
# set up the variables
mummichog.lib.new <- list(
pathways = mummichog.lib.new$pathways,
cpd.tree = cpd.tree,
cpd.lib = cpd.lib
)
qs::qsave(mummichog.lib.new,paste0(wd,"/",lib.new,".qs"))
return(mummichog.lib.new)
}
outwd = paste0(project_dir, dim_reduction_dir,PCA_dir)
for (dim in paste0("Dim.", 1:5)){
dim.sig<-readr::read_csv(paste0(outwd, dim, "_anno_freq_top1bin.csv"))
dir.create(paste0(outwd,"/Metabo_",dim))
setwd(paste0(outwd,"/Metabo_",dim))
message(getwd())
mSet<-NULL
mSet<-InitDataObjects("mass_table", "mummichog", FALSE)
anal.type<<-mSet$analSet$type
mSet<-SetPeakFormat(mSet, "pvalue")
mSet<-UpdateInstrumentParameters(mSet, 10.0, "negative", "yes");
mSet<-SetRTincluded(mSet, "no")
mSet<-Read.TextData(mSet, paste0(project_dir, "/bin_data_time_sumspec_tb.csv"), "colu", "disc")
mSet<-SanityCheckMummichogData(mSet)
mSet<-ReplaceMin(mSet);
mSet<-SanityCheckMummichogData(mSet)
mSet<-PreparePrenormData(mSet)
mSet<-Normalization(mSet, "none", "none", "none", ratio=FALSE, ratioNum=20)
mSet<-SetPeakEnrichMethod(mSet, "mum", "v2")
mSet<-PreparePeakTable4PSEA(mSet)
feature_sig <- names(mSet[["analSet"]][["tt"]][["t.score"]]) %in% dim.sig$mz
mSet[["analSet"]][["tt"]][["p.value"]][feature_sig] <-0.01
mSet[["analSet"]][["tt"]][["p.value"]][!feature_sig] <-1
nData<-mSet$dataSet$mummi.proc
feature_sig <- nData$m.z %in% dim.sig$mz
match_mz(nData$m.z,dim.sig$mz,10,"ppm")->mapped_mz
feature_sig <- !is.na(mapped_mz)
nData[feature_sig,1] <-0.000001
nData[!feature_sig,1] <-1
nData->mSet$dataSet$mummi.proc
mSet<-SetMummichogPval(mSet, 0.05)
mummichog.lib.new<-mummichog.lib.mod(lib="bta_kegg",lib.new="bta_kegg_13C_adduct",C13_number=c(6),C13_deltamass=1.003355,wd=paste0(outwd,"/Metabo_",dim),force_update_lib=F,adducts_list=c("M+Cl[-]","M-H[-]"),method = "new_adduct")
mSet$paramSet$ContainsMS2<-F
mSet<-PerformPSEA(mSet, "bta_kegg_13C_adduct", "current", 3 , 100)
mSet<-SaveTransformedData(mSet)
mSet<-PlotPeaks2Paths(mSet, "peaks_to_paths_0_", "png", 300, width=NA)
write.csv(annotate_PSEA(mSet = mSet,mummichog.lib.new),"mSet_mummi_resmat_anno.csv")
}
df_C13<-NULL
for (p_thres in paste0("Metabo_Dim.",1:5)){
df_C13[[as.character(p_thres)]]<-read.csv(paste0(outwd,"/",p_thres,"/","/","mSet_mummi_resmat_anno.csv"))
df_C13[[as.character(p_thres)]]$source<-p_thres
}
#read KEGG library
bta_kegg<- qs::qread( paste0(outwd, "/Metabo_Dim.1/bta_kegg.qs") )
cpd.info<- data.frame(cpd.name = bta_kegg[["cpd.lib"]][["name"]], id = bta_kegg[["cpd.lib"]][["id"]])
library(reshape2)
library(dplyr)
library(stringr)
df_C13<-NULL
for (p_thres in paste0("Metabo_Dim.",1:5)){
df_C13[[as.character(p_thres)]]<-read.csv(paste0(outwd,"/",p_thres,"/","mSet_mummi_resmat_anno.csv"))
df_C13[[as.character(p_thres)]]$source<-p_thres
}
df_C13<-do.call(rbind,df_C13)
df_C13$Gamma_log<--log(df_C13$Gamma,10)
df_C13$Pathway<-as.factor(df_C13$X)
df_C13$Pval_quartile<-df_C13$source
df_C13$PCA_dim<-as.numeric(str_remove(df_C13$source,"Metabo_Dim."))
df_C13_selected<-df_C13[df_C13$X %in% df_C13$X[df_C13$FET<=0.05],]
df_C13_selected$sig_level <- gtools::stars.pval(df_C13_selected$FET)
pathway_summary<-df_C13_selected[,c("X", "PCA_dim", "sig_level","Hits.sig")]
colnames(pathway_summary)<-c("Pathway", "Dim", "Sig_level","Hits.sig")
pathway_summary$Dim<-paste0("Dim",".",pathway_summary$Dim)
pathway_summary_sig<-dcast(pathway_summary,Pathway ~ Dim, value.var="Sig_level")
pathway_summary_sig[is.na(pathway_summary_sig)]<-""
sig.cpd.all <- list()
for (dim in paste0("Dim.", 1:5)){
matched.cpd<-read.csv(paste0(outwd, "/Metabo_", dim, "/mummichog_matched_compound_all.csv"))
sig.cpd.dim <- read.csv(paste0(outwd, "/", dim, "_anno_freq_top1bin.csv"))
sig.cpd.id <- match_mz(matched.cpd$Query.Mass, sig.cpd.dim$mz, 10,"ppm")
sig.cpd.dim <- matched.cpd[!is.na(sig.cpd.id),]
sig.cpd.dim$Dim <- paste0(dim)
sig.cpd.all[[dim]] <- sig.cpd.dim
}
sig.cpd.all<- do.call(rbind, sig.cpd.all)
pathway.sig <- unique(df_C13_selected$X)
pathway.uni.id<- match ( pathway.sig, df_C13_selected$X)
pathway.sig<- df_C13_selected[pathway.uni.id,]
pathway.cpd<- list()
for (pathway in unique(df_C13_selected$X)){
pathway.cpd[[pathway]] <- unlist ( str_split(pathway.sig[which (pathway.sig$X == pathway), which(colnames(pathway.sig)=="cpd_id")],";") )
}
lapply(names(pathway.cpd), function(x){
sig.cpd.all[sig.cpd.all$Matched.Compound %in% pathway.cpd[[x]],]->df
df$Pathway<-(x)
df
})->Pathway_matched_sig
Pathway_matched_sig<-do.call(rbind,Pathway_matched_sig)
Pathway_matched_sig$CPD_name<- cpd.info$cpd.name[match(Pathway_matched_sig$Matched.Compound, cpd.info$id)]
Pathway_matched_sig$Isotype=NA
Pathway_matched_sig$Isotype[str_detect(Pathway_matched_sig$Matched.Form, "13C")] <- "SIL_C6"
Pathway_matched_sig$Isotype[!str_detect(Pathway_matched_sig$Matched.Form, "13C")] <- "Normal"
df<- Pathway_matched_sig %>% group_by(Pathway, Dim) %>% dplyr::summarise(MZ_Normal = length(unique(Query.Mass[!str_detect(Matched.Form,"M_13C")])),MZ_SIL = length(unique(Query.Mass[str_detect(Matched.Form,"M_13C")])))
df_cast_SIL<- dcast(df,Pathway ~ Dim, value.var="MZ_SIL")
df_cast_SIL$Category<- "m/z SIL"
df_cast_SIL[is.na(df_cast_SIL)]<-0
df_cast_SIL<- df_cast_SIL%>% relocate(Category, .after = Pathway)
df_cast_Normal<- dcast(df,Pathway ~ Dim, value.var="MZ_Normal")
df_cast_Normal$Category<- "m/z Normal"
df_cast_Normal[is.na(df_cast_Normal)]<-0
df_cast_Normal<- df_cast_Normal%>% relocate(Category, .after = Pathway)
df_output <- rbind(df_cast_SIL,df_cast_Normal)
pathway_summary_sig$Category<- "Sig Level"
pathway_summary_sig<- pathway_summary_sig%>% relocate(Category, .after = Pathway)
df_output <- list(df_cast_Normal,df_cast_SIL, pathway_summary_sig )
df_output<-do.call(rbind,df_output)
# df_output<- df_output %>% relocate(Dim.10, .after = Dim.9)
write.csv(df_output, paste0(outwd, "pathway_final_output_1_to_5.csv"), row.names = F)
#
# Pathway_matched_sig <- Pathway_matched_sig %>% group_by(Matched.Compound, Matched.Form) %>% dplyr::summarise(mean = mean (Query.Mass), )
Pathway_matched_sig_summary<-Pathway_matched_sig
Pathway_matched_sig_summary<-Pathway_matched_sig_summary[!duplicated(Pathway_matched_sig_summary[,c( "Matched.Compound", "Matched.Form","Dim" )]),]
library(HiTMaP)
library(stringr)
wd="V:/Bioinformatics/test_scoring_cpd_mummichog/"
raw_wd<-"G:/imzML"
#spectrum_to_match<- read.csv(paste0(wd,"raw_data_time_sumspec.csv"))
cpd_list<-read.csv(paste0(wd,"pca_anal_thesis_data_test_tol_10/Metabo_Dim.1/mummichog_matched_compound_all.csv"))
cmpd.map<-read.csv(paste0(wd,"cmpd.map_anno.csv"))
fileinfo_v2 <- read.csv(paste0(raw_wd,"/fileinfo_v2.csv"))
#SMPdb<-read.csv(paste0(wd,"SMPdb.csv"),row.names = 1)
#spectrum_to_match<-read.csv(paste0(wd,"bin_data_time_sumspec.csv"),row.names=1)
locs_file<-fileinfo_v2$filenames
locs<-paste0("G:/imzML/",locs_file," ID/")
cpd_list$formula<-cmpd.map$formula[match(cpd_list$Matched.Compound,cmpd.map$kegg_id)]
cpd_list$Isotype<-""
cpd_list<-cpd_list[!is.na(cpd_list$formula),]
cpd_list->cpd_list_org
for (score_method in c("SQRTP")){
for (timepoint in unique(locs_file)){
#for (timepoint in "20hrSILGlucose_03_20211123"){
#"16hrSILGlucose_01_20211123_trim"->timepoint
for (FDR_cutoff in c(0.1)) {
dir.create(paste0(wd,"/",score_method,"_",FDR_cutoff))
peaklist<-as.data.frame(readr::read_csv(paste0(locs<-paste0("G:/imzML/",timepoint," ID/"),"overall_Spectrum.csv")))
library(HiTMaP)
library(BiocParallel)
HiTMaP:::Cpd_spectrum_match_rescore(cpd_list,peaklist,
wd=paste0(wd,"/",score_method,"_",FDR_cutoff),
adjust_score=T,
SPECTRUM_batch=timepoint,
BPPARAM=SerialParam(),
atom_isotope_sub=NULL,
ppm=10,
score_method=score_method,
FDR_cutoff=FDR_cutoff,Decoy_search=T)
}
}
}
peak_res_sum<-NULL
peak_res_sum_2nd<-NULL
for (score_method in c("SQRTP")){
dir.create(paste0(wd,"/",score_method))
for (timepoint in unique(locs_file)){
for (FDR_cutoff in c(0.1)) {
dir.create(paste0(wd,"/",score_method,"_",FDR_cutoff))
peak_res<-read.csv(paste0(wd,"/",score_method,"_",FDR_cutoff,"/",timepoint,"/","CPD_1st_ID_score_rank_SQRTP.csv"))
peak_res2nd<-read.csv(paste0(wd,"/",score_method,"_",FDR_cutoff,"/",timepoint,"/","CPD_2nd_ID_score_rank_SQRTP.csv"))
if(nrow(peak_res)>=1) peak_res$source<-paste0(score_method,"_",FDR_cutoff,"_",timepoint)
peak_res_sum[[paste0(score_method,"_",FDR_cutoff,"_",timepoint)]]<-peak_res
if(nrow(peak_res2nd)>=1) peak_res2nd$source<-paste0(score_method,"_",FDR_cutoff,"_",timepoint)
peak_res_sum_2nd[[paste0(score_method,"_",FDR_cutoff,"_",timepoint)]]<-peak_res2nd
}
}
}
peak_res_sum_bind<-do.call(rbind,peak_res_sum)
peak_res_sum_2nd_bind<-do.call(rbind,peak_res_sum_2nd)
peak_res_sum_bind$CPD_file<-paste0(peak_res_sum_bind$formula,"_",
peak_res_sum_bind$isdecoy,"_",
peak_res_sum_bind$Isotype,"_",
peak_res_sum_bind$source)
peak_res_sum_2nd_bind$CPD_file<-paste0(peak_res_sum_2nd_bind$formula,"_",
peak_res_sum_2nd_bind$isdecoy,"_",
peak_res_sum_2nd_bind$Isotype,"_",
peak_res_sum_2nd_bind$source)
target_fm<-c("C6H12O6","C12H22O11","C15H24N2O17P2","C6H14O6","C7H15O10P","C6H13O9P1","C6H14O12P2")
target_nm<-c("Glucose","Sucrose","UDP_Glucose","Sorbitol","S7P","G6P","F16P")
peak_res_sum_bind_tgt<-peak_res_sum_bind[peak_res_sum_bind$formula_mono %in% target_fm,]
peak_res_sum_bind_tgt_meta<-match(str_split_fixed(peak_res_sum_bind_tgt$source,"_",3)[,3],fileinfo_v2$filenames)
peak_res_sum_bind_tgt<-cbind(peak_res_sum_bind_tgt,fileinfo_v2[peak_res_sum_bind_tgt_meta,])
peak_res_sum_bind_tgt$FDR_pass<-peak_res_sum_bind_tgt$CPD_file %in% unique(peak_res_sum_2nd_bind$CPD_file)
peak_res_sum_bind_tgt<-unique(peak_res_sum_bind_tgt[,c("Score" ,"adduct", "formula","formula_mono","Isotype","isdecoy","mins","bioreplicate","class" ,"FDR_pass" )])
peak_res_sum_bind_tgt$cpd<-target_nm[match(peak_res_sum_bind_tgt$formula_mono,target_fm)]
data_summary <- function(data, varname, groupnames){
require(plyr)
summary_func <- function(x, col){
c(mean = mean(x[[col]], na.rm=TRUE),
sem = sd(x[[col]], na.rm=TRUE)/sqrt(length(x[[col]])))
}
data_sum<-ddply(data, groupnames, .fun=summary_func,
varname)
data_sum <- rename(data_sum, c("mean" = varname))
return(data_sum)
}
library(ggplot2)
df2 <- data_summary(peak_res_sum_bind_tgt, varname="Score",
groupnames=c("cpd","Isotype","isdecoy","class","adduct"))
df2 <- merge(df2,peak_res_sum_bind_tgt[,c("cpd","Isotype","isdecoy","class","adduct","FDR_pass")],by=c("cpd","Isotype","isdecoy","class","adduct"))
p<-ggplot(df2) +geom_point(aes(x=class,y=Score,colour=FDR_pass,size=FDR_pass)) + geom_line(aes(x=class,y=Score,color=interaction(isdecoy))) +
geom_errorbar(aes(x=class,ymin=Score-sem, ymax=Score+sem), width=.2,
position=position_dodge(.9)) +
facet_grid(rows = vars(Isotype,adduct),cols = vars(cpd))
png(paste0(wd,"/","/target_fm_summary_",FDR_cutoff,".png"),height = 2400*2,width = 1500*length(target_nm),res = 300)
print(p)
dev.off()
write.csv(peak_res_sum_bind,paste0(wd,"/",score_method,"_","peak_res_sum_bind_",FDR_cutoff,".csv"),row.names = F)
write.csv(peak_res_sum_2nd_bind,paste0(wd,"/",score_method,"_","peak_res_sum_2nd_bind_",FDR_cutoff,".csv"),row.names = F)
library(stringr)
library(readr)
FDR_wd<- paste0(project_dir, FDR_dir)
main_dir <- FDR_wd
dir_list <- list.dirs(main_dir,recursive = FALSE)
all_coordata_label_sel_arc_bind<-read.csv(paste0(project_dir, spatial_alignment_dir, "all_coordata_label_sel_arc_bind2.csv"),row.names = 1)
match.features <- read_csv(paste0(outwd,"/Metabo_Dim.1/mummichog_matched_compound_all.csv" ))
match.features$adduct <- NA
match.features$adduct[str_detect(match.features$Matched.Form,"Cl")]<- "M+Cl"
match.features$adduct[!str_detect(match.features$Matched.Form,"Cl")]<- "M-H"
match.features$Isotype<- NA
match.features$Isotype[str_detect(match.features$Matched.Form,"13C6")]<- "SIL"
match.features$Isotype[!str_detect(match.features$Matched.Form,"13C6")]<- "Normal"
folder_name<- list.files(FDR_wd)
folder.slt <- c()
for (x in 1:length(folder_name)){
folder.exist = str_detect(unique(all_coordata_label_sel_arc_bind$run),tolower(folder_name[x]))
if (sum(folder.exist) > 0){
folder.slt<- append(folder.slt,folder_name[x])
}
else {
message("Data in this run was not included.")
}
}
cpd.list<- list()
for (dir in folder.slt){
cpd.df <- read_csv(paste0(FDR_wd,dir,"/CPD_2nd_ID_score_rank_SQRTP.csv"))
run <- str_remove(dir, paste0(main_dir,"/"))
cpd.df$run<-run
cpd.list[[run]] <- cpd.df
}
cpd.list.all<- do.call(rbind, cpd.list)
file.info<-fileinfo_v2
cpd_filter <- unique(cpd.list.all[,c("Isotype", "Metabolite.Name", "adduct")])
cpd_filter <- unique(cpd.list.all[,c("mz","Isotype", "Metabolite.Name", "adduct")])
cpd_filter$match <- paste(cpd_filter$Metabolite.Name, cpd_filter$Isotype, cpd_filter$adduct, sep = "_")
match.features$Isotype<- NA
match.features$Isotype[str_detect(match.features$Matched.Form,"13C")] <- "SIL"
match.features$Isotype[!str_detect(match.features$Matched.Form,"13C")] <- "Normal"
match.features$adduct<- NA
match.features$adduct[str_detect(match.features$Matched.Form, "Cl")] <- "M+Cl"
match.features$adduct[!str_detect(match.features$Matched.Form, "Cl")] <- "M-H"
match.features$match<- paste(match.features$Matched.Compound, match.features$Isotype, match.features$adduct, sep = "_")
match.features_HQ<-match.features[match.features$match %in% cpd_filter$match,]
cpd_filter <- unique(match.features_HQ$Query.Mass)
file.path <- outwd
for (dim in paste0("Dim.", 1:5)){
top.features <- read_csv(paste0(file.path ,dim,"_anno_freq_top1bin.csv"))
top.features.flt <- top.features[which(top.features$mz %in% cpd_filter),]
write.csv(top.features.flt, paste0(file.path, dim,"_anno_freq_top1bin_FDR_filter_0.1.csv"), row.names = FALSE)
}
df <- read.csv(paste0(project_dir,"/bin_data_time_sumspec.csv") , row.names = 1)
df_filter <- df[as.numeric(rownames(df)) %in% cpd_filter,]
label<- c(rep("Early", 5), rep("Late", 5) )
df_filter_label <-rbind(label, df_filter )
write.csv(df_filter_label, paste0(project_dir, "/bin_data_time_sumspec_FDR_0.1_tb.csv") )
write.csv(df_filter,paste0(project_dir, "/bin_data_time_sumspec_FDR_0.1.csv") )
for (dim in paste0("Dim.", 1:5)){
dim.sig<-readr::read_csv(paste0(outwd, dim, "_anno_freq_top1bin_FDR_filter_0.1.csv"))
dir.create(paste0(outwd,"/Metabo_",dim,"FDR_0.1"))
setwd(paste0(outwd,"/Metabo_",dim,"FDR_0.1"))
message(getwd())
mSet<-NULL
library(MetaboAnalystR)
mSet<-InitDataObjects("mass_table", "mummichog", FALSE)
anal.type<<-mSet$analSet$type
mSet<-SetPeakFormat(mSet, "pvalue")
mSet<-UpdateInstrumentParameters(mSet, 10.0, "negative", "yes");
mSet<-SetRTincluded(mSet, "no")
mSet<-Read.TextData(mSet, paste0(project_dir, "/bin_data_time_sumspec_FDR_0.1_tb.csv"), "colu", "disc")
mSet<-SanityCheckMummichogData(mSet)
mSet<-ReplaceMin(mSet);
mSet<-SanityCheckMummichogData(mSet)
mSet<-PreparePrenormData(mSet)
mSet<-Normalization(mSet, "none", "none", "none", ratio=FALSE, ratioNum=20)
mSet<-SetPeakEnrichMethod(mSet, "mum", "v2")
mSet<-PreparePeakTable4PSEA(mSet)
feature_sig <- names(mSet[["analSet"]][["tt"]][["t.score"]]) %in% dim.sig$mz
mSet[["analSet"]][["tt"]][["p.value"]][feature_sig] <-0.01
mSet[["analSet"]][["tt"]][["p.value"]][!feature_sig] <-1
nData<-mSet$dataSet$mummi.proc
feature_sig <- nData$m.z %in% dim.sig$mz
match_mz(nData$m.z,dim.sig$mz,10,"ppm")->mapped_mz
feature_sig <- !is.na(mapped_mz)
nData[feature_sig,1] <-0.000001
nData[!feature_sig,1] <-1
nData->mSet$dataSet$mummi.proc
mSet<-SetMummichogPval(mSet, 0.05)
mummichog.lib.new<-mummichog.lib.mod(lib="bta_kegg",lib.new="bta_kegg_13C_adduct",C13_number=c(6),C13_deltamass=1.003355,wd=paste0(outwd,"/Metabo_",dim, "FDR_0.1"),force_update_lib=F,adducts_list=c("M+Cl[-]","M-H[-]"),method = "new_adduct")
mSet$paramSet$ContainsMS2<-F
mSet<-PerformPSEA(mSet, "bta_kegg_13C_adduct", "current", 3 , 100)
mSet<-SaveTransformedData(mSet)
mSet<-PlotPeaks2Paths(mSet, "peaks_to_paths_0_", "png", 300, width=NA)
write.csv(annotate_PSEA(mSet = mSet,mummichog.lib.new),"mSet_mummi_resmat_anno.csv")
}
df_C13<-NULL
for (p_thres in paste0("Metabo_Dim.",1:5,"FDR_0.1")){
df_C13[[as.character(p_thres)]]<-read.csv(paste0(outwd,"/",p_thres,"/","/","mSet_mummi_resmat_anno.csv"))
df_C13[[as.character(p_thres)]]$source<-p_thres
}
df_C13<-do.call(rbind,df_C13)
library(reshape2)
library(dplyr)
df_C13<-NULL
for (p_thres in paste0("Metabo_Dim.",1:5)){
df_C13[[as.character(p_thres)]]<-read.csv(paste0(outwd,"/",p_thres,"FDR_0.1","/","mSet_mummi_resmat_anno.csv"))
df_C13[[as.character(p_thres)]]$source<-p_thres
}
df_C13<-do.call(rbind,df_C13)
df_C13$Gamma_log<--log(df_C13$Gamma,10)
df_C13$Pathway<-as.factor(df_C13$X)
df_C13$Pval_quartile<-df_C13$source
df_C13$PCA_dim<-as.numeric(str_remove(df_C13$source,"Metabo_Dim."))
df_C13_selected<-df_C13[df_C13$X %in% df_C13$X[df_C13$FET<=0.05],]
df_C13_selected$sig_level <- gtools::stars.pval(df_C13_selected$FET)
pathway_summary<-df_C13_selected[,c("X", "PCA_dim", "sig_level","Hits.sig")]
colnames(pathway_summary)<-c("Pathway", "Dim", "Sig_level","Hits.sig")
pathway_summary$Dim<-paste0("Dim",".",pathway_summary$Dim)
pathway_summary_sig<-dcast(pathway_summary,Pathway ~ Dim, value.var="Sig_level")
pathway_summary_sig[is.na(pathway_summary_sig)]<-""
sig.cpd.all <- list()
for (dim in paste0("Dim.", 1:5)){
matched.cpd<-read.csv(paste0(outwd, "/Metabo_", dim,"FDR_0.1", "/mummichog_matched_compound_all.csv"))
sig.cpd.dim <- read.csv(paste0(outwd, "/", dim, "_anno_freq_top1bin_FDR_filter_0.1.csv"))
sig.cpd.id <- match_mz(matched.cpd$Query.Mass, sig.cpd.dim$mz, 10,"ppm")
sig.cpd.dim <- matched.cpd[!is.na(sig.cpd.id),]
sig.cpd.dim$Dim <- paste0(dim)
sig.cpd.all[[dim]] <- sig.cpd.dim
}
sig.cpd.all<- do.call(rbind, sig.cpd.all)
pathway.sig <- unique(df_C13_selected$X)
pathway.uni.id<- match ( pathway.sig, df_C13_selected$X)
pathway.sig<- df_C13_selected[pathway.uni.id,]
pathway.cpd<- list()
for (pathway in unique(df_C13_selected$X)){
pathway.cpd[[pathway]] <- unlist ( str_split(pathway.sig[which (pathway.sig$X == pathway), which(colnames(pathway.sig)=="cpd_id")],";") )
}
lapply(names(pathway.cpd), function(x){
sig.cpd.all[sig.cpd.all$Matched.Compound %in% pathway.cpd[[x]],]->df
df$Pathway<-(x)
df
})->Pathway_matched_sig
Pathway_matched_sig<-do.call(rbind,Pathway_matched_sig)
df<- Pathway_matched_sig %>% group_by(Pathway, Dim) %>% dplyr::summarise(MZ_Normal = length(unique(Query.Mass[!str_detect(Matched.Form,"M_13C")])),MZ_SIL = length(unique(Query.Mass[str_detect(Matched.Form,"M_13C")])))
df_cast_SIL<- dcast(df,Pathway ~ Dim, value.var="MZ_SIL")
df_cast_SIL$Category<- "m/z SIL"
df_cast_SIL[is.na(df_cast_SIL)]<-0
df_cast_SIL<- df_cast_SIL%>% relocate(Category, .after = Pathway)
df_cast_Normal<- dcast(df,Pathway ~ Dim, value.var="MZ_Normal")
df_cast_Normal$Category<- "m/z Normal"
df_cast_Normal[is.na(df_cast_Normal)]<-0
df_cast_Normal<- df_cast_Normal%>% relocate(Category, .after = Pathway)
df_output <- rbind(df_cast_SIL,df_cast_Normal)
pathway_summary_sig$Category<- "Sig Level"
pathway_summary_sig<- pathway_summary_sig%>% relocate(Category, .after = Pathway)
df_output <- list(df_cast_Normal,df_cast_SIL, pathway_summary_sig )
df_output<-do.call(rbind,df_output)
# df_output<- df_output %>% relocate(Dim.10, .after = Dim.9)
write.csv(df_output, paste0(outwd, "pathway_final_output_1_to_5_FDR_0.1_remapping.csv"), row.names = F)
library(Cardinal)
library(stringr)
library(dplyr)
heximg_dir <- "/heximg/"
dir.create(paste0(project_dir, visualization_dir, heximg_dir))
run_demo_slt<-c("ff_trim_02_08042021","5minsilglucose_2nd_20201116_trim","15minsilglucose_2nd_20201116_trim","30minsilglucose_2nd_20201116_trim","1hrsilglucose_201130_trim","2hrsilglucose_2nd_20201109_trim","4hr_01_20201109_trim","8hr_01_20201109_trim","16hrsilglucose_03_20211123_trim","20hrsilglucose_02_20211123_trim")
combinedimdata_demo<-combinedimdata[,run(combinedimdata) %in% run_demo_slt]
all_coordata_label_sel_arc_bind<-read.csv(paste0(project_dir, spatial_alignment_dir,"all_coordata_label_sel_arc_bind2.csv"),row.names = 1)
pixeldf<-all_coordata_label_sel_arc_bind
library(Cardinal)
combinedimdata_full<- combinedimdata
fileinfo_v2 <- readr::read_csv(paste0(project_dir,"/fileinfo_v2.csv"))
# final_selection_df<-read.csv(paste0(wd,"/final_selection.csv"))
fileinfo_v2$filenames<-tolower(fileinfo_v2$filenames)
fileinfo_v2<-fileinfo_v2[fileinfo_v2$filenames %in% levels(run(combinedimdata_demo)),]
unique(fileinfo_v2$filenames)
coordata<-combinedimdata_demo@elementMetadata@coord@listData
coordata$run<-run(combinedimdata_demo)
coordata<-as.data.frame(coordata)
unique(coordata$run)
#hexbin all features
combinedimdata_full<-combinedimdata_demo
library(hexbin)
label_sel="slt"
merged_data_df<-NULL
coordata_label_sel_arc<-NULL
setCardinalBPPARAM(BPPARAM=SerialParam())
setCardinalVerbose(verbose=F)
for (imsrun in fileinfo_v2$filenames){
message(imsrun)
time_cls<-fileinfo_v2$class[match(imsrun,fileinfo_v2$filenames)]
combinedimdata_full@elementMetadata@coord@listData->coordata
coordata$run<-run(combinedimdata_full)
coordata<-as.data.frame(coordata)
coordata_run<-coordata[coordata$run==imsrun,]
coordata_run<-pixeldf[pixeldf$run==imsrun,]
rownames(coordata_run)<-1:nrow(coordata_run)
library(hexbin)
combinedimdata_full[,run(combinedimdata_full)==imsrun]->combinedimdata_full_sample_bin
bincell_df<-NULL
for (bincell in unique(coordata_run$new_label_scale)){
combinedimdata_full_sample_bin[,which(coordata_run$new_label_scale==bincell)] %>% summarizeFeatures( FUN = "mean") %>% process()->meanspec
meanspec@featureData@listData[["mean"]]->bincell_df[[bincell]]
}
bincell_df_bind<-do.call(cbind,bincell_df)
colnames(bincell_df_bind)<-paste(imsrun,time_cls,unique(coordata_run$new_label_scale),sep = "@")
merged_data_df[[imsrun]]<-bincell_df_bind
coordata_label_sel_arc[[imsrun]]<-coordata_run
}
coordata_label_sel_arc_bind<-do.call(rbind,coordata_label_sel_arc)
merged_data_df_bind<-do.call(cbind,merged_data_df)
rownames(merged_data_df_bind)<-combinedimdata_full@featureData@mz
#Generate hexbin data
write.csv(merged_data_df_bind,paste0(project_dir, visualization_dir , heximg_dir,label_sel,"_merged_data_df_bind",".csv"))
#Generate Master Table
write.csv(coordata_label_sel_arc_bind,paste0(project_dir, visualization_dir , heximg_dir,label_sel,"_coordata_label_sel_arc_bind",".csv"))
message(label_sel," DONE")
setwd(wd)
library(readr)
all_data_original<-read_csv(paste0(project_dir, visualization_dir , heximg_dir,"/slt_merged_data_df_bind.csv"))
all_data_original<-as.data.frame(all_data_original)
rownames(all_data_original)<-all_data_original[,1]
all_data_original[,-1]->all_data_original
library(stringr)
col.nms.split<-str_split_fixed(colnames(all_data_original),"@",3)
all_data_original_t<-t(all_data_original)
group_sum <- rowsum(all_data_original_t, group = interaction (col.nms.split[,2],col.nms.split[,3]))
group_sum_t<-as.data.frame(t(group_sum))
a = paste(col.nms.split[,2], col.nms.split[,3], sep=".")
for (col in colnames(group_sum_t)) {
group_sum_t [,col]= group_sum_t [,col] / sum(a==col)
# message(col,sum(a==col))
}
grouped_bio_rep_all_pixel_mean<-group_sum_t
grouped_bio_rep_all_pixel_mean<-as.data.frame(grouped_bio_rep_all_pixel_mean)
library(stringr)
col.nms.split_2<-str_split_fixed(colnames(grouped_bio_rep_all_pixel_mean),"\\.",2)
Label<-col.nms.split_2[,1]
grouped_bio_rep_all_pixel_mean<-rbind(Label, grouped_bio_rep_all_pixel_mean)
row.names(grouped_bio_rep_all_pixel_mean)[1]<-"Label"
# grouped_bio_rep_all_pixel_mean<-grouped_bio_rep_all_pixel_mean[,order(grouped_bio_rep_all_pixel_mean[1,])]
cls_merge<-list()
for (cls in unique(Label)){
group_sum_t[,col.nms.split_2[,1]==cls]->temp
colnames(temp)<-str_remove(colnames(temp),paste0(cls,"\\."))
row.names(temp)<-paste0(row.names(temp),"@",cls)
cls_merge[[cls]]<-(temp)
}
library(plyr)
cls_merge_bind<-rbind.fill(cls_merge)
rownms<-unlist(lapply(cls_merge,rownames))
rownames(cls_merge_bind)<-rownms
write.csv(cls_merge_bind, paste0(project_dir, visualization_dir , heximg_dir, "grouped_bio_rep_slt_pixel_mean_label_time_merge_new.csv"))
mum.url <- paste("https://www.metaboanalyst.ca/resources/libs/mummichog/bta_kegg.qs")
download.file(mum.url, paste0(project_dir, visualization_dir, heximg_dir, "bta_kegg.qs"),
mode = "wb")
bta_kegg<-qs::qread(paste0(project_dir, visualization_dir, heximg_dir, "bta_kegg.qs"))
cpd_list<- data.frame(id =bta_kegg[["cpd.lib"]][["id"]], name = bta_kegg[["cpd.lib"]][["name"]])
cpd.match <- read_csv(paste0(project_dir, dim_reduction_dir, PCA_dir, "/Metabo_Dim.1/mummichog_matched_compound_all.csv"))
cpd.match$name <- cpd_list$name[match(cpd.match$Matched.Compound, cpd_list$id)]
target_cpd<- c("C00031","C00668","C05378","C05382", "C00029", "C00794")
cpd.target<- cpd.match[which(cpd.match$Matched.Compound %in% target_cpd),]
cpd.target.sil<- cpd.target[str_detect(cpd.target$Matched.Form,"13C6"),]
library(RColorBrewer)
fun_color_range <- colorRampPalette(brewer.pal(9, "Spectral"))
my_colors <- fun_color_range(10)
library(Cardinal)
library(stringr)
library(dplyr)
all_coordata_label_sel_arc_bind<-read.csv(paste0(project_dir, spatial_alignment_dir, "all_coordata_label_sel_arc_bind2.csv"),row.names = 1)
cls_merge_bind<-readr::read_csv(paste0(project_dir, visualization_dir, heximg_dir,"grouped_bio_rep_slt_pixel_mean_label_time_merge_new.csv"))
cls_merge_bind<-as.data.frame(cls_merge_bind)
rownames(cls_merge_bind)<-cls_merge_bind[,1]
cls_merge_bind[,1]<-NULL
cpd.target<-cpd.target[!duplicated(paste(cpd.target$Matched.Compound, cpd.target$Matched.Form, sep = ";")),]
for (mass in cpd.target$Query.Mass) {
glucose_intensity <- cls_merge_bind[stringr::str_detect(rownames(cls_merge_bind), paste0(mass)),]
glucose_intensity[is.na(glucose_intensity)]<-0
glucose_intensity<-as.data.frame (t (glucose_intensity))
colnames(glucose_intensity)<- paste("Time",str_remove(colnames(glucose_intensity),paste0(mass,"@")))
glucose_intensity<-glucose_intensity[,order(colnames(glucose_intensity))]
for (a in c(0, 0.05,0.1 , 0.12,0.15)){
for (col in 1:10){
target_class <- data.frame(intensity=glucose_intensity[,colnames(glucose_intensity)[col]], row.names = rownames(glucose_intensity) )
all_coordata_label_sel_arc_bind$intensity<-target_class$intensity[match(all_coordata_label_sel_arc_bind$new_label_scale, as.numeric(rownames(target_class)))]
all_coordata_label_sel_arc_bind$intensity[is.na(all_coordata_label_sel_arc_bind$intensity)]<-0
all_coordata_label_sel_arc_bind$signal_exist<- NA
all_coordata_label_sel_arc_bind$signal_exist[which(all_coordata_label_sel_arc_bind$intensity <= a)]<- 0
all_coordata_label_sel_arc_bind$signal_exist[is.na(all_coordata_label_sel_arc_bind$signal_exist)]<-1
factor(all_coordata_label_sel_arc_bind$signal_exist[all_coordata_label_sel_arc_bind$run %in% unique(run(example_imdata))])->final_selection
fig.arbitrary_seg<- image(example_imdata, factor(final_selection) ~ x * y,superpose=F, key=F, col = c("#FFFFFF00", my_colors[col]))
fig.arbitrary_seg[["par"]][["ann"]]=F
fig.arbitrary_seg[["par"]][["bty"]]="n"
fig.arbitrary_seg[["par"]][["pty"]]="s"
fig.arbitrary_seg[["par"]][["xaxt"]]="n"
fig.arbitrary_seg[["par"]][["yaxt"]]="n"
fig.arbitrary_seg[["par"]][["fg"]]="white"
fig.arbitrary_seg[["par"]][["oma"]]=c(0, 0, 0, 0)
dir.create(paste0(project_dir, visualization_dir, heximg_dir,"image_time_stack_selected/"))
png(paste0(project_dir, visualization_dir, heximg_dir,"/image_time_stack_selected/", mass,"_time_",col,"_", a,".png"),height = 5 ,width = 6,units = "in",res = 300, bg="transparent")
print(fig.arbitrary_seg)
dev.off()
}
col=10:1
img_set<-magick::image_read(paste0(project_dir, visualization_dir, heximg_dir,"/image_time_stack_selected/", mass,"_time_",col,"_", a,".png"))
library(magick)
image_mosaic(img_set)-> image_stack
image_write(image_stack,paste0(project_dir, visualization_dir, heximg_dir,"/image_time_stack_selected/", mass,"_time_stack_",a,".png"))
}
}
library(stringr)
run_demo_slt<-c("ff_trim_02_08042021","5minsilglucose_2nd_20201116_trim","15minsilglucose_2nd_20201116_trim","30minsilglucose_2nd_20201116_trim","1hrsilglucose_201130_trim","2hrsilglucose_2nd_20201109_trim","4hr_01_20201109_trim","8hr_01_20201109_trim","16hrsilglucose_03_20211123_trim","20hrsilglucose_02_20211123_trim")
combinedimdata_demo<-combinedimdata[,run(combinedimdata) %in% run_demo_slt]
run(combinedimdata_demo)->run_vec
run_vec_replace<-str_extract(as.character(run_vec),"^.{0,2}min|^.{0,2}hr|ff")
run_vec_replace<-ifelse(str_detect(run_vec_replace,"min"),str_remove(run_vec_replace,"min"),ifelse(str_detect(run_vec_replace,"hr"),as.numeric(str_remove(run_vec_replace,"hr"))*60,"0"))
run(combinedimdata_demo)<-factor(run_vec_replace,level=as.character(sort(unique(as.numeric(run_vec_replace)))))
cpd_mapped <- read.csv(paste0(project_dir, dim_reduction_dir, PCA_dir, "/Metabo_Dim.1FDR_0.1/mummichog_matched_compound_all.csv"))
combinedimdata_demo[mz(combinedimdata_demo) %in% cpd_mapped$Query.Mass, ] -> combinedimdata_demo_cpd_match
saveRDS(combinedimdata_demo_cpd_match,paste0(project_dir, dim_reduction_dir, PCA_dir, "/combinedimdata_demo_cpd_match.rds"))
library(Cardinal)
readRDS(paste0(project_dir, dim_reduction_dir, PCA_dir, "/combinedimdata_demo_cpd_match.rds"))->combinedimdata_demo_cpd_match
ion_images_dir <- "/ion_images/"
dir.create(paste0(project_dir, dim_reduction_dir, PCA_dir, ion_images_dir))
outwd= paste0(project_dir, dim_reduction_dir, PCA_dir, ion_images_dir)
for (x in mz(combinedimdata_demo_cpd_match)){
png(paste0(outwd,"/",x,".png"),width = 300 * length(levels(run(combinedimdata_demo_cpd_match))),height =450,units = "px")
darkmode()
par(oma=c(0, 0, 0, 0),tcl = NA,mar=c(0, 0, 0, 0),mfrow = c(1,1),
bty="n",pty="s",xaxt="n",
yaxt="n",
no.readonly = TRUE,ann=FALSE)
p<-image(combinedimdata_demo_cpd_match, mz=as.numeric(x),superpose=F, key=F,plusminus=as.numeric(x)/100000,contrast.enhance ="suppression",normalize.image = c("none"),smooth.image = c("none"), layout=c(1,length(levels(run(combinedimdata_demo_cpd_match)))))
lapply(p[["facets"]],function(x) {
attr(x,"strip")$text<-""
return(x)
})->p[["facets"]]
print(p)
dev.off()
}
Loading Pathway database and matched compound information
bta_kegg<- qs::qread(paste0(project_dir, dim_reduction_dir, PCA_dir, "/Metabo_Dim.1FDR_0.1/bta_kegg_13C_adduct.qs"))
cpd.info<- data.frame(cpd.name = bta_kegg[["cpd.lib"]][["name"]], id = bta_kegg[["cpd.lib"]][["id"]])
pathway.db<-list()
for ( x in 1:length(bta_kegg[["pathways"]][["name"]])){
pathway.db[[x]] <- data.frame(pathway.name = bta_kegg[["pathways"]][["name"]][x], pathway.cpd= paste(bta_kegg[["pathways"]][["cpds"]][[x]], collapse = ";"))
}
pathway.db<- do.call(rbind, pathway.db)
pathway.info <- data.frame(pathway.name = bta_kegg[["pathways"]][["name"]], pathway.cpd = paste(bta_kegg[["pathways"]][["cpds"]], collapse = ";"))
Summarize the matched compound information and PCA loading information
library(tidyverse)
df_output<- read.csv(paste0(project_dir, dim_reduction_dir, PCA_dir, "/pathway_final_output_1_to_5_FDR_0.1_remapping.csv") )
cpd.high.quality <- read.csv(paste0(project_dir, dim_reduction_dir, PCA_dir, "/Metabo_Dim.1FDR_0.1/mummichog_matched_compound_all.csv"))
pathway.target<- unique(df_output$Pathway)
pathway.target <- pathway.db[which(pathway.info$pathway.name %in% pathway.target),]
rownames(pathway.target)<-pathway.target$pathway.name
pathway.target.cpddf<-pathway.target %>% group_by(pathway.name) %>% separate_longer_delim(pathway.cpd, delim = ";")
pathway.target.cpddf.matched<-merge(cpd.high.quality,pathway.target.cpddf, by.x="Matched.Compound", by.y="pathway.cpd")
outwd<- paste0(project_dir, dim_reduction_dir, PCA_dir)
sig.cpd.all <- list()
for (dim in paste0("Dim.", 1:5)){
sig.cpd.dim <- read.csv(paste0(outwd, "/", dim, "_anno_freq_top1bin_FDR_filter_0.1.csv"))
sig.cpd.dim <- cpd.high.quality[which(cpd.high.quality$Query.Mass %in% sig.cpd.dim$mz ),]
sig.cpd.dim$Dim <- paste0(dim)
sig.cpd.all[[dim]] <- sig.cpd.dim
}
sig.cpd.all<- do.call(rbind, sig.cpd.all)
pathway.sig <- unique(df_output$Pathway)
# pathway.sig<- df_C13_selected[match ( pathway.sig, df_C13_selected$X),]
pathway.cpd<- list()
for (pathway in pathway.target$pathway.name){
pathway.cpd[[pathway]] <- unlist ( stringr::str_split(pathway.target[pathway,"pathway.cpd"], ";") )
}
lapply(names(pathway.cpd), function(x){
sig.cpd.all[sig.cpd.all$Matched.Compound %in% pathway.cpd[[x]],]->df
df$Pathway<-(x)
df
})->Pathway_matched_sig
Pathway_matched_sig<-do.call(rbind,Pathway_matched_sig)
pathway.target.cpddf.matched.nonsig<- pathway.target.cpddf.matched[!pathway.target.cpddf.matched$Query.Mass %in% Pathway_matched_sig$Query.Mass,]
pathway.target.cpddf.matched.nonsig$Dim=""
pathway.target.cpddf.matched.nonsig$Pathway<-pathway.target.cpddf.matched.nonsig$pathway.name
pathway.target.cpddf.matched.nonsig$pathway.name<-NULL
Pathway_matched_all<-rbind(Pathway_matched_sig,pathway.target.cpddf.matched.nonsig)
Rendering pathway map with matched sig compound information:
outwd= paste0(project_dir, dim_reduction_dir, PCA_dir, ion_images_dir)
library(magick)
library(flextable)
library(ggplot2)
library(grid)
Pathway_matched_sig_summary<-Pathway_matched_sig
Pathway_matched_sig_summary<-Pathway_matched_sig_summary[!duplicated(Pathway_matched_sig_summary[,c( "Matched.Compound", "Matched.Form","Dim" )]),]
Pathway_matched_sig_summary$CPD_name<-cpd.info$cpd.name[match(Pathway_matched_sig_summary$Matched.Compound,cpd.info$id)]
Pathway_matched_sig_summary$CPD_name<-str_remove_all(Pathway_matched_sig_summary$CPD_name,"^Beta-|^Alpha-|^alpha-")
Pathway_matched_sig_summary$CPD_name<-str_replace_all(Pathway_matched_sig_summary$CPD_name,"yl","yl ")
Pathway_matched_sig_summary$CPD_name_width<-str_width(Pathway_matched_sig_summary$CPD_name)
Pathway_matched_sig_summary$CPD_name<-str_wrap(Pathway_matched_sig_summary$CPD_name,width = 15,whitespace_only = T)
Pathway_matched_sig_summary$Isotype<-NA
Pathway_matched_sig_summary$Isotype[str_detect(Pathway_matched_sig_summary$Matched.Form,"13C6") ]<-"SIL_C6"
Pathway_matched_sig_summary$Isotype[!str_detect(Pathway_matched_sig_summary$Matched.Form,"13C6") ]<-"Normal"
for (pathway in unique(Pathway_matched_sig_summary$Pathway)){
Pathway_matched_sig_summary_subset <- Pathway_matched_sig_summary[Pathway_matched_sig_summary$Pathway==pathway,]
# Pathway_matched_sig_summary_subset_subset<-Pathway_matched_sig_summary_subset_subset %>% group_by(Query.Mass) %>% summarize(CPD_name=paste(CPD_name,collapse = "\n\n"),Isotype=paste(Isotype,collapse = "\n\n"))
Pathway_matched_sig_summary_subset_subset <- Pathway_matched_sig_summary_subset %>% group_by(Query.Mass) %>% summarize(CPD_name = paste(unique(unlist(CPD_name)), collapse = "; "), Isotype = paste(unique(unlist(Isotype)),collapse = "\n\n"), Dim = paste(unique(Dim), collapse = ","))
Pathway_matched_sig_summary_subset_subset$Dim<-str_remove_all(Pathway_matched_sig_summary_subset_subset$Dim,"Dim.")
img_set<-magick::image_read(paste0(outwd,"/", Pathway_matched_sig_summary_subset_subset$Query.Mass,".png"))
for (Cpd in 1:nrow(Pathway_matched_sig_summary_subset_subset)){
img_info<-image_info(img_set)
CPD_name=unique(unlist(str_split(Pathway_matched_sig_summary_subset_subset$CPD_name[Cpd],"\n\n")))
Isotype=unique(unlist(str_split(Pathway_matched_sig_summary_subset_subset$Isotype[Cpd],"\n\n")))
Dim = Pathway_matched_sig_summary_subset_subset$Dim[Cpd]
tb<-data.frame(CPD_name=CPD_name,Isotype=Isotype, Dim = Dim)
#colnames(tb)<-c("CPD_name","Isotype", "Dim")
tb<-as_tibble(tb)
set_flextable_defaults(font.size =10)
}
img_set_append<-image_append(img_set,stack = T)
img_set_info<-image_info(img_set_append)
tb<-as_tibble(Pathway_matched_sig_summary_subset_subset)
set_flextable_defaults(font.size =10)
ft_raster_cpd <- tb %>%
as.data.frame(.) %>% flextable::flextable() %>% flextable::delete_part(part = "header") %>%
bg( bg = "transparent", part = "all") %>%
flextable::color(color = "white") %>% border_remove %>%
height_all(height = 1) %>%
hrule(rule = "exact")
ft_raster_cpd<-ft_raster_cpd %>%
flextable::as_raster()
legend <- image_blank(width = 1500, height=img_set_info$height, color="black")
legend <- image_ggplot(legend) + theme_void() +
annotation_custom(rasterGrob(ft_raster_cpd), xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf)
legend1 <- image_graph(width = 1500, height=img_set_info$height, bg="black")
print(legend)
dev.off()
img_set_append_anno<-image_append(c(img_set_append,legend1),stack = F)
image_write(img_set_append_anno,paste0(outwd,pathway,"FDR_0.1_remapping.png"))
tb<-as_tibble(Pathway_matched_sig_summary_subset_subset[,c("CPD_name","Isotype", "Dim")])
set_flextable_defaults(font.size =10)
ft_raster_cpd <- tb %>%
as.data.frame(.) %>% flextable::flextable() %>% flextable::delete_part(part = "header") %>%
bg( bg = "transparent", part = "all") %>%
flextable::color(color = "white") %>% border_remove %>%
height_all(height = 1) %>%
hrule(rule = "exact")
ft_raster_cpd<-ft_raster_cpd %>%
flextable::as_raster()
legend <- image_blank(width = 1200, height=img_set_info$height, color="black")
legend <- image_ggplot(legend) + theme_void() +
annotation_custom(rasterGrob(ft_raster_cpd), xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf)
legend1 <- image_graph(width = 1200 , height=img_set_info$height, bg="black")
print(legend)
dev.off()
img_set_append_anno<-image_append(c(img_set_append,legend1),stack = F)
image_write(img_set_append_anno,paste0(outwd,pathway,"FDR_0.1_remapping_style2.png"))
}
Rendering pathway map with matched compound information:
outwd= paste0(project_dir, dim_reduction_dir, PCA_dir, ion_images_dir)
library(magick)
library(flextable)
library(ggplot2)
library(grid)
library(stringr)
Pathway_matched_sig_summary<-Pathway_matched_all
Pathway_matched_sig_summary<-Pathway_matched_sig_summary[!duplicated(Pathway_matched_sig_summary[,c( "Matched.Compound", "Matched.Form","Dim" )]),]
Pathway_matched_sig_summary$CPD_name<-cpd.info$cpd.name[match(Pathway_matched_sig_summary$Matched.Compound,cpd.info$id)]
Pathway_matched_sig_summary$CPD_name<-str_remove_all(Pathway_matched_sig_summary$CPD_name,"^Beta-|^Alpha-|^alpha-")
Pathway_matched_sig_summary$CPD_name<-str_replace_all(Pathway_matched_sig_summary$CPD_name,"yl","yl ")
Pathway_matched_sig_summary$CPD_name_width<-str_width(Pathway_matched_sig_summary$CPD_name)
Pathway_matched_sig_summary$CPD_name<-str_wrap(Pathway_matched_sig_summary$CPD_name,width = 15,whitespace_only = T)
Pathway_matched_sig_summary$Isotype<-NA
Pathway_matched_sig_summary$Isotype[str_detect(Pathway_matched_sig_summary$Matched.Form,"13C6") ]<-"SIL_C6"
Pathway_matched_sig_summary$Isotype[!str_detect(Pathway_matched_sig_summary$Matched.Form,"13C6") ]<-"Normal"
for (pathway in unique(Pathway_matched_sig_summary$Pathway)){
Pathway_matched_sig_summary_subset <- Pathway_matched_sig_summary[Pathway_matched_sig_summary$Pathway==pathway,]
Pathway_matched_sig_summary_subset_subset <- Pathway_matched_sig_summary_subset %>% group_by(Query.Mass) %>% summarize(CPD_name = paste(unique(unlist(CPD_name)), collapse = "; "), Isotype = paste(unique(unlist(Isotype)),collapse = "\n\n"), Dim = paste(unique(Dim), collapse = ","))
Pathway_matched_sig_summary_subset_subset$Dim<-str_remove_all(Pathway_matched_sig_summary_subset_subset$Dim,"Dim.")
img_set<-magick::image_read(paste0(outwd,"/", Pathway_matched_sig_summary_subset_subset$Query.Mass,".png"))
for (Cpd in 1:nrow(Pathway_matched_sig_summary_subset_subset)){
img_info<-image_info(img_set)
CPD_name=unique(unlist(str_split(Pathway_matched_sig_summary_subset_subset$CPD_name[Cpd],"\n\n")))
Isotype=unique(unlist(str_split(Pathway_matched_sig_summary_subset_subset$Isotype[Cpd],"\n\n")))
Dim = Pathway_matched_sig_summary_subset_subset$Dim[Cpd]
tb<-data.frame(CPD_name=CPD_name,Isotype=Isotype, Dim = Dim)
#colnames(tb)<-c("CPD_name","Isotype", "Dim")
tb<-as_tibble(tb)
set_flextable_defaults(font.size =10)
}
img_set_append<-image_append(img_set,stack = T)
img_set_info<-image_info(img_set_append)
tb<-as_tibble(Pathway_matched_sig_summary_subset_subset)
set_flextable_defaults(font.size =10)
ft_raster_cpd <- tb %>%
as.data.frame(.) %>% flextable::flextable() %>% flextable::delete_part(part = "header") %>%
bg( bg = "transparent", part = "all") %>%
flextable::color(color = "white") %>% border_remove %>%
height_all(height = 1) %>%
hrule(rule = "exact")
ft_raster_cpd<-ft_raster_cpd %>%
flextable::as_raster()
legend <- image_blank(width = 1500, height=img_set_info$height, color="black")
legend <- image_ggplot(legend) + theme_void() +
annotation_custom(rasterGrob(ft_raster_cpd), xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf)
legend1 <- image_graph(width = 1500, height=img_set_info$height, bg="black")
print(legend)
dev.off()
img_set_append_anno<-image_append(c(img_set_append,legend1),stack = F)
image_write(img_set_append_anno,paste0(outwd,pathway,"FDR_0.1_remapping_all.png"))
tb<-as_tibble(Pathway_matched_sig_summary_subset_subset[,c("CPD_name","Isotype", "Dim")])
set_flextable_defaults(font.size =10)
ft_raster_cpd <- tb %>%
as.data.frame(.) %>% flextable::flextable() %>% flextable::delete_part(part = "header") %>%
bg( bg = "transparent", part = "all") %>%
flextable::color(color = "white") %>% border_remove %>%
height_all(height = 1) %>%
hrule(rule = "exact")
ft_raster_cpd<-ft_raster_cpd %>%
flextable::as_raster()
legend <- image_blank(width = 1200, height=img_set_info$height, color="black")
legend <- image_ggplot(legend) + theme_void() +
annotation_custom(rasterGrob(ft_raster_cpd), xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf)
legend1 <- image_graph(width = 1200 , height=img_set_info$height, bg="black")
print(legend)
dev.off()
img_set_append_anno<-image_append(c(img_set_append,legend1),stack = F)
image_write(img_set_append_anno,paste0(outwd,pathway,"FDR_0.1_remapping_style2_all.png"))
}
dir.create(paste0(project_dir, shrink_dir))
library(Cardinal)
all_coordata_label_sel_arc_bind<-read.csv(paste0(project_dir, spatial_alignment_dir, "all_coordata_label_sel_arc_bind2.csv"),row.names = 1)
final_selection<- read.csv(paste0(project_dir, shrink_dir, "final_selection_shrink.csv"))
run(example_imdata)<-" "
lightmode()
fig.arbitrary_seg<- image(example_imdata, factor(final_selection$label[run(combinedimdata) == "4hrsilglucose_2nd_20201109_trim"] ) ~ x * y,
superpose=F,
key=F, xlab= NULL, col = c("#ff0027","#0092ff", "#800080", "grey80","#20b2aa"))
fig.arbitrary_seg<-fig.arbitrary_seg
fig.arbitrary_seg[["par"]][["ann"]]=F
fig.arbitrary_seg[["par"]][["bty"]]="n"
fig.arbitrary_seg[["par"]][["pty"]]="s"
fig.arbitrary_seg[["par"]][["xaxt"]]="n"
fig.arbitrary_seg[["par"]][["yaxt"]]="n"
fig.arbitrary_seg[["par"]][["fg"]]="white"
fig.arbitrary_seg[["par"]][["oma"]]=c(0, 0, 0, 0)
png(paste0(project_dir, shrink_dir,"arbitrary_segmentation_shrink.png"),height = 3,width = 3,units = "in",res = 600)
print(fig.arbitrary_seg)
dev.off()
library(egg)
library(rsvd)
library(dplyr)
library(ggplot2)
library(wesanderson)
library(Cardinal)
load(paste0(project_dir, dim_reduction_dir , PCA_dir,"Time_merged_PCA_all_bio.rda"))
all_coordata_label_sel_arc_bind<-read.csv(paste0(project_dir, spatial_alignment_dir ,"all_coordata_label_sel_arc_bind2.csv"),row.names = 1)
arbitrary_seg <-read.csv( paste0(project_dir, shrink_dir,"/final_selection_shrink.csv"))
all_coordata_label_sel_arc_bind<-cbind(all_coordata_label_sel_arc_bind,arbitrary_seg= arbitrary_seg$label)
example_data <-all_coordata_label_sel_arc_bind[all_coordata_label_sel_arc_bind$run == "4hrsilglucose_2nd_20201109_trim",]
dims.pca.full <- PCA[["ind"]][["coord"]]
dims.pca.full <- as.data.frame(dims.pca.full)
dims.pca = dims.pca.full
dims.pca<- dims.pca[which(as.numeric(rownames(dims.pca))%in% example_data$new_label_scale) ,]
dims.pca$region.final<-example_data$arbitrary_seg[match(as.numeric(rownames(dims.pca)), example_data$new_label_scale)]
dims.pca.full$region.final<- dims.pca$region.final[match(rownames(dims.pca.full), rownames(dims.pca) )]
dims.pca.full$region.final [is.na(dims.pca.full$region.final)] ="NS"
dims.pca.full$region.final<-factor(dims.pca.full$region.final, levels =c("Anterior", "Core", "Equator","Posterior", "NS"))
dims.pca.full<-dims.pca.full[order(dims.pca.full$region.final, decreasing = T),]
png(paste0(project_dir, dim_reduction_dir, PCA_dir, "/PCA_Dim.1_vs_Dim.2.png"), width = 3, height = 3, units = "in", res=600)
fig<- ggplot(dims.pca.full, aes(x= Dim.1, y= Dim.2))+xlim(-200, 450)+ylim(-200, 100)+
geom_point(data = dims.pca.full[dims.pca.full$region.final=="NS", ],
stat = "identity", position = "identity", show.legend = FALSE,
colour = "grey80", size = 1
)+
theme_void()+ theme(legend.position = "none")
fig<- fig + geom_point(data = dims.pca.full[!(dims.pca.full$region.final=="NS"), ],
stat = "identity", position = "identity", show.legend = FALSE,
size = 1, alpha = .6,aes(color=region.final)
)+ scale_colour_manual(values= c("#ff0027","#0092ff", "#800080","#20b2aa"))+
stat_ellipse(data = dims.pca.full[! (dims.pca.full$region.final =="NS"),], geom = "polygon",
type = "t", linetype = "twodash", aes(color = region.final, fill = region.final), level = 0.75, size = 0.5, alpha = 0.2) + scale_fill_manual(values= c("#ff0027","#0092ff", "#800080","#20b2aa"))+
theme_void()+ theme(legend.position = "none")
print(fig)
dev.off()
png(paste0(project_dir, dim_reduction_dir, PCA_dir, "/PCA_Dim.4_vs_Dim.5.png"), width = 3, height = 3, units = "in", res=600)
fig<- ggplot(dims.pca.full, aes(x= Dim.4, y= Dim.5))+xlim(-100, 150)+ylim(-200, 200)+
geom_point(data = dims.pca.full[dims.pca.full$region.final=="NS", ],
stat = "identity", position = "identity", show.legend = FALSE,
colour = "grey80", size = 1
)+
theme_void()+ theme(legend.position = "none")
fig<- fig + geom_point(data = dims.pca.full[!(dims.pca.full$region.final=="NS"), ],
stat = "identity", position = "identity", show.legend = FALSE,
size = 1, alpha = .6,aes(color=region.final)
)+ scale_colour_manual(values= c("#ff0027","#0092ff", "#800080","#20b2aa"))+
stat_ellipse(data = dims.pca.full[! (dims.pca.full$region.final =="NS"),], geom = "polygon",
type = "t", linetype = "twodash", aes(color = region.final, fill = region.final), level = 0.75, size = 0.5, alpha = 0.2) +
scale_fill_manual(values= c("#ff0027","#0092ff", "#800080","#20b2aa"))+
theme_void()+ theme(legend.position = "none")
print(fig)
dev.off()
#Dim.4 vs Dim.5
set.seed(2)
pca.kmean.res<- kmeans(dims.pca.full[,c("Dim.4", "Dim.5")], 6, nstart = 25)
Time_merged_pca<- data.frame(dims.pca.full$Dim.4, dims.pca.full$Dim.5, rownames(dims.pca.full))
colnames(Time_merged_pca)<-c("Dim.4","Dim.5","bin")
pca.kmean.res.cluster<-data.frame(pca.kmean.res$cluster)
index<- match(Time_merged_pca$bin, as.numeric(rownames(pca.kmean.res.cluster)))
Time_merged_pca$K_Label<- pca.kmean.res.cluster[,1][index]
fig<- ggplot(Time_merged_pca, aes(x= Dim.4, y= Dim.5)) + xlim(-100, 150) + ylim(-200, 200) +
geom_point( stat = "identity", position = "identity", show.legend = FALSE,
size = 1, alpha=.6,aes(color = as.factor(K_Label)) )+
scale_colour_manual(values= c("#760F00","#4e639e","#e54616", "#dba053", "#FF997C", "#7FBFDD"))+theme_void()
png(paste0(project_dir,dim_reduction_dir, PCA_dir, "/Scatter_K_means_Dim.4_vs_Dim.5.png"), width = 3, height = 3, units = "in", res=300)
print(fig)
dev.off()
all_coordata_label_sel_arc_bind$K_Label<-Time_merged_pca$K_Label[match(as.character(all_coordata_label_sel_arc_bind$new_label_scale),as.character(Time_merged_pca$bin))]
factor(all_coordata_label_sel_arc_bind$K_Label[which(all_coordata_label_sel_arc_bind$run =="4hrsilglucose_2nd_20201109_trim" )])->final_selection
lightmode()
fig.arbitrary_seg<- image(example_imdata, factor(final_selection) ~ x * y,superpose=F, key=F, col = c("#760F00","#4e639e","#e54616", "#dba053", "#FF997C", "#7FBFDD"))
fig.arbitrary_seg[["par"]][["ann"]]=F
fig.arbitrary_seg[["par"]][["bty"]]="n"
fig.arbitrary_seg[["par"]][["pty"]]="s"
fig.arbitrary_seg[["par"]][["xaxt"]]="n"
fig.arbitrary_seg[["par"]][["yaxt"]]="n"
fig.arbitrary_seg[["par"]][["fg"]]="white"
fig.arbitrary_seg[["par"]][["oma"]]=c(0, 0, 0, 0)
png(paste0(project_dir,dim_reduction_dir, PCA_dir, "/Lens_K_means_Dim.4_vs_Dim.5.png"), width = 3, height = 3, units = "in", res=600)
print(fig.arbitrary_seg)
dev.off()
#Dim.1 vs Dim.2
set.seed(2)
pca.kmean.res<- kmeans(dims.pca.full[,c("Dim.1", "Dim.2")], 6, nstart = 25)
Time_merged_pca<- data.frame(dims.pca.full$Dim.1, dims.pca.full$Dim.2, rownames(dims.pca.full))
colnames(Time_merged_pca)<-c("Dim.1","Dim.2","bin")
pca.kmean.res.cluster<-data.frame(pca.kmean.res$cluster)
index<- match(Time_merged_pca$bin, as.numeric(rownames(pca.kmean.res.cluster)))
Time_merged_pca$K_Label<- pca.kmean.res.cluster[,1][index]
fig<- ggplot(Time_merged_pca, aes(x= Dim.1, y= Dim.2)) + xlim(-200, 450) + ylim(-200, 100) +
geom_point( stat = "identity", position = "identity", show.legend = FALSE,
size = 1, alpha=.6, aes(color = as.factor(K_Label)) )+
scale_colour_manual(values= c("#760F00","#4e639e","#e54616", "#FF997C","#dba053", "#7FBFDD"))+theme_void()
png(paste0(project_dir,dim_reduction_dir, PCA_dir, "/Scatter_K_means_Dim.1_vs_Dim.2.png"), width = 3, height = 3, units = "in", res=300)
print(fig)
dev.off()
all_coordata_label_sel_arc_bind$K_Label<-Time_merged_pca$K_Label[match(as.character(all_coordata_label_sel_arc_bind$new_label_scale),as.character(Time_merged_pca$bin))]
factor(all_coordata_label_sel_arc_bind$K_Label[which(all_coordata_label_sel_arc_bind$run =="4hrsilglucose_2nd_20201109_trim" )])->final_selection
lightmode()
fig.arbitrary_seg<- image(example_imdata, factor(final_selection) ~ x * y,superpose=F, key=F, col = c("#760F00","#4e639e","#e54616", "#FF997C","#dba053", "#7FBFDD"))
fig.arbitrary_seg[["par"]][["ann"]]=F
fig.arbitrary_seg[["par"]][["bty"]]="n"
fig.arbitrary_seg[["par"]][["pty"]]="s"
fig.arbitrary_seg[["par"]][["xaxt"]]="n"
fig.arbitrary_seg[["par"]][["yaxt"]]="n"
fig.arbitrary_seg[["par"]][["fg"]]="white"
fig.arbitrary_seg[["par"]][["oma"]]=c(0, 0, 0, 0)
png(paste0(project_dir,dim_reduction_dir, PCA_dir, "/Lens_K_means_Dim.1_vs_Dim.2.png"), width =3, height = 3, units = "in", res=600)
print(fig.arbitrary_seg)
dev.off()
library(rsvd)
library(dplyr)
library(ggplot2)
Time_merged_umap<- readRDS(paste0(project_dir, dim_reduction_dir, UMAP_dir, "/Time_merged_umap_all_bio.rds"))
colnames(Time_merged_umap)<- c("UMAP_1","UMAP_2","bin")
Time_merged_umap_full <- Time_merged_umap
all_coordata_label_sel_arc_bind<-read.csv(paste0(project_dir, spatial_alignment_dir ,"all_coordata_label_sel_arc_bind2.csv"),row.names = 1)
arbitrary_seg <-read.csv( paste0(project_dir, shrink_dir,"/final_selection_shrink.csv"))
all_coordata_label_sel_arc_bind<-cbind(all_coordata_label_sel_arc_bind,arbitrary_seg= arbitrary_seg$label)
example_data <-all_coordata_label_sel_arc_bind[all_coordata_label_sel_arc_bind$run == "4hrsilglucose_2nd_20201109_trim",]
Time_merged_umap<-Time_merged_umap[Time_merged_umap$bin %in% all_coordata_label_sel_arc_bind$new_label_scale[all_coordata_label_sel_arc_bind$run == "4hrsilglucose_2nd_20201109_trim"],]
Time_merged_umap$Region <- example_data$arbitrary_seg[match(as.numeric(Time_merged_umap$bin), as.numeric(example_data$new_label_scale)) ]
Time_merged_umap_full$Region <- Time_merged_umap$Region[match(Time_merged_umap_full$bin, Time_merged_umap$bin)]
Time_merged_umap_full$Region[is.na(Time_merged_umap_full$Region)] <- "NS"
Time_merged_umap$Region<- factor(Time_merged_umap$Region, levels =c("Anterior", "Core", "Equator", "Posterior", "NS") )
Time_merged_umap_full$Region<- factor(Time_merged_umap_full$Region, levels =c("Anterior", "Core", "Equator", "Posterior", "NS") )
png(paste0(project_dir, dim_reduction_dir, UMAP_dir,"//umap_arb_shrink.png"),height = 3,width = 3,units = "in",res = 600)
fig<- ggplot(Time_merged_umap_full, aes(x= UMAP_2, y= UMAP_1))+
geom_point(data = Time_merged_umap_full[Time_merged_umap_full$Region =="NS", ],
stat = "identity", position = "identity", show.legend = FALSE,
colour = "grey80", size = 1
)+
theme_void()
fig<- fig + geom_point(data = Time_merged_umap_full[! (Time_merged_umap_full$Region =="NS"),],
stat = "identity", position = "identity", show.legend = FALSE,
size = 1, alpha = .6,aes(color=Region)
)+ scale_colour_manual(values= c("#ff0027","#0092ff", "#800080","#20b2aa"))+
stat_ellipse(data = Time_merged_umap_full[! (Time_merged_umap_full$Region =="NS"),],
type = "t", linetype = "twodash", geom = "polygon", aes(color = Region, fill = Region), level = 0.75, size = 0.5, alpha = 0.2) + scale_fill_manual(values= c("#ff0027","#0092ff", "#800080","#20b2aa"))+
theme_void()+ theme(legend.position = "none")
print(fig)
dev.off()
set.seed(9)
umap.kmean.res<- kmeans(Time_merged_umap_full[,c("UMAP_1", "UMAP_2")], 6, nstart = 25)
Time_merged_umap<- data.frame(UMAP_1 = Time_merged_umap_full$UMAP_1, UMAP_2 = Time_merged_umap_full$UMAP_2, bin = Time_merged_umap_full$bin, K_Label=umap.kmean.res$cluster)
fig<- ggplot(Time_merged_umap, aes(x= UMAP_2, y= UMAP_1)) +
geom_point( stat = "identity", position = "identity", show.legend = FALSE,
size = 1, alpha=.6,aes(color = as.factor(K_Label)) )+
scale_colour_manual(values= c("#dba053","#4e639e","#760F00", "#7FBFDD","#e54616", "#FF997C"))+theme_void()
png(paste0(project_dir,dim_reduction_dir, UMAP_dir, "/Scatter_K_means_UMAP.png"), width = 3, height = 3, units = "in", res=600)
print(fig)
dev.off()
all_coordata_label_sel_arc_bind$K_Label<-Time_merged_umap$K_Label[match(as.character(all_coordata_label_sel_arc_bind$new_label_scale),as.character(Time_merged_umap$bin))]
factor(all_coordata_label_sel_arc_bind$K_Label[which(all_coordata_label_sel_arc_bind$run =="4hrsilglucose_2nd_20201109_trim" )])->final_selection
lightmode()
fig.arbitrary_seg<- image(example_imdata, factor(final_selection) ~ x * y,superpose=F, key=F, col = c("#dba053","#4e639e","#760F00", "#7FBFDD","#e54616", "#FF997C"))
fig.arbitrary_seg[["par"]][["ann"]]=F
fig.arbitrary_seg[["par"]][["bty"]]="n"
fig.arbitrary_seg[["par"]][["pty"]]="s"
fig.arbitrary_seg[["par"]][["xaxt"]]="n"
fig.arbitrary_seg[["par"]][["yaxt"]]="n"
fig.arbitrary_seg[["par"]][["fg"]]="white"
fig.arbitrary_seg[["par"]][["oma"]]=c(0, 0, 0, 0)
png(paste0(project_dir,dim_reduction_dir, UMAP_dir, "/Lens_K_means_UMAP.png"), width = 3, height = 3, units = "in", res=600)
print(fig.arbitrary_seg)
dev.off()
library(rsvd)
library(dplyr)
library(ggplot2)
Time_merged_tsne<- readRDS(paste0(project_dir, dim_reduction_dir, tSNE_dir, "/Time_merged_tsne_all_bio.rds"))
colnames(Time_merged_tsne)<- c("tSNE_1","tSNE_2","bin")
Time_merged_tsne_full <- Time_merged_tsne
all_coordata_label_sel_arc_bind<-read.csv(paste0(project_dir, spatial_alignment_dir ,"all_coordata_label_sel_arc_bind2.csv"),row.names = 1)
arbitrary_seg <-read.csv( paste0(project_dir, shrink_dir,"/final_selection_shrink.csv"))
all_coordata_label_sel_arc_bind<-cbind(all_coordata_label_sel_arc_bind,arbitrary_seg= arbitrary_seg$label)
example_data <-all_coordata_label_sel_arc_bind[all_coordata_label_sel_arc_bind$run == "4hrsilglucose_2nd_20201109_trim",]
Time_merged_tsne<-Time_merged_tsne[Time_merged_tsne$bin %in% all_coordata_label_sel_arc_bind$new_label_scale[all_coordata_label_sel_arc_bind$run == "4hrsilglucose_2nd_20201109_trim"],]
Time_merged_tsne$Region <- example_data$arbitrary_seg[match(as.numeric(Time_merged_tsne$bin), as.numeric(example_data$new_label_scale)) ]
Time_merged_tsne_full$Region <- Time_merged_tsne$Region[match(Time_merged_tsne_full$bin, Time_merged_tsne$bin)]
Time_merged_tsne_full$Region[is.na(Time_merged_tsne_full$Region)] <- "NS"
Time_merged_tsne$Region<- factor(Time_merged_tsne$Region, levels =c("Anterior", "Core", "Equator", "Posterior", "NS") )
Time_merged_tsne_full$Region<- factor(Time_merged_tsne_full$Region, levels =c("Anterior", "Core", "Equator", "Posterior", "NS") )
png(paste0(project_dir, dim_reduction_dir, tSNE_dir,"//tSNE_arb_shrink.png"),height = 3,width = 3,units = "in",res = 600)
fig<- ggplot(Time_merged_tsne_full, aes(x= tSNE_1, y= tSNE_2))+
geom_point(data = Time_merged_tsne_full[Time_merged_tsne_full$Region =="NS", ],
stat = "identity", position = "identity", show.legend = FALSE,
colour = "grey80", size = 1
)+
theme_void()
fig<- fig + geom_point(data = Time_merged_tsne_full[! (Time_merged_tsne_full$Region =="NS"),],
stat = "identity", position = "identity", show.legend = FALSE,
size = 1, alpha = .6,aes(color=Region)
)+ scale_colour_manual(values= c("#ff0027","#0092ff", "#800080","#20b2aa"))+
stat_ellipse(data = Time_merged_tsne_full[! (Time_merged_tsne_full$Region =="NS"),],
type = "t", linetype = "twodash", geom = "polygon", aes(color = Region, fill = Region), level = 0.75, size = 0.5, alpha = 0.2) + scale_fill_manual(values= c("#ff0027","#0092ff", "#800080","#20b2aa"))+
theme_void()+ theme(legend.position = "none")
fig
print(fig)
dev.off()
set.seed(9)
tsne.kmean.res<- kmeans(Time_merged_tsne_full[,c("tSNE_1", "tSNE_2")], 6, nstart = 25)
Time_merged_tsne<- data.frame(tSNE_1 = Time_merged_tsne_full$tSNE_1, tSNE_2 = Time_merged_tsne_full$tSNE_2, bin = Time_merged_tsne_full$bin, K_Label = tsne.kmean.res$cluster)
fig<- ggplot(Time_merged_tsne, aes(x= tSNE_1, y= tSNE_2)) +
geom_point( stat = "identity", position = "identity", show.legend = FALSE,
size = 1, alpha = 0.6, aes(color = as.factor(K_Label)) )+
scale_colour_manual(values= c("#760F00", "#dba053","#4e639e","#e54616", "#7FBFDD", "#FF997C"))+theme_void()
png(paste0(project_dir,dim_reduction_dir, tSNE_dir, "/Scatter_K_means_tSNE.png"), width = 3, height = 3, units = "in", res=600)
print(fig)
dev.off()
all_coordata_label_sel_arc_bind$K_Label<-Time_merged_tsne$K_Label[match(as.character(all_coordata_label_sel_arc_bind$new_label_scale),as.character(Time_merged_tsne$bin))]
factor(all_coordata_label_sel_arc_bind$K_Label[which(all_coordata_label_sel_arc_bind$run =="4hrsilglucose_2nd_20201109_trim" )])->final_selection
lightmode()
fig.arbitrary_seg<- image(example_imdata, factor(final_selection) ~ x * y,superpose=F, key=F, col = c("#760F00", "#dba053","#4e639e","#e54616", "#7FBFDD", "#FF997C"))
fig.arbitrary_seg[["par"]][["ann"]]=F
fig.arbitrary_seg[["par"]][["bty"]]="n"
fig.arbitrary_seg[["par"]][["pty"]]="s"
fig.arbitrary_seg[["par"]][["xaxt"]]="n"
fig.arbitrary_seg[["par"]][["yaxt"]]="n"
fig.arbitrary_seg[["par"]][["fg"]]="white"
fig.arbitrary_seg[["par"]][["oma"]]=c(0, 0, 0, 0)
png(paste0(project_dir,dim_reduction_dir, tSNE_dir, "/Lens_K_means_tSNE.png"), width = 3, height = 3, units = "in", res=600)
print(fig.arbitrary_seg)
dev.off()
library(Cardinal)
library(MetaboAnalystR)
library(dplyr)
library(plotly)
library(ggplot2)
library(ggpubr)
library(egg)
library(stringr)
library(reshape2)
load(paste0(project_dir, "/large_files/Time_merged_PCA_all_bio.rda") )
library(stringr)
targetmzs=c(221.0521,223.0679, 265.0425, 295.0538, 345.0090, 571.0667)
targetcpds <- c("SIL_Glucose", "SIL_Sorbitol", "SIL_G6P", "SIL_S7P", "SIL_F16BP", "SIL_UDP_Glucose")
targetmzs_df <- data.frame(targetmzs = targetmzs,targetcpds=targetcpds )
cls_merge_mt_mz<-str_split_fixed(colnames(cls_merge_mt),"@",2)[,1]
cls_merge_mt_time<-str_split_fixed(colnames(cls_merge_mt),"@",2)[,2]
cls_merge_mt[,cls_merge_mt_mz %in% match_mz(targetmzs,as.numeric(cls_merge_mt_mz),10,"ppm")]->plotdf
plotdf<-melt(plotdf)
plotdf<-as.data.frame(do.call(cbind,list(plotdf$Var1,str_split_fixed(plotdf$Var2,"@",2),plotdf$value)))
colnames(plotdf)<-c("bin","mz","time","Intensity")
cls_merge_mt_mz<-str_split_fixed(colnames(cls_merge_mt),"@",2)[,1]
cls_merge_mt_time<-str_split_fixed(colnames(cls_merge_mt),"@",2)[,2]
cls_merge_mt[,cls_merge_mt_mz %in% match_mz(targetmzs,as.numeric(cls_merge_mt_mz),10,"ppm")]->plotdf
plotdf<-melt(plotdf)
plotdf<-as.data.frame(do.call(cbind,list(plotdf$Var1,str_split_fixed(plotdf$Var2,"@",2),plotdf$value)))
colnames(plotdf)<-c("bin","mz","time","Intensity")
targetmzs_df$targetmzs <- match_mz(targetmzs, as.numeric(plotdf$mz), 10, "ppm")
plotdf$cpd.name<- targetmzs_df$targetcpds[match(plotdf$mz, targetmzs_df$targetmzs)]
unique(plotdf[,c("mz", "cpd.name")])
set.seed(2)
pca.kmean.res<- kmeans(PCA[["ind"]][["coord"]][,4:5],6, nstart = 25)
Time_merged_pca<- data.frame(PCA[["ind"]][["coord"]][,4:5],rownames(PCA[["ind"]][["coord"]]))
colnames(Time_merged_pca)<-c("Dim.4","Dim.5","bin")
pca.kmean.res.cluster<-data.frame(pca.kmean.res$cluster)
index<- match(Time_merged_pca$bin, as.numeric(rownames(pca.kmean.res.cluster)))
Time_merged_pca$K_Label<- pca.kmean.res.cluster[,1][index]
plotdf$Intensity<-as.numeric(plotdf$Intensity)
plotdf$Label_PCA<-as.factor(Time_merged_pca$K_Label[match(plotdf$bin,Time_merged_pca$bin)])
# plotdf$Label_Arbitrary<-factor(bin_freq$final[match(plotdf$bin,bin_freq$new_label_scale)],levels=c("Anterior","Posterior", "Equator", "IC", "Core"))
plotdf<-plotdf[!is.na(plotdf$Label_PCA),]
plotdf_trim<-unique(plotdf[,c("time","bin")])
plotdf_trim<-table(plotdf_trim$bin)
names(plotdf_trim[plotdf_trim==10])->trimedbin
plotdf[plotdf$bin %in% trimedbin,]->plotdf
plotdf$mz<-as.factor(plotdf$mz)
k <- c(1:6)
Mannual_label<- c( "Noise", "Inner Cortex","Noise", "Posterior", "Anterior","Equator + Core")
df_Mannual_label<- data.frame(K_Label = k, Mannual_label= Mannual_label)
df_Mannual_label$Mannual_label<- factor(df_Mannual_label$Mannual_label, levels = c( "Anterior","Posterior", "Noise", "Inner Cortex", "Equator + Core"))
plotdf$Mannual_label <- df_Mannual_label$Mannual_label[match(plotdf$Label_PCA, df_Mannual_label$K_Label)]
data_summary <- function(data, varname, groupnames){
require(plyr)
summary_func <- function(x, col){
c(mean = mean(x[[col]], na.rm=TRUE),
sd = sd(x[[col]], na.rm=TRUE),
sem = sd(x[[col]], na.rm=TRUE)/sqrt(length(x[[col]])))
}
data_sum<-ddply(data, groupnames, .fun=summary_func,
varname)
data_sum <- rename(data_sum, c("mean" = varname))
return(data_sum)
}
plotdf$Intensity <- as.numeric(plotdf$Intensity )
plotdf$Intensity[plotdf$Intensity==0]<-NA
plotdf$cpd.name<- factor(plotdf$cpd.name, levels = targetcpds)
df2 <- data_summary(plotdf, varname="Intensity",
groupnames=c("mz", "time","Mannual_label"))
df2$cpd.name <- plotdf$cpd.name[match(df2$mz, plotdf$mz)]
df2$time_incubation <- time_df$time[match(as.numeric(df2$time), time_df$class)]
df2_sub<- filter(df2, !Mannual_label == "Noise")
p<- ggplot(df2_sub, aes(x=time_incubation, y=Intensity, group=Mannual_label, color=Mannual_label)) +
geom_line() +
geom_point()+
#stat_smooth( method = lm, formula = y ~ poly(x, ploy_order), se = F) +
geom_errorbar(aes(ymin=Intensity-sem, ymax=Intensity+sem), width=.2,
position=position_dodge(0.05))+
theme_article()+
theme(legend.position="none",
axis.title.x=element_blank(),
axis.title.y=element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))+
ggh4x::facet_grid2(cols= vars(cpd.name), rows= vars(Mannual_label), scales = "free", independent = "y")
png(paste0(wd,"PCA_label_image","_anno_SIL_cpd_","summarized","_timeseries_plot_norm.png"),width = 8500,height = 4000,res=600)
print(p)
dev.off()
targetmzs=c(221.0521,223.0679, 265.0425, 295.0538, 345.0090, 571.0667)
targetcpds <- c("SIL_Glucose", "SIL_Sorbitol", "SIL_G6P", "SIL_S7P", "SIL_F16BP", "SIL_UDP_Glucose")
targetmzs_df <- data.frame(targetmzs = targetmzs,targetcpds=targetcpds )
cls_merge_mt_mz<- stringr::str_split_fixed(colnames(cls_merge_mt),"@",2)[,1]
cls_merge_mt_time<-str_split_fixed(colnames(cls_merge_mt),"@",2)[,2]
cls_merge_mt[,cls_merge_mt_mz %in% match_mz(targetmzs,as.numeric(cls_merge_mt_mz),10,"ppm")]->plotdf
plotdf<-melt(plotdf)
plotdf<-as.data.frame(do.call(cbind,list(plotdf$Var1,str_split_fixed(plotdf$Var2,"@",2),plotdf$value)))
colnames(plotdf)<-c("bin","mz","time","Intensity")
cls_merge_mt_mz<-str_split_fixed(colnames(cls_merge_mt),"@",2)[,1]
cls_merge_mt_time<-str_split_fixed(colnames(cls_merge_mt),"@",2)[,2]
cls_merge_mt[,cls_merge_mt_mz %in% match_mz(targetmzs,as.numeric(cls_merge_mt_mz),10,"ppm")]->plotdf
plotdf<-melt(plotdf)
plotdf<-as.data.frame(do.call(cbind,list(plotdf$Var1,str_split_fixed(plotdf$Var2,"@",2),plotdf$value)))
colnames(plotdf)<-c("bin","mz","time","Intensity")
targetmzs_df$targetmzs <- match_mz(targetmzs, as.numeric(plotdf$mz), 10, "ppm")
plotdf$cpd.name<- targetmzs_df$targetcpds[match(plotdf$mz, targetmzs_df$targetmzs)]
unique(plotdf[,c("mz", "cpd.name")])
set.seed(2)
pca.kmean.res<- kmeans(PCA[["ind"]][["coord"]][,4:5],6, nstart = 25)
Time_merged_pca<- data.frame(PCA[["ind"]][["coord"]][,4:5],rownames(PCA[["ind"]][["coord"]]))
colnames(Time_merged_pca)<-c("Dim.4","Dim.5","bin")
pca.kmean.res.cluster<-data.frame(pca.kmean.res$cluster)
index<- match(Time_merged_pca$bin, as.numeric(rownames(pca.kmean.res.cluster)))
Time_merged_pca$K_Label<- pca.kmean.res.cluster[,1][index]
plotdf$Intensity<-as.numeric(plotdf$Intensity)
plotdf$Label_PCA<-as.factor(Time_merged_pca$K_Label[match(plotdf$bin,Time_merged_pca$bin)])
# plotdf$Label_Arbitrary<-factor(bin_freq$final[match(plotdf$bin,bin_freq$new_label_scale)],levels=c("Anterior","Posterior", "Equator", "IC", "Core"))
plotdf<-plotdf[!is.na(plotdf$Label_PCA),]
plotdf_trim<-unique(plotdf[,c("time","bin")])
plotdf_trim<-table(plotdf_trim$bin)
names(plotdf_trim[plotdf_trim==10])->trimedbin
plotdf[plotdf$bin %in% trimedbin,]->plotdf
plotdf$mz<-as.factor(plotdf$mz)
k <- c(1:6)
Mannual_label<- c( "Noise", "Inner Cortex","Noise", "Posterior", "Anterior","Equator + Core")
df_Mannual_label<- data.frame(K_Label = k, Mannual_label= Mannual_label)
df_Mannual_label$Mannual_label<- factor(df_Mannual_label$Mannual_label, levels = c( "Anterior","Posterior", "Noise", "Inner Cortex", "Equator + Core"))
plotdf$Mannual_label <- df_Mannual_label$Mannual_label[match(plotdf$Label_PCA, df_Mannual_label$K_Label)]
data_summary <- function(data, varname, groupnames){
require(plyr)
summary_func <- function(x, col){
c(mean = mean(x[[col]], na.rm=TRUE),
sd = sd(x[[col]], na.rm=TRUE),
sem = sd(x[[col]], na.rm=TRUE)/sqrt(length(x[[col]])))
}
data_sum<-ddply(data, groupnames, .fun=summary_func,
varname)
data_sum <- rename(data_sum, c("mean" = varname))
return(data_sum)
}
plotdf$Intensity <- as.numeric(plotdf$Intensity )
plotdf$Intensity[plotdf$Intensity==0]<-NA
plotdf$cpd.name<- factor(plotdf$cpd.name, levels = targetcpds)
plotdf$y <- all_coordata_label_sel_arc_bind$y[match(as.numeric(plotdf$bin), all_coordata_label_sel_arc_bind$new_label_scale)]
plotdf$x <- all_coordata_label_sel_arc_bind$x[match(as.numeric(plotdf$bin), all_coordata_label_sel_arc_bind$new_label_scale)]
plotdf_cluster <-filter(plotdf, Mannual_label == "Equator + Core")
plotdf_rest<- filter(plotdf, !Mannual_label == "Equator + Core")
index_core<- which('&'(plotdf_cluster$y < 100, plotdf_cluster$y> 22))
index_equator<- which('|'(plotdf_cluster$y >= 100, plotdf_cluster$y<= 22))
plotdf_cluster$Mannual_label<-NA
plotdf_cluster$Mannual_label[index_core]<- "Core"
plotdf_cluster$Mannual_label[index_equator]<- "Equator"
plotdf<- rbind(plotdf_rest, plotdf_cluster)
plotdf$cpd.name<-factor(plotdf$cpd.name, levels = c("SIL_Glucose", "SIL_G6P", "SIL_F16BP", "SIL_S7P", "SIL_Sorbitol", "SIL_UDP_Glucose"))
df2 <- data_summary(plotdf, varname="Intensity",
groupnames=c("mz", "time","Mannual_label"))
df2$cpd.name <- plotdf$cpd.name[match(df2$mz, plotdf$mz)]
df2$time_incubation <- time_df$time[match(as.numeric(df2$time), time_df$class)]
df2_sub<- filter(df2, !Mannual_label == "Noise")
p<- ggplot(df2_sub, aes(x=time_incubation, y=Intensity, group=Mannual_label, color=Mannual_label)) +
geom_line() +
geom_point()+
#stat_smooth( method = lm, formula = y ~ poly(x, ploy_order), se = F) +
geom_errorbar(aes(ymin=Intensity-sem, ymax=Intensity+sem), width=.2,
position=position_dodge(0.05))+
theme_article()+
theme(legend.position="none",
axis.title.x=element_blank(),
axis.title.y=element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))+
ggh4x::facet_grid2(cols= vars(cpd.name), rows= vars(Mannual_label), scales = "free", independent = "y")
png(paste0(wd,"PCA_label_image","_anno_SIL_cpd_","summarized","_timeseries_plot_norm.png"),width = 8500,height = 4000,res=600)
print(p)
dev.off()