-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDeSeq2Peak.r
94 lines (80 loc) · 3.2 KB
/
DeSeq2Peak.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#install all the required package from Bioconductor through bioManager cammand
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("DESeq2", "vsn")
library(DESeq2)
library(vsn)
library(pheatmap)
library(RColorBrewer)
library(ggplot2)
library(genefilter)
#preparing count matrix as input
counts=read.csv(“practice_coronary”, sep=“”, head=T, skip=1, row.names = “Geneid”)
meta = read.table(“coldata.txt”, header=F)
counts <- counts[ -c(1:5) ]
colnames(meta) = c(“sample”, “condition”)
#meta =meta[ -c(1)]
#if you dont remove 1st column from meta, it will have 2 column one as sample and othr as condition , which we can use downstream during diffrential heatmap
rownames(meta)= colnames(counts)
meta$condition = factor(meta$condition)
dds = DESeqDataSetFromMatrix(countData = counts, colData = meta, design = ~ condition)
print(dds)
vsd = vst(dds, blind=F)
meanSdPlot(assay(vsd), ranks=F)
# Select ntop most variable peaks
ntop = 2500
rv = rowVars(assay(vsd))
selectIDX = order(rv, decreasing=T)[seq_len(min(ntop, length(rv)))]
# Sample heatmap
sampleDists = dist(t(assay(vsd)[selectIDX,]))
sampleDistMatrix = as.matrix(sampleDists)
rownames(sampleDistMatrix) = paste(vsd$name)
colnames(sampleDistMatrix) = NULL
colors = colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix, clustering_distance_rows = sampleDists, clustering_distance_cols = sampleDists, col=colors)
# PCA
pca = prcomp(t(assay(vsd)[selectIDX,]))
print(summary(pca))
# Check loadings (which peaks contribute most to each PC)
loadings = abs(pca$rotation)
contribution = as.data.frame(sweep(loadings, 2, colSums(loadings), "/"))
contribution = contribution[with(contribution, order(-PC1)),]
print(head(contribution))
# Plot PCA
pcaData = as.data.frame(pca$x)
pcaData$name=rownames(pcaData)
pcaData=merge(pcaData, meta)
percentVar = round(100 * (pca$sdev^2 / sum( pca$sdev^2 ) ))
p=ggplot(data=pcaData, aes(x = PC1, y = PC2, color=condition)) + geom_point(size=3)
p=p+xlab(paste0("PC1: ", percentVar[1], "% variance"))
p=p+ylab(paste0("PC2: ", percentVar[2], "% variance"))
p
q=ggplot(data=pcaData, aes(x = PC3, y = PC4, color=condition)) + geom_point(size=3)
q=q+xlab(paste0("PC3: ", percentVar[3], "% variance"))
q=q+ylab(paste0("PC4: ", percentVar[4], "% variance"))
q
# Differential expression
dds = DESeq(dds)
# log-foldchange > 1, adjusted p-value < 5%
#res = results(dds, lfcThreshold=0.25, alpha=0.05)
res = results(dds, lfcThreshold=0, alpha=0.05)
print(mcols(res, use.names=T))
print(summary(res))
# Histogram
hist(res$pvalue[res$baseMean > 1], breaks=0:20/20, col="grey50", border="white", xlim=c(0,1), main="Histogram of p-values", xlab="p-value")
# MA-plot
plotMA(res, ylim = c(-5, 5))
# Significant results
resSig = subset(res, padj<0.05)
resSig = resSig[order(resSig$pvalue),]
# Heatmap
mat = assay(dds)[rownames(resSig),]
mat = mat - rowMeans(mat)
anno = as.data.frame(colData(dds)[, c("name", "condition")])
rownames(mat) = NULL
pheatmap(mat, annotation_col = anno, scale="row")
# Write results
resSig = as.data.frame(resSig)
resSig$id = rownames(resSig)
resSig = merge(resSig, peaks)
write.table(resSig, file="res_sig.tsv", col.names=T, row.names=F, quote=F, sep="\t")