-
Notifications
You must be signed in to change notification settings - Fork 2
/
run_edgeR.R
62 lines (53 loc) · 2.81 KB
/
run_edgeR.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/usr/bin/env Rscript
# parse parameter ---------------------------------------------------------
library(argparser, quietly=TRUE)
# Create a parser
p <- arg_parser("Run differential expression analysis using DESeq2.")
# Add command line arguments
p <- add_argument(p, "--matrix", help="matrix of raw read counts (not normalized!)", type="character")
p <- add_argument(p, "--samples_file", help="tab-delimited text file indicating biological replicate relationships.", type="character")
p <- add_argument(p, "--min_reps", help="At least min count of replicates must have cpm values > min cpm value.", type="numeric", default = 1)
p <- add_argument(p, "--min_cpm", help="At least min count of replicates must have cpm values > min cpm value.", type="numeric", default = 1)
p <- add_argument(p, "--contrasts", help="file (tab-delimited) containing the pairs of sample comparisons to perform.", type="character")
p <- add_argument(p, "--dispersion", help = "edgeR dispersion value.", type = "numeric", default = 0.1)
# Parse the command line arguments
argv <- parse_args(p)
matrix <- argv$matrix
samples_file <- argv$samples_file
min_reps <- argv$min_reps
min_cpm <- argv$min_cpm
contrasts <- argv$contrasts
dispersion <- argv$dispersion
# test
if (FALSE) {
matrix <- "../03.Merge_result/genes.counts.matrix"
samples_file <- "../00.data/samples.txt"
min_reps <- 1
min_cpm <- 1
contrasts <- "./contrasts.txt"
shrinkage <- TRUE
}
library(edgeR)
data <- read.table(file = matrix, header = TRUE, row.names = 1, com = '')
samples <- read.table(file = samples_file, header = FALSE, sep = "\t")
colnames(samples)[1:2] <- c("group", "sample")
contrasts <- read.table(file = contrasts, header = FALSE, sep = "\t")
colnames(contrasts)[1:2] <- c("treatment", "control")
for (i in 1:nrow(contrasts)) {
treatment <- contrasts[i, 1][[1]]
control <- contrasts[i, 2][[1]]
col_ordering <- c(which(samples$group %in% treatment), which(samples$group %in% control))
rnaseqMatrix <- data[,col_ordering]
rnaseqMatrix <- round(rnaseqMatrix)
rnaseqMatrix <- rnaseqMatrix[rowSums(cpm(rnaseqMatrix) > min_cpm) >= min_reps, ]
conditions <- factor(c(rep(treatment, 1), rep(control, 1)))
exp_study <- DGEList(counts = rnaseqMatrix, group = conditions)
exp_study <- calcNormFactors(exp_study)
et <- exactTest(exp_study, pair = c(treatment, control), dispersion = 0.1)
tTags <- topTags(et, n = NULL)
result_table <- tTags$table
result_table <- data.frame(sampleA = treatment, sampleB = control, result_table)
result_table$logFC <- -1 * result_table$logFC
write.table(x = res, file = paste(paste(treatment, "vs", control, sep = "_"), "edgeR.DE_results.txt", sep = "."), sep = '\t', quote = FALSE)
write.table(x = rnaseqMatrix, file = paste(paste(treatment, "vs", control, sep = "_"), "edgeR.read_count.txt", sep = "."), sep = '\t', quote = FALSE)
}