-
Notifications
You must be signed in to change notification settings - Fork 2
/
BEDcounter_multicore.R
72 lines (54 loc) · 1.47 KB
/
BEDcounter_multicore.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#Read Copy Counter and Aggregator of the Mapped Reads in BED files
#Taihsien Ouyang
#2015 Dec. 9
library("doMC")
args <- commandArgs(TRUE)
cat("== Multi-Cored BED Read Counter ==\n")
## Passing arguments to R ##
if(length(args)>0){
NUM_CORE=as.numeric(args[1])
cat("Set number of cores =", NUM_CORE ,"\n")
}else{
NUM_CORE=12
}
registerDoMC(NUM_CORE)
fileNames=dir()
## Create the annotation vector ##
annot=NULL
for(i in 1:length(fileNames)){
cat("Scanning file", i,"...\n" )
bed<-read.table(fileNames[i])
annot<-unique(c( annot, as.character(bed$V1) ))
}
## Read the BED files in parallel ##
geList<-foreach(j = 1:length(fileNames)) %dopar% {
cat("Processing file", j,"...\n" )
bed<-read.table(fileNames[j])
cat("creating UMI table", j,"...\n")
umiList<-rep("",nrow(bed))
for(i in 1:nrow(bed)){
umiList[i]=strsplit(as.character(bed$V4[i]), "_")[[1]][2]
}
geneList<-as.character(bed$V1)
geneNames<-unique(geneList)
g<-rep(0,length(annot))
names(g)<-annot
cat("Summarizing", j,"...\n")
for(i in 1:length(annot)){
g[annot[i]] =length(unique( umiList[ which(geneList==geneNames[i]) ] ))
}
cat("Done", j,"\n")
return(g)
}
## Aggregate the profiles ##
ge<-matrix(0,length(annot),length(fileNames))
rownames(ge)<-annot
colnames(ge)<-fileNames
cat("Aggregating expression profiles...")
for(i in 1:length(geList)){
ge[,i]<-geList[[i]]
}
cat("Writing outputs\n")
save(ge, file="count_matrix.rda")
write.table(ge, file="count_matrix.csv",sep=",")
cat("== Done ==\n")