-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathNoiseCorrection.R
54 lines (37 loc) · 1.18 KB
/
NoiseCorrection.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#kNN-Based Noise Correction Model for Single Cell RNA-Seq data
#Taihsien Ouyang
#2015 Dec. 9
library("impute")
args <- commandArgs(TRUE)
cat("== kNN-Based Noise Correction of Single Cell RNA-Seq data ==\n")
## Passing arguments to R ##
if(length(args)>0){
load(args[1])
}else{
stop("Please provide the count matrix file.\n")
}
## Estimate the parameters of Poisson Model ##
ge[is.na(ge)]<-0
mu_all<-apply(ge,1,mean)
sd_all<-apply(ge,1,sd)
mu_all_sorted<-sort(mu_all)
## Noise Filtering ##
## The Poisson model is based on doi:10.1038/nmeth.2772
noisygenes<-names( which(sd_all>(3.7*(mu_all)^0.5+0.3)) )
for( i in 1:length(noisygenes)){
gValues<-ge[noisygenes[i],]
while(length(gValues)>2){
gValues=gValues[-which.max(gValues)]
if(sd(gValues)<=(3.7*mean(gValues)^0.5+0.3) ){
cat(noisygenes[i], ": remove" , as.numeric(ncol(ge) - length(gValues)) , "value(s) out of", ncol(ge), " values.\n") #Usually only remove 1 value
ge[noisygenes[i], setdiff(colnames(ge),names(gValues))]=NA
break
}
}
}
## Impute the matrix ##
cat("Imputing the matrix\n")
ge.impute<-impute.knn(ge)$data
cat("Writing results\n")
save(ge.impute, file="count_matrix_imputed.rda")
cat("== Done ==\n")