-
Notifications
You must be signed in to change notification settings - Fork 0
/
FIMO_annotated.R
68 lines (68 loc) · 3.37 KB
/
FIMO_annotated.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
setwd("")
## lists file of meme matrices
All=grep(".meme$",list.files(),value=T)
fimo_function <- function(x){
setwd("/bigdata/serreslab/mreynoso/Big_Exp/results/DPI/AME/For_FIMO")
## lists file of meme matrices
All=grep(".meme$",list.files(),value=T)
## command for FIMO over the whole genome
file_ds=paste0("module load meme; module load bedtools; fimo --thresh 0.01 --max-stored-scores 1000000000 -o ",Fimo_meme[x],"_FIMO ",Fimo_meme[x],".meme Oryza_sativa.IRGSP-1.0.30.dna.genome.fa")
## call command for FIMO
system(file_ds)
## setwd to result folder
setwd(paste0("/For_FIMO/",Fimo_meme[x],"_FIMO/"))
## export bed file of results with locations
file_ss=paste0("module load bedtools; cut -f3,4,5 fimo.tsv > ", Fimo_meme[x] ,".bed; bedtools sort -i ",Fimo_meme[x],".bed > ",Fimo_meme[x],".sorted.bed; ")
## call command for FIMO
system(file_ss)
## libraries
library(ChIPpeakAnno)
library(GenomicRanges)
# reads annotation of genes GRanges
eByg <- readRDS("Genes.RDS")
# reads annotation of THSs
THS_gr <- readRDS("ATAC_GH_FIELD_35S.RDS")
# reads differential THSs
Diff_THS_gr=readRDS("Diff_THS_gr.RDS")
# reads annotation of genes
annotation=readRDS("annotation_040620.RDS")
setwd(paste0("/For_FIMO/",Fimo_meme[x],"_FIMO/"))
## reads FIMO results
file_fimo=read.delim(paste0("fimo.tsv"),comment.char = "#")
## adds colnames
colnames(file_fimo)=gsub("sequence_name","seqnames",colnames(file_fimo))
## converts to Granges
file_fimo_gr=GRanges(file_fimo)
## annotate with respect to genes
annoGR=as.data.frame(annotatePeakInBatch(file_fimo_gr,eByg))
## find overlaps with THSs locations
object=findOverlaps(file_fimo_gr,THS_gr)
## aux dataframe with overlaps
df=as.data.frame(object)
## Add names to target regions
names(file_fimo_gr)=1:length(file_fimo_gr)
## find overlaps with dTHSs locations
object2=findOverlaps(file_fimo_gr,Diff_THS_gr)
## aux dataframe with overlaps
dfDEG=as.data.frame(object2)
## addition to annotated THSs
annoGR$THS=df$subjectHits[match(annoGR$peak,df$queryHits)]
## addition of labels
annoGR$labels=Diff_THS_gr$labels[dfDEG$subjectHits[match(annoGR$peak,dfDEG$queryHits)]]
## addition to dTHSs
annoGR$dTHS=dfDEG$subjectHits[match(annoGR$peak,dfDEG$queryHits)]
## addition of description
annoGR$description=annotation$Description[match(annoGR$feature,annotation$GENEID)]
## addition of short names
annoGR$CGSNL.Gene.Symbol=annotation$CGSNL.Gene.Symbol[match(annoGR$feature,annotation$GENEID)]
## filter step to upstream closer than 2kb, either inside or downstream less than 1kb
filtered_annoGR=annoGR[(annoGR$insideFeature=="upstream"&annoGR$shortestDistance<2000)|(annoGR$insideFeature=="downstream"&annoGR$shortestDistance<1000)|(annoGR$insideFeature=="inside"),]
write.csv(filtered_annoGR,paste0("annoGR_",Fimo_meme[x],".csv"))
}
#### Run on cluster or individually locally
library(BiocParallel); library(BatchJobs)
cluster.functions <- makeClusterFunctionsSLURM("slurm.tmpl")
resources <- list(walltime="4:00:00", ntasks=length(Fimo_meme), ncpus=1, memory="8G") # note this assigns 1Gb of Ram per core. If ncpus is 4 then this will amount to 4Gb total
param <- BatchJobsParam(length(Fimo_meme), resources=resources, cluster.functions=cluster.functions)
register(param)
Funct <- bplapply(seq(along=Fimo_meme), fimo_function) # Check status with qstat