From ed7c3007d1ee6d1d882f311fc790374f0828fef5 Mon Sep 17 00:00:00 2001
From: Fengyuan Hu <huf@bi1820m.babraham.ac.uk>
Date: Wed, 6 Jan 2016 16:09:33 +0000
Subject: [PATCH] add all scripts

---
 commands/ribotaper_Gao.q              |    9 +
 commands/ribotaper_Zebr.q             |    9 +
 commands/ribotaper_Zebr.q~            |    9 +
 commands/ribotaper_this_study.q       |    9 +
 commands_executed                     |   91 ++
 scripts/CCDS_orf_finder.R             | 1070 +++++++++++++++
 scripts/NONCCDS_orf_finder.R          |  983 ++++++++++++++
 scripts/ORF_final_results.R           |  208 +++
 scripts/P_sites_RNA_sites_calc.bash   |   75 ++
 scripts/Ribotaper.sh                  |  191 +++
 scripts/Ribotaper.sh~                 |  191 +++
 scripts/Ribotaper_ORF_find.sh         |  129 ++
 scripts/analyze_multi_clust.bash      |   54 +
 scripts/annotate_exons.R              |  118 ++
 scripts/bowrrna_star.q                |   34 +
 scripts/create_annotations_files.bash |  231 ++++
 scripts/create_metaplots.bash         |   76 ++
 scripts/create_protein_db.R           |  310 +++++
 scripts/create_tracks.bash            |   75 ++
 scripts/functions.R                   | 1734 +++++++++++++++++++++++++
 scripts/genes_coor.R                  |   14 +
 scripts/gtf_to_start_stop_tr.R        |   93 ++
 scripts/include_multi_nomerge.R       |   79 ++
 scripts/metag.R                       |  133 ++
 scripts/quality_check.R               |  265 ++++
 scripts/tracks_analysis.R             |  127 ++
 26 files changed, 6317 insertions(+)
 create mode 100644 commands/ribotaper_Gao.q
 create mode 100644 commands/ribotaper_Zebr.q
 create mode 100644 commands/ribotaper_Zebr.q~
 create mode 100644 commands/ribotaper_this_study.q
 create mode 100644 commands_executed
 create mode 100755 scripts/CCDS_orf_finder.R
 create mode 100755 scripts/NONCCDS_orf_finder.R
 create mode 100755 scripts/ORF_final_results.R
 create mode 100755 scripts/P_sites_RNA_sites_calc.bash
 create mode 100755 scripts/Ribotaper.sh
 create mode 100644 scripts/Ribotaper.sh~
 create mode 100755 scripts/Ribotaper_ORF_find.sh
 create mode 100755 scripts/analyze_multi_clust.bash
 create mode 100755 scripts/annotate_exons.R
 create mode 100644 scripts/bowrrna_star.q
 create mode 100755 scripts/create_annotations_files.bash
 create mode 100755 scripts/create_metaplots.bash
 create mode 100755 scripts/create_protein_db.R
 create mode 100755 scripts/create_tracks.bash
 create mode 100755 scripts/functions.R
 create mode 100755 scripts/genes_coor.R
 create mode 100755 scripts/gtf_to_start_stop_tr.R
 create mode 100755 scripts/include_multi_nomerge.R
 create mode 100755 scripts/metag.R
 create mode 100755 scripts/quality_check.R
 create mode 100755 scripts/tracks_analysis.R

diff --git a/commands/ribotaper_Gao.q b/commands/ribotaper_Gao.q
new file mode 100644
index 0000000..196b9d6
--- /dev/null
+++ b/commands/ribotaper_Gao.q
@@ -0,0 +1,9 @@
+#!/bin/bash
+#$ -pe smp 7
+#$ -l h_vmem=8G
+#$ -e "error_ribot_new"
+#$ -o "out_ribot_new"
+#$ -cwd
+
+
+../scripts/Ribotaper.sh ../alignment_files/HEK_293_Ribo_Gao_etal_Aligned.out.sorted.bam ../alignment_files/HEK_293_RNA_Gao_etal_Aligned.out.sorted.bam ../annotation_dir_human/ 26,27,28,29 12,12,12,12 ../scripts/ ../bedtools_dir/ 7
diff --git a/commands/ribotaper_Zebr.q b/commands/ribotaper_Zebr.q
new file mode 100644
index 0000000..7ace1d3
--- /dev/null
+++ b/commands/ribotaper_Zebr.q
@@ -0,0 +1,9 @@
+#!/bin/bash
+#$ -pe smp 7
+#$ -l h_vmem=8G
+#$ -e "error_ribot_new"
+#$ -o "out_ribot_new"
+#$ -cwd
+
+
+../scripts/Ribotaper.sh  ../alignment_files/Danio_rerio_Bazzini_5hPF_Ribo_Aligned.out.sorted.bam  ../alignment_files/Danio_rerio_Bazzini_5hPF_RNA_Aligned.out.sorted.bam ../annotation_dir_zebr/ 28,29 12,12 ../scripts/ ../bedtools_dir/ 7
diff --git a/commands/ribotaper_Zebr.q~ b/commands/ribotaper_Zebr.q~
new file mode 100644
index 0000000..ba9f1b5
--- /dev/null
+++ b/commands/ribotaper_Zebr.q~
@@ -0,0 +1,9 @@
+#!/bin/bash
+#$ -pe smp 7
+#$ -l h_vmem=6G
+#$ -e "error_ribot_new"
+#$ -o "out_ribot_new"
+#$ -cwd
+
+
+../scripts/Ribotaper.sh  ../alignment_files/Danio_rerio_Bazzini_5hPF_Ribo_Aligned.out.sorted.bam  ../alignment_files/Danio_rerio_Bazzini_5hPF_RNA_Aligned.out.sorted.bam ../annotation_dir_zebr/ 28,29 12,12 ../scripts/ ../bedtools_dir/ 7
diff --git a/commands/ribotaper_this_study.q b/commands/ribotaper_this_study.q
new file mode 100644
index 0000000..670329e
--- /dev/null
+++ b/commands/ribotaper_this_study.q
@@ -0,0 +1,9 @@
+#!/bin/bash
+#$ -pe smp 7
+#$ -l h_vmem=8G
+#$ -e "error_ribot_new"
+#$ -o "out_ribot_new"
+#$ -cwd
+
+
+../scripts/Ribotaper.sh ../alignment_files/HEK_293_Ribo_This_study_Aligned.out.sorted.bam ../alignment_files/HEK_293_RNA_This_study_Aligned.out.sorted.bam ../annotation_dir_human/ 26,28,29 9,12,12 ../scripts/ ../bedtools_dir/ 7
diff --git a/commands_executed b/commands_executed
new file mode 100644
index 0000000..fa88f02
--- /dev/null
+++ b/commands_executed
@@ -0,0 +1,91 @@
+
+####----------------Here a list of commands executed for the RiboTaper analysis.(version 1.2, November 2015)------------------
+
+
+###################################################################
+#    This file is part of RiboTaper.
+#    RiboTaper is a method for defining traslated ORFs using
+#    Ribosome Profiling data.
+#   
+#    Copyright (C) 2015  Lorenzo Calviello
+#
+#    RiboTaper is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+
+#    RiboTaper is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with RiboTaper.  If not, see <http://www.gnu.org/licenses/>.
+#
+#    Contact: Lorenzo.Calviello@mdc-berlin.de
+#######################################################################
+
+
+
+####Unpack the provided files on our website (https://ohlerlab.mdc-berlin.de/software/):
+
+#alignment_files.tar.gz
+#annotation_human_daniorerio.tar.gz
+#RiboTaper_v1.0.tar.gz
+
+
+tar -zxvf alignment_files.tar.gz
+tar -zxvf annotation_human_daniorerio.tar.gz
+tar -zxvf RiboTaper_v1.0.tar.gz
+
+
+#### create annotation files for human using ccds and appris tags (Gencode 19 + hg19 genome):
+
+scripts/create_annotations_files.bash annotation_human_daniorerio/gencode.v19.annotation.gtf annotation_human_daniorerio/hg19_genome.fa true true annotation_dir_human bedtools_dir/ scripts/
+
+
+#### create annotation files Danio rerio created without using any tags (no ccds, no appris):
+
+
+scripts/create_annotations_files.bash annotation_human_daniorerio/Danio_rerio.Zv9.76_noscaff.gtf annotation_human_daniorerio/Danio_rerio.Zv9.dna.toplevel_noscaff.fa false false annotation_dir_zebr bedtools_dir/ scripts/
+
+
+#### create new directories for the provided experiments
+
+mkdir HEK_this_study HEK_Gao Zebrafish_Bazzini_5hPF
+
+
+
+### go inside the first directory
+
+cd HEK_this_study
+
+
+### submit bash script for SGE computing, using 7 cores and 8 Gigabyte of RAM per core
+### HEK293 data for this study, Ribo-seq newly generated + RNA seq from http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE49831
+### annotation files from human 
+
+qsub ../commands/ribotaper_this_study.q
+
+
+### go inside the second directory
+
+cd ../HEK_Gao
+
+### submit bash script for SGE computing, using 7 cores and 8 Gigabyte of RAM per core
+### HEK293 data for Gao et al, cycloheximide (Ribo) and RNA-seq experiments for the "control" sample: http://www.ncbi.nlm.nih.gov/sra/SRX740748%5Baccn%5D http://www.ncbi.nlm.nih.gov/sra/SRX740751%5Baccn%5D
+
+qsub ../commands/ribotaper_Gao.q
+
+
+
+### go inside the third directory
+
+cd ../Zebrafish_Bazzini_5hPF
+
+
+### Danio Rerio data for 5h_PF, from http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE53693
+### submit bash script for SGE computing, using 7 cores and 8 Gigabyte of RAM per core
+
+qsub ../commands/ribotaper_Zebr.q
+
diff --git a/scripts/CCDS_orf_finder.R b/scripts/CCDS_orf_finder.R
new file mode 100755
index 0000000..70d8d80
--- /dev/null
+++ b/scripts/CCDS_orf_finder.R
@@ -0,0 +1,1070 @@
+#!/usr/bin/Rscript
+
+###################################################################
+#    This file is part of RiboTaper.
+#    RiboTaper is a method for defining traslated ORFs using
+#    Ribosome Profiling data.
+#   
+#    Copyright (C) 2015  Lorenzo Calviello
+#
+#    RiboTaper is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+
+#    RiboTaper is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with RiboTaper.  If not, see <http://www.gnu.org/licenses/>.
+#
+#    Contact: Lorenzo.Calviello@mdc-berlin.de
+#######################################################################
+
+
+###script for CCDS genes ORF Finding, takes as arguments annotation dir, RiboTaper scripts dir, bedtools dir, n of cores
+
+
+
+args <- commandArgs(trailingOnly = TRUE)
+
+print(paste("--- CCDS ORF finding","---",date(),sep=" "))
+###loads functions
+
+suppressMessages(source(paste(args[2],"functions.R",sep = "/")))
+
+###takes n of cores
+
+registerDoMC(args[4])
+
+###loads annotation files
+
+annot<-paste(args[1],"cds_coords_transcripts",sep = "/")
+cdss_transcripts<-read.table(annot,stringsAsFactors=F,header=F)
+colnames(cdss_transcripts)<-c("transcript_id","annotated_start","annotated_stop")
+
+###loads exonic results
+
+
+results_ccds_ORFs<-read.table("all_calculations_ccdsgenes_annot_new",sep="\t",quote = "",stringsAsFactors=F,header=T)
+all_annot_notCCDS<-results_ccds_ORFs[results_ccds_ORFs[,"type"]!="ccds",]
+
+###calculates coordinates for sequence search
+
+
+
+fives_threes<-all_annot_notCCDS[all_annot_notCCDS[,"type"]%in%c("3_utrs_ex","3_utrs_st","5_utrs_ex","5_utrs_st"),]
+fives_threes_nonov<-fives_threes[is.na(fives_threes["overlapping_ccds_start"]),]
+fives_threes_ov<-fives_threes[!is.na(fives_threes["overlapping_ccds_start"]),]
+
+fives_threes_ok<-rbind(fives_threes_nonov,fives_threes_ov[fives_threes_ov[,"type"]%in%c("3_utrs_st","5_utrs_st"),])
+
+
+###reads data tracks
+
+
+all_tracks_ccds<-readBigText("data_tracks/Psit_Ribo_Rna_Cent_tracks_ccds")
+
+index_ccds<-read.table("data_tracks/index_tracks_ccds",stringsAsFactors=F,header=F)
+colnames(index_ccds)<-"exon_id"
+
+
+all_tracks_exonsccds<-readBigText("data_tracks/Psit_Ribo_Rna_Cent_tracks_exonsccds")
+
+index_exonsccds<-read.table("data_tracks/index_tracks_exonsccds",stringsAsFactors=F,header=F)
+colnames(index_exonsccds)<-"exon_id"
+
+tr_ex<-paste(args[1],"transcr_exons_ccds.bed",sep = "/")
+
+transcr_ccds<-read.table(tr_ex,stringsAsFactors=F,header=F)
+colnames(transcr_ccds)<-c("chr","start","end","transcript_id","gene_id","strand")
+transcr_ccds$coords_id<-paste(transcr_ccds[,1],transcr_ccds[,2],transcr_ccds[,3],sep="_")
+transcr_ccds$coords_ok<-paste(transcr_ccds[,1],transcr_ccds[,2],transcr_ccds[,3],transcr_ccds[,6],sep="_")
+results_ccds_ORFs$coords_ok<-paste(results_ccds_ORFs$coords,results_ccds_ORFs$strand.x,sep="_")
+
+###selects transcript with >2 reads
+
+transcr_ccds<-unique(merge(transcr_ccds,results_ccds_ORFs[,c("coords_ok","P_sites_sum")],by="coords_ok",all.x=T))
+transcr_sites<-aggregate(transcr_ccds$P_sites_sum,by=list(transcr_ccds$transcript_id),FUN=sum)
+colnames(transcr_sites)<-c("transcript_id","n_P_sites")
+transcr_sites<-transcr_sites[transcr_sites[,"n_P_sites"]>2,]
+transcr_sites<-unique(transcr_sites$transcript_id)
+###checks  for CCDS transcripts (if available)
+
+
+if(sum(list.files(path =args[1])=="transcr_exons_ccds_ccdsid.bed")>0){
+        tr_cc_app_ccdsid<-paste(args[1],"transcr_exons_ccds_ccdsid.bed",sep = "/")
+        transcr_ccds_ccdsid<-read.table(tr_cc_app_ccdsid,stringsAsFactors=F,header=F)
+        colnames(transcr_ccds_ccdsid)<-c("chr","start","end","transcript_id","gene_id","strand")
+        transcr_ccds_ccdsid$coords_id<-paste(transcr_ccds_ccdsid[,1],transcr_ccds_ccdsid[,2],transcr_ccds_ccdsid[,3],sep="_")
+        transcr_ccds_ccdsid$exon_id<-paste(transcr_ccds_ccdsid$coords_id,"EXONCCDS",transcr_ccds_ccdsid[,5],sep="_")
+        transcript_ccds_transl_uORF<-unique(transcr_ccds_ccdsid[transcr_ccds_ccdsid[,"exon_id"]%in%fives_threes_ok[,"exon_id"],"transcript_id"])        
+        transcript_ccds_transl_uORF<-transcript_ccds_transl_uORF[!is.na(transcript_ccds_transl_uORF)]
+        if(sum(list.files(path =args[1])=="transcr_exons_ccds_appris.bed")==0){
+                transcr_sites<-unique(c(transcript_ccds_transl_uORF))
+                
+        }
+        
+}
+
+###checks  for APPRIS transcripts (if available)
+
+
+if(sum(list.files(path =args[1])=="transcr_exons_ccds_appris.bed")>0){
+        tr_cc_app<-paste(args[1],"transcr_exons_ccds_appris.bed",sep = "/")
+        transcr_ccds_appr<-read.table(tr_cc_app,stringsAsFactors=F,header=F)
+        colnames(transcr_ccds_appr)<-c("chr","start","end","transcript_id","gene_id","strand")
+        transcr_ccds_appr$coords_id<-paste(transcr_ccds_appr[,1],transcr_ccds_appr[,2],transcr_ccds_appr[,3],sep="_")
+        transcr_ccds_appr$exon_id<-paste(transcr_ccds_appr$coords_id,"EXONnonCCDS",transcr_ccds_appr[,5],sep="_")
+        transcr_ccds_appr$coords2<-paste(transcr_ccds_appr$chr,":",transcr_ccds_appr$start,"-",transcr_ccds_appr$end,"(",transcr_ccds_appr$strand,")",sep="")
+        #see prev versions to change this
+        #transcript_ccds_transl<-results_ccds_ORFs[results_ccds_ORFs[,"pval_multit_3nt_ribo"]<0.05 & results_ccds_ORFs[,"P_sites_sum"]>5 ,]
+        transcript_ccds_transl<-transcr_ccds_appr[!is.na(transcr_ccds_appr[,"gene_id"]),]
+        transcr_sites<-unique(transcript_ccds_transl$transcript_id)
+        if(sum(list.files(path =args[1])=="transcr_exons_ccds_ccdsid.bed")>0){
+                transcript_ccds_transl<-results_ccds_ORFs[results_ccds_ORFs[,"pval_multit_3nt_ribo"]<0.05 & results_ccds_ORFs[,"P_sites_sum"]>5 ,]
+                transcript_ccds_transl<-transcript_ccds_transl[!is.na(transcript_ccds_transl[,"gene_id"]),]
+                transcript_ccds_transl<-unique(transcr_ccds_appr[transcr_ccds_appr[,"coords_id"]%in%transcript_ccds_transl[,"coords"],"transcript_id"])        
+                transcript_ccds_transl<-transcript_ccds_transl[!is.na(transcript_ccds_transl)]
+                transcr_sites<-unique(c(transcript_ccds_transl,transcript_ccds_transl_uORF))
+                
+        }
+        
+}
+
+
+#reduce the search space to enhance speed
+
+
+
+index_coords_ccds<-sapply(strsplit(index_ccds$exon_id,split="_"),function(x){paste(x[1],x[2],x[3],sep="_")})
+index_coords_exonsccds<-sapply(strsplit(index_exonsccds$exon_id,split="_"),function(x){paste(x[1],x[2],x[3],sep="_")})
+
+
+#index_coords_ccds<-index_coords_ccds[index_coords_ccds%in%transcr_ccds_fin_ids]
+
+#index_coords_exonsccds<-index_coords_exonsccds[index_coords_exonsccds%in%transcr_ccds_fin_ids]
+
+if(sum(list.files(path =args[1])=="transcr_exons_ccds_ccdsid.bed")==1){
+        transcr_sites<-unique(c(transcript_ccds_transl_uORF,transcr_sites))
+}
+
+transcr_sites<-transcr_sites[!is.na(transcr_sites)]
+
+transcr_ccds_fin<-transcr_ccds[transcr_ccds[,"transcript_id"]%in%transcr_sites,]
+transcr_ccds_fin_ids<-unique(unlist(transcr_ccds_fin[,"coords_id"]))
+# 
+# all_tracks_ccds<-all_tracks_ccds[(index_coords_ccds%in%transcr_ccds_fin_ids)]
+# index_ccds<-subset(index_ccds,index_coords_ccds%in%transcr_ccds_fin_ids)
+# 
+# all_tracks_exonsccds<-all_tracks_exonsccds[(index_coords_exonsccds%in%transcr_ccds_fin_ids)]
+# index_exonsccds<-subset(index_exonsccds,index_coords_exonsccds%in%transcr_ccds_fin_ids)
+# 
+
+st_st_NA<-data.frame(start_pos=NA,st2vect=NA)
+st_st_NA$ORF_frame<-NA
+st_st_NA$ORF_length<-NA
+st_st_NA$ORF_P_sites<-NA
+st_st_NA$ORF_Psit_pct_in_frame<-NA
+st_st_NA$ORF_RNA_sites<-NA
+st_st_NA$ORF_RNAsit_pct_in_frame<-NA
+st_st_NA$ORF_freq_multi_ribo<-NA
+st_st_NA$ORF_pval_multi_ribo<-NA
+st_st_NA$ORF_spec_multi_ribo<-NA
+st_st_NA$ORF_freq_multi_rna<-NA
+st_st_NA$ORF_pval_multi_rna<-NA
+st_st_NA$ORF_spec_multi_rna<-NA
+st_st_NA$ORF_freq3_fft_ribo<-NA
+st_st_NA$ORF_spec3_fft_ribo<-NA
+st_st_NA$ORF_freq3_spec_ribo<-NA
+st_st_NA$ORF_spec3_spec_ribo<-NA
+st_st_NA$ORF_freq3_fft_rna<-NA
+st_st_NA$ORF_spec3_fft_rna<-NA
+st_st_NA$ORF_freq3_spec_rna<-NA
+st_st_NA$ORF_spec3_spec_rna<-NA
+st_st_NA$ORF_ORF_score_ribo<-NA
+st_st_NA$ORF_ORF_score_rna<-NA
+st_st_NA$ORF_chisq_ribo<-NA
+st_st_NA$ORF_chisq_rna<-NA
+st_st_NA$ORF_Ribo_cov_aver<-NA
+st_st_NA$ORF_RNA_cov_aver<-NA
+st_st_NA$ORF_pept<-NA
+st_st_NA$nt_tocheck_next_start<-0
+st_st_NA$pval_next_start<-1
+st_st_NA$P_sites_next_start<-0
+st_st_NA$pct_P_sites_inframe_next_start<-0
+st_st_NA$Method<-NA
+st_st_NA$to_check<-NA
+st_st_NA$to_check_rem<-NA
+st_st_NA$ORF_id_tr<-NA
+st_st_NA$ORF_id_gen<-NA
+st_st_NA$to_check_ALL<-NA
+
+CCDS_orfs<-foreach(j=1:length(transcr_sites),.combine=rbind,.multicombine=T) %dopar%{
+        transcript<-transcr_sites[j]
+        
+        ###assembles transcript
+        
+        exons_in_transcr<-transcr_ccds[transcr_ccds[,"transcript_id"]==transcript,]
+        #order exons
+        exons_in_transcr<-exons_in_transcr[order(exons_in_transcr$start,decreasing=F),]
+        list_exons_transcr<-list()
+        list_exons_seqs<-list()
+        
+        for(k in seq(1,dim(exons_in_transcr)[1])){
+                exon_track<-c()
+                subs_ccds<-index_coords_ccds==exons_in_transcr[k,"coords_id"]
+                if(sum(subs_ccds)>0){
+                        if(sum(subs_ccds)==5){
+                                exon_track<-all_tracks_ccds[subs_ccds]
+                        }
+                        if(sum(subs_ccds)>5){
+                                exon_track<-all_tracks_ccds[which(subs_ccds)[1:5]]
+                        }
+                }
+                if(length(exon_track)==0){
+                        subs_exonsccds<-index_coords_exonsccds==exons_in_transcr[k,"coords_id"]
+                        if(sum(subs_exonsccds)==5){
+                                
+                                exon_track<-all_tracks_exonsccds[subs_exonsccds]
+                        }
+                        if(sum(subs_exonsccds)>5){
+                                exon_track<-all_tracks_exonsccds[which(subs_exonsccds)[1:5]]
+                                
+                        }
+                        
+                        
+                }
+                
+                withsep<-strsplit(exon_track,split=" ")
+                x<-t(data.frame(withsep))
+                
+                strand<-x[1,2]
+                tracks<-t(x[,-c(1:2)])
+                
+                colnames(tracks)<-c("Psites","RiboCov","RNACov","RNAcent","Seq")
+                seq<-tracks[,5]
+                tracks<-tracks[,1:4]
+                mode(tracks)<-"numeric"
+                length<-dim(tracks)[1]
+                list_exons_transcr[[k]]<-tracks
+                list_exons_seqs[[k]]<-seq
+                
+        }
+        
+        merged_tracks<-do.call(what=rbind,list_exons_transcr)
+        
+        if(strand=="-"){
+                merged_tracks<-cbind(rev(merged_tracks[,1]),rev(merged_tracks[,2]),rev(merged_tracks[,3]),rev(merged_tracks[,4]))
+        }
+        
+        tracks<-merged_tracks
+        length<-dim(tracks)[1]
+        
+        if(strand=="+"){
+                seq_transcr<-unlist(list_exons_seqs)
+        }
+        if(strand=="-"){
+                
+                seq_transcr<-unlist(list_exons_seqs)
+                seq_transcr<-comp(rev((seq_transcr)),forceToLower=F)
+        }
+        transcr_data<-data.frame(transcript_id=transcript,stringsAsFactors=F)
+        transcr_data$gene_id<-unique(transcr_ccds[transcr_ccds[,"transcript_id"]==transcript,"gene_id"])[1]
+        transcr_data$annotation<-unique(results_ccds_ORFs[results_ccds_ORFs[,"gene_id"]==transcr_data$gene_id,"annotation",])[1]
+        transcr_data$gene_symbol<-unique(results_ccds_ORFs[results_ccds_ORFs[,"gene_id"]==transcr_data$gene_id,"gene_symbol",])[1]
+        
+        P_sites_sum<-sum(tracks[,1])
+        RNA_sites_sum<-sum(tracks[,4])
+        transcr_data$strand<-strand
+        transcr_data$length<-length
+        transcr_data$n_exons<-dim(exons_in_transcr)[1]
+        transcr_data$P_sites_sum<-P_sites_sum
+        transcr_data$RNA_sites<-RNA_sites_sum
+        transcr_data$Ribo_cov_aver<-mean(tracks[,2])
+        transcr_data$RNA_cov_aver<-mean(tracks[,3])
+        
+        transcr_data$freq_multit_3nt<-NA
+        transcr_data$pval_multit_3nt<-NA
+        transcr_data$spec_multit_3nt<-NA
+        if(P_sites_sum>2 & length>5){
+                if(length<25){slepians<-dpss(n=length+(50-length),k=24,nw=12)}
+                if(length>=25){slepians<-dpss(n=length,k=24,nw=12)}
+                vals_mtm<-take_freqs_Fvalues_all_around_3nt_spec(n_tapers=24,time_bw=12,tracks[,1],slepians_values=slepians)[c(1,6,7)]
+                transcr_data$freq_multit_3nt<-vals_mtm[1]
+                transcr_data$pval_multit_3nt<-vals_mtm[2]
+                transcr_data$spec_multit_3nt<-vals_mtm[3]
+                
+        }
+        
+        Phase_P_sites_frame<-sum(tracks[seq(1,length,by=3),1])
+        Phase_P_sites_frame_1<-sum(tracks[seq(2,length,by=3),1])
+        Phase_P_sites_frame_2<-sum(tracks[seq(3,length,by=3),1])
+        
+        transcr_data$chisq_noccds_psit<-NA
+        if(P_sites_sum>15){
+                transcr_data$chisq_noccds_psit<-chisq.test(as.table(c(Phase_P_sites_frame,Phase_P_sites_frame_1,Phase_P_sites_frame_2)))$p.value}
+        if(P_sites_sum<16 & P_sites_sum>0){
+                transcr_data$chisq_noccds_psit<-xmulti(obs=c(Phase_P_sites_frame,Phase_P_sites_frame_1,Phase_P_sites_frame_2),expr=c(1,1,1),statName="Prob",detail=0)$pProb
+        }    
+        pctPhase_frame<-Phase_P_sites_frame/P_sites_sum
+        pctPhase_frame_1<-Phase_P_sites_frame_1/P_sites_sum
+        pctPhase_frame_2<-Phase_P_sites_frame_2/P_sites_sum
+        
+        Centered_sites_sum<-round(sum(tracks[,4]),digits=6)
+        
+        Phase_Centered_sites_frame<-sum(tracks[seq(1,length,by=3),4])
+        Phase_Centered_sites_frame_1<-sum(tracks[seq(2,length,by=3),4])
+        Phase_Centered_sites_frame_2<-sum(tracks[seq(3,length,by=3),4])
+        
+        pctPhaseCentered_frame<-Phase_Centered_sites_frame/Centered_sites_sum
+        pctPhaseCentered_frame_1<-Phase_Centered_sites_frame_1/Centered_sites_sum
+        pctPhaseCentered_frame_2<-Phase_Centered_sites_frame_2/Centered_sites_sum
+        
+        transcr_data$chisq_noccds_rna<-NA
+        if(Centered_sites_sum>15){
+                chisq_rna<-chisq.test(as.table(c(Phase_Centered_sites_frame,Phase_Centered_sites_frame_1,Phase_Centered_sites_frame_2)))$p.value}
+        if(Centered_sites_sum<16 & Centered_sites_sum>0){
+                chisq_rna<-xmulti(obs=c(Phase_Centered_sites_frame,Phase_Centered_sites_frame_1,Phase_Centered_sites_frame_2),expr=c(1,1,1),statName="Prob",detail=0)$pProb
+        }
+        
+        
+        MAXPhase_frame<-max(c(pctPhase_frame,pctPhase_frame_1,pctPhase_frame_2))
+        FRAME_MAX_phase<-max.col(t(c(pctPhase_frame,pctPhase_frame_1,pctPhase_frame_2)))-1
+        
+        MAXPhaseCentered_frame<-max(c(pctPhaseCentered_frame,pctPhaseCentered_frame_1,pctPhaseCentered_frame_2))
+        FRAME_MAX_phaseCentered<-max.col(t(c(pctPhaseCentered_frame,pctPhaseCentered_frame_1,pctPhaseCentered_frame_2)))-1
+        
+        frame_start_pred<-FRAME_MAX_phase
+        frame_end_pred<-(length-(FRAME_MAX_phase+1))%%3
+        
+        ###Finds ORFs on the 3 different frames
+        
+        all_sign_frames<-list()
+        for(u in 0:2){
+                
+                pept<-NA
+                pept<-unlist(getTrans(seq_transcr,sens="F",frame=u))
+                
+                starts<-pept=="M"
+                
+                stops<-pept=="*"
+                transcr_data$orf_position<-"undetected"
+                
+                start_pos<-((1:length(pept))[starts])*3
+                if(length(start_pos)>0){
+                        start_pos<-start_pos+u-2
+                } else {start_pos<-NA}
+                
+                stop_pos<-((1:length(pept))[stops])*3
+                if(length(stop_pos)>0){
+                        stop_pos<-stop_pos+u-2
+                } else {stop_pos<-NA}
+                
+                #NAs
+                if(sum(!is.na(start_pos))==0 | sum(!is.na(stop_pos))==0){
+                        st_st<-st_st_NA
+                        transcr_data_fr_sORFs<-cbind(transcr_data,st_st_NA)
+                }
+                
+                if(sum(!is.na(start_pos))>0 & sum(!is.na(stop_pos))>0){
+                        st2vect<-c()
+                        for(h in 1:length(start_pos)){
+                                st1<-start_pos[h]
+                                diff<-stop_pos-st1
+                                diff<-diff[diff>0]
+                                if(length(diff)>0){st2<-st1+min(diff)}
+                                if(length(diff)==0){st2<-NA}
+                                st2vect[h]<-st2
+                                
+                        }
+                        st_st<-data.frame(cbind(start_pos,st2vect))
+                        
+                        st_st<-st_st[!is.na(st_st[,"st2vect"]),]
+                        if(dim(st_st)[1]>0){
+                                if(dim(st_st)[1]==1){
+                                        list_coords=list()
+                                        list_coords[[1]]<-st_st[,1]:st_st[,2]
+                                }
+                                if(dim(st_st)[1]>1){
+                                        list_coords<-apply(st_st,FUN=function(x){x[1]:x[2]},1)
+                                }
+                                
+                                max_period<-NA
+                                start_pos<-NA
+                                stop_pos<-NA
+                                pval_max_period<-NA
+                        }
+                        if(dim(st_st)[1]>0){
+                                st_st$ORF_frame<-u
+                                st_st$ORF_length<-NA
+                                st_st$ORF_P_sites<-NA
+                                st_st$ORF_Psit_pct_in_frame<-NA
+                                st_st$ORF_RNA_sites<-NA
+                                st_st$ORF_RNAsit_pct_in_frame<-NA
+                                st_st$ORF_freq_multi_ribo<-NA
+                                st_st$ORF_pval_multi_ribo<-NA
+                                st_st$ORF_spec_multi_ribo<-NA
+                                st_st$ORF_freq_multi_rna<-NA
+                                st_st$ORF_pval_multi_rna<-NA
+                                st_st$ORF_spec_multi_rna<-NA
+                                
+                                st_st$ORF_freq3_fft_ribo<-NA
+                                st_st$ORF_spec3_fft_ribo<-NA
+                                st_st$ORF_freq3_spec_ribo<-NA
+                                st_st$ORF_spec3_spec_ribo<-NA
+                                st_st$ORF_freq3_fft_rna<-NA
+                                st_st$ORF_spec3_fft_rna<-NA
+                                st_st$ORF_freq3_spec_rna<-NA
+                                st_st$ORF_spec3_spec_rna<-NA
+                                st_st$ORF_ORF_score_ribo<-NA
+                                st_st$ORF_ORF_score_rna<-NA
+                                st_st$ORF_chisq_ribo<-NA
+                                st_st$ORF_chisq_rna<-NA
+                                st_st$ORF_Ribo_cov_aver<-NA
+                                st_st$ORF_RNA_cov_aver<-NA
+                                st_st$ORF_pept<-NA
+                                st_st$Method<-NA
+                                st_st$to_check<-NA
+                                st_st$to_check_rem<-NA
+                                st_st$ORF_id_tr<-NA
+                                st_st$ORF_id_gen<-NA
+                                st_st$to_check_ALL<-NA
+                                for(r in 1:dim(st_st)[1]){
+                                        tracks_stst<-tracks[st_st[r,1]:st_st[r,2],]
+                                        length<-dim(tracks_stst)[1]
+                                        P_sites_sum<-sum(tracks_stst[,1])
+                                        RNA_sites_sum<-sum(tracks_stst[,4])
+                                        st_st[r,"ORF_length"]<-length-1
+                                        st_st[r,"ORF_P_sites"]<-P_sites_sum
+                                        st_st[r,"ORF_RNA_sites"]<-RNA_sites_sum
+                                        st_st[r,"ORF_Ribo_cov_aver"]<-mean(tracks_stst[,2])
+                                        st_st[r,"ORF_RNA_cov_aver"]<-mean(tracks_stst[,3])
+                                        if(P_sites_sum>5 & length>5){
+                                                Phase_P_sites_frame<-sum(tracks_stst[seq(1,length,by=3),1])
+                                                Phase_P_sites_frame_1<-sum(tracks_stst[seq(2,length,by=3),1])
+                                                Phase_P_sites_frame_2<-sum(tracks_stst[seq(3,length,by=3),1])
+                                                st_st[r,"ORF_Psit_pct_in_frame"]<-Phase_P_sites_frame/P_sites_sum
+                                                if((Phase_P_sites_frame/P_sites_sum)>0.5){
+                                                        score1<-((Phase_P_sites_frame-P_sites_sum/3)^2)/(P_sites_sum/3)
+                                                        score2<-((Phase_P_sites_frame_1-P_sites_sum/3)^2)/(P_sites_sum/3)
+                                                        score3<-((Phase_P_sites_frame_2-P_sites_sum/3)^2)/(P_sites_sum/3)
+                                                        
+                                                        orfsc<-log2(score1+score2+score3+1)
+                                                        st_st[r,"ORF_ORF_score_ribo"]<-orfsc
+                                                        if(Phase_P_sites_frame<=Phase_P_sites_frame_1 | Phase_P_sites_frame<=Phase_P_sites_frame_2){
+                                                                st_st[r,"ORF_ORF_score_ribo"]<--orfsc
+                                                        }
+                                                        
+                                                        if(max(tracks_stst[,1])>(P_sites_sum*.7)){
+                                                                new_track<-tracks_stst
+                                                                new_track[which(new_track[,1]==max(new_track[,1]))]<-0
+                                                                st_st[r,"ORF_ORF_score_ribo"]<-NA
+                                                                if(sum(new_track[,1])>2){
+                                                                        Phase_P_sites_frame_corr<-sum(new_track[seq(1,length,by=3),1])
+                                                                        Phase_P_sites_frame_1_corr<-sum(new_track[seq(2,length,by=3),1])
+                                                                        Phase_P_sites_frame_2_corr<-sum(new_track[seq(3,length,by=3),1])
+                                                                        score1<-((Phase_P_sites_frame_corr-sum(new_track[,1])/3)^2)/(sum(new_track[,1])/3)
+                                                                        score2<-((Phase_P_sites_frame_1_corr-sum(new_track[,1])/3)^2)/(sum(new_track[,1])/3)
+                                                                        score3<-((Phase_P_sites_frame_2_corr-sum(new_track[,1])/3)^2)/(sum(new_track[,1])/3)
+                                                                        st_st[r,"ORF_ORF_score_ribo"]<-log2(score1+score2+score3+1)
+                                                                        if(Phase_P_sites_frame_corr<=Phase_P_sites_frame_1_corr | Phase_P_sites_frame<=Phase_P_sites_frame_2_corr){
+                                                                                st_st[r,"ORF_ORF_score_ribo"]<--log2(score1+score2+score3+1)
+                                                                        }                                                                }
+                                                        }
+                                                        
+                                                        if(P_sites_sum>15){
+                                                                st_st[r,"ORF_chisq_ribo"]<-chisq.test(as.table(c(Phase_P_sites_frame,Phase_P_sites_frame_1,Phase_P_sites_frame_2)))$p.value}
+                                                        if(P_sites_sum<16 & P_sites_sum>0){
+                                                                st_st[r,"ORF_chisq_ribo"]<-xmulti(obs=c(Phase_P_sites_frame,Phase_P_sites_frame_1,Phase_P_sites_frame_2),expr=c(1,1,1),statName="Prob",detail=0)$pProb
+                                                        }  
+                                                        if(length<25){slepians<-dpss(n=length+(50-length),k=24,nw=12)}
+                                                        if(length>=25){slepians<-dpss(n=length,k=24,nw=12)}
+                                                        values_mtm_orf<-take_freqs_Fvalues_all_around_3nt_spec(x=tracks_stst[,1],n_tapers=24,time_bw=12,slepians_values=slepians)[c(1,6,7)]
+                                                        st_st[r,"ORF_freq_multi_ribo"]<-values_mtm_orf[1]
+                                                        
+                                                        st_st[r,"ORF_pval_multi_ribo"]<-values_mtm_orf[2]
+                                                        st_st[r,"ORF_spec_multi_ribo"]<-values_mtm_orf[3]
+                                                        fft_sp<-take_maxfreq_and_power_FFT_Spec(tracks_stst[,1])
+                                                        st_st[,"ORF_freq3_fft_ribo"]<-fft_sp[1]
+                                                        st_st[,"ORF_spec3_fft_ribo"]<-fft_sp[2]
+                                                        st_st[,"ORF_freq3_spec_ribo"]<-fft_sp[3]
+                                                        st_st[,"ORF_spec3_spec_ribo"]<-fft_sp[4]
+                                                        
+                                                        pept<-unlist(getTrans(seq_transcr[st_st[r,1]:st_st[r,2]],sens="F"))
+                                                        st_st[r,"ORF_pept"]<-paste(pept,sep="",collapse="")
+                                                }
+                                                if(RNA_sites_sum>5 & (Phase_P_sites_frame/P_sites_sum)>0.5){
+                                                        if(length<25){slepians<-dpss(n=length+(50-length),k=24,nw=12)}
+                                                        if(length>=25){slepians<-dpss(n=length,k=24,nw=12)}
+                                                        values_mtm_orf_rna<-take_freqs_Fvalues_all_around_3nt_spec(x=tracks_stst[,4],n_tapers=24,time_bw=12,slepians_values=slepians)[c(1,6,7)]
+                                                        
+                                                        st_st[r,"ORF_freq_multi_rna"]<-values_mtm_orf_rna[1]
+                                                        st_st[r,"ORF_pval_multi_rna"]<-values_mtm_orf_rna[2]
+                                                        st_st[r,"ORF_spec_multi_rna"]<-values_mtm_orf_rna[3]
+                                                        
+                                                        fft_sp<-take_maxfreq_and_power_FFT_Spec(tracks_stst[,4])
+                                                        st_st[,"ORF_freq3_fft_rna"]<-fft_sp[1]
+                                                        st_st[,"ORF_spec3_fft_rna"]<-fft_sp[2]
+                                                        st_st[,"ORF_freq3_spec_rna"]<-fft_sp[3]
+                                                        st_st[,"ORF_spec3_spec_rna"]<-fft_sp[4]
+                                                        
+                                                        Phase_Centered_sites_frame<-sum(tracks_stst[seq(1,length,by=3),4])
+                                                        Phase_Centered_sites_frame_1<-sum(tracks_stst[seq(2,length,by=3),4])
+                                                        Phase_Centered_sites_frame_2<-sum(tracks_stst[seq(3,length,by=3),4])
+                                                        st_st[r,"ORF_RNAsit_pct_in_frame"]<-Phase_Centered_sites_frame/RNA_sites_sum
+                                                        score1<-((Phase_Centered_sites_frame-P_sites_sum/3)^2)/(RNA_sites_sum/3)
+                                                        score2<-((Phase_Centered_sites_frame_1-P_sites_sum/3)^2)/(RNA_sites_sum/3)
+                                                        score3<-((Phase_Centered_sites_frame_2-P_sites_sum/3)^2)/(RNA_sites_sum/3)
+                                                        
+                                                        orfsc<-log2(score1+score2+score3+1)
+                                                        st_st[r,"ORF_ORF_score_rna"]<-orfsc
+                                                        if(Phase_Centered_sites_frame<=Phase_Centered_sites_frame_1 | Phase_Centered_sites_frame<=Phase_Centered_sites_frame_2){
+                                                                st_st[r,"ORF_ORF_score_rna"]<--orfsc
+                                                        }
+                                                        
+                                                        if(max(tracks_stst[,4])>(RNA_sites_sum*.7)){
+                                                                new_track<-tracks_stst
+                                                                new_track[which(new_track[,4]==max(new_track[,4]))]<-0
+                                                                st_st[r,"ORF_ORF_score_rna"]<-NA
+                                                                if(sum(new_track[,4])>2){
+                                                                        Phase_Centered_sites_frame_corr<-sum(new_track[seq(1,length,by=3),4])
+                                                                        Phase_Centered_sites_frame_1_corr<-sum(new_track[seq(2,length,by=3),4])
+                                                                        Phase_Centered_sites_frame_2_corr<-sum(new_track[seq(3,length,by=3),4])
+                                                                        score1<-((Phase_Centered_sites_frame_corr-sum(new_track[,4])/3)^2)/(sum(new_track[,4])/3)
+                                                                        score2<-((Phase_Centered_sites_frame_1_corr-sum(new_track[,4])/3)^2)/(sum(new_track[,4])/3)
+                                                                        score3<-((Phase_Centered_sites_frame_2_corr-sum(new_track[,4])/3)^2)/(sum(new_track[,4])/3)
+                                                                        st_st[r,"ORF_ORF_score_rna"]<-log2(score1+score2+score3+1)
+                                                                        if(Phase_Centered_sites_frame_corr<=Phase_Centered_sites_frame_1_corr | Phase_Centered_sites_frame_corr<=Phase_Centered_sites_frame_2_corr){
+                                                                                st_st[r,"ORF_ORF_score_rna"]<--log2(score1+score2+score3+1)
+                                                                        }
+                                                                }
+                                                        }
+                                                        
+                                                        if(RNA_sites_sum>15){
+                                                                st_st[r,"ORF_chisq_rna"]<-chisq.test(as.table(c(Phase_Centered_sites_frame,Phase_Centered_sites_frame_1,Phase_Centered_sites_frame_2)))$p.value}
+                                                        if(RNA_sites_sum<16 & RNA_sites_sum>0){
+                                                                st_st[r,"ORF_chisq_rna"]<-xmulti(obs=c(Phase_Centered_sites_frame,Phase_Centered_sites_frame_1,Phase_Centered_sites_frame_2),expr=c(1,1,1),statName="Prob",detail=0)$pProb
+                                                        }
+                                                        
+                                                }
+                                        }
+                                }
+                                if(dim(st_st)[1]>0){st_st<-st_st[!is.na(st_st[,"ORF_pval_multi_ribo"]),]}
+                                if(dim(st_st)[1]>0){st_st<-st_st[st_st[,"ORF_Psit_pct_in_frame"]>0.5,]}
+                                if(dim(st_st)[1]>0){
+                                        st_st$nt_tocheck_next_start<-0
+                                        st_st$pval_next_start<-1
+                                        st_st$P_sites_next_start<-0
+                                        st_st$pct_P_sites_inframe_next_start<-0
+                                        #find starts per each stop codon
+                                        list_stopsorfs<-split.data.frame(x=st_st,f=st_st[,2],drop=T)
+                                        
+                                        transcr_data_fr<-transcr_data
+                                        
+                                        list_sORFs_frame_moretap<-list()
+                                        list_sORFs_frame_bestperiod<-list()
+                                        list_sORFs_frame_maxsit<-list()
+                                        
+                                        for(g in 1:length(list_stopsorfs)){
+                                                
+                                                stoplist<-list_stopsorfs[[g]]
+                                                max_period<-stoplist[stoplist[,"ORF_pval_multi_ribo"]==min(stoplist[,"ORF_pval_multi_ribo"]),]
+                                                list_sORFs_frame_bestperiod[[g]]<-max_period 
+                                                stoplists_period<-stoplist[stoplist[,"ORF_pval_multi_ribo"]<0.05,]
+                                                if(dim(stoplists_period)[1]>0){
+                                                        stoplists_period<-stoplists_period[!is.na(stoplists_period[,"ORF_pval_multi_ribo"]),]
+                                                }
+                                                if(dim(stoplists_period)[1]>1){
+                                                        
+                                                        for(b in 1:(dim(stoplists_period)[1]-1)){
+                                                                stoplists_period[b,"nt_tocheck_next_start"]<-stoplists_period[b+1,"start_pos"]-stoplist[b,"start_pos"]
+                                                                tracks_stst<-tracks[stoplists_period[b,"start_pos"]:stoplists_period[b+1,"start_pos"],]
+                                                                length<-dim(tracks_stst)[1]
+                                                                P_sites_sum<-sum(tracks_stst[,1])
+                                                                pval_to_next<-1                                                              
+                                                                
+                                                                Phase_P_sites_frame<-sum(tracks_stst[seq(1,length,by=3),1])
+                                                                Phase_P_sites_frame_1<-sum(tracks_stst[seq(2,length,by=3),1])
+                                                                Phase_P_sites_frame_2<-sum(tracks_stst[seq(3,length,by=3),1])
+                                                                
+                                                                pctPhase_frame<-Phase_P_sites_frame/P_sites_sum
+                                                                pctPhase_frame_1<-Phase_P_sites_frame_1/P_sites_sum
+                                                                pctPhase_frame_2<-Phase_P_sites_frame_2/P_sites_sum
+                                                                
+                                                                if(P_sites_sum>5){
+                                                                        if(length<25){slepians<-dpss(n=length+(50-length),k=24,nw=12)}
+                                                                        if(length>=25){slepians<-dpss(n=length,k=24,nw=12)}
+                                                                        
+                                                                        pval_to_next<-take_freqs_Fvalues_all_around_3nt_spec(x=tracks_stst[,1],n_tapers=24,time_bw=12,slepians_values=slepians)[6]
+                                                                }
+                                                                stoplists_period[b,"P_sites_next_start"]<-P_sites_sum
+                                                                
+                                                                stoplists_period[b,"pct_P_sites_inframe_next_start"]<-pctPhase_frame
+                                                                
+                                                                stoplists_period[b,"pval_next_start"]<-pval_to_next
+                                                        }
+                                                        
+                                                        max_sit<-stoplists_period[which(stoplists_period[,"P_sites_next_start"]>5 & stoplists_period[,"pct_P_sites_inframe_next_start"]>0.5)[1],]
+                                                        max_sit<-max_sit[!is.na(max_sit[,"ORF_length"]),]
+                                                        
+                                                        if(dim(max_sit)[1]==0){
+                                                                max_sit<-max_period
+                                                        }
+                                                        list_sORFs_frame_maxsit[[g]]<-max_sit   
+                                                        
+                                                        more_tap<-stoplists_period[which(stoplists_period[,"pval_next_start"]<0.05)[1],]
+                                                        more_tap<-more_tap[!is.na(more_tap[,"ORF_length"]),]
+                                                        if(dim(more_tap)[1]==0){
+                                                                more_tap<-max_period
+                                                        }
+                                                        list_sORFs_frame_moretap[[g]]<-more_tap
+                                                        
+                                                }
+                                                if(dim(stoplists_period)[1]<2){
+                                                        list_sORFs_frame_maxsit[[g]]<-max_period
+                                                        list_sORFs_frame_moretap[[g]]<-max_period
+                                                        
+                                                }
+                                                
+                                                
+                                        }
+                                        sORFs_frame_moretap<-do.call(what=rbind.data.frame,args=list_sORFs_frame_moretap)
+                                        sORFs_frame_moretap$Method<-"more_tapers"
+                                        sORFs_frame_maxsit<-do.call(what=rbind.data.frame,args=list_sORFs_frame_maxsit)
+                                        sORFs_frame_maxsit$Method<-"max_P_sites"
+                                        sORFs_frame_bestperiod<-do.call(what=rbind.data.frame,args=list_sORFs_frame_bestperiod)
+                                        sORFs_frame_bestperiod$Method<-"best_periodicity"
+                                        sORFs_frames<-rbind(sORFs_frame_moretap,sORFs_frame_maxsit,sORFs_frame_bestperiod)
+                                        
+                                        for(w in 1:dim(sORFs_frames)[1]){
+                                                transcr_data_fr[w,]<-transcr_data_fr[1,]
+                                        }
+                                        
+                                        transcr_data_fr_sORFs<-cbind(transcr_data_fr,sORFs_frames)
+                                        transcr_data_fr_sORFs$orf_position<-"detected"
+                                }
+                        }
+                        
+                        if(dim(st_st)[1]==0){
+                                st_st<-st_st_NA
+                                transcr_data_fr_sORFs<-cbind(transcr_data,st_st_NA)
+                        }
+                }
+                
+                
+                all_sign_frames[[u+1]]<-transcr_data_fr_sORFs
+        }
+        all_sign_frames<-do.call(what=rbind.data.frame,args=all_sign_frames)
+        transcr_all_frames_res<-unique(all_sign_frames)
+        transcr_all_frames_res$ORF_id_tr<-paste(transcr_all_frames_res$transcript_id,transcr_all_frames_res$start_pos,transcr_all_frames_res$st2vect,sep="_")
+        transcr_all_frames_ok<-transcr_all_frames_res[!is.na(transcr_all_frames_res$ORF_pept),]
+        if(dim(transcr_all_frames_ok)[1]>0){
+                all_orfs<-unique(transcr_all_frames_ok[,c("transcript_id","length","strand","start_pos","st2vect","ORF_length","gene_id")])
+                transcr<-all_orfs$transcript_id[1]
+                trascr_length<-all_orfs$length[1]
+                orf_strand<-all_orfs$strand[1]                
+                ex_intr_coords<-exons_in_transcr$coords_id
+                if(orf_strand=="-"){ex_intr_coords<-rev(ex_intr_coords)}
+                
+                exons_in_transcr_data<-results_ccds_ORFs[results_ccds_ORFs[,"coords"]%in%ex_intr_coords,]
+                exons_in_transcr_data<-exons_in_transcr_data[match(ex_intr_coords,exons_in_transcr_data$coords),]
+                cumsumexons<-cumsum(exons_in_transcr_data$length.x)
+                
+                list_orfas<-list()
+                for(z in 1:dim(all_orfs)[1]){
+                        orfa<-all_orfs[z,]
+                        
+                        transcr_data<-data.frame(transcript_id=transcr)
+                        
+                        orf_start<-orfa$start_pos
+                        orf_end<-orfa$st2vect
+                        
+                        st_ex<-which((cumsumexons-orf_start)==min(cumsumexons[cumsumexons>orf_start]-orf_start))
+                        end_ex<-which((cumsumexons-orf_end)==min(cumsumexons[cumsumexons>=orf_end]-orf_end))
+                        in_betw_ex<-st_ex:end_ex
+                        in_betw_ex<-in_betw_ex[!in_betw_ex%in%c(st_ex,end_ex)>0]
+                        exon_inbetween_data<-exons_in_transcr_data[in_betw_ex,]
+                        
+                        
+                        coord_start<-NA
+                        coord_end<-NA
+                        nt_to_rem<-NA
+                        rem_len<-0
+                        if(st_ex>1){rem_len<-cumsumexons[st_ex-1]}
+                        if(orfa$strand=="+"){coord_start<-exons_in_transcr_data[st_ex,"start"] + (orf_start-rem_len)}
+                        if(orfa$strand=="-"){coord_start<-exons_in_transcr_data[st_ex,"end"] - (orf_start-rem_len)}
+                        
+                        if(length(in_betw_ex)==0){
+                                if(st_ex==end_ex){nt_to_rem<-0}
+                                if(st_ex!=end_ex){if(orfa$strand=="+"){
+                                        nt_to_rem<-exons_in_transcr_data[st_ex,"end"]-coord_start
+                                }
+                                                  if(orfa$strand=="-"){
+                                                          nt_to_rem<-coord_start-exons_in_transcr_data[st_ex,"start"]
+                                                  }
+                                }
+                        }
+                        
+                        if(length(in_betw_ex)>0){
+                                nt_in_betw<-sum(exons_in_transcr_data[in_betw_ex,"length.x"])
+                                if(orfa$strand=="+"){
+                                        nt_to_rem<-exons_in_transcr_data[st_ex,"end"]-coord_start
+                                }
+                                if(orfa$strand=="-"){
+                                        nt_to_rem<-coord_start-exons_in_transcr_data[st_ex,"start"]
+                                }
+                                nt_to_rem<-nt_to_rem+nt_in_betw
+                        }
+                        
+                        if(st_ex==end_ex & orfa$strand=="+"){coord_end<-coord_start+orfa$ORF_length+1}
+                        if(st_ex==end_ex & orfa$strand=="-"){coord_end<-coord_start-orfa$ORF_length+1}
+                        
+                        if(st_ex!=end_ex & orfa$strand=="+"){coord_end<-exons_in_transcr_data[end_ex,"start"] + (orfa$ORF_length-nt_to_rem)+1}
+                        if(st_ex!=end_ex & orfa$strand=="-"){coord_end<-exons_in_transcr_data[end_ex,"end"] - (orfa$ORF_length-nt_to_rem)+1}
+                        
+                        if(orfa$strand=="-"){
+                                coord_start2<-coord_start
+                                coord_start<-coord_end
+                                coord_end<-coord_start2
+                        }
+                        
+                        
+                        if(st_ex!=end_ex & orfa$strand=="+"){to_check_st<-paste(exons_in_transcr_data[st_ex,"chr"],coord_start,exons_in_transcr_data[st_ex,"end"],"CCDS",orfa$gene_id,orfa$strand,sep="_")
+                                                             to_check_end<-paste(exons_in_transcr_data[end_ex,"chr"],exons_in_transcr_data[end_ex,"start"],coord_end,"CCDS",orfa$gene_id,orfa$strand,sep="_")
+                                                             to_check<-paste(to_check_st,to_check_end,sep=";")
+                                                             
+                        }
+                        if(st_ex!=end_ex & orfa$strand=="-"){to_check_st<-paste(exons_in_transcr_data[st_ex,"chr"],exons_in_transcr_data[st_ex,"start"],coord_end,"CCDS",orfa$gene_id,orfa$strand,sep="_")
+                                                             to_check_end<-paste(exons_in_transcr_data[end_ex,"chr"],coord_start,exons_in_transcr_data[end_ex,"end"],"CCDS",orfa$gene_id,orfa$strand,sep="_")
+                                                             to_check<-paste(to_check_st,to_check_end,sep=";")
+                        }
+                        
+                        if(st_ex==end_ex){to_check<-paste(exons_in_transcr_data[st_ex,"chr"],coord_start,coord_end,"CCDS",orfa$gene_id,orfa$strand,sep="_")}
+                        orfa$to_check<-to_check
+                        orfa$to_check_rem<-NA
+                        if(length(in_betw_ex)>0){
+                                orfa$to_check_rem<-paste(exon_inbetween_data$exon_id,collapse=";")
+                                
+                        }
+                        orfa$ORF_id_tr<-paste(transcr_data$transcript_id,orf_start,orf_end,sep="_")
+                        orfa$ORF_id_gen<-paste(exons_in_transcr_data[st_ex,"chr"],coord_start,coord_end,sep="_")
+                        orfa$to_check_ALL<-paste(orfa$to_check,orfa$to_check_rem,sep=";")
+                        list_orfas[[z]]<-orfa
+                        
+                        
+                }
+                list_orfas<-do.call(rbind.data.frame,args=list_orfas)
+                transcr_all_frames_ok$ORF_id_gen<-NULL
+                transcr_all_frames_ok$to_check<-NULL
+                transcr_all_frames_ok$to_check_rem<-NULL
+                transcr_all_frames_ok$to_check_ALL<-NULL
+                
+                transcr_all_frames_ok<-merge(transcr_all_frames_ok,list_orfas[,c("ORF_id_tr","ORF_id_gen","to_check","to_check_rem","to_check_ALL")],by="ORF_id_tr")
+                #reconcile and maybe add the rest
+                return(transcr_all_frames_ok)
+        }
+        if(dim(transcr_all_frames_ok)[1]==0){return(transcr_all_frames_res)}
+        
+        
+}
+CCDS_orfs_found<-CCDS_orfs[!is.na(CCDS_orfs[,"ORF_pept"]),]
+
+
+CCDS_orfs<-merge(CCDS_orfs_found,cdss_transcripts,by="transcript_id",all.x=T)
+
+write.table(CCDS_orfs,file="orfs_found",quote=F,row.names=F,sep="\t",col.names=T)
+
+options(scipen=999)
+
+CCDS_orfs$ORF_id_tr_minus2<-paste(CCDS_orfs$transcript_id,CCDS_orfs$start_pos,CCDS_orfs$st2vect+2,sep="_")
+CCDS_orfs$ORF_id_tr_annotated<-paste(CCDS_orfs$transcript_id,CCDS_orfs$annotated_start,CCDS_orfs$annotated_stop,sep="_")
+
+
+#nonccds_res<-results_ccds_ORFs
+CCDS_orfs_uniq<-CCDS_orfs
+
+print(paste("--- checking CCDS ORF coverage and multi-mapping ratio,",date(),sep=" "))
+
+all_sORFs_CCDS_multi<-CCDS_orfs_uniq
+
+
+ex_to_check<-strsplit(all_sORFs_CCDS_multi$to_check,split=";")
+
+ex_to_check<-unique(unlist(ex_to_check))
+
+ex_to_check_spl<-strsplit(ex_to_check,split="_")
+
+bedfiles_to_check<-data.frame(chr=NA,start=NA,end=NA,type=NA,gene_id=NA,strand=NA)
+for(h in 1:length(ex_to_check_spl)){
+        to_bed<-ex_to_check_spl[[h]]
+        bedfiles_to_check[h,"chr"]<-to_bed[1]
+        bedfiles_to_check[h,"start"]<-to_bed[2]
+        bedfiles_to_check[h,"end"]<-to_bed[3]
+        bedfiles_to_check[h,"type"]<-to_bed[4]
+        bedfiles_to_check[h,"gene_id"]<-to_bed[5]
+        bedfiles_to_check[h,"strand"]<-to_bed[6]
+        
+}
+
+
+
+write.table(bedfiles_to_check,file="bed_tocheck_ccds.bed",quote=F,row.names=F,sep="\t",col.names=F)
+
+scr<-paste(args[2],"analyze_multi_clust.bash",sep="/")
+syst_scr<-paste(scr,"bed_tocheck_ccds.bed bed_tocheck_ccds",args[3],sep = " ")
+system(syst_scr)
+
+scr<-paste(args[2],"include_multi_nomerge.R",sep="/")
+syst_scr<-paste(scr,"bed_tocheck_ccds",sep = " ")
+
+system(syst_scr)
+
+res_to_check<-read.table(file="multi_table_bed_tocheck_ccds",header=T,stringsAsFactors=F)
+
+dir.create("tmp_ccds", showWarnings = FALSE)
+
+system("mv *tocheck_ccds* tmp_ccds/")
+
+setwd("tmp_ccds")
+
+
+ex_rem<-strsplit(as.character(all_sORFs_CCDS_multi$to_check_rem),split=";")
+
+ex_rem<-unique(unlist(ex_rem))
+ex_rem<-ex_rem[!is.na(ex_rem)]
+
+
+res_ex_rem<-results_ccds_ORFs[results_ccds_ORFs[,"exon_id"]%in%ex_rem,c(c("exon_id","strand.x","length.y","reads_ribo","reads_multi_ribo","pct_region_covered_ribo","pct_covered_onlymulti_ribo","reads_rna","reads_multi_rna","pct_region_covered_rna","pct_covered_onlymulti_rna"))]
+names(res_ex_rem)<-names(res_to_check)
+
+res_all_multi<-rbind.data.frame(res_ex_rem,res_to_check)
+
+res_all_multi$exon_id_2<-paste(res_all_multi$exon_id,res_all_multi$strand,sep="_")
+
+
+all_sORFs_CCDS_multi_final<-foreach(g=1:(dim(all_sORFs_CCDS_multi)[1]),.combine=rbind,.multicombine=T) %dopar%{
+        s<-all_sORFs_CCDS_multi[g,]
+        list_ex<-strsplit(s$to_check_ALL,split=";")[[1]]
+        with_exon2<-which(res_all_multi[,"exon_id_2"]%in%list_ex)
+        with_exon1<-which(res_all_multi[,"exon_id"]%in%list_ex)
+        to_take<-unique(c(with_exon2,with_exon1))
+        res_multi<-res_all_multi[to_take,]
+        res_multi$reads_ribo<-sum(res_multi$reads_ribo)
+        res_multi$reads_multi_ribo<-sum(res_multi$reads_multi_ribo)
+        res_multi$pct_region_covered_ribo_ALL<-res_multi$pct_region_covered_ribo*res_multi$length.y
+        res_multi$pct_covered_onlymulti_ribo_ALL<-res_multi$pct_covered_onlymulti_ribo*res_multi$length.y
+        res_multi$pct_region_covered_ribo<-sum(res_multi$pct_region_covered_ribo_ALL)/(sum(res_multi$length.y))
+        res_multi$pct_covered_onlymulti_ribo<-sum(res_multi$pct_covered_onlymulti_ribo_ALL)/(sum(res_multi$length.y))
+        res_multi$reads_rna<-sum(res_multi$reads_rna)
+        res_multi$reads_multi_rna<-sum(res_multi$reads_multi_rna)
+        res_multi$pct_region_covered_rna_ALL<-res_multi$pct_region_covered_rna*res_multi$length.y
+        res_multi$pct_covered_onlymulti_rna_ALL<-res_multi$pct_covered_onlymulti_rna*res_multi$length.y
+        res_multi$pct_region_covered_rna<-sum(res_multi$pct_region_covered_rna_ALL)/sum(res_multi$length.y)
+        res_multi$pct_covered_onlymulti_rna<-sum(res_multi$pct_covered_onlymulti_rna_ALL)/sum(res_multi$length.y)
+        
+        s<-cbind(s,res_multi[1,])
+        s
+}
+
+print(paste("--- Selecting best transcript per CCDS ORF,",date(),sep=" "))
+
+
+write.table(all_sORFs_CCDS_multi_final,file="orfs_bef_ag",quote=F,row.names=F,sep="\t",col.names=T)
+
+
+agg<-aggregate(x=all_sORFs_CCDS_multi_final[,"RNA_sites"],by=list(all_sORFs_CCDS_multi_final[,"gene_id"],all_sORFs_CCDS_multi_final[,"ORF_pept"],all_sORFs_CCDS_multi_final[,"Method"]),FUN=max)
+names(agg)<-c("gene_id","ORF_pept","Method","RNA_sites")
+agg2<-merge(x=all_sORFs_CCDS_multi_final[,c("ORF_id_tr_minus2","length","gene_id","ORF_pept","Method","RNA_sites")],agg,by=c("gene_id","ORF_pept","Method","RNA_sites"))
+
+agg3<-aggregate(x=agg2[,"length"],by=list(agg2[,"gene_id"],agg2[,"ORF_pept"],agg2[,"Method"],agg2[,"RNA_sites"]),FUN=max)
+
+names(agg3)<-c("gene_id","ORF_pept","Method","RNA_sites","length")
+agg4<-merge(x=all_sORFs_CCDS_multi_final[,c("ORF_id_tr_minus2","length","gene_id","ORF_pept","Method","RNA_sites")],agg3,by=c("gene_id","ORF_pept","Method","length","RNA_sites"))
+all_sORFs_CCDS_multi_final<-all_sORFs_CCDS_multi_final[all_sORFs_CCDS_multi_final[,"ORF_id_tr_minus2"]%in%agg4[,"ORF_id_tr_minus2"],]
+
+
+all_sORFs_CCDS_periodic<-all_sORFs_CCDS_multi_final[all_sORFs_CCDS_multi_final[,"ORF_pval_multi_ribo"]<0.05,]
+all_sORFs_CCDS_periodic<-all_sORFs_CCDS_multi_final[!is.na(all_sORFs_CCDS_multi_final[,"transcript_id"]),]
+
+
+
+all_sORFs_CCDS_periodic$n_exons_ORF<-sapply(strsplit(all_sORFs_CCDS_periodic$to_check_ALL,split=";"),FUN=function(x){sum(x!="NA")})
+
+
+print(paste("--- Checking CCDS ORFs intersections with annotated CDS regions,",date(),sep=" "))
+
+
+
+ex_to_check<-strsplit(all_sORFs_CCDS_periodic$to_check_ALL,split=";")
+
+ex_to_check_spl<-unique(unlist(ex_to_check))
+
+ex_to_check_spl<-strsplit(ex_to_check_spl,split="_")
+
+bedfiles_to_check<-data.frame(chr=NA,start=NA,end=NA,type=NA,gene_id=NA,strand=NA)
+for(h in 1:length(ex_to_check_spl)){
+        to_bed<-ex_to_check_spl[[h]]
+        bedfiles_to_check[h,"chr"]<-to_bed[1]
+        bedfiles_to_check[h,"start"]<-to_bed[2]
+        bedfiles_to_check[h,"end"]<-to_bed[3]
+        bedfiles_to_check[h,"type"]<-to_bed[4]
+        bedfiles_to_check[h,"gene_id"]<-to_bed[5]
+        bedfiles_to_check[h,"strand"]<-to_bed[6]
+        
+}
+bedfiles_to_check<-bedfiles_to_check[!is.na(bedfiles_to_check[,"chr"]),]
+bedfiles_to_check<-bedfiles_to_check[bedfiles_to_check[,"chr"]!="NA",]
+
+write.table(bedfiles_to_check,file="sORFs_totest",quote=F,row.names=F,sep="\t",col.names=F)
+
+system("sort -k1,1 -k2,2n sORFs_totest > sORFs_totest.bed")
+
+bedfiles_to_check<-read.table("sORFs_totest.bed",stringsAsFactors=F,header=F)
+colnames(bedfiles_to_check)<-c("chr","start","end","type","gene_id","strand")
+bedfiles_to_check<-bedfiles_to_check[!is.na(bedfiles_to_check[,"chr"]),]
+
+bedfiles_to_check[is.na(bedfiles_to_check["strand"]),"strand"]<-"+"
+
+write.table(bedfiles_to_check,file="sORFs_totest.bed",quote=F,row.names=F,sep="\t",col.names=F)
+
+fhalf_scr<-paste(args[3],"intersectBed -v -a sORFs_totest.bed -b",sep = "/")
+
+shalf_scr<-paste(args[1],"all_cds.bed > sORFs_totest_nocds.bed",sep = "/")
+
+system(paste(fhalf_scr,shalf_scr,sep = " "))
+
+
+command<-paste("wc -l","sORFs_totest_nocds.bed")
+lines_in_file<-system(command,intern=T)
+lines_in_file<-as.numeric(strsplit(lines_in_file,split=" ")[[1]][1])
+
+if(lines_in_file>0){
+        results_nonoverlapcdss<-read.table("sORFs_totest_nocds.bed",stringsAsFactors=F,header=F)
+        names(results_nonoverlapcdss)<-names(bedfiles_to_check)
+        results_nonoverlapcdss[,"exon_id"]<-paste(results_nonoverlapcdss[,"chr"],results_nonoverlapcdss[,"start"],results_nonoverlapcdss[,"end"],results_nonoverlapcdss[,"type"],results_nonoverlapcdss[,"gene_id"],results_nonoverlapcdss[,"strand"],sep="_")
+        NA_str<-which(is.na(results_nonoverlapcdss[,"strand"]))
+        if(length(NA_str)>0){
+                for(o in NA_str){
+                        results_nonoverlapcdss[o,"exon_id"]<-paste(results_nonoverlapcdss[o,"chr"],results_nonoverlapcdss[o,"start"],results_nonoverlapcdss[o,"end"],results_nonoverlapcdss[o,"type"],results_nonoverlapcdss[o,"gene_id"],sep="_")
+                        
+                }
+        }
+}
+
+if(lines_in_file==0){
+        results_nonoverlapcdss<-data.frame(exon_id=NA,stringsAsFactors=F)
+}
+
+overl_cds<-c()
+for(i in 1:length(ex_to_check)){
+        a<-ex_to_check[[i]]
+        a<-a[a!="NA"]
+        overl_cds[i]<-sum(!a%in%results_nonoverlapcdss$exon_id)>0
+        
+}
+
+all_sORFs_CCDS_periodic_nocds<-all_sORFs_CCDS_periodic[!overl_cds,]
+
+all_sORFs_CCDS_periodic_nocds<-all_sORFs_CCDS_periodic_nocds[all_sORFs_CCDS_periodic_nocds[,"ORF_pval_multi_ribo"]<0.05,]
+all_sORFs_CCDS_periodic_nocds<-all_sORFs_CCDS_periodic_nocds[!is.na(all_sORFs_CCDS_periodic_nocds[,"transcript_id"]),]
+write.table(all_sORFs_CCDS_periodic_nocds,file="orfs_before_u_dorfs",quote=F,row.names=F,sep="\t",col.names=T)
+if(dim(all_sORFs_CCDS_periodic_nocds)[1]>0){
+        all_sORFs_CCDS_periodic_nocds$type<-NA
+        
+        for(r in 1:dim(all_sORFs_CCDS_periodic_nocds)[1]){
+                
+                x<-all_sORFs_CCDS_periodic_nocds[r,]
+                type<-NA
+                if(!is.na(as.numeric(x[,"annotated_start"])) &  !is.na(as.numeric(x[,"annotated_stop"]))){
+                        if(as.numeric(x[,"start_pos"])<as.numeric(x[,"annotated_start"])){type<-"uORF"}
+                        if(as.numeric(x[,"start_pos"])>as.numeric(x[,"annotated_stop"])){type<-"dORF"}
+                        if(as.numeric(x[,"start_pos"])>as.numeric(x[,"annotated_start"]) & x[,"start_pos"]<as.numeric(x[,"annotated_stop"]) & as.numeric(x[,"st2vect"])>as.numeric(x[,"annotated_stop"])){type<-"Overl_dORF"}
+                        if(as.numeric(x[,"start_pos"])<as.numeric(x[,"annotated_start"]) & x[,"st2vect"]>as.numeric(x[,"annotated_start"])){type<-"Overl_uORF"}
+                        
+                }
+                all_sORFs_CCDS_periodic_nocds[r,"type"]<-type
+        }
+}
+
+if(dim(all_sORFs_CCDS_periodic_nocds)[1]==0){
+        print("Warning! No u/dORFs found ! all ORFs overlap annotated CDS exons")
+        all_sORFs_CCDS_periodic_nocds[1,]<-NA
+        all_sORFs_CCDS_periodic_nocds$type<-NA
+        
+}
+
+all_sORFs_CCDS_periodic_nocds_filtered_multi<-all_sORFs_CCDS_periodic_nocds[(all_sORFs_CCDS_periodic_nocds$pct_covered_onlymulti_ribo/all_sORFs_CCDS_periodic_nocds$pct_region_covered_ribo)<0.3,]
+all_sORFs_CCDS_periodic_nocds_filtered_multi<-all_sORFs_CCDS_periodic_nocds_filtered_multi[all_sORFs_CCDS_periodic_nocds_filtered_multi$pct_region_covered_ribo>0.3,]
+all_sORFs_CCDS_periodic_nocds_filtered_multi<-all_sORFs_CCDS_periodic_nocds_filtered_multi[!is.na(all_sORFs_CCDS_periodic_nocds_filtered_multi[,"transcript_id"]),]
+
+all_sORFs_CCDS_periodic<-all_sORFs_CCDS_periodic[overl_cds,]
+all_sORFs_CCDS_periodic_nofilt<-all_sORFs_CCDS_periodic
+all_sORFs_CCDS_periodic<-all_sORFs_CCDS_periodic[(all_sORFs_CCDS_periodic$pct_covered_onlymulti_ribo/all_sORFs_CCDS_periodic$pct_region_covered_ribo)<0.3,]
+all_sORFs_CCDS_periodic<-all_sORFs_CCDS_periodic[!is.na(all_sORFs_CCDS_periodic[,"transcript_id"]),]
+
+
+setwd("../")
+
+
+dir.create("ORFs_CCDS", showWarnings = FALSE)
+dir.create("ORFs_CCDS/best_periodicity", showWarnings = FALSE)
+dir.create("ORFs_CCDS/max_P_sites", showWarnings = FALSE)
+dir.create("ORFs_CCDS/more_tapers", showWarnings = FALSE)
+
+
+sORFs_sign_filtered_cds<-all_sORFs_CCDS_periodic_nocds[all_sORFs_CCDS_periodic_nocds[,"Method"]=="best_periodicity",]
+write.table(sORFs_sign_filtered_cds,file="ORFs_CCDS/best_periodicity/sORFs_sign_filtered_cds",quote=F,row.names=F,sep="\t",col.names=T)
+sORFs_sign_filtered_cds_multi<-all_sORFs_CCDS_periodic_nocds_filtered_multi[all_sORFs_CCDS_periodic_nocds_filtered_multi[,"Method"]=="best_periodicity",]
+write.table(sORFs_sign_filtered_cds_multi,file="ORFs_CCDS/best_periodicity/sORFs_sign_filtered_cds_multi",quote=F,row.names=F,sep="\t",col.names=T)
+ORFs_sign_filtered_multi<-all_sORFs_CCDS_periodic[all_sORFs_CCDS_periodic[,"Method"]=="best_periodicity",]
+ORFs_sign_notfiltered_multi<-all_sORFs_CCDS_periodic_nofilt[all_sORFs_CCDS_periodic_nofilt[,"Method"]=="best_periodicity",]
+
+write.table(ORFs_sign_filtered_multi,file="ORFs_CCDS/best_periodicity/ORFs_sign_filtered_multi",quote=F,row.names=F,sep="\t",col.names=T)
+write.table(ORFs_sign_notfiltered_multi,file="ORFs_CCDS/best_periodicity/ORFs_sign_notfiltered_multi",quote=F,row.names=F,sep="\t",col.names=T)
+
+ORFs_all<-CCDS_orfs[CCDS_orfs[,"Method"]=="best_periodicity",]
+ORFs_all<-ORFs_all[!is.na(ORFs_all[,"transcript_id"]),]
+write.table(ORFs_all,file="ORFs_CCDS/best_periodicity/ORFs_all",quote=F,row.names=F,sep="\t",col.names=T)
+
+
+sORFs_sign_filtered_cds<-all_sORFs_CCDS_periodic_nocds[all_sORFs_CCDS_periodic_nocds[,"Method"]=="max_P_sites",]
+write.table(sORFs_sign_filtered_cds,file="ORFs_CCDS/max_P_sites/sORFs_sign_filtered_cds",quote=F,row.names=F,sep="\t",col.names=T)
+sORFs_sign_filtered_cds_multi<-all_sORFs_CCDS_periodic_nocds_filtered_multi[all_sORFs_CCDS_periodic_nocds_filtered_multi[,"Method"]=="max_P_sites",]
+write.table(sORFs_sign_filtered_cds_multi,file="ORFs_CCDS/max_P_sites/sORFs_sign_filtered_cds_multi",quote=F,row.names=F,sep="\t",col.names=T)
+ORFs_sign_filtered_multi<-all_sORFs_CCDS_periodic[all_sORFs_CCDS_periodic[,"Method"]=="max_P_sites",]
+
+ORFs_sign_notfiltered_multi<-all_sORFs_CCDS_periodic_nofilt[all_sORFs_CCDS_periodic_nofilt[,"Method"]=="max_P_sites",]
+
+write.table(ORFs_sign_notfiltered_multi,file="ORFs_CCDS/max_P_sites/ORFs_sign_notfiltered_multi",quote=F,row.names=F,sep="\t",col.names=T)
+
+
+write.table(ORFs_sign_filtered_multi,file="ORFs_CCDS/max_P_sites/ORFs_sign_filtered_multi",quote=F,row.names=F,sep="\t",col.names=T)
+ORFs_all<-CCDS_orfs[CCDS_orfs[,"Method"]=="max_P_sites",]
+ORFs_all<-ORFs_all[!is.na(ORFs_all[,"transcript_id"]),]
+
+write.table(ORFs_all,file="ORFs_CCDS/max_P_sites/ORFs_all",quote=F,row.names=F,sep="\t",col.names=T)
+
+
+sORFs_sign_filtered_cds<-all_sORFs_CCDS_periodic_nocds[all_sORFs_CCDS_periodic_nocds[,"Method"]=="more_tapers",]
+write.table(sORFs_sign_filtered_cds,file="ORFs_CCDS/more_tapers/sORFs_sign_filtered_cds",quote=F,row.names=F,sep="\t",col.names=T)
+sORFs_sign_filtered_cds_multi<-all_sORFs_CCDS_periodic_nocds_filtered_multi[all_sORFs_CCDS_periodic_nocds_filtered_multi[,"Method"]=="more_tapers",]
+write.table(sORFs_sign_filtered_cds_multi,file="ORFs_CCDS/more_tapers/sORFs_sign_filtered_cds_multi",quote=F,row.names=F,sep="\t",col.names=T)
+ORFs_sign_filtered_multi<-all_sORFs_CCDS_periodic[all_sORFs_CCDS_periodic[,"Method"]=="more_tapers",]
+
+ORFs_sign_notfiltered_multi<-all_sORFs_CCDS_periodic_nofilt[all_sORFs_CCDS_periodic_nofilt[,"Method"]=="more_tapers",]
+
+write.table(ORFs_sign_notfiltered_multi,file="ORFs_CCDS/more_tapers/ORFs_sign_notfiltered_multi",quote=F,row.names=F,sep="\t",col.names=T)
+
+
+write.table(ORFs_sign_filtered_multi,file="ORFs_CCDS/more_tapers/ORFs_sign_filtered_multi",quote=F,row.names=F,sep="\t",col.names=T)
+ORFs_all<-CCDS_orfs[CCDS_orfs[,"Method"]=="more_tapers",]
+ORFs_all<-ORFs_all[!is.na(ORFs_all[,"transcript_id"]),]
+
+write.table(ORFs_all,file="ORFs_CCDS/more_tapers/ORFs_all",quote=F,row.names=F,sep="\t",col.names=T)
+
+print(paste("--- CCDS ORF finding Done!","---",date(),sep=" "))
+
+
diff --git a/scripts/NONCCDS_orf_finder.R b/scripts/NONCCDS_orf_finder.R
new file mode 100755
index 0000000..92ac6b5
--- /dev/null
+++ b/scripts/NONCCDS_orf_finder.R
@@ -0,0 +1,983 @@
+#!/usr/bin/Rscript
+
+###################################################################
+#    This file is part of RiboTaper.
+#    RiboTaper is a method for defining traslated ORFs using
+#    Ribosome Profiling data.
+#   
+#    Copyright (C) 2015  Lorenzo Calviello
+#
+#    RiboTaper is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+
+#    RiboTaper is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with RiboTaper.  If not, see <http://www.gnu.org/licenses/>.
+#
+#    Contact: Lorenzo.Calviello@mdc-berlin.de
+#######################################################################
+
+
+###script for NONCCDS genes ORF Finding, takes as arguments annotation dir, RiboTaper scripts dir, bedtools dir, n of cores
+
+
+args <- commandArgs(trailingOnly = TRUE)
+
+###loads functions
+
+print(paste("--- non-CCDS ORF finding","---",date(),sep=" "))
+
+suppressMessages(source(paste(args[2],"functions.R",sep = "/")))
+
+###takes n of cores
+
+registerDoMC(args[4])
+
+###loads annotation files
+
+
+tr_ex<-paste(args[1],"transcr_exons_nonccds.bed",sep = "/")
+transcr_nonccds<-read.table(tr_ex,stringsAsFactors=F,header=F)
+colnames(transcr_nonccds)<-c("chr","start","end","transcript_id","gene_id","strand")
+transcr_nonccds$coords_id<-paste(transcr_nonccds[,1],transcr_nonccds[,2],transcr_nonccds[,3],sep="_")
+
+transcr_nonccds$exon_id<-paste(transcr_nonccds$coords_id,"EXONnonCCDS",transcr_nonccds[,5],sep="_")
+transcr_nonccds$coords2<-paste(transcr_nonccds$chr,":",transcr_nonccds$start,"-",transcr_nonccds$end,"(",transcr_nonccds$strand,")",sep="")
+###loads exonic results
+
+
+results_nonccds_ORFs<-read.table("results_nonccds_annot",sep="\t",stringsAsFactors=F,header=T,quote = "")
+results_nonccds_ORFs$chr<-sapply(strsplit(results_nonccds_ORFs$exon_id,split="_"),"[[",1)
+results_nonccds_ORFs$start<-sapply(strsplit(results_nonccds_ORFs$exon_id,split="_"),"[[",2)
+results_nonccds_ORFs$end<-sapply(strsplit(results_nonccds_ORFs$exon_id,split="_"),"[[",3)
+
+###calculates coordinates for sequence search
+
+results_nonccds_ORFs_copy<-results_nonccds_ORFs
+nonccds_res<-results_nonccds_ORFs
+
+###loads data tracks
+
+all_tracks_nonccds<-readBigText("data_tracks/Psit_Ribo_Rna_Cent_tracks_nonccds")
+
+index_nonccds<-read.table("data_tracks/index_tracks_nonccds",stringsAsFactors=F,header=F)
+colnames(index_nonccds)<-"exon_id"
+
+#Subset
+
+#it does make sense, but it was not like this in the orig
+# transcr_nonccds<-unique(merge(transcr_nonccds,results_nonccds_ORFs[,c("exon_id","P_sites_sum")],by="exon_id",all.x=T))
+# transcr_sites<-aggregate(transcr_nonccds$P_sites_sum,by=list(transcr_nonccds$transcript_id),FUN=sum)
+# colnames(transcr_sites)<-c("transcript_id","n_P_sites")
+#transcr_sites<-transcr_sites[transcr_sites[,"n_P_sites"]>2,]
+
+transcript_nonccds_transl<-results_nonccds_ORFs[results_nonccds_ORFs[,"P_sites_sum"]>5,]
+
+transcript_nonccds_transl<-transcript_nonccds_transl[!is.na(transcript_nonccds_transl[,"exon_id"]),]
+
+transcript_nonccds_transl<-unique(transcr_nonccds[transcr_nonccds[,"exon_id"]%in%transcript_nonccds_transl[,"exon_id"],"transcript_id"])
+
+transcript_nonccds_transl<-transcript_nonccds_transl[!is.na(transcript_nonccds_transl)]
+
+transcr_sites<-unique(transcript_nonccds_transl)
+transcr_sites<-transcr_sites[!is.na(transcr_sites)]
+transcr_nonccds_fin<-transcr_nonccds[transcr_nonccds[,"transcript_id"]%in%transcr_sites,]
+transcr_nonccds_fin_ids<-unique(unlist(transcr_nonccds_fin[,c("exon_id")]))
+
+all_tracks_nonccds<-all_tracks_nonccds[(index_nonccds[,"exon_id"]%in%transcr_nonccds_fin_ids)]
+index_nonccds<-subset(index_nonccds,exon_id%in%transcr_nonccds_fin_ids)
+
+
+st_st_NA<-data.frame(start_pos=NA,st2vect=NA)
+st_st_NA$ORF_frame<-NA
+st_st_NA$ORF_length<-NA
+st_st_NA$ORF_P_sites<-NA
+st_st_NA$ORF_Psit_pct_in_frame<-NA
+st_st_NA$ORF_RNA_sites<-NA
+st_st_NA$ORF_RNAsit_pct_in_frame<-NA
+st_st_NA$ORF_freq_multi_ribo<-NA
+st_st_NA$ORF_pval_multi_ribo<-NA
+st_st_NA$ORF_spec_multi_ribo<-NA
+st_st_NA$ORF_freq_multi_rna<-NA
+st_st_NA$ORF_pval_multi_rna<-NA
+st_st_NA$ORF_spec_multi_rna<-NA
+st_st_NA$ORF_freq3_fft_ribo<-NA
+st_st_NA$ORF_spec3_fft_ribo<-NA
+st_st_NA$ORF_freq3_spec_ribo<-NA
+st_st_NA$ORF_spec3_spec_ribo<-NA
+st_st_NA$ORF_freq3_fft_rna<-NA
+st_st_NA$ORF_spec3_fft_rna<-NA
+st_st_NA$ORF_freq3_spec_rna<-NA
+st_st_NA$ORF_spec3_spec_rna<-NA
+st_st_NA$ORF_ORF_score_ribo<-NA
+st_st_NA$ORF_ORF_score_rna<-NA
+st_st_NA$ORF_chisq_ribo<-NA
+st_st_NA$ORF_chisq_rna<-NA
+st_st_NA$ORF_Ribo_cov_aver<-NA
+st_st_NA$ORF_RNA_cov_aver<-NA
+st_st_NA$ORF_pept<-NA
+st_st_NA$nt_tocheck_next_start<-0
+st_st_NA$pval_next_start<-1
+st_st_NA$P_sites_next_start<-0
+st_st_NA$pct_P_sites_inframe_next_start<-0
+st_st_NA$Method<-NA
+st_st_NA$to_check<-NA
+st_st_NA$to_check_rem<-NA
+st_st_NA$ORF_id_tr<-NA
+st_st_NA$ORF_id_gen<-NA
+st_st_NA$to_check_ALL<-NA
+
+NONCCDS_orfs<-foreach(j=1:length(transcr_sites),.combine=rbind,.multicombine=T) %dopar%{
+        transcript<-transcr_sites[j]
+        
+        ###assembles transcript
+        
+        exons_in_transcr<-transcr_nonccds[transcr_nonccds[,"transcript_id"]==transcript,]
+        exons_in_transcr<-exons_in_transcr[order(exons_in_transcr$start,decreasing=F),]
+        
+        list_exons_transcr<-list()
+        list_exons_seqs<-list()
+        
+        for(k in seq(1,dim(exons_in_transcr)[1])){
+                exon_track<-c()
+                subs_nonccds<-index_nonccds[,"exon_id"]==exons_in_transcr[k,"exon_id"]
+                
+                if(sum(subs_nonccds)>0){
+                        if(sum(subs_nonccds)==5){
+                                exon_track<-all_tracks_nonccds[subs_nonccds]
+                        }
+                        if(sum(subs_nonccds)>5){
+                                exon_track<-all_tracks_nonccds[which(subs_nonccds)[1:4]]
+                        }
+                }                                
+                
+                
+                withsep<-strsplit(exon_track,split=" ")
+                x<-t(data.frame(withsep))
+                
+                strand<-x[1,2]
+                tracks<-t(x[,-c(1:2)])
+                colnames(tracks)<-c("Psites","RiboCov","RNACov","RNAcent","Seq")
+                seq<-tracks[,5]
+                tracks<-tracks[,1:4]
+                mode(tracks)<-"numeric"
+                length<-dim(tracks)[1]
+                list_exons_transcr[[k]]<-tracks
+                list_exons_seqs[[k]]<-seq
+        }
+        
+        merged_tracks<-do.call(what=rbind,list_exons_transcr)
+        
+        if(strand=="-"){
+                merged_tracks<-cbind(rev(merged_tracks[,1]),rev(merged_tracks[,2]),rev(merged_tracks[,3]),rev(merged_tracks[,4]))
+        }
+        
+        tracks<-merged_tracks
+        length<-dim(tracks)[1]
+        if(strand=="+"){
+                seq_transcr<-unlist(list_exons_seqs)
+        }
+        if(strand=="-"){
+                
+                seq_transcr<-unlist(list_exons_seqs)
+                seq_transcr<-comp(rev((seq_transcr)),forceToLower=F)
+        }
+        
+        transcr_data<-data.frame(transcript_id=transcript,stringsAsFactors=F)
+        transcr_data$gene_id<-unique(transcr_nonccds[transcr_nonccds[,"transcript_id"]==transcript,"gene_id"])[1]
+        transcr_data$annotation<-unique(results_nonccds_ORFs[results_nonccds_ORFs[,"gene_id"]==transcr_data$gene_id,"annotation",])[1]
+        transcr_data$gene_symbol<-unique(results_nonccds_ORFs[results_nonccds_ORFs[,"gene_id"]==transcr_data$gene_id,"gene_symbol",])[1]
+        
+        P_sites_sum<-sum(tracks[,1])
+        RNA_sites_sum<-sum(tracks[,4])
+        transcr_data$strand<-strand
+        transcr_data$length<-length
+        transcr_data$n_exons<-dim(exons_in_transcr)[1]
+        transcr_data$P_sites_sum<-P_sites_sum
+        transcr_data$RNA_sites<-RNA_sites_sum
+        transcr_data$Ribo_cov_aver<-mean(tracks[,2])
+        transcr_data$RNA_cov_aver<-mean(tracks[,3])
+        
+        transcr_data$freq_multit_3nt<-NA
+        transcr_data$pval_multit_3nt<-NA
+        transcr_data$spec_multit_3nt<-NA
+        if(P_sites_sum>2 & length>5){
+                if(length<25){slepians<-dpss(n=length+(50-length),k=24,nw=12)}
+                if(length>=25){slepians<-dpss(n=length,k=24,nw=12)}
+                vals_mtm<-take_freqs_Fvalues_all_around_3nt_spec(n_tapers=24,time_bw=12,tracks[,1],slepians_values=slepians)[c(1,6,7)]
+                transcr_data$freq_multit_3nt<-vals_mtm[1]
+                transcr_data$pval_multit_3nt<-vals_mtm[2]
+                transcr_data$spec_multit_3nt<-vals_mtm[3]
+                
+        }
+        Phase_P_sites_frame<-sum(tracks[seq(1,length,by=3),1])
+        Phase_P_sites_frame_1<-sum(tracks[seq(2,length,by=3),1])
+        Phase_P_sites_frame_2<-sum(tracks[seq(3,length,by=3),1])
+        
+        transcr_data$chisq_noccds_psit<-NA
+        if(P_sites_sum>15){
+                transcr_data$chisq_noccds_psit<-chisq.test(as.table(c(Phase_P_sites_frame,Phase_P_sites_frame_1,Phase_P_sites_frame_2)))$p.value}
+        if(P_sites_sum<16 & P_sites_sum>0){
+                transcr_data$chisq_noccds_psit<-xmulti(obs=c(Phase_P_sites_frame,Phase_P_sites_frame_1,Phase_P_sites_frame_2),expr=c(1,1,1),statName="Prob",detail=0)$pProb
+        }    
+        pctPhase_frame<-Phase_P_sites_frame/P_sites_sum
+        pctPhase_frame_1<-Phase_P_sites_frame_1/P_sites_sum
+        pctPhase_frame_2<-Phase_P_sites_frame_2/P_sites_sum
+        
+        Centered_sites_sum<-round(sum(tracks[,4]),digits=6)
+        
+        Phase_Centered_sites_frame<-sum(tracks[seq(1,length,by=3),4])
+        Phase_Centered_sites_frame_1<-sum(tracks[seq(2,length,by=3),4])
+        Phase_Centered_sites_frame_2<-sum(tracks[seq(3,length,by=3),4])
+        
+        pctPhaseCentered_frame<-Phase_Centered_sites_frame/Centered_sites_sum
+        pctPhaseCentered_frame_1<-Phase_Centered_sites_frame_1/Centered_sites_sum
+        pctPhaseCentered_frame_2<-Phase_Centered_sites_frame_2/Centered_sites_sum
+        
+        transcr_data$chisq_noccds_rna<-NA
+        if(Centered_sites_sum>15){
+                chisq_rna<-chisq.test(as.table(c(Phase_Centered_sites_frame,Phase_Centered_sites_frame_1,Phase_Centered_sites_frame_2)))$p.value}
+        if(Centered_sites_sum<16 & Centered_sites_sum>0){
+                chisq_rna<-xmulti(obs=c(Phase_Centered_sites_frame,Phase_Centered_sites_frame_1,Phase_Centered_sites_frame_2),expr=c(1,1,1),statName="Prob",detail=0)$pProb
+        }
+        
+        
+        MAXPhase_frame<-max(c(pctPhase_frame,pctPhase_frame_1,pctPhase_frame_2))
+        FRAME_MAX_phase<-max.col(t(c(pctPhase_frame,pctPhase_frame_1,pctPhase_frame_2)))-1
+        
+        MAXPhaseCentered_frame<-max(c(pctPhaseCentered_frame,pctPhaseCentered_frame_1,pctPhaseCentered_frame_2))
+        FRAME_MAX_phaseCentered<-max.col(t(c(pctPhaseCentered_frame,pctPhaseCentered_frame_1,pctPhaseCentered_frame_2)))-1
+        
+        frame_start_pred<-FRAME_MAX_phase
+        frame_end_pred<-(length-(FRAME_MAX_phase+1))%%3
+        
+        ###Finds ORFs on the 3 different frames
+        
+        all_sign_frames<-list()
+        for(u in 0:2){
+                
+                pept<-NA
+                pept<-unlist(getTrans(seq_transcr,sens="F",frame=u))
+                
+                starts<-pept=="M"
+                
+                stops<-pept=="*"
+                transcr_data$orf_position<-"undetected"
+                
+                start_pos<-((1:length(pept))[starts])*3
+                if(length(start_pos)>0){
+                        start_pos<-start_pos+u-2
+                } else {start_pos<-NA}
+                
+                stop_pos<-((1:length(pept))[stops])*3
+                if(length(stop_pos)>0){
+                        stop_pos<-stop_pos+u-2
+                } else {stop_pos<-NA}
+                
+                
+                if(sum(!is.na(start_pos))==0 | sum(!is.na(stop_pos))==0){
+                        st_st<-st_st_NA
+                        transcr_data_fr_sORFs<-cbind(transcr_data,st_st_NA)
+                }
+                
+                if(sum(!is.na(start_pos))>0 & sum(!is.na(stop_pos))>0){
+                        st2vect<-c()
+                        for(h in 1:length(start_pos)){
+                                st1<-start_pos[h]
+                                diff<-stop_pos-st1
+                                diff<-diff[diff>0]
+                                if(length(diff)>0){st2<-st1+min(diff)}
+                                if(length(diff)==0){st2<-NA}
+                                st2vect[h]<-st2
+                                
+                        }
+                        st_st<-data.frame(cbind(start_pos,st2vect))
+                        
+                        st_st<-st_st[!is.na(st_st[,"st2vect"]),]
+                        if(dim(st_st)[1]>0){
+                                if(dim(st_st)[1]==1){
+                                        list_coords=list()
+                                        list_coords[[1]]<-st_st[,1]:st_st[,2]
+                                }
+                                if(dim(st_st)[1]>1){
+                                        list_coords<-apply(st_st,FUN=function(x){x[1]:x[2]},1)
+                                }
+                                
+                                max_period<-NA
+                                start_pos<-NA
+                                stop_pos<-NA
+                                pval_max_period<-NA
+                        }
+                        if(dim(st_st)[1]>0){
+                                st_st$ORF_frame<-u
+                                st_st$ORF_length<-NA
+                                st_st$ORF_P_sites<-NA
+                                st_st$ORF_Psit_pct_in_frame<-NA
+                                st_st$ORF_RNA_sites<-NA
+                                st_st$ORF_RNAsit_pct_in_frame<-NA
+                                st_st$ORF_freq_multi_ribo<-NA
+                                st_st$ORF_pval_multi_ribo<-NA
+                                st_st$ORF_spec_multi_ribo<-NA
+                                st_st$ORF_freq_multi_rna<-NA
+                                st_st$ORF_pval_multi_rna<-NA
+                                st_st$ORF_spec_multi_rna<-NA
+                                
+                                st_st$ORF_freq3_fft_ribo<-NA
+                                st_st$ORF_spec3_fft_ribo<-NA
+                                st_st$ORF_freq3_spec_ribo<-NA
+                                st_st$ORF_spec3_spec_ribo<-NA
+                                st_st$ORF_freq3_fft_rna<-NA
+                                st_st$ORF_spec3_fft_rna<-NA
+                                st_st$ORF_freq3_spec_rna<-NA
+                                st_st$ORF_spec3_spec_rna<-NA
+                                st_st$ORF_ORF_score_ribo<-NA
+                                st_st$ORF_ORF_score_rna<-NA
+                                st_st$ORF_chisq_ribo<-NA
+                                st_st$ORF_chisq_rna<-NA
+                                st_st$ORF_Ribo_cov_aver<-NA
+                                st_st$ORF_RNA_cov_aver<-NA
+                                st_st$ORF_pept<-NA
+                                st_st$Method<-NA
+                                st_st$to_check<-NA
+                                st_st$to_check_rem<-NA
+                                st_st$ORF_id_tr<-NA
+                                st_st$ORF_id_gen<-NA
+                                st_st$to_check_ALL<-NA
+                                
+                                for(r in 1:dim(st_st)[1]){
+                                        tracks_stst<-tracks[st_st[r,1]:st_st[r,2],]
+                                        length<-dim(tracks_stst)[1]
+                                        P_sites_sum<-sum(tracks_stst[,1])
+                                        RNA_sites_sum<-sum(tracks_stst[,4])
+                                        st_st[r,"ORF_length"]<-length-1
+                                        st_st[r,"ORF_P_sites"]<-P_sites_sum
+                                        st_st[r,"ORF_RNA_sites"]<-RNA_sites_sum
+                                        st_st[r,"ORF_Ribo_cov_aver"]<-mean(tracks_stst[,2])
+                                        st_st[r,"ORF_RNA_cov_aver"]<-mean(tracks_stst[,3])
+                                        if(P_sites_sum>5 & length>5){
+                                                Phase_P_sites_frame<-sum(tracks_stst[seq(1,length,by=3),1])
+                                                Phase_P_sites_frame_1<-sum(tracks_stst[seq(2,length,by=3),1])
+                                                Phase_P_sites_frame_2<-sum(tracks_stst[seq(3,length,by=3),1])
+                                                st_st[r,"ORF_Psit_pct_in_frame"]<-Phase_P_sites_frame/P_sites_sum
+                                                if((Phase_P_sites_frame/P_sites_sum)>0.5){
+                                                        score1<-((Phase_P_sites_frame-P_sites_sum/3)^2)/(P_sites_sum/3)
+                                                        score2<-((Phase_P_sites_frame_1-P_sites_sum/3)^2)/(P_sites_sum/3)
+                                                        score3<-((Phase_P_sites_frame_2-P_sites_sum/3)^2)/(P_sites_sum/3)
+                                                        
+                                                        orfsc<-log2(score1+score2+score3+1)
+                                                        st_st[r,"ORF_ORF_score_ribo"]<-orfsc
+                                                        if(Phase_P_sites_frame<=Phase_P_sites_frame_1 | Phase_P_sites_frame<=Phase_P_sites_frame_2){
+                                                                st_st[r,"ORF_ORF_score_ribo"]<--orfsc
+                                                        }
+                                                        
+                                                        if(max(tracks_stst[,1])>(P_sites_sum*.7)){
+                                                                new_track<-tracks_stst
+                                                                new_track[which(new_track[,1]==max(new_track[,1]))]<-0
+                                                                st_st[r,"ORF_ORF_score_ribo"]<-NA
+                                                                if(sum(new_track[,1])>2){
+                                                                        Phase_P_sites_frame_corr<-sum(new_track[seq(1,length,by=3),1])
+                                                                        Phase_P_sites_frame_1_corr<-sum(new_track[seq(2,length,by=3),1])
+                                                                        Phase_P_sites_frame_2_corr<-sum(new_track[seq(3,length,by=3),1])
+                                                                        score1<-((Phase_P_sites_frame_corr-sum(new_track[,1])/3)^2)/(sum(new_track[,1])/3)
+                                                                        score2<-((Phase_P_sites_frame_1_corr-sum(new_track[,1])/3)^2)/(sum(new_track[,1])/3)
+                                                                        score3<-((Phase_P_sites_frame_2_corr-sum(new_track[,1])/3)^2)/(sum(new_track[,1])/3)
+                                                                        st_st[r,"ORF_ORF_score_ribo"]<-log2(score1+score2+score3+1)
+                                                                        if(Phase_P_sites_frame_corr<=Phase_P_sites_frame_1_corr | Phase_P_sites_frame<=Phase_P_sites_frame_2_corr){
+                                                                                st_st[r,"ORF_ORF_score_ribo"]<--log2(score1+score2+score3+1)
+                                                                        }
+                                                                }
+                                                        }
+                                                        
+                                                        if(P_sites_sum>15){
+                                                                st_st[r,"ORF_chisq_ribo"]<-chisq.test(as.table(c(Phase_P_sites_frame,Phase_P_sites_frame_1,Phase_P_sites_frame_2)))$p.value}
+                                                        if(P_sites_sum<16 & P_sites_sum>0){
+                                                                st_st[r,"ORF_chisq_ribo"]<-xmulti(obs=c(Phase_P_sites_frame,Phase_P_sites_frame_1,Phase_P_sites_frame_2),expr=c(1,1,1),statName="Prob",detail=0)$pProb
+                                                        }  
+                                                        if(length<25){slepians<-dpss(n=length+(50-length),k=24,nw=12)}
+                                                        if(length>=25){slepians<-dpss(n=length,k=24,nw=12)}
+                                                        values_mtm_orf<-take_freqs_Fvalues_all_around_3nt_spec(x=tracks_stst[,1],n_tapers=24,time_bw=12,slepians_values=slepians)[c(1,6,7)]
+                                                        
+                                                        st_st[r,"ORF_freq_multi_ribo"]<-values_mtm_orf[1]
+                                                        st_st[r,"ORF_pval_multi_ribo"]<-values_mtm_orf[2]
+                                                        st_st[r,"ORF_spec_multi_ribo"]<-values_mtm_orf[3]
+                                                        
+                                                        fft_sp<-take_maxfreq_and_power_FFT_Spec(tracks_stst[,1])
+                                                        st_st[,"ORF_freq3_fft_ribo"]<-fft_sp[1]
+                                                        st_st[,"ORF_spec3_fft_ribo"]<-fft_sp[2]
+                                                        st_st[,"ORF_freq3_spec_ribo"]<-fft_sp[3]
+                                                        st_st[,"ORF_spec3_spec_ribo"]<-fft_sp[4]
+                                                        
+                                                        pept<-unlist(getTrans(seq_transcr[st_st[r,1]:st_st[r,2]],sens="F"))
+                                                        st_st[r,"ORF_pept"]<-paste(pept,sep="",collapse="")
+                                                }
+                                                if(RNA_sites_sum>5 & (Phase_P_sites_frame/P_sites_sum)>0.5){
+                                                        if(length<25){slepians<-dpss(n=length+(50-length),k=24,nw=12)}
+                                                        if(length>=25){slepians<-dpss(n=length,k=24,nw=12)}
+                                                        values_mtm_orf_rna<-take_freqs_Fvalues_all_around_3nt_spec(x=tracks_stst[,4],n_tapers=24,time_bw=12,slepians_values=slepians)[c(1,6,7)]
+                                                        
+                                                        st_st[r,"ORF_freq_multi_rna"]<-values_mtm_orf_rna[1]
+                                                        st_st[r,"ORF_pval_multi_rna"]<-values_mtm_orf_rna[2]
+                                                        st_st[r,"ORF_spec_multi_rna"]<-values_mtm_orf_rna[3]
+                                                        
+                                                        
+                                                        fft_sp<-take_maxfreq_and_power_FFT_Spec(tracks_stst[,4])
+                                                        st_st[,"ORF_freq3_fft_rna"]<-fft_sp[1]
+                                                        st_st[,"ORF_spec3_fft_rna"]<-fft_sp[2]
+                                                        st_st[,"ORF_freq3_spec_rna"]<-fft_sp[3]
+                                                        st_st[,"ORF_spec3_spec_rna"]<-fft_sp[4]
+                                                        
+                                                        Phase_Centered_sites_frame<-sum(tracks_stst[seq(1,length,by=3),4])
+                                                        Phase_Centered_sites_frame_1<-sum(tracks_stst[seq(2,length,by=3),4])
+                                                        Phase_Centered_sites_frame_2<-sum(tracks_stst[seq(3,length,by=3),4])
+                                                        st_st[r,"ORF_RNAsit_pct_in_frame"]<-Phase_Centered_sites_frame/RNA_sites_sum
+                                                        score1<-((Phase_Centered_sites_frame-P_sites_sum/3)^2)/(RNA_sites_sum/3)
+                                                        score2<-((Phase_Centered_sites_frame_1-P_sites_sum/3)^2)/(RNA_sites_sum/3)
+                                                        score3<-((Phase_Centered_sites_frame_2-P_sites_sum/3)^2)/(RNA_sites_sum/3)
+                                                        
+                                                        orfsc<-log2(score1+score2+score3+1)
+                                                        st_st[r,"ORF_ORF_score_rna"]<-orfsc
+                                                        if(Phase_Centered_sites_frame<=Phase_Centered_sites_frame_1 | Phase_Centered_sites_frame<=Phase_Centered_sites_frame_2){
+                                                                st_st[r,"ORF_ORF_score_rna"]<--orfsc
+                                                        }
+                                                        
+                                                        if(max(tracks_stst[,4])>(RNA_sites_sum*.7)){
+                                                                new_track<-tracks_stst
+                                                                new_track[which(new_track[,4]==max(new_track[,4]))]<-0
+                                                                st_st[r,"ORF_ORF_score_rna"]<-NA
+                                                                if(sum(new_track[,4])>2){
+                                                                        Phase_Centered_sites_frame_corr<-sum(new_track[seq(1,length,by=3),4])
+                                                                        Phase_Centered_sites_frame_1_corr<-sum(new_track[seq(2,length,by=3),4])
+                                                                        Phase_Centered_sites_frame_2_corr<-sum(new_track[seq(3,length,by=3),4])
+                                                                        score1<-((Phase_Centered_sites_frame_corr-sum(new_track[,4])/3)^2)/(sum(new_track[,4])/3)
+                                                                        score2<-((Phase_Centered_sites_frame_1_corr-sum(new_track[,4])/3)^2)/(sum(new_track[,4])/3)
+                                                                        score3<-((Phase_Centered_sites_frame_2_corr-sum(new_track[,4])/3)^2)/(sum(new_track[,4])/3)
+                                                                        st_st[r,"ORF_ORF_score_rna"]<-log2(score1+score2+score3+1)
+                                                                        if(Phase_Centered_sites_frame_corr<=Phase_Centered_sites_frame_1_corr | Phase_Centered_sites_frame_corr<=Phase_Centered_sites_frame_2_corr){
+                                                                                st_st[r,"ORF_ORF_score_rna"]<--log2(score1+score2+score3+1)
+                                                                        }
+                                                                }
+                                                        }
+                                                        
+                                                        if(RNA_sites_sum>15){
+                                                                st_st[r,"ORF_chisq_rna"]<-chisq.test(as.table(c(Phase_Centered_sites_frame,Phase_Centered_sites_frame_1,Phase_Centered_sites_frame_2)))$p.value}
+                                                        if(RNA_sites_sum<16 & RNA_sites_sum>0){
+                                                                st_st[r,"ORF_chisq_rna"]<-xmulti(obs=c(Phase_Centered_sites_frame,Phase_Centered_sites_frame_1,Phase_Centered_sites_frame_2),expr=c(1,1,1),statName="Prob",detail=0)$pProb
+                                                        }
+                                                        
+                                                }
+                                        }
+                                }
+                                if(dim(st_st)[1]>0){st_st<-st_st[!is.na(st_st[,"ORF_pval_multi_ribo"]),]}
+                                if(dim(st_st)[1]>0){st_st<-st_st[st_st[,"ORF_Psit_pct_in_frame"]>0.5,]}
+                                if(dim(st_st)[1]>0){
+                                        st_st$nt_tocheck_next_start<-0
+                                        st_st$pval_next_start<-1
+                                        st_st$P_sites_next_start<-0
+                                        st_st$pct_P_sites_inframe_next_start<-0
+                                        #find starts per each stop codon
+                                        list_stopsorfs<-split.data.frame(x=st_st,f=st_st[,2],drop=T)
+                                        
+                                        transcr_data_fr<-transcr_data
+                                        
+                                        list_sORFs_frame_moretap<-list()
+                                        list_sORFs_frame_bestperiod<-list()
+                                        list_sORFs_frame_maxsit<-list()
+                                        
+                                        for(g in 1:length(list_stopsorfs)){
+                                                
+                                                stoplist<-list_stopsorfs[[g]]
+                                                max_period<-stoplist[stoplist[,"ORF_pval_multi_ribo"]==min(stoplist[,"ORF_pval_multi_ribo"]),]
+                                                list_sORFs_frame_bestperiod[[g]]<-max_period 
+                                                stoplists_period<-stoplist[stoplist[,"ORF_pval_multi_ribo"]<0.05,]
+                                                if(dim(stoplists_period)[1]>0){
+                                                        stoplists_period<-stoplists_period[!is.na(stoplists_period[,"ORF_pval_multi_ribo"]),]
+                                                }
+                                                if(dim(stoplists_period)[1]>1){
+                                                        
+                                                        for(b in 1:(dim(stoplists_period)[1]-1)){
+                                                                stoplists_period[b,"nt_tocheck_next_start"]<-stoplists_period[b+1,"start_pos"]-stoplist[b,"start_pos"]
+                                                                tracks_stst<-tracks[stoplists_period[b,"start_pos"]:stoplists_period[b+1,"start_pos"],]
+                                                                length<-dim(tracks_stst)[1]
+                                                                P_sites_sum<-sum(tracks_stst[,1])
+                                                                pval_to_next<-1                                                              
+                                                                
+                                                                Phase_P_sites_frame<-sum(tracks_stst[seq(1,length,by=3),1])
+                                                                Phase_P_sites_frame_1<-sum(tracks_stst[seq(2,length,by=3),1])
+                                                                Phase_P_sites_frame_2<-sum(tracks_stst[seq(3,length,by=3),1])
+                                                                
+                                                                pctPhase_frame<-Phase_P_sites_frame/P_sites_sum
+                                                                pctPhase_frame_1<-Phase_P_sites_frame_1/P_sites_sum
+                                                                pctPhase_frame_2<-Phase_P_sites_frame_2/P_sites_sum
+                                                                
+                                                                if(P_sites_sum>5){
+                                                                        if(length<25){slepians<-dpss(n=length+(50-length),k=24,nw=12)}
+                                                                        if(length>=25){slepians<-dpss(n=length,k=24,nw=12)}
+                                                                        
+                                                                        pval_to_next<-take_freqs_Fvalues_all_around_3nt_spec(x=tracks_stst[,1],n_tapers=24,time_bw=12,slepians_values=slepians)[6]
+                                                                }
+                                                                stoplists_period[b,"P_sites_next_start"]<-P_sites_sum
+                                                                
+                                                                stoplists_period[b,"pct_P_sites_inframe_next_start"]<-pctPhase_frame
+                                                                
+                                                                stoplists_period[b,"pval_next_start"]<-pval_to_next
+                                                        }
+                                                        
+                                                        max_sit<-stoplists_period[which(stoplists_period[,"P_sites_next_start"]>5 & stoplists_period[,"pct_P_sites_inframe_next_start"]>0.5)[1],]
+                                                        max_sit<-max_sit[!is.na(max_sit[,"ORF_length"]),]
+                                                        if(dim(max_sit)[1]==0){
+                                                                max_sit<-max_period
+                                                        }
+                                                        list_sORFs_frame_maxsit[[g]]<-max_sit   
+                                                        
+                                                        more_tap<-stoplists_period[which(stoplists_period[,"pval_next_start"]<0.05)[1],]
+                                                        more_tap<-more_tap[!is.na(more_tap[,"ORF_length"]),]
+                                                        if(dim(more_tap)[1]==0){
+                                                                more_tap<-max_period
+                                                        }
+                                                        list_sORFs_frame_moretap[[g]]<-more_tap
+                                                        
+                                                }
+                                                if(dim(stoplists_period)[1]<2){
+                                                        list_sORFs_frame_maxsit[[g]]<-max_period
+                                                        list_sORFs_frame_moretap[[g]]<-max_period
+                                                        
+                                                }
+                                                
+                                                
+                                        }
+                                        sORFs_frame_moretap<-do.call(what=rbind.data.frame,args=list_sORFs_frame_moretap)
+                                        sORFs_frame_moretap$Method<-"more_tapers"
+                                        sORFs_frame_maxsit<-do.call(what=rbind.data.frame,args=list_sORFs_frame_maxsit)
+                                        sORFs_frame_maxsit$Method<-"max_P_sites"
+                                        sORFs_frame_bestperiod<-do.call(what=rbind.data.frame,args=list_sORFs_frame_bestperiod)
+                                        sORFs_frame_bestperiod$Method<-"best_periodicity"
+                                        sORFs_frames<-rbind(sORFs_frame_moretap,sORFs_frame_maxsit,sORFs_frame_bestperiod)
+                                        
+                                        for(w in 1:dim(sORFs_frames)[1]){
+                                                transcr_data_fr[w,]<-transcr_data_fr[1,]
+                                        }
+                                        
+                                        transcr_data_fr_sORFs<-cbind(transcr_data_fr,sORFs_frames)
+                                        transcr_data_fr_sORFs$orf_position<-"detected"
+                                }
+                        }
+                        
+                        if(dim(st_st)[1]==0){
+                                st_st<-st_st_NA
+                                transcr_data_fr_sORFs<-cbind(transcr_data,st_st_NA)
+                        }
+                }
+                
+                
+                all_sign_frames[[u+1]]<-transcr_data_fr_sORFs
+        }
+        all_sign_frames<-do.call(what=rbind.data.frame,args=all_sign_frames)
+        transcr_all_frames_res<-unique(all_sign_frames)
+        transcr_all_frames_res$ORF_id_tr<-paste(transcr_all_frames_res$transcript_id,transcr_all_frames_res$start_pos,transcr_all_frames_res$st2vect,sep="_")
+        transcr_all_frames_ok<-transcr_all_frames_res[!is.na(transcr_all_frames_res$ORF_pept),]
+        if(dim(transcr_all_frames_ok)[1]>0){
+                all_orfs<-unique(transcr_all_frames_ok[,c("transcript_id","length","strand","start_pos","st2vect","ORF_length","gene_id")])
+                transcr<-all_orfs$transcript_id[1]
+                trascr_length<-all_orfs$length[1]
+                orf_strand<-all_orfs$strand[1]                
+                ex_intr_coords<-exons_in_transcr$exon_id
+                if(orf_strand=="-"){ex_intr_coords<-rev(ex_intr_coords)}
+                
+                exons_in_transcr_data<-results_nonccds_ORFs[results_nonccds_ORFs[,"exon_id"]%in%ex_intr_coords,]
+                exons_in_transcr_data<-exons_in_transcr_data[match(ex_intr_coords,exons_in_transcr_data$exon_id),]
+                cumsumexons<-cumsum(exons_in_transcr_data$length.x)
+                
+                list_orfas<-list()
+                for(z in 1:dim(all_orfs)[1]){
+                        orfa<-all_orfs[z,]
+                        
+                        transcr_data<-data.frame(transcript_id=transcr)
+                        
+                        orf_start<-orfa$start_pos
+                        orf_end<-orfa$st2vect
+                        
+                        st_ex<-which((cumsumexons-orf_start)==min(cumsumexons[cumsumexons>orf_start]-orf_start))
+                        end_ex<-which((cumsumexons-orf_end)==min(cumsumexons[cumsumexons>=orf_end]-orf_end))
+                        in_betw_ex<-st_ex:end_ex
+                        in_betw_ex<-in_betw_ex[!in_betw_ex%in%c(st_ex,end_ex)>0]
+                        exon_inbetween_data<-exons_in_transcr_data[in_betw_ex,]
+                        
+                        
+                        coord_start<-NA
+                        coord_end<-NA
+                        nt_to_rem<-NA
+                        rem_len<-0
+                        if(st_ex>1){rem_len<-cumsumexons[st_ex-1]}
+                        if(orfa$strand=="+"){coord_start<-as.numeric(exons_in_transcr_data[st_ex,"start"]) + (orf_start-rem_len)}
+                        if(orfa$strand=="-"){coord_start<-as.numeric(exons_in_transcr_data[st_ex,"end"]) - (orf_start-rem_len)}
+                        
+                        if(length(in_betw_ex)==0){
+                                if(st_ex==end_ex){nt_to_rem<-0}
+                                if(st_ex!=end_ex){if(orfa$strand=="+"){
+                                        nt_to_rem<-as.numeric(exons_in_transcr_data[st_ex,"end"])-coord_start
+                                }
+                                                  if(orfa$strand=="-"){
+                                                          nt_to_rem<-coord_start-as.numeric(exons_in_transcr_data[st_ex,"start"])
+                                                  }
+                                }
+                        }
+                        
+                        if(length(in_betw_ex)>0){
+                                nt_in_betw<-sum(exons_in_transcr_data[in_betw_ex,"length.x"])
+                                if(orfa$strand=="+"){
+                                        nt_to_rem<-as.numeric(exons_in_transcr_data[st_ex,"end"])-coord_start
+                                }
+                                if(orfa$strand=="-"){
+                                        nt_to_rem<-coord_start-as.numeric(exons_in_transcr_data[st_ex,"start"])
+                                }
+                                nt_to_rem<-nt_to_rem+nt_in_betw
+                        }
+                        
+                        if(st_ex==end_ex & orfa$strand=="+"){coord_end<-coord_start+orfa$ORF_length+1}
+                        if(st_ex==end_ex & orfa$strand=="-"){coord_end<-coord_start-orfa$ORF_length+1}
+                        
+                        if(st_ex!=end_ex & orfa$strand=="+"){coord_end<-as.numeric(exons_in_transcr_data[end_ex,"start"]) + (orfa$ORF_length-nt_to_rem)+1}
+                        if(st_ex!=end_ex & orfa$strand=="-"){coord_end<-as.numeric(exons_in_transcr_data[end_ex,"end"]) - (orfa$ORF_length-nt_to_rem)+1}
+                        
+                        if(orfa$strand=="-"){
+                                coord_start2<-coord_start
+                                coord_start<-coord_end
+                                coord_end<-coord_start2
+                        }
+                        
+                        
+                        if(st_ex!=end_ex & orfa$strand=="+"){to_check_st<-paste(exons_in_transcr_data[st_ex,"chr"],coord_start,exons_in_transcr_data[st_ex,"end"],"CCDS",orfa$gene_id,orfa$strand,sep="_")
+                                                             to_check_end<-paste(exons_in_transcr_data[end_ex,"chr"],exons_in_transcr_data[end_ex,"start"],coord_end,"CCDS",orfa$gene_id,orfa$strand,sep="_")
+                                                             to_check<-paste(to_check_st,to_check_end,sep=";")
+                                                             
+                        }
+                        if(st_ex!=end_ex & orfa$strand=="-"){to_check_st<-paste(exons_in_transcr_data[st_ex,"chr"],exons_in_transcr_data[st_ex,"start"],coord_end,"CCDS",orfa$gene_id,orfa$strand,sep="_")
+                                                             to_check_end<-paste(exons_in_transcr_data[end_ex,"chr"],coord_start,exons_in_transcr_data[end_ex,"end"],"CCDS",orfa$gene_id,orfa$strand,sep="_")
+                                                             to_check<-paste(to_check_st,to_check_end,sep=";")
+                        }
+                        
+                        if(st_ex==end_ex){to_check<-paste(exons_in_transcr_data[st_ex,"chr"],coord_start,coord_end,"CCDS",orfa$gene_id,orfa$strand,sep="_")}
+                        orfa$to_check<-to_check
+                        orfa$to_check_rem<-NA
+                        if(length(in_betw_ex)>0){
+                                orfa$to_check_rem<-paste(exon_inbetween_data$exon_id,collapse=";")
+                                
+                        }
+                        orfa$ORF_id_tr<-paste(transcr_data$transcript_id,orf_start,orf_end,sep="_")
+                        orfa$ORF_id_gen<-paste(exons_in_transcr_data[st_ex,"chr"],coord_start,coord_end,sep="_")
+                        orfa$to_check_ALL<-paste(orfa$to_check,orfa$to_check_rem,sep=";")
+                        list_orfas[[z]]<-orfa
+                        
+                        
+                }
+                list_orfas<-do.call(rbind.data.frame,args=list_orfas)
+                transcr_all_frames_ok$ORF_id_gen<-NULL
+                transcr_all_frames_ok$to_check<-NULL
+                transcr_all_frames_ok$to_check_rem<-NULL
+                transcr_all_frames_ok$to_check_ALL<-NULL
+                
+                transcr_all_frames_ok<-merge(transcr_all_frames_ok,list_orfas[,c("ORF_id_tr","ORF_id_gen","to_check","to_check_rem","to_check_ALL")],by="ORF_id_tr")
+                #reconcile and maybe add the rest
+                return(transcr_all_frames_ok)
+        }
+        if(dim(transcr_all_frames_ok)[1]==0){return(transcr_all_frames_res)}
+        
+        
+        
+}
+
+NONCCDS_orfs<-NONCCDS_orfs[!is.na(NONCCDS_orfs[,"ORF_pept"]),]
+
+write.table(NONCCDS_orfs,file="orfs_found_nonccds",quote=F,row.names=F,sep="\t",col.names=T)
+
+options(scipen=999)
+
+
+print(paste("--- checking non-CCDS ORF coverage and multi-mapping ratio,",date(),sep=" "))
+
+
+
+
+NONCCDS_orfs$ORF_id_tr_minus2<-paste(NONCCDS_orfs$transcript_id,NONCCDS_orfs$start_pos,NONCCDS_orfs$st2vect+2,sep="_")
+
+all_sORFs_noncod_multi<-NONCCDS_orfs
+
+
+ex_to_check<-strsplit(all_sORFs_noncod_multi$to_check,split=";")
+
+ex_to_check<-unique(unlist(ex_to_check))
+
+ex_to_check_spl<-strsplit(ex_to_check,split="_")
+
+bedfiles_to_check<-data.frame(chr=NA,start=NA,end=NA,type=NA,gene_id=NA,strand=NA)
+for(h in 1:length(ex_to_check_spl)){
+        to_bed<-ex_to_check_spl[[h]]
+        bedfiles_to_check[h,"chr"]<-to_bed[1]
+        bedfiles_to_check[h,"start"]<-to_bed[2]
+        bedfiles_to_check[h,"end"]<-to_bed[3]
+        bedfiles_to_check[h,"type"]<-to_bed[4]
+        bedfiles_to_check[h,"gene_id"]<-to_bed[5]
+        bedfiles_to_check[h,"strand"]<-to_bed[6]
+        
+}
+
+write.table(bedfiles_to_check,file="bed_tocheck_nonccds.bed",quote=F,row.names=F,sep="\t",col.names=F)
+
+
+scr<-paste(args[2],"analyze_multi_clust.bash",sep="/")
+syst_scr<-paste(scr,"bed_tocheck_nonccds.bed bed_tocheck_nonccds",args[3],sep = " ")
+system(syst_scr)
+
+scr<-paste(args[2],"include_multi_nomerge.R",sep="/")
+syst_scr<-paste(scr,"bed_tocheck_nonccds",sep = " ")
+
+system(syst_scr)
+
+res_to_check<-read.table(file="multi_table_bed_tocheck_nonccds",header=T,stringsAsFactors=F)
+
+dir.create("tmp_nonccds", showWarnings = FALSE)
+
+system("mv *tocheck_nonccds* tmp_nonccds/")
+
+setwd("tmp_nonccds")
+
+print(paste("--- Selecting best transcript per non-CCDS ORF,",date(),sep=" "))
+
+
+if(sum(!is.na(all_sORFs_noncod_multi$to_check_rem))>0){
+ex_rem<-strsplit(all_sORFs_noncod_multi$to_check_rem,split=";")
+
+ex_rem<-unique(unlist(ex_rem))
+ex_rem<-ex_rem[!is.na(ex_rem)]
+
+
+res_ex_rem<-nonccds_res[nonccds_res[,"exon_id"]%in%ex_rem,c(c("exon_id","strand.x","length.y","reads_ribo","reads_multi_ribo","pct_region_covered_ribo","pct_covered_onlymulti_ribo","reads_rna","reads_multi_rna","pct_region_covered_rna","pct_covered_onlymulti_rna"))]
+names(res_ex_rem)<-names(res_to_check)
+
+res_all_multi<-rbind.data.frame(res_ex_rem,res_to_check)
+}
+
+if(sum(!is.na(all_sORFs_noncod_multi$to_check_rem))==0){
+        res_all_multi<-res_to_check
+}
+
+res_all_multi$exon_id_2<-paste(res_all_multi$exon_id,res_all_multi$strand,sep="_")
+
+all_sORFs_noncod_multi$to_check_ALL<-paste(all_sORFs_noncod_multi$to_check,all_sORFs_noncod_multi$to_check_rem,sep=";")
+
+all_sORFs_noncod_multi_final<-foreach(g=1:(dim(all_sORFs_noncod_multi)[1]),.combine=rbind,.multicombine=T) %dopar%{
+        s<-all_sORFs_noncod_multi[g,]
+        list_ex<-strsplit(s$to_check_ALL,split=";")[[1]]
+        with_exon2<-which(res_all_multi[,"exon_id_2"]%in%list_ex)
+        with_exon1<-which(res_all_multi[,"exon_id"]%in%list_ex)
+        to_take<-unique(c(with_exon2,with_exon1))
+        res_multi<-res_all_multi[to_take,]
+        res_multi$reads_ribo<-sum(res_multi$reads_ribo)
+        res_multi$reads_multi_ribo<-sum(res_multi$reads_multi_ribo)
+        res_multi$pct_region_covered_ribo_ALL<-res_multi$pct_region_covered_ribo*res_multi$length.y
+        res_multi$pct_covered_onlymulti_ribo_ALL<-res_multi$pct_covered_onlymulti_ribo*res_multi$length.y
+        res_multi$pct_region_covered_ribo<-sum(res_multi$pct_region_covered_ribo_ALL)/(sum(res_multi$length.y))
+        res_multi$pct_covered_onlymulti_ribo<-sum(res_multi$pct_covered_onlymulti_ribo_ALL)/(sum(res_multi$length.y))
+        res_multi$reads_rna<-sum(res_multi$reads_rna)
+        res_multi$reads_multi_rna<-sum(res_multi$reads_multi_rna)
+        res_multi$pct_region_covered_rna_ALL<-res_multi$pct_region_covered_rna*res_multi$length.y
+        res_multi$pct_covered_onlymulti_rna_ALL<-res_multi$pct_covered_onlymulti_rna*res_multi$length.y
+        res_multi$pct_region_covered_rna<-sum(res_multi$pct_region_covered_rna_ALL)/sum(res_multi$length.y)
+        res_multi$pct_covered_onlymulti_rna<-sum(res_multi$pct_covered_onlymulti_rna_ALL)/sum(res_multi$length.y)
+        
+        s<-cbind(s,res_multi[1,])
+        s
+}
+
+
+
+#for every peptide choses one transcript
+agg<-aggregate(x=all_sORFs_noncod_multi_final[,"RNA_sites"],by=list(all_sORFs_noncod_multi_final[,"gene_id"],all_sORFs_noncod_multi_final[,"ORF_pept"],all_sORFs_noncod_multi_final[,"Method"]),FUN=max)
+names(agg)<-c("gene_id","ORF_pept","Method","RNA_sites")
+agg2<-merge(x=all_sORFs_noncod_multi_final[,c("ORF_id_tr_minus2","length","gene_id","ORF_pept","Method","RNA_sites")],agg,by=c("gene_id","ORF_pept","Method","RNA_sites"))
+
+agg3<-aggregate(x=agg2[,"length"],by=list(agg2[,"gene_id"],agg2[,"ORF_pept"],agg2[,"Method"],agg2[,"RNA_sites"]),FUN=min)
+
+names(agg3)<-c("gene_id","ORF_pept","Method","RNA_sites","length")
+agg4<-merge(x=all_sORFs_noncod_multi_final[,c("ORF_id_tr_minus2","length","gene_id","ORF_pept","Method","RNA_sites")],agg3,by=c("gene_id","ORF_pept","Method","length","RNA_sites"))
+all_sORFs_noncod_multi_final<-all_sORFs_noncod_multi_final[all_sORFs_noncod_multi_final[,"ORF_id_tr_minus2"]%in%agg4[,"ORF_id_tr_minus2"],]
+
+
+all_sORFs_noncod_periodic<-all_sORFs_noncod_multi_final[all_sORFs_noncod_multi_final[,"ORF_pval_multi_ribo"]<0.05,]
+all_sORFs_noncod_periodic<-all_sORFs_noncod_periodic[!is.na(all_sORFs_noncod_periodic[,"transcript_id"]),]
+
+
+noncod_all<-NONCCDS_orfs
+noncod_found<-all_sORFs_noncod_periodic
+noncod_found$n_exons_ORF<-sapply(strsplit(noncod_found$to_check_ALL,split=";"),FUN=function(x){sum(x!="NA")})
+
+
+print(paste("--- Checking non-CCDS ORFs intersections with annotated CDS regions,",date(),sep=" "))
+
+
+
+ex_to_check<-strsplit(noncod_found$to_check_ALL,split=";")
+
+ex_to_check_spl<-unique(unlist(ex_to_check))
+
+ex_to_check_spl<-strsplit(ex_to_check_spl,split="_")
+
+bedfiles_to_check<-data.frame(chr=NA,start=NA,end=NA,type=NA,gene_id=NA,strand=NA)
+for(h in 1:length(ex_to_check_spl)){
+        to_bed<-ex_to_check_spl[[h]]
+        bedfiles_to_check[h,"chr"]<-to_bed[1]
+        bedfiles_to_check[h,"start"]<-to_bed[2]
+        bedfiles_to_check[h,"end"]<-to_bed[3]
+        bedfiles_to_check[h,"type"]<-to_bed[4]
+        bedfiles_to_check[h,"gene_id"]<-to_bed[5]
+        bedfiles_to_check[h,"strand"]<-to_bed[6]
+        
+}
+
+write.table(bedfiles_to_check,file="sORFs_totest",quote=F,row.names=F,sep="\t",col.names=F)
+
+system("sort -k1,1 -k2,2n sORFs_totest > sORFs_totest.bed")
+
+bedfiles_to_check<-read.table("sORFs_totest.bed",stringsAsFactors=F,header=F)
+colnames(bedfiles_to_check)<-c("chr","start","end","type","gene_id","strand")
+bedfiles_to_check<-bedfiles_to_check[!is.na(bedfiles_to_check[,"chr"]),]
+write.table(bedfiles_to_check,file="sORFs_totest.bed",quote=F,row.names=F,sep="\t",col.names=F)
+
+#intersect out ORFs overlapping CDS regions
+
+fhalf_scr<-paste(args[3],"intersectBed -v -a sORFs_totest.bed -b",sep = "/")
+
+shalf_scr<-paste(args[1],"all_cds.bed > sORFs_totest_nocds.bed",sep = "/")
+
+system(paste(fhalf_scr,shalf_scr,sep = " "))
+
+command<-paste("wc -l","sORFs_totest_nocds.bed")
+lines_in_file<-system(command,intern=T)
+lines_in_file<-as.numeric(strsplit(lines_in_file,split=" ")[[1]][1])
+
+if(lines_in_file>0){
+        results_nonoverlapcdss<-read.table("sORFs_totest_nocds.bed",stringsAsFactors=F,header=F)
+        names(results_nonoverlapcdss)<-names(bedfiles_to_check)
+        results_nonoverlapcdss[,"exon_id"]<-paste(results_nonoverlapcdss[,"chr"],results_nonoverlapcdss[,"start"],results_nonoverlapcdss[,"end"],results_nonoverlapcdss[,"type"],results_nonoverlapcdss[,"gene_id"],results_nonoverlapcdss[,"strand"],sep="_")
+        NA_str<-which(is.na(results_nonoverlapcdss[,"strand"]))
+        if(length(NA_str)>0){
+                for(o in NA_str){
+                        results_nonoverlapcdss[o,"exon_id"]<-paste(results_nonoverlapcdss[o,"chr"],results_nonoverlapcdss[o,"start"],results_nonoverlapcdss[o,"end"],results_nonoverlapcdss[o,"type"],results_nonoverlapcdss[o,"gene_id"],sep="_")
+                        
+                }
+        }
+}
+
+
+if(lines_in_file==0){
+        results_nonoverlapcdss<-data.frame(exon_id=NA,stringsAsFactors=F)
+        print("Warning! No ncORFs found! all ORFs overlap annotated CDS exons ")
+}
+
+overl_cds<-c()
+for(i in 1:length(ex_to_check)){
+        a<-ex_to_check[[i]]
+        a<-a[a!="NA"]
+        overl_cds[i]<-sum(!a%in%results_nonoverlapcdss$exon_id)>0
+        
+}
+#divides in overlcds (nonccds coding ORFs) and ncORFs
+noncod_found_overl<-noncod_found[overl_cds,]
+noncod_found_overl_sign<-noncod_found_overl[noncod_found_overl[,"ORF_pval_multi_ribo"]<0.05,]
+noncod_found_overl_sign<-noncod_found_overl_sign[!is.na(noncod_found_overl_sign[,"transcript_id"]),]
+noncod_found_overl_sign_nomultifilt<-noncod_found_overl_sign
+noncod_found_overl_sign<-noncod_found_overl_sign[(noncod_found_overl_sign$pct_covered_onlymulti_ribo/noncod_found_overl_sign$pct_region_covered_ribo)<0.3,]
+noncod_found_overl_sign<-noncod_found_overl_sign[noncod_found_overl_sign$pct_region_covered_ribo>0.3,]
+noncod_found_overl_sign<-noncod_found_overl_sign[!is.na(noncod_found_overl_sign[,"transcript_id"]),]
+#nonoverlcds
+noncod_found<-noncod_found[!overl_cds,]
+noncod_found_nofiltmult<-noncod_found
+noncod_found<-noncod_found[(noncod_found$pct_covered_onlymulti_ribo/noncod_found$pct_region_covered_ribo)<0.3,]
+noncod_found<-noncod_found[!is.na(noncod_found$transcript_id),]
+
+setwd("../")
+
+dir.create("ORFs_NONCCDS", showWarnings = FALSE)
+dir.create("ORFs_NONCCDS/best_periodicity", showWarnings = FALSE)
+dir.create("ORFs_NONCCDS/max_P_sites", showWarnings = FALSE)
+dir.create("ORFs_NONCCDS/more_tapers", showWarnings = FALSE)
+
+
+ORFs_sign_filtered_multi<-noncod_found_overl_sign[noncod_found_overl_sign[,"Method"]=="best_periodicity",]
+ORFs_sign<-noncod_found_overl_sign_nomultifilt[noncod_found_overl_sign_nomultifilt[,"Method"]=="best_periodicity",]
+
+write.table(ORFs_sign_filtered_multi,file="ORFs_NONCCDS/best_periodicity/ORFs_sign_nocds_filtered_multi",quote=F,row.names=F,sep="\t",col.names=T)
+write.table(ORFs_sign,file="ORFs_NONCCDS/best_periodicity/ORFs_sign_nocds_nofilter",quote=F,row.names=F,sep="\t",col.names=T)
+
+noncod_overl_cds<-noncod_found[noncod_found[,"Method"]=="best_periodicity",]
+write.table(noncod_overl_cds,file="ORFs_NONCCDS/best_periodicity/ORFs_sign_cds_filtered_multi",quote=F,row.names=F,sep="\t",col.names=T)
+
+
+noncod_overl_cds_nofilt<-noncod_found_nofiltmult[noncod_found_nofiltmult[,"Method"]=="best_periodicity",]
+write.table(noncod_overl_cds_nofilt,file="ORFs_NONCCDS/best_periodicity/ORFs_sign_cds_notfiltered_multi",quote=F,row.names=F,sep="\t",col.names=T)
+
+noncod_all_meth<-noncod_all[noncod_all[,"Method"]=="best_periodicity",]
+
+noncod_noORF<-noncod_all_meth[!noncod_all_meth[,"gene_id"]%in%noncod_found[,"gene_id"],]
+noncod_noORF<-noncod_noORF[!is.na(noncod_noORF[,"transcript_id"]),]
+noncod_noORF<-noncod_noORF[noncod_noORF[,"P_sites_sum"]>10,]
+write.table(noncod_noORF,file="ORFs_NONCCDS/best_periodicity/noncod_noORF",quote=F,row.names=F,sep="\t",col.names=T)
+ORF_all<-NONCCDS_orfs[NONCCDS_orfs[,"Method"]=="best_periodicity",]
+write.table(ORF_all,file="ORFs_NONCCDS/best_periodicity/ORFs_all",quote=F,row.names=F,sep="\t",col.names=T)
+
+
+ORFs_sign<-noncod_found_overl_sign_nomultifilt[noncod_found_overl_sign_nomultifilt[,"Method"]=="max_P_sites",]
+ORFs_sign_filtered_multi<-noncod_found_overl_sign[noncod_found_overl_sign[,"Method"]=="max_P_sites",]
+write.table(ORFs_sign_filtered_multi,file="ORFs_NONCCDS/max_P_sites/ORFs_sign_nocds_filtered_multi",quote=F,row.names=F,sep="\t",col.names=T)
+write.table(ORFs_sign,file="ORFs_NONCCDS/max_P_sites/ORFs_sign_nocds_nofilter",quote=F,row.names=F,sep="\t",col.names=T)
+
+noncod_overl_cds_nofilt<-noncod_found_nofiltmult[noncod_found_nofiltmult[,"Method"]=="max_P_sites",]
+write.table(noncod_overl_cds_nofilt,file="ORFs_NONCCDS/max_P_sites/ORFs_sign_cds_notfiltered_multi",quote=F,row.names=F,sep="\t",col.names=T)
+
+
+noncod_overl_cds<-noncod_found[noncod_found[,"Method"]=="max_P_sites",]
+write.table(noncod_overl_cds,file="ORFs_NONCCDS/max_P_sites/ORFs_sign_cds_filtered_multi",quote=F,row.names=F,sep="\t",col.names=T)
+
+noncod_all_meth<-noncod_all[noncod_all[,"Method"]=="max_P_sites",]
+noncod_noORF<-noncod_all_meth[!noncod_all_meth[,"gene_id"]%in%noncod_found[,"gene_id"],]
+noncod_noORF<-noncod_noORF[!is.na(noncod_noORF[,"transcript_id"]),]
+noncod_noORF<-noncod_noORF[noncod_noORF[,"P_sites_sum"]>10,]
+write.table(noncod_noORF,file="ORFs_NONCCDS/max_P_sites/noncod_noORF",quote=F,row.names=F,sep="\t",col.names=T)
+ORF_all<-NONCCDS_orfs[NONCCDS_orfs[,"Method"]=="max_P_sites",]
+write.table(ORF_all,file="ORFs_NONCCDS/max_P_sites/ORFs_all",quote=F,row.names=F,sep="\t",col.names=T)
+
+
+ORFs_sign<-noncod_found_overl_sign_nomultifilt[noncod_found_overl_sign_nomultifilt[,"Method"]=="more_tapers",]
+
+ORFs_sign_filtered_multi<-noncod_found_overl_sign[noncod_found_overl_sign[,"Method"]=="more_tapers",]
+write.table(ORFs_sign_filtered_multi,file="ORFs_NONCCDS/more_tapers/ORFs_sign_nocds_filtered_multi",quote=F,row.names=F,sep="\t",col.names=T)
+write.table(ORFs_sign,file="ORFs_NONCCDS/more_tapers/ORFs_sign_nocds_nofilter",quote=F,row.names=F,sep="\t",col.names=T)
+
+noncod_overl_cds_nofilt<-noncod_found_nofiltmult[noncod_found_nofiltmult[,"Method"]=="more_tapers",]
+write.table(noncod_overl_cds_nofilt,file="ORFs_NONCCDS/more_tapers/ORFs_sign_cds_notfiltered_multi",quote=F,row.names=F,sep="\t",col.names=T)
+
+
+noncod_overl_cds<-noncod_found[noncod_found[,"Method"]=="more_tapers",]
+write.table(noncod_overl_cds,file="ORFs_NONCCDS/more_tapers/ORFs_sign_cds_filtered_multi",quote=F,row.names=F,sep="\t",col.names=T)
+
+noncod_all_meth<-noncod_all[noncod_all[,"Method"]=="more_tapers",]
+noncod_noORF<-noncod_all_meth[!noncod_all_meth[,"gene_id"]%in%noncod_found[,"gene_id"],]
+noncod_noORF<-noncod_noORF[!is.na(noncod_noORF[,"transcript_id"]),]
+noncod_noORF<-noncod_noORF[noncod_noORF[,"P_sites_sum"]>10,]
+
+write.table(noncod_noORF,file="ORFs_NONCCDS/more_tapers/noncod_noORF",quote=F,row.names=F,sep="\t",col.names=T)
+ORF_all<-NONCCDS_orfs[NONCCDS_orfs[,"Method"]=="more_tapers",]
+write.table(ORF_all,file="ORFs_NONCCDS/more_tapers/ORFs_all",quote=F,row.names=F,sep="\t",col.names=T)
+
+
+print(paste("--- non-CCDS ORF finding Done!","---",date(),sep=" "))
+
+
+
+
diff --git a/scripts/ORF_final_results.R b/scripts/ORF_final_results.R
new file mode 100755
index 0000000..21f6870
--- /dev/null
+++ b/scripts/ORF_final_results.R
@@ -0,0 +1,208 @@
+#!/usr/bin/Rscript
+
+###################################################################
+#    This file is part of RiboTaper.
+#    RiboTaper is a method for defining traslated ORFs using
+#    Ribosome Profiling data.
+#   
+#    Copyright (C) 2015  Lorenzo Calviello
+#
+#    RiboTaper is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+
+#    RiboTaper is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with RiboTaper.  If not, see <http://www.gnu.org/licenses/>.
+#
+#    Contact: Lorenzo.Calviello@mdc-berlin.de
+#######################################################################
+
+
+###script for plotting general results about the identified ORFs, takes no arguments
+
+
+print(paste("--- plotting ORF finding results ---",date(),sep= " "))
+ORFs<-read.table("ORFs_max_filt",stringsAsFactors=F,header=T,quote = "")
+ORFs_all<-read.table("ORFs_max",stringsAsFactors=F,header=T,quote = "")
+
+df<-(data.frame(table(ORFs_all$category,ORFs_all$annotation)))
+names(df)<-c("category","annotation","n_ORFs")
+df<-df[df[,"n_ORFs"]>0,]
+
+ORFs_genes<-unique(ORFs_all[,c("category","annotation","gene_id")])
+df_genes<-(data.frame(table(ORFs_genes$category,ORFs_genes$annotation)))
+names(df_genes)<-c("category","annotation","n_genes")
+df_genes<-df_genes[df_genes[,"n_genes"]>0,]
+
+df_filt<-(data.frame(table(ORFs$category,ORFs$annotation)))
+names(df_filt)<-c("category","annotation","n_ORFs_filtered")
+df_filt<-df_filt[df_filt[,"n_ORFs_filtered"]>0,]
+
+ORFs_genes_filt<-unique(ORFs[,c("category","annotation","gene_id")])
+df_genes_filt<-(data.frame(table(ORFs_genes_filt$category,ORFs_genes_filt$annotation)))
+names(df_genes_filt)<-c("category","annotation","n_genes_filtered")
+df_genes_filt<-df_genes_filt[df_genes_filt[,"n_genes_filtered"]>0,]
+
+
+
+df_new<-merge(df,df_filt,by=c("category","annotation"),all.x=T)
+
+df_new<-merge(df_new,df_genes,by=c("category","annotation"),all.x=T)
+df_new<-merge(df_new,df_genes_filt,by=c("category","annotation"),all.x=T)
+df_new<-df_new[order(df_new$n_ORFs,decreasing=T),]
+
+write.table("ORFs_genes_found",x=df_new,quote=F,row.names=F,col.names=T,sep="\t")
+
+ORFs_coding<-ORFs[ORFs[,"annotation"]=="protein_coding",]
+if(dim(ORFs_coding)[1]>0){
+        tb<-as.data.frame(table(ORFs_coding$category),stringsAsFactors=F)
+        names(tb)<-c("category","counts")
+        tb<-tb[order(tb$counts,decreasing=T),]
+        if(dim(tb)[1]>4){
+                tb_ok<-tb[1:4,]
+                tb_more<-tb[5:dim(tb)[1],]
+                tb_other<-data.frame(counts=sum(tb_more$counts),category="other_coding",stringsAsFactors=F)
+                tb<-rbind.data.frame(tb_ok,tb_other)
+                ORFs_coding[ORFs_coding[,"category"]%in%tb_more$category,"category"]<-"other_coding"
+                
+                ORFs_coding$category<-factor(ORFs_coding$category,levels=tb$category)
+                
+                
+        }
+        if(dim(tb)[1]>=4){
+                ORFs_coding$category<-factor(ORFs_coding$category,levels=tb$category)
+        }
+        
+        
+}
+ncORFs<-ORFs[ORFs[,"category"]=="ncORFS",]
+
+if(dim(ncORFs)[1]>0){
+        tb<-as.data.frame(table(ncORFs$annotation),stringsAsFactors=F)
+        names(tb)<-c("annotation","counts")
+        tb<-tb[order(tb$counts,decreasing=T),]
+        if(dim(tb)[1]>4){
+                tb_ok<-tb[1:4,]
+                tb_more<-tb[5:dim(tb)[1],]
+                tb_other<-data.frame(counts=sum(tb_more$counts),annotation="other_ncORFs",stringsAsFactors=F)
+                tb<-rbind.data.frame(tb_ok,tb_other)
+                ncORFs[ncORFs[,"annotation"]%in%tb_more$annotation,"annotation"]<-"other_ncORFs"
+                
+                ncORFs$category<-factor(ncORFs$annotation,levels=tb$annotation)
+                
+                
+        }
+        if(dim(tb)[1]>=4){
+                ncORFs$category<-factor(ncORFs$annotation,levels=tb$annotation)
+        }
+        
+        
+}
+
+
+all<-rbind.data.frame(ORFs_coding[,c("category","ORF_length","ORF_P_sites")],ncORFs[,c("category","ORF_length","ORF_P_sites")])
+all_filt<-all
+
+
+ORFs_coding<-ORFs_all[ORFs_all[,"annotation"]=="protein_coding",]
+if(dim(ORFs_coding)[1]>0){
+        tb<-as.data.frame(table(ORFs_coding$category),stringsAsFactors=F)
+        names(tb)<-c("category","counts")
+        tb<-tb[order(tb$counts,decreasing=T),]
+        if(dim(tb)[1]>4){
+                tb_ok<-tb[1:4,]
+                tb_more<-tb[5:dim(tb)[1],]
+                tb_other<-data.frame(counts=sum(tb_more$counts),category="other_coding",stringsAsFactors=F)
+                tb<-rbind.data.frame(tb_ok,tb_other)
+                ORFs_coding[ORFs_coding[,"category"]%in%tb_more$category,"category"]<-"other_coding"
+                
+                ORFs_coding$category<-factor(ORFs_coding$category,levels=tb$category)
+                
+                
+        }
+        if(dim(tb)[1]>=4){
+                ORFs_coding$category<-factor(ORFs_coding$category,levels=tb$category)
+        }
+        
+        
+}
+ncORFs<-ORFs_all[ORFs_all[,"category"]=="ncORFS",]
+
+if(dim(ncORFs)[1]>0){
+        tb<-as.data.frame(table(ncORFs$annotation),stringsAsFactors=F)
+        names(tb)<-c("annotation","counts")
+        tb<-tb[order(tb$counts,decreasing=T),]
+        if(dim(tb)[1]>4){
+                tb_ok<-tb[1:4,]
+                tb_more<-tb[5:dim(tb)[1],]
+                tb_other<-data.frame(counts=sum(tb_more$counts),annotation="other_ncORFs",stringsAsFactors=F)
+                tb<-rbind.data.frame(tb_ok,tb_other)
+                ncORFs[ncORFs[,"annotation"]%in%tb_more$annotation,"annotation"]<-"other_ncORFs"
+                
+                ncORFs$category<-factor(ncORFs$annotation,levels=tb$annotation)
+                
+                
+        }
+        if(dim(tb)[1]>=4){
+                ncORFs$category<-factor(ncORFs$annotation,levels=tb$annotation)
+        }
+        
+        
+}
+
+
+all<-rbind.data.frame(ORFs_coding[,c("category","ORF_length","ORF_P_sites")],ncORFs[,c("category","ORF_length","ORF_P_sites")])
+
+
+
+pdf(file="Final_ORF_results.pdf",width=7,height=10,onefile=T,title="ORFs_results")
+par(mar=c(10,4,4,4))
+par(mfrow=c(2,2))
+barplot(table(all_filt$category),col=c("red","dark red","yellow","orange","grey","dark blue","blue","cornflowerblue","cyan4","grey"),ylab="ORFs_filtered",las=2)
+grid(lwd=1.2,col="black")
+barplot(log10(table(all_filt$category)),col=c("red","dark red","yellow","orange","grey","dark blue","blue","cornflowerblue","cyan4","grey"),ylab="ORFs_filtered(logscale)",yaxt="n",las=2)
+axis(side=2,at=0:4,labels=10^(0:4))
+grid(lwd=1.2,col="black")
+
+barplot(table(all$category),col=c("red","dark red","yellow","orange","grey","dark blue","blue","cornflowerblue","cyan4","grey"),ylab="ORFs_all",las=2)
+grid(lwd=1.2,col="black")
+barplot(log10(table(all$category)),col=c("red","dark red","yellow","orange","grey","dark blue","blue","cornflowerblue","cyan4","grey"),ylab="ORFs_all(logscale)",yaxt="n",las=2)
+axis(side=2,at=0:4,labels=10^(0:4))
+grid(lwd=1.2,col="black")
+
+
+
+par(mfrow=c(3,1))
+
+boxplot(log10(all$ORF_P_sites)~all$category,col=c("red","dark red","yellow","orange","grey","dark blue","blue","cornflowerblue","cyan4","grey"),ylab="ORF_P_sites",yaxt="n",main="ORFs_filtered",las=2)
+axis(side=2,at=1:4,labels=10^(1:4),las=2)
+grid(lwd=1.2,col="black")
+boxplot(log10(all$ORF_length)~all$category,col=c("red","dark red","yellow","orange","grey","dark blue","blue","cornflowerblue","cyan4","grey"),ylab="ORF_length",yaxt="n",las=2)
+axis(side=2,at=1:4,labels=10^(1:4),las=2)
+grid(lwd=1.2,col="black")
+boxplot((all$ORF_P_sites/(all$ORF_length/3))~all$category,col=c("red","dark red","yellow","orange","grey","dark blue","blue","cornflowerblue","cyan4","grey"),ylab="ORF_P_sites_per_codon",ylim=c(0,6),las=2)
+grid(lwd=1.2,col="black")
+
+par(mfrow=c(3,1))
+
+boxplot(log10(all$ORF_P_sites)~all$category,col=c("red","dark red","yellow","orange","grey","dark blue","blue","cornflowerblue","cyan4","grey"),ylab="ORF_P_sites",yaxt="n",main="ORFs_all",las=2)
+axis(side=2,at=1:4,labels=10^(1:4),las=2)
+grid(lwd=1.2,col="black")
+boxplot(log10(all$ORF_length)~all$category,col=c("red","dark red","yellow","orange","grey","dark blue","blue","cornflowerblue","cyan4","grey"),ylab="ORF_length",yaxt="n",las=2)
+axis(side=2,at=1:4,labels=10^(1:4),las=2)
+grid(lwd=1.2,col="black")
+boxplot((all$ORF_P_sites/(all$ORF_length/3))~all$category,col=c("red","dark red","yellow","orange","grey","dark blue","blue","cornflowerblue","cyan4","grey"),ylab="ORF_P_sites_per_codon",ylim=c(0,6),las=2)
+grid(lwd=1.2,col="black")
+
+
+dev.off()
+
+
+print(paste("--- ORF finding results Done! ---",date(),sep= " "))
+
diff --git a/scripts/P_sites_RNA_sites_calc.bash b/scripts/P_sites_RNA_sites_calc.bash
new file mode 100755
index 0000000..a72716c
--- /dev/null
+++ b/scripts/P_sites_RNA_sites_calc.bash
@@ -0,0 +1,75 @@
+#!/bin/bash
+
+
+###################################################################
+#    This file is part of RiboTaper.
+#    RiboTaper is a method for defining traslated ORFs using
+#    Ribosome Profiling data.
+#   
+#    Copyright (C) 2015  Lorenzo Calviello
+#
+#    RiboTaper is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+
+#    RiboTaper is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with RiboTaper.  If not, see <http://www.gnu.org/licenses/>.
+#
+#    Contact: Lorenzo.Calviello@mdc-berlin.de
+#######################################################################
+
+
+##This script calculates P-sites and RNA sites positions, it uses as arguments a comma-separated list of read lengths to be used, a comma-separated list of offsets, the bedtools exec directory
+
+if [ $# -ne 3 ]; then  
+        echo  "------------Usage: P_sites_RNA_sites_calc.bash <read_lengths> <offsets> <bedtools_dir> "
+        exit 1
+fi
+
+lengths=$1
+
+lengths=(${lengths//,/ })
+
+bedtools_dir=$3
+
+offsets=$2
+offsets=(${offsets//,/ })
+
+if [ ${#lengths[@]} -ne ${#offsets[@]} ]; then
+	echo ${#lengths[@]}
+	echo ${#offsets[@]}
+        echo  "------------The number of read lengths and offsets differ! Insert comma-separated value for read lengths and offsets e.g. 28,29 11,12 Usage: P_sites_RNA_sites_calc.bash <read_lengths> <offsets> <bedtools_dir> "
+        exit 1
+
+fi
+n_frag=${#lengths[@]}
+
+for (( i=0; i<${n_frag}; i++ ));
+do
+  len=${lengths[$i]}
+  offs=${offsets[$i]}
+  echo "------------processing" $len "nt reads with offset of +" $offs
+  $bedtools_dir"/bamToBed" -cigar -bed12 -i RIBO_best.bam | awk -v env_var=$len -F"\t" '{split($11,c,","); if((c[1]+c[2]+c[3])==env_var) print $0 "\t" c[1]+c[2]+c[3] > "tmp_align_len"}'
+  less tmp_align_len | awk -F"\t" '{split($11,c,","); print $0 "\t" c[1]+c[2]+c[3]+c[4]+c[5] }' | awk -v env_var=$offs '{split($11,c,","); split($12,d,","); if($6=="+" && c[1]<env_var && (c[1]+c[2])>env_var) $2=($2)+d[2]+(env_var-c[1]); if($6=="+" && c[1]>=env_var) $2=($2)+env_var; if($6=="+" && c[1]<env_var && (c[1]+c[2])<12 && (c[1]+c[2]+c[3])>env_var) $2=($2)+d[3]+(env_var-c[1]-c[2]); if($6=="+" && (c[1]+c[2]+c[3])<env_var) $2=($2)+d[4]+(env_var-c[1]-c[2]-c[3]); if($6=="-" && c[1]>=$NF-env_var) $2=$2+$NF-env_var-1; if($6=="-" && c[1]<($NF-env_var) && (c[1]+c[2])>=($NF-env_var)) $2=($2)+d[2]+(($NF-env_var-1)-c[1]); if($6=="-" && c[1]<($NF-env_var) && (c[1]+c[2])<($NF-env_var) && (c[1]+c[2]+c[3])>=($NF-env_var)) $2=($2)+d[3]+(($NF-env_var-1)-c[1]-c[2]); if($6=="+" && (c[1]+c[2]+c[3])<($NF-env_var)) $2=($2)+d[4]+(($NF-env_var)-c[1]-c[2]-c[3]); print $0}' OFS="\t" | awk '{$3=$2+1 ; print $0 }' OFS="\t" | awk '{if($2>0) print $0}' OFS="\t" > P_sites_len
+  rm tmp_align_len
+  mv P_sites_len tmp_P_sites_"$len"
+
+  
+done
+
+cat tmp_P_sites_* > P_sites_all
+rm tmp_P_sites_*
+echo "------------Done!"
+
+echo "------------processing RNA-seq with offset of + 25"
+
+$bedtools_dir"/bamToBed"  -cigar -bed12 -i RNA_best.bam |  awk -F"\t" '{split($11,c,","); print $0 "\t" c[1]+c[2]+c[3]+c[4]+c[5] }' | awk -v env_var=25 '{split($11,c,","); split($12,d,","); if($6=="+" && c[1]<env_var && (c[1]+c[2])>env_var) $2=($2)+d[2]+(env_var-c[1]); if($6=="+" && c[1]>=env_var) $2=($2)+env_var; if($6=="+" && c[1]<env_var && (c[1]+c[2])<12 && (c[1]+c[2]+c[3])>env_var) $2=($2)+d[3]+(env_var-c[1]-c[2]); if($6=="+" && (c[1]+c[2]+c[3])<env_var) $2=($2)+d[4]+(env_var-c[1]-c[2]-c[3]); if($6=="-" && c[1]>=$NF-env_var) $2=$2+$NF-env_var-1; if($6=="-" && c[1]<($NF-env_var) && (c[1]+c[2])>=($NF-env_var)) $2=($2)+d[2]+(($NF-env_var-1)-c[1]); if($6=="-" && c[1]<($NF-env_var) && (c[1]+c[2])<($NF-env_var) && (c[1]+c[2]+c[3])>=($NF-env_var)) $2=($2)+d[3]+(($NF-env_var-1)-c[1]-c[2]); if($6=="+" && (c[1]+c[2]+c[3])<($NF-env_var)) $2=($2)+d[4]+(($NF-env_var)-c[1]-c[2]-c[3]); print $0}' OFS="\t" | awk '{$3=$2+1 ; print $0 }' OFS="\t" | awk '{if($2>0) print $0}' OFS="\t" > Centered_RNA
+
+echo "------------P_sites and RNA_sites calculated !!!"
+
diff --git a/scripts/Ribotaper.sh b/scripts/Ribotaper.sh
new file mode 100755
index 0000000..50b220d
--- /dev/null
+++ b/scripts/Ribotaper.sh
@@ -0,0 +1,191 @@
+#!/bin/bash
+
+
+###################################################################
+#    This file is part of RiboTaper.
+#    RiboTaper is a method for defining traslated ORFs using
+#    Ribosome Profiling data.
+#   
+#    Copyright (C) 2015  Lorenzo Calviello
+#
+#    RiboTaper is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+
+#    RiboTaper is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with RiboTaper.  If not, see <http://www.gnu.org/licenses/>.
+#
+#    Contact: Lorenzo.Calviello@mdc-berlin.de
+#######################################################################
+
+### RiboTaper master File
+
+
+set -e
+
+if [ $# -ne 8 ]; then  
+	echo "Usage: ./Ribotaper.sh <Ribo_bamfile> <RNA_bamfile> <annotation_dir> <comma-sep_read_lenghts_ribo> <comma-sep_cutoffs> <scripts_dir> <bedtools_dir> <n_cores> "
+	exit 1
+fi
+if ! [[ -f "$1" ]]; then
+     echo "!!!!!   ribo_bam file not found!."
+     exit 1
+   fi
+
+if ! [[ -f "$2" ]]; then
+     echo "!!!!!   ribo_bam not found!."
+     exit 1
+   fi
+
+if [ ! -d "$3" ]; then
+     echo "!!!!!   annotation_directory not found!."
+    exit 1
+fi
+
+if [ ! -d "$6" ]; then
+     echo "!!!!!   scripts_directory not found!."
+    exit 1
+fi
+
+if [ ! -d "$7" ]; then
+     echo "!!!!!   bedtools_directory not found!."
+    exit 1
+fi
+
+
+re='^[0-9]+$'
+if ! [[ "$8" =~ $re ]] ; then
+   echo "!!!!!   n of cores not valid"
+   exit 1
+fi
+
+
+if [ "$8" == 1 ]; then
+     echo "!!!!!   n of cores required >1."
+    exit 1
+fi
+
+
+
+ribo_bam="`readlink -f $1`"
+rna_bam="`readlink -f $2`"
+annot_dir="`readlink -f $3`"
+read_len=$4
+cutoffs=$5
+scripts_dir="`readlink -f $6`"
+bedtools_dir="`readlink -f $7`"
+n_of_cores=$8
+
+
+echo "Parameters used:"
+echo ""
+
+echo "<Ribo_bamfile> $ribo_bam"
+echo "<RNA_bamfile> $rna_bam"
+echo "<annotation_dir> $annot_dir"
+echo "<comma-sep_read_lenghts_ribo> $read_len"
+echo "<comma-sep_cutoffs> $cutoffs"
+echo "<scripts_dir> $scripts_dir"
+echo "<bedtools_dir> $bedtools_dir"
+echo "<n_cores> $8"
+echo ""
+echo "---------------"
+echo ""
+
+
+
+#take bams for unique and best alignments
+
+echo "Taking unique - best alignments..."
+
+samtools view -b -q 50 $ribo_bam > RIBO_unique.bam 
+samtools view -b -F 0X100 $ribo_bam > RIBO_best.bam 
+
+samtools view -b -q 50 $rna_bam > RNA_unique.bam 
+samtools view -b -F 0X100 $rna_bam > RNA_best.bam 
+
+
+#calculates P-sites (from argument) and RNA-sites (default 25nt offset)
+
+echo "Calculating P-sites..."
+
+$scripts_dir"/P_sites_RNA_sites_calc.bash" $read_len $cutoffs $bedtools_dir
+
+#creates exonic tracks for ccds regions, exons_in ccds genes and non_ccds genes (if a ccds annotation is not available, CCDS = CDS)
+
+echo "Creating tracks..."
+
+$scripts_dir"/create_tracks.bash"  $annot_dir"/unique_ccds.bed" $annot_dir"/sequences_ccds" ccds $bedtools_dir
+ 
+$scripts_dir"/create_tracks.bash" $annot_dir"/unique_exons_ccds.bed" $annot_dir"/sequences_exonsccds" exonsccds $bedtools_dir
+
+$scripts_dir"/create_tracks.bash" $annot_dir"/unique_nonccds.bed" $annot_dir"/sequences_nonccds" nonccds $bedtools_dir
+
+
+#run calculation on CCDS, ExonsCCDS, non-CCDS exons and makes quality checks plots for length-coverage statistics  
+
+echo "Running calculations ccds..."
+
+$scripts_dir"/tracks_analysis.R" ccds $scripts_dir $n_of_cores 
+
+echo "Running calculations exons_ccds..."
+
+$scripts_dir"/tracks_analysis.R" exonsccds $scripts_dir $n_of_cores 
+
+echo "Running calculations nonccds..."
+
+$scripts_dir"/tracks_analysis.R" nonccds $scripts_dir $n_of_cores 
+
+# annotates the exons relative to ccds regions TO BE ADAPTED, CHECK WHICH FILES THEY NEED.
+
+echo "Annotate exons..."
+
+$scripts_dir"/annotate_exons.R" $annot_dir $scripts_dir $n_of_cores 
+
+echo "Making quality plots..."
+
+$scripts_dir"/quality_check.R" $annot_dir
+
+#echo "Calculating coherence..."
+
+#$scripts_dir"/calculate_coherence_all_draft.R" $scripts_dir $n_of_cores
+
+#echo "Calculating alternative exon usage..."
+
+#$scripts_dir"/alt_exon_usage_draft.R" $annot_dir $scripts_dir $n_of_cores 
+
+#ORF-finding
+
+echo "CCDS ORF finding..."
+
+$scripts_dir"/CCDS_orf_finder.R" $annot_dir $scripts_dir $bedtools_dir $n_of_cores
+
+echo "NONCCDS ORF finding..."
+
+$scripts_dir"/NONCCDS_orf_finder.R" $annot_dir $scripts_dir $bedtools_dir $n_of_cores
+
+# Groups ORFs and creates BED files + protein fasta database
+
+echo "Grouping ORFs and creating protein fasta database..."
+
+$scripts_dir"/create_protein_db.R"
+
+# makes summary plot for the found ORFs
+
+echo "Summarizing ORF finding results"
+
+$scripts_dir"/ORF_final_results.R"
+
+echo "RiboTaper analysis finished !!!"
+
+
+
+
+
+
diff --git a/scripts/Ribotaper.sh~ b/scripts/Ribotaper.sh~
new file mode 100644
index 0000000..ee467ba
--- /dev/null
+++ b/scripts/Ribotaper.sh~
@@ -0,0 +1,191 @@
+#!/bin/bash
+
+
+###################################################################
+#    This file is part of RiboTaper.
+#    RiboTaper is a method for defining traslated ORFs using
+#    Ribosome Profiling data.
+#   
+#    Copyright (C) 2015  Lorenzo Calviello
+#
+#    RiboTaper is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+
+#    RiboTaper is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with RiboTaper.  If not, see <http://www.gnu.org/licenses/>.
+#
+#    Contact: Lorenzo.Calviello@mdc-berlin.de
+#######################################################################
+
+### RiboTaper master File
+
+
+set -e
+
+if [ $# -ne 8 ]; then  
+	echo "Usage: ./Ribotaper.sh <Ribo_bamfile> <RNA_bamfile> <annotation_dir> <comma-sep_read_lenghts_ribo> <comma-sep_cutoffs> <scripts_dir> <bedtools_dir> <n_cores> "
+	exit 1
+fi
+if ! [[ -f "$1" ]]; then
+     echo "!!!!!   ribo_bam file not found!."
+     exit 1
+   fi
+
+if ! [[ -f "$2" ]]; then
+     echo "!!!!!   ribo_bam not found!."
+     exit 1
+   fi
+
+if [ ! -d "$3" ]; then
+     echo "!!!!!   annotation_directory not found!."
+    exit 1
+fi
+
+if [ ! -d "$6" ]; then
+     echo "!!!!!   scripts_directory not found!."
+    exit 1
+fi
+
+if [ ! -d "$7" ]; then
+     echo "!!!!!   bedtools_directory not found!."
+    exit 1
+fi
+
+
+re='^[0-9]+$'
+if ! [[ "$8" =~ $re ]] ; then
+   echo "!!!!!   n of cores not valid"
+   exit 1
+fi
+
+
+if [ "$8" == 1 ]; then
+     echo "!!!!!   n of cores required >1."
+    exit 1
+fi
+
+
+
+ribo_bam="`readlink -f $1`"
+rna_bam="`readlink -f $2`"
+annot_dir="`readlink -f $3`"
+read_len=$4
+cutoffs=$5
+scripts_dir="`readlink -f $6`"
+bedtools_dir="`readlink -f $7`"
+n_of_cores=$8
+
+
+echo "Parameters used:"
+echo ""
+
+echo "<Ribo_bamfile> $ribo_bam"
+echo "<RNA_bamfile> $rna_bam"
+echo "<annotation_dir> $annot_dir"
+echo "<comma-sep_read_lenghts_ribo> $read_len"
+echo "<comma-sep_cutoffs> $cutoffs"
+echo "<scripts_dir> $scripts_dir"
+echo "<bedtools_dir> $bedtools_dir"
+echo "<n_cores> $8"
+echo ""
+echo "---------------"
+echo ""
+
+
+
+#take bams for unique and best alignments
+
+echo "Taking unique - best alignments..."
+
+samtools view -b -q 255 $ribo_bam > RIBO_unique.bam 
+samtools view -b -F 0X100 $ribo_bam > RIBO_best.bam 
+
+samtools view -b -q 255 $rna_bam > RNA_unique.bam 
+samtools view -b -F 0X100 $rna_bam > RNA_best.bam 
+
+
+#calculates P-sites (from argument) and RNA-sites (default 25nt offset)
+
+echo "Calculating P-sites..."
+
+$scripts_dir"/P_sites_RNA_sites_calc.bash" $read_len $cutoffs $bedtools_dir
+
+#creates exonic tracks for ccds regions, exons_in ccds genes and non_ccds genes (if a ccds annotation is not available, CCDS = CDS)
+
+echo "Creating tracks..."
+
+$scripts_dir"/create_tracks.bash"  $annot_dir"/unique_ccds.bed" $annot_dir"/sequences_ccds" ccds $bedtools_dir
+ 
+$scripts_dir"/create_tracks.bash" $annot_dir"/unique_exons_ccds.bed" $annot_dir"/sequences_exonsccds" exonsccds $bedtools_dir
+
+$scripts_dir"/create_tracks.bash" $annot_dir"/unique_nonccds.bed" $annot_dir"/sequences_nonccds" nonccds $bedtools_dir
+
+
+#run calculation on CCDS, ExonsCCDS, non-CCDS exons and makes quality checks plots for length-coverage statistics  
+
+echo "Running calculations ccds..."
+
+$scripts_dir"/tracks_analysis.R" ccds $scripts_dir $n_of_cores 
+
+echo "Running calculations exons_ccds..."
+
+$scripts_dir"/tracks_analysis.R" exonsccds $scripts_dir $n_of_cores 
+
+echo "Running calculations nonccds..."
+
+$scripts_dir"/tracks_analysis.R" nonccds $scripts_dir $n_of_cores 
+
+# annotates the exons relative to ccds regions TO BE ADAPTED, CHECK WHICH FILES THEY NEED.
+
+echo "Annotate exons..."
+
+$scripts_dir"/annotate_exons.R" $annot_dir $scripts_dir $n_of_cores 
+
+echo "Making quality plots..."
+
+$scripts_dir"/quality_check.R" $annot_dir
+
+#echo "Calculating coherence..."
+
+#$scripts_dir"/calculate_coherence_all_draft.R" $scripts_dir $n_of_cores
+
+#echo "Calculating alternative exon usage..."
+
+#$scripts_dir"/alt_exon_usage_draft.R" $annot_dir $scripts_dir $n_of_cores 
+
+#ORF-finding
+
+echo "CCDS ORF finding..."
+
+$scripts_dir"/CCDS_orf_finder.R" $annot_dir $scripts_dir $bedtools_dir $n_of_cores
+
+echo "NONCCDS ORF finding..."
+
+$scripts_dir"/NONCCDS_orf_finder.R" $annot_dir $scripts_dir $bedtools_dir $n_of_cores
+
+# Groups ORFs and creates BED files + protein fasta database
+
+echo "Grouping ORFs and creating protein fasta database..."
+
+$scripts_dir"/create_protein_db.R"
+
+# makes summary plot for the found ORFs
+
+echo "Summarizing ORF finding results"
+
+$scripts_dir"/ORF_final_results.R"
+
+echo "RiboTaper analysis finished !!!"
+
+
+
+
+
+
diff --git a/scripts/Ribotaper_ORF_find.sh b/scripts/Ribotaper_ORF_find.sh
new file mode 100755
index 0000000..525b08c
--- /dev/null
+++ b/scripts/Ribotaper_ORF_find.sh
@@ -0,0 +1,129 @@
+#!/bin/bash
+
+
+###################################################################
+#    This file is part of RiboTaper.
+#    RiboTaper is a method for defining traslated ORFs using
+#    Ribosome Profiling data.
+#   
+#    Copyright (C) 2015  Lorenzo Calviello
+#
+#    RiboTaper is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+
+#    RiboTaper is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with RiboTaper.  If not, see <http://www.gnu.org/licenses/>.
+#
+#    Contact: Lorenzo.Calviello@mdc-berlin.de
+#######################################################################
+
+### RiboTaper master File
+
+
+set -e
+
+if [ $# -ne 8 ]; then  
+	echo "Usage: ./Ribotaper.sh <Ribo_bamfile> <RNA_bamfile> <annotation_dir> <comma-sep_read_lenghts_ribo> <comma-sep_cutoffs> <scripts_dir> <bedtools_dir> <n_cores> "
+	exit 1
+fi
+if ! [[ -f "$1" ]]; then
+     echo "!!!!!   ribo_bam file not found!."
+     exit 1
+   fi
+
+if ! [[ -f "$2" ]]; then
+     echo "!!!!!   ribo_bam not found!."
+     exit 1
+   fi
+
+if [ ! -d "$3" ]; then
+     echo "!!!!!   annotation_directory not found!."
+    exit 1
+fi
+
+if [ ! -d "$6" ]; then
+     echo "!!!!!   scripts_directory not found!."
+    exit 1
+fi
+
+if [ ! -d "$7" ]; then
+     echo "!!!!!   bedtools_directory not found!."
+    exit 1
+fi
+
+
+re='^[0-9]+$'
+if ! [[ "$8" =~ $re ]] ; then
+   echo "!!!!!   n of cores not valid"
+   exit 1
+fi
+
+
+if [ "$8" == 1 ]; then
+     echo "!!!!!   n of cores required >1."
+    exit 1
+fi
+
+
+
+ribo_bam="`readlink -f $1`"
+rna_bam="`readlink -f $2`"
+annot_dir="`readlink -f $3`"
+read_len=$4
+cutoffs=$5
+scripts_dir="`readlink -f $6`"
+bedtools_dir="`readlink -f $7`"
+n_of_cores=$8
+
+
+echo "Parameters used:"
+echo ""
+
+echo "<Ribo_bamfile> $ribo_bam"
+echo "<RNA_bamfile> $rna_bam"
+echo "<annotation_dir> $annot_dir"
+echo "<comma-sep_read_lenghts_ribo> $read_len"
+echo "<comma-sep_cutoffs> $cutoffs"
+echo "<scripts_dir> $scripts_dir"
+echo "<bedtools_dir> $bedtools_dir"
+echo "<n_cores> $8"
+echo ""
+echo "---------------"
+echo ""
+
+
+
+echo "CCDS ORF finding..."
+
+$scripts_dir"/CCDS_orf_finder.R" $annot_dir $scripts_dir $bedtools_dir $n_of_cores
+
+echo "NONCCDS ORF finding..."
+
+$scripts_dir"/NONCCDS_orf_finder.R" $annot_dir $scripts_dir $bedtools_dir $n_of_cores
+
+# Groups ORFs and creates BED files + protein fasta database
+
+echo "Grouping ORFs and creating protein fasta database..."
+
+$scripts_dir"/create_protein_db.R"
+
+# makes summary plot for the found ORFs
+
+echo "Summarizing ORF finding results"
+
+$scripts_dir"/ORF_final_results.R"
+
+echo "RiboTaper analysis finished !!!"
+
+
+
+
+
+
diff --git a/scripts/analyze_multi_clust.bash b/scripts/analyze_multi_clust.bash
new file mode 100755
index 0000000..ec9182f
--- /dev/null
+++ b/scripts/analyze_multi_clust.bash
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+
+###################################################################
+#    This file is part of RiboTaper.
+#    RiboTaper is a method for defining traslated ORFs using
+#    Ribosome Profiling data.
+#   
+#    Copyright (C) 2015  Lorenzo Calviello
+#
+#    RiboTaper is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+
+#    RiboTaper is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with RiboTaper.  If not, see <http://www.gnu.org/licenses/>.
+#
+#    Contact: Lorenzo.Calviello@mdc-berlin.de
+#######################################################################
+
+
+##This script counts the multi-mapping/unique reads ratio per region, including coverage information, it uses as arguments a bed file, a name as an appendix for further analysis, the bedtools exec directory
+
+if [ $# -ne 3 ]; then  
+	echo "Usage: analyze_multi.bash <bed_file> <name> <bedtools_dir>"
+	exit 1
+fi
+if ! [[ -f "$1" ]]; then
+     echo "!!!!!   bed file not found!."
+     exit 1
+   fi
+
+bedtools_dir=$3
+
+echo "-----Intersecting with unique/best alignments-----"
+
+$bedtools_dir"/coverageBed" -s -split -abam RIBO_unique.bam -b $1 | sort -k1,1 -k2,2g | sed 's/_//g' | awk '{ print $1 "_" $2 "_" $3 "_" $4 "_" $5"\t" $6 "\t" $7 "\t" $8 "\t" $9 "\t" $10}' > RIBO_unique_counts"_$2"
+
+$bedtools_dir"/coverageBed" -s -split -abam RNA_unique.bam -b $1 | sort -k1,1 -k2,2g |  sed 's/_//g' | awk '{ print $1 "_" $2 "_" $3 "_" $4 "_" $5"\t" $6 "\t" $7 "\t" $8 "\t" $9 "\t" $10}' > RNA_unique_counts"_$2"
+
+$bedtools_dir"/coverageBed" -s -split -abam RIBO_best.bam -b $1 | sort -k1,1 -k2,2g | sed 's/_//g' |  awk '{ print $1 "_" $2 "_" $3 "_" $4 "_" $5"\t" $6 "\t" $7 "\t" $8 "\t" $9 "\t" $10}' > RIBO_best_counts"_$2"
+
+$bedtools_dir"/coverageBed" -s -split -abam RNA_best.bam -b $1 | sort -k1,1 -k2,2g | sed 's/_//g' | awk '{ print $1 "_" $2 "_" $3 "_" $4 "_" $5"\t" $6 "\t" $7 "\t" $8 "\t" $9 "\t" $10}' > RNA_best_counts"_$2"
+
+echo "-----Done !!!-----"
+
+
+
diff --git a/scripts/annotate_exons.R b/scripts/annotate_exons.R
new file mode 100755
index 0000000..672ef7c
--- /dev/null
+++ b/scripts/annotate_exons.R
@@ -0,0 +1,118 @@
+#!/usr/bin/Rscript
+
+###################################################################
+#    This file is part of RiboTaper.
+#    RiboTaper is a method for defining traslated ORFs using
+#    Ribosome Profiling data.
+#   
+#    Copyright (C) 2015  Lorenzo Calviello
+#
+#    RiboTaper is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+
+#    RiboTaper is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with RiboTaper.  If not, see <http://www.gnu.org/licenses/>.
+#
+#    Contact: Lorenzo.Calviello@mdc-berlin.de
+#######################################################################
+
+
+###script for annotating exons, it takes as arguments the annotation directory, the RiboTaper scripts directory, the n of cores
+
+args <- commandArgs(trailingOnly = TRUE)
+
+
+print(paste("--- annotating exons","---",date(),sep=" "))
+
+
+suppressMessages(source(paste(args[2],"functions.R",sep = "/")))
+
+
+genes<-paste(args[1],"gene_annot_names",sep = "/")
+
+genes_annot<-read.table(genes,stringsAsFactors=F,header=F)
+
+colnames(genes_annot)<-c("gene_id","annotation","gene_symbol")
+
+nonccds_res<-read.table("results_nonccds",header=T,stringsAsFactors=F)
+
+ids_nonccds<-sapply(nonccds_res$exon_id,FUN=function(x){strsplit(x,split="_")})
+nonccds_res$gene_id<-as.character(lapply(ids_nonccds,"[[",5))
+
+nonccds_res<-merge(nonccds_res,genes_annot,by="gene_id")
+nonccds_res$type<-"non_ccds_exon"
+
+
+
+ccds_res<-read.table("results_ccds",header=T,stringsAsFactors=F)
+ids_ccds<-sapply(ccds_res$exon_id,FUN=function(x){strsplit(x,split="_")})
+ccds_res$gene_id<-as.character(lapply(ids_ccds,"[[",5))
+
+ccds_res<-merge(ccds_res,genes_annot,by="gene_id")
+ccds_res$type<-"ccds"
+
+
+exons_ccds_res<-read.table("results_exonsccds",header=T,stringsAsFactors=F)
+ids_exons_ccds<-sapply(exons_ccds_res$exon_id,FUN=function(x){strsplit(x,split="_")})
+exons_ccds_res$gene_id<-as.character(lapply(ids_exons_ccds,"[[",5))
+
+exons_ccds_res<-merge(exons_ccds_res,genes_annot,by="gene_id")
+exons_ccds_res$type<-"exon"
+
+
+all<-rbind(ccds_res,exons_ccds_res)
+
+coords<-matrix(nrow=dim(all)[1],ncol=1)
+for(i in seq(1,dim(all)[1])){
+        coords[i,1]<-paste(strsplit(all$exon_id[i],split="_")[[1]][1:3],collapse="_")
+}
+all$coords<-coords
+
+all <-all[order(all$coords,all$type,decreasing=F),]
+
+
+coords2<-matrix(nrow=dim(all)[1],ncol=3)
+for(i in seq(1,dim(all)[1])){
+        coords2[i,1]<-strsplit(all$exon_id[i],split="_")[[1]][1]
+        coords2[i,2]<-strsplit(all$exon_id[i],split="_")[[1]][2]
+        coords2[i,3]<-strsplit(all$exon_id[i],split="_")[[1]][3]
+}
+
+
+all$chr<-coords2[,1]
+all$start<-as.integer(coords2[,2])
+all$end<-as.integer(coords2[,3])
+
+all$nt_more<-NA
+all$nt_more_ribocovered<-NA
+all$nt_more_P_sites<-NA
+all$nt_more_rnacovered<-NA
+all$nt_more_cent_sites<-NA
+all$overlapping_ccds_start<-NA
+all$overlapping_ccds_end<-NA
+
+
+list_genes_exon_ccds<-split.data.frame(all,f=all$gene_id,drop=T)
+
+
+list_genes_exon_ccds_annot<-list()
+
+list_genes_exon_ccds_annot<-mclapply(X=list_genes_exon_ccds,FUN=annotate_exons,mc.cores=args[3],mc.preschedule = TRUE)
+
+
+all_annot<-do.call(rbind.data.frame,list_genes_exon_ccds_annot)
+
+
+
+write.table(file="results_nonccds_annot",sep="\t",nonccds_res,quote=F,row.names=F)
+
+
+write.table(file="all_calculations_ccdsgenes_annot_new",sep="\t",all_annot,quote=F,row.names=F)
+print(paste("--- annotating exons, Done!","---",date(),sep=" "))
diff --git a/scripts/bowrrna_star.q b/scripts/bowrrna_star.q
new file mode 100644
index 0000000..7945f68
--- /dev/null
+++ b/scripts/bowrrna_star.q
@@ -0,0 +1,34 @@
+#!/bin/bash
+#$ -pe smp 4
+#$ -l h_vmem=15G
+#$ -e "error_mapp_bowrrnastar"
+#$ -o "out_mapp_bowrrnastar"
+#$ -cwd
+
+
+##fastq, rRNA ref, star_in, start_stop bed file
+
+fastq=$1
+
+full_fastq="`readlink -f $fastq`"
+
+name_exp="`echo $fastq | sed 's/\.fastq//g'`"
+
+full_name_exp="`echo $full_fastq | sed 's/\.fastq//g'`"
+
+/data/ohler/Lorenzo/bins/bowtie1/bowtie --best -S -p 4 --al $name_exp"_rRNA.fastq" --un $name_exp"_notrRNA.fastq" $2 $1 > /dev/null
+
+
+mkdir "starmapp_star_"$name_exp/
+
+cd "starmapp_star_"$name_exp/
+
+/data/ohler/Lorenzo/STAR_2.3.1z1/STAR --genomeDir $3 --alignEndsType EndToEnd --readFilesIn $full_name_exp"_notrRNA.fastq" --runThreadN 4 --outFilterMismatchNmax 4 --outFilterMultimapNmax 8 --chimScoreSeparation 10 --chimScoreMin 20 --chimSegmentMin 15 --outSAMattributes All --outFilterIntronMotifs RemoveNoncanonicalUnannotated --alignSJoverhangMin 500 --outFileNamePrefix "star_"$name_exp"_" --outReadsUnmapped Fastx 
+samtools view -bS "star_"$name_exp"_"Aligned.out.sam | samtools sort - "star_"$name_exp"_"Aligned.out.sorted
+samtools index "star_"$name_exp"_"Aligned.out.sorted.bam
+
+
+/data/ohler/website/files/RiboTaper/Version_1.2/create_metaplots.bash "star_"$name_exp"_"Aligned.out.sorted.bam $4 $name_exp"_metaplots"
+
+echo "done"$name_exp"!!!"
+
diff --git a/scripts/create_annotations_files.bash b/scripts/create_annotations_files.bash
new file mode 100755
index 0000000..19609ec
--- /dev/null
+++ b/scripts/create_annotations_files.bash
@@ -0,0 +1,231 @@
+#!/bin/bash
+
+
+###################################################################
+#    This file is part of RiboTaper.
+#    RiboTaper is a method for defining traslated ORFs using
+#    Ribosome Profiling data.
+#   
+#    Copyright (C) 2015  Lorenzo Calviello
+#
+#    RiboTaper is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+
+#    RiboTaper is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with RiboTaper.  If not, see <http://www.gnu.org/licenses/>.
+#
+#    Contact: Lorenzo.Calviello@mdc-berlin.de
+#######################################################################
+
+
+##This script creates annotation files to be used in the RiboTaper pipeline, it uses as arguments a gtf file, a genome fasta file, a logical value for using the CCDS annotation (true or false) , a logical value for using the APPRIS annotation (true or false), a destination folder, the bedtools executables directory,  the RiboTaper scripts directory
+
+
+
+
+if [ $# -ne 7 ]; then  
+	echo "Usage: ./create_annotation_files.bash <gtf_file> <genome_fasta_file(indexed)> <use_ccdsid?> <use_appris?> <dest_folder> <bedtools_path> <scripts_dir>"
+	exit 1
+fi
+if ! [[ -f "$1" ]]; then
+     echo "!!!!!   gtf_file not found!."
+     exit 1
+   fi
+
+if ! [[ -f "$2" ]]; then
+     echo "!!!!!   genome fasta not found!."
+     exit 1
+   fi
+
+if ! [ "$3" = true ]  ; then
+     if ! [ "$3" = false ]; then
+          echo "use_ccdsid = "true" or "false""
+          exit 1
+          fi
+fi
+
+if ! [ "$4" = true ]  ; then
+     if ! [ "$4" = false ]; then
+          echo "use_appris = "true" or "false""
+          exit 1
+          fi
+fi
+
+
+
+
+gencode_ann=$1
+genc_full="`readlink -e $gencode_ann`"
+
+
+genome=$2
+genome_full=`readlink -e $genome`
+
+scripts_dir=$7
+scripts_dir_full=`readlink -e $scripts_dir`
+
+dest_folder=$5
+dest_folder_full=`readlink -f $dest_folder`
+
+
+bedtools_path=$6
+bedtools_path_full=`readlink -e $bedtools_path`
+
+echo "Parameters used:"
+echo ""
+
+
+echo "<gtf_file> $genc_full"
+echo "<genome_fasta_file(indexed)> $genome_full"
+echo "<use_ccdsid?> $3"
+echo "<use_appris?> $4"
+echo "<dest_folder> $dest_folder_full"
+echo "<bedtools_path> $bedtools_path_full"
+echo "<scripts_dir> $scripts_dir_full"
+echo ""
+echo "---------------"
+echo ""
+
+
+
+
+echo "creating directory..."
+mkdir -p $dest_folder_full
+
+cd $dest_folder_full
+
+echo "Extracting gene names + biotypes from gtf..."
+
+#grep out at each step!
+
+awk '{for (x=1;x<=NF;x++) if ($x~"^gene_id") for (y=1;y<=NF;y++) if ($y~/gene_type|gene_biotype/) for (z=1;z<=NF;z++) if ($z~"gene_name") print $(x+1) "\t" $(y+1) "\t" $(z+1)}' $genc_full  | sort | uniq | sed 's/;//g' | sed 's/"//g' > gene_name_type
+less gene_name_type | cut -f 1 | grep -Fvf - $genc_full | awk '{for (x=1;x<=NF;x++) if ($x~"^gene_id") for (z=1;z<=NF;z++) if ($z~"gene_name") print $(x+1) "\t" "no_biotype" "\t" $(z+1)}' | sort | uniq | sed 's/;//g' | sed 's/"//g' > gene_name_notype
+less gene_name_type | cut -f 1 | grep -Fvf - $genc_full | awk '{for (x=1;x<=NF;x++) if ($x~"^gene_id") for (y=1;y<=NF;y++) if ($y~/gene_type|gene_biotype/)  print $(x+1) "\t" $(y+1) "\t" "no_name"}'  | sort | uniq | sed 's/;//g' | sed 's/"//g' > gene_noname_type
+
+
+cat gene_name_type gene_name_notype gene_noname_type > gene_annot_name_pre
+less gene_annot_name_pre | cut -f 1 | grep -Fvf - $genc_full | awk '{for (x=1;x<=NF;x++) if ($x~"^gene_id") print $(x+1) "\t" "no_biotype" "\t" "no_name"}'  | sort | uniq | sed 's/;//g' | sed 's/"//g' > gene_noname_notype
+
+cat gene_annot_name_pre gene_noname_notype | sort | uniq > gene_annot_names
+
+
+rm gene_name_type gene_name_notype gene_noname_type gene_annot_name_pre gene_noname_notype 
+
+
+
+echo "creating bed_files..."
+
+#TAKE CDS OF CCDS REGIONS
+
+if [ "$3" = true ] ; then
+    awk '{if($3=="CDS") print $0}' $genc_full | grep ccdsid | awk '{ for (x=1;x<=NF;x++) if ($x~"^gene_id") print $1 "\t" $4-1 "\t" $5 "\t" "CCDS" "\t" $(x+1) "\t" $7 }' | sort -k1,1 -k2,2n | uniq | sed 's/;//g' | sed 's/"//g' > unique_ccds.bed
+    less $genc_full | grep ccdsid | awk '{for (x=1;x<=NF;x++) if ($x~"^gene_id") for (y=1;y<=NF;y++) if ($y~"^transcript_id") if($3=="exon") print $1 "\t" $4-1 "\t" $5 "\t" $(y+1)"\t" $(x+1) "\t" $7}'| sed 's/"//g' | sed 's/;//g' | sort -k1,1 -k2,2n | uniq > transcr_exons_ccds_ccdsid.bed
+
+fi
+
+if [ "$3" = false ] ; then
+    awk '{if($3=="CDS") print $0}' $genc_full | awk '{for (x=1;x<=NF;x++) if ($x~"^gene_id") print $1 "\t" $4-1 "\t" $5 "\t" "CCDS" "\t" $(x+1)  "\t" $7 }' | sort -k1,1 -k2,2n | uniq | sed 's/;//g' | sed 's/"//g' > unique_ccds.bed
+fi
+
+#STORE CCDS GENES
+less unique_ccds.bed | cut -f 5 | sort | uniq > genes_ccds
+
+#STORE COORDINATES CDS CCDS
+less unique_ccds.bed | cut -f 1-3 | tr '\t' '_'  > coords_ccds
+
+#TAKE ALL EXONS OF CCDS GENES
+grep -Ff genes_ccds $genc_full | awk '{if($3=="exon") print $0}' | awk '{for (x=1;x<=NF;x++) if ($x~"^gene_id") print $1 "\t" $4-1 "\t" $5 "\t" "EXONCCDS" "\t" $(x+1) "\t" $7 }' | sort -k1,1 -k2,2n | uniq | sed 's/;//g' | sed 's/"//g' > unique_exons_ccds.bed
+
+#STORE COORDINATES EXONS CCDS
+less unique_exons_ccds.bed | awk '{print $1"_"$2"_"$3 "\t" $0}' > coords_unique_exons_ccds.bed
+#TAKE OUT CDS CCDS FROM EXONS CCDS
+grep -Fvf coords_ccds coords_unique_exons_ccds.bed | awk '{print $2 "\t" $3 "\t" $4 "\t" "EXONCCDS" "\t" $6 "\t" $7}' > unique_exons_ccds.bed
+#REMOVE COORDS
+rm coords_ccds coords_unique_exons_ccds.bed
+
+
+
+
+#TAKE EXONS OF NONCCDS GENES
+grep -Fvf genes_ccds $genc_full | awk '{for (x=1;x<=NF;x++) if ($x~"^gene_id") if($3=="exon") print $1 "\t" $4-1 "\t" $5 "\t" "EXONnonCCDS" "\t" $(x+1) "\t" $7 }' | sort -k1,1 -k2,2n | uniq | sed 's/;//g' | sed 's/"//g' > unique_nonccds.bed
+
+#TAKE SEQUENCES, STRANDED INFO
+
+echo "creating fasta sequences..."
+
+fastaFb="$bedtools_path_full/fastaFromBed"
+
+$fastaFb -s -fi $genome_full -bed unique_ccds.bed  -fo unique_ccds_seq.fa
+
+awk '{$4=$1"_"$2"_"$3"_"$4"_"$5"_"$6; print $0}' OFS="\t" unique_ccds.bed  | $fastaFb -fi $genome_full -name -bed - -tab -fo unique_ccds_seq_name_tab
+paste <(cut -f 1 unique_ccds_seq_name_tab| tr '_' '\t') <(cut -f 2 unique_ccds_seq_name_tab | sed 's/[A-Z]/& /g') > sequences_ccds
+
+awk '{$4=$1"_"$2"_"$3"_"$4"_"$5"_"$6; print $0}' OFS="\t" unique_exons_ccds.bed  | $fastaFb -fi $genome_full -name -bed - -tab -fo unique_exons_ccds_seq_name_tab
+paste <(cut -f 1 unique_exons_ccds_seq_name_tab | tr '_' '\t') <(cut -f 2 unique_exons_ccds_seq_name_tab | sed 's/[A-Z]/& /g') > sequences_exonsccds
+
+awk '{$4=$1"_"$2"_"$3"_"$4"_"$5"_"$6; print $0}' OFS="\t" unique_nonccds.bed  | $fastaFb -fi $genome_full -name -bed - -tab -fo unique_nonccds_seq_name_tab
+paste <(cut -f 1 unique_nonccds_seq_name_tab | tr '_' '\t') <(cut -f 2 unique_nonccds_seq_name_tab | sed 's/[A-Z]/& /g') > sequences_nonccds
+
+
+$fastaFb -s -fi $genome_full -bed unique_exons_ccds.bed  -fo unique_exons_ccds_seq.fa
+$fastaFb -s -fi $genome_full -bed unique_nonccds.bed -fo unique_exons_nonccds_seq.fa
+
+#CAT SEQUENCES TOGETHER FOR ORF FINDING
+cat unique_ccds_seq.fa unique_exons_ccds_seq.fa > unique_ccds_exonccds_seq.fa
+
+#make all CDS regions
+less $genc_full | awk '{if($3=="CDS") print $0}' | awk '{for (x=1;x<=NF;x++) if ($x~"^gene_id") print $1 "\t" $4-1 "\t" $5 "\t" "cds" "\t" $(x+1) "\t" $7 }' | sort -k1,1 -k2,2n | uniq | sed 's/;//g' | sed 's/"//g' > all_cds.bed
+
+echo "assembling transcript information..."
+
+#TAKE TRANSCR CCDS
+grep -Ff genes_ccds $genc_full | awk '{ for (x=1;x<=NF;x++) if ($x~"^gene_id") for (y=1;y<=NF;y++) if ($y~"^transcript_id") if($3=="exon") print $1 "\t" $4-1 "\t" $5 "\t" $(y+1) "\t" $(x+1)  "\t" $7}'| sed 's/"//g' | sed 's/;//g' | sort -k1,1 -k2,2n | uniq > transcr_exons_ccds.bed
+
+
+#TAKE TRANSCR APPRIS CCDS
+
+if [ "$4" = true ] ; then
+
+grep -Ff genes_ccds $genc_full | grep appris_prin | awk '{for (x=1;x<=NF;x++) if ($x~"^gene_id") for (y=1;y<=NF;y++) if ($y~"^transcript_id") if($3=="exon") print $1 "\t" $4-1 "\t" $5 "\t" $(y+1)"\t" $(x+1) "\t" $7}'| sed 's/"//g' | sed 's/;//g' | sort -k1,1 -k2,2n | uniq > transcr_exons_ccds_appris_prin.bed
+cut -f 5 transcr_exons_ccds_appris_prin.bed | sort | uniq > genes_appris_prin
+grep -Ff genes_ccds $genc_full | grep -Fvf genes_appris_prin - | grep appris | awk '{for (x=1;x<=NF;x++) if ($x~"^gene_id") for (y=1;y<=NF;y++) if ($y~"^transcript_id") if($3=="exon") print $1 "\t" $4-1 "\t" $5 "\t" $(y+1)"\t" $(x+1) "\t" $7}'| sed 's/"//g' | sed 's/;//g' | sort -k1,1 -k2,2n | uniq > transcr_exons_ccds_appris_noprin.bed
+cut -f 5 transcr_exons_ccds_appris_noprin.bed | sort | uniq > genes_appris_noprin
+grep -Ff genes_ccds $genc_full | grep -Fvf genes_appris_prin - | grep -Fvf genes_appris_noprin | awk '{for (x=1;x<=NF;x++) if ($x~"^gene_id") for (y=1;y<=NF;y++) if ($y~"^transcript_id") if($3=="exon") print $1 "\t" $4-1 "\t" $5 "\t" $(y+1)"\t" $(x+1) "\t" $7}'| sed 's/"//g' | sed 's/;//g' | sort -k1,1 -k2,2n | uniq > transcr_exons_ccds_noappris_noprin.bed
+cut -f 5 transcr_exons_ccds_noappris_noprin.bed | sort | uniq > genes_noappris_noprin
+cat transcr_exons_ccds_appris_prin.bed transcr_exons_ccds_appris_noprin.bed transcr_exons_ccds_noappris_noprin.bed > transcr_exons_ccds_appris.bed
+cat genes_appris_prin genes_appris_noprin genes_noappris_noprin > genes_ccds_appris
+
+fi
+
+#TAKE TRANSCR NONCCDS
+grep -Fvf genes_ccds $genc_full | awk '{for (x=1;x<=NF;x++) if ($x~"^gene_id") for (y=1;y<=NF;y++) if ($y~"^transcript_id") if($3=="exon") print $1 "\t" $4-1 "\t" $5 "\t" $(y+1)"\t" $(x+1) "\t" $7}'| sed 's/"//g' | sed 's/;//g' | sort -k1,1 -k2,2n | uniq > transcr_exons_nonccds.bed
+
+#start_stop_cds
+
+less $genc_full | awk '{for (x=1;x<=NF;x++) if ($x~"^gene_id") if($3=="start_codon" || $3=="stop_codon") print $1 "\t"$4-1 "\t"$5 "\t" $3 "\t" $(x+1) "\t"$7}' | sed 's/;//g' | sed 's/"//g' | sort -k1,1 -k2,2g | uniq | awk 'p{print $0 "\t" $2-p}{p=$2}' | tac | awk 'p{print $0 "\t" $2-p}{p=$2}' | tac | awk '{if($NF<-100 || $(NF-1)>100) print $1 "\t" $2 "\t" $3 "\t" $4 "\t" $5 "\t" $6}' > start_stops_FAR.bed
+
+#make cds transcript coords
+echo "Creating transcript cds coordinates from gtf..."
+
+awk '{ for (x=1;x<=NF;x++) if ($x~"^transcript_id") if ( $3=="exon" || $3=="CDS" ) print $1 "\t" $3 "\t" $4 "\t" $5 "\t" $7 "\t" $(x+1)}' $genc_full | sed 's/"//g' | sed 's/;//g' | sort -k1,1 -k3,3g > exons_cds_all
+$scripts_dir_full"/gtf_to_start_stop_tr.R" 
+
+#make cds frames
+
+less $genc_full | awk '{for (x=1;x<=NF;x++) if ($x~"^gene_id") if($3=="CDS") print $1 "_" $4-1 "_" $5 "_" "CCDS" "_" $(x+1) "\t" $8 "\t"$7 "\t" $5-($4-1)}' | sed 's/;//g' | sed 's/"//g' | sort -k1,1 -k2,2 | uniq > frames_ccds
+
+
+#take all exonic regions
+less $genc_full | awk '{if($3=="exon") print $0}' | awk '{for (x=1;x<=NF;x++) if ($x~"^gene_id") print $1 "\t" $4-1 "\t" $5 "\t" "exon" "\t" $(x+1) "\t" $7 }' | sort -k1,1 -k2,2n | uniq | sed 's/;//g' | sed 's/"//g' > all_exons.bed
+
+$scripts_dir_full"/genes_coor.R"
+echo "Done!"
+
+
diff --git a/scripts/create_metaplots.bash b/scripts/create_metaplots.bash
new file mode 100755
index 0000000..bd1525b
--- /dev/null
+++ b/scripts/create_metaplots.bash
@@ -0,0 +1,76 @@
+#!/bin/bash
+
+
+###################################################################
+#    This file is part of RiboTaper.
+#    RiboTaper is a method for defining traslated ORFs using
+#    Ribosome Profiling data.
+#   
+#    Copyright (C) 2015  Lorenzo Calviello
+#
+#    RiboTaper is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+
+#    RiboTaper is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with RiboTaper.  If not, see <http://www.gnu.org/licenses/>.
+#
+#    Contact: Lorenzo.Calviello@mdc-berlin.de
+#######################################################################
+
+
+##This script creates aggregate plots around start-stop codons, it uses as arguments a bam file, a bed file for start-stop positions (e.g. the one produced by the create_annotation.bash script), a name as an appendix for further analysis, the RiboTaper scripts directory
+
+
+if [ $# -ne 5 ]; then  
+	echo "Usage: create_metaplots.bash <ribo.bam> <bedfile> <name> <bedtools_dir> <scripts_dir>"
+	exit 1
+fi
+if ! [[ -f "$1" ]]; then
+     echo "!!!!!   ribo.bam file not found!."
+     exit 1
+   fi
+if ! [[ -f "$2" ]]; then
+     echo "!!!!!   start_stop bed file not found!."
+     exit 1
+   fi
+
+bedtools_dir=$4
+
+echo "Downsampling to 10%..."
+
+samtools view -s 1.03 $1 > sample_to_metapl.sam
+
+cat <( samtools view -H $1 ) <(cat sample_to_metapl.sam) | samtools view - -bS > sample_to_metapl.bam
+
+echo "Intersecting alignments with start/stop sites ..."
+
+$bedtools_dir"bamToBed" -i sample_to_metapl.bam -bed12 -split | /data/ohler/Lorenzo/bins/windowBed -w 100 -sm -b stdin -a $2 | awk '{print $7 "\t" $8 "\t" $9 "\t" $10 "\t" $11 "\t" $12 "\t" $13 "\t" $14 "\t" $15 "\t" $16 "\t" $17 "\t" $18}' | sort -k1,1 -k2,2g | /data/ohler/Lorenzo/bins/closestBed -s -t "last" -a stdin -b $2 > $3
+
+if !  [[ -s $3 ]]; then
+     echo "!!!!!   no intersections found, check input files"
+     exit 1
+   fi
+
+
+
+echo "Creating metaplots..."
+
+scripts_dir=$5
+
+$scripts_dir"metag.R" $3
+
+mkdir metaplots
+
+mv *.png metaplots/
+
+
+
+echo "Done !!! "
+
diff --git a/scripts/create_protein_db.R b/scripts/create_protein_db.R
new file mode 100755
index 0000000..7cf4c3d
--- /dev/null
+++ b/scripts/create_protein_db.R
@@ -0,0 +1,310 @@
+#!/usr/bin/Rscript
+
+###################################################################
+#    This file is part of RiboTaper.
+#    RiboTaper is a method for defining traslated ORFs using
+#    Ribosome Profiling data.
+#   
+#    Copyright (C) 2015  Lorenzo Calviello
+#
+#    RiboTaper is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+
+#    RiboTaper is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with RiboTaper.  If not, see <http://www.gnu.org/licenses/>.
+#
+#    Contact: Lorenzo.Calviello@mdc-berlin.de
+#######################################################################
+
+
+###script for grouping and filtering identified ORFs, writing BED files and creating a protein FASTA file.
+
+
+suppressMessages(library("seqinr"))
+
+print(paste("--- create protein db and output final ORFs ---",date(),sep=" "))
+
+
+ORFs_new_more<-read.table("ORFs_CCDS/more_tapers/ORFs_sign_notfiltered_multi",stringsAsFactors=F,header=T,quote = "")
+
+ORFs_new_max<-read.table("ORFs_CCDS/max_P_sites/ORFs_sign_notfiltered_multi",stringsAsFactors=F,header=T,quote = "")
+
+ORFs_new_best<-read.table("ORFs_CCDS/best_periodicity/ORFs_sign_notfiltered_multi",stringsAsFactors=F,header=T,quote = "")
+
+nonccdsORFS_new_more<-read.table("ORFs_NONCCDS/more_tapers/ORFs_sign_nocds_nofilter",stringsAsFactors=F,header=T,quote = "")
+
+nonccdsORFS_new_max<-read.table("ORFs_NONCCDS/max_P_sites/ORFs_sign_nocds_nofilter",stringsAsFactors=F,header=T,quote = "")
+
+nonccdsORFS_new_best<-read.table("ORFs_NONCCDS/best_periodicity/ORFs_sign_nocds_nofilter",stringsAsFactors=F,header=T,quote = "")
+
+
+sORFS_new_more<-read.table("ORFs_CCDS/more_tapers/sORFs_sign_filtered_cds",stringsAsFactors=F,header=T,quote = "")
+
+sORFS_new_max<-read.table("ORFs_CCDS/max_P_sites/sORFs_sign_filtered_cds",stringsAsFactors=F,header=T,quote = "")
+
+sORFS_new_best<-read.table("ORFs_CCDS/best_periodicity/sORFs_sign_filtered_cds",stringsAsFactors=F,header=T,quote = "")
+
+
+ncORFS_new_more<-read.table("ORFs_NONCCDS/more_tapers/ORFs_sign_cds_notfiltered_multi",stringsAsFactors=F,header=T,quote = "")
+
+ncORFS_new_max<-read.table("ORFs_NONCCDS/max_P_sites/ORFs_sign_cds_notfiltered_multi",stringsAsFactors=F,header=T,quote = "")
+
+ncORFS_new_best<-read.table("ORFs_NONCCDS/best_periodicity/ORFs_sign_cds_notfiltered_multi",stringsAsFactors=F,header=T,quote = "")
+
+if(dim(ORFs_new_more)[1]>0 & dim(ORFs_new_max)[1]>0 & dim(ORFs_new_best)[1]>0){
+        
+        ORFs_new<-rbind(ORFs_new_more,ORFs_new_max[!ORFs_new_max[,"gene_id"]%in%ORFs_new_more[,"gene_id"],])
+        ORFs_new<-rbind(ORFs_new,ORFs_new_best[!ORFs_new_best[,"gene_id"]%in%ORFs_new[,"gene_id"],])
+        ORFs_new$category<-"ORFs_ccds"
+        ORFs_new$annotation<-"protein_coding"
+        ORFs_new$header_tofasta<-paste(ORFs_new$ORF_id_tr,ORFs_new$gene_id,ORFs_new$Method,ORFs_new$annotation,ORFs_new$category,ORFs_new$ORF_P_sites,ORFs_new$ORF_spec3_spec_ribo,ORFs_new$ORF_spec_multi_ribo,sep=":")
+}
+if(dim(ncORFS_new_more)[1]>0 & dim(ncORFS_new_max)[1]>0 & dim(ncORFS_new_best)[1]>0){
+        
+        ncORFS_new<-rbind(ncORFS_new_more,ncORFS_new_max[!ncORFS_new_max[,"gene_id"]%in%ncORFS_new_more[,"gene_id"],])
+        ncORFS_new<-rbind(ncORFS_new,ncORFS_new_best[!ncORFS_new_best[,"gene_id"]%in%ncORFS_new[,"gene_id"],])
+        ncORFS_new$category<-"ncORFS"
+        ncORFS_new$header_tofasta<-paste(ncORFS_new$ORF_id_tr,ncORFS_new$gene_id,ncORFS_new$Method,ncORFS_new$annotation,ncORFS_new$category,ncORFS_new$ORF_P_sites,ncORFS_new$ORF_spec3_spec_ribo,ncORFS_new$ORF_spec_multi_ribo,sep=":")
+        ncORFS_new[,c("annotated_start","annotated_stop","ORF_id_tr_annotated")]<-NA
+}
+if(dim(nonccdsORFS_new_more)[1]>0 & dim(nonccdsORFS_new_max)[1]>0 & dim(nonccdsORFS_new_best)[1]>0){
+        nonccdsORFS_new<-rbind(nonccdsORFS_new_more,nonccdsORFS_new_max[!nonccdsORFS_new_max[,"gene_id"]%in%nonccdsORFS_new_more[,"gene_id"],])
+        nonccdsORFS_new<-rbind(nonccdsORFS_new,nonccdsORFS_new_best[!nonccdsORFS_new_best[,"gene_id"]%in%nonccdsORFS_new[,"gene_id"],])
+        nonccdsORFS_new$category<-"nonccds_coding_ORFs"
+        nonccdsORFS_new$header_tofasta<-paste(nonccdsORFS_new$ORF_id_tr,nonccdsORFS_new$gene_id,nonccdsORFS_new$Method,nonccdsORFS_new$annotation,nonccdsORFS_new$category,nonccdsORFS_new$ORF_P_sites,nonccdsORFS_new$ORF_spec3_spec_ribo,nonccdsORFS_new$ORF_spec_multi_ribo,sep=":")
+        nonccdsORFS_new[,c("annotated_start","annotated_stop","ORF_id_tr_annotated")]<-NA
+}
+
+if(dim(sORFS_new_more)[1]>0 & dim(sORFS_new_max)[1]>0 & dim(sORFS_new_best)[1]>0){
+        
+        sORFS_new<-rbind(sORFS_new_more,sORFS_new_max[!sORFS_new_max[,"gene_id"]%in%sORFS_new_more[,"gene_id"],])
+        sORFS_new<-rbind(sORFS_new,sORFS_new_best[!sORFS_new_best[,"gene_id"]%in%sORFS_new[,"gene_id"],])
+        sORFS_new$category<-sORFS_new$type
+        sORFS_new$type<-NULL
+        sORFS_new$annotation<-"protein_coding"
+        sORFS_new$header_tofasta<-paste(sORFS_new$ORF_id_tr,sORFS_new$gene_id,sORFS_new$Method,sORFS_new$annotation,sORFS_new$category,sORFS_new$ORF_P_sites,sORFS_new$ORF_spec3_spec_ribo,sORFS_new$ORF_spec_multi_ribo,sep=":")
+}
+if(is.null(sORFS_new$annotated_start)){sORFS_new$annotated_start<-NA}
+if(is.null(sORFS_new$annotated_stop)){sORFS_new$annotated_stop<-NA}
+if(is.null(sORFS_new$ORF_id_tr_annotated)){sORFS_new$ORF_id_tr_annotated<-NA}
+
+if(is.null(ORFs_new$annotated_start)){ORFs_new$annotated_start<-NA}
+if(is.null(ORFs_new$annotated_stop)){ORFs_new$annotated_stop<-NA}
+if(is.null(ORFs_new$ORF_id_tr_annotated)){ORFs_new$ORF_id_tr_annotated<-NA}
+
+
+cat_obj<-c("ORFs_new","ncORFS_new","nonccdsORFS_new","sORFS_new")
+present<-c()
+for(q in 1:length(cat_obj)){
+        present[q]<-exists(cat_obj[q])
+}
+
+ORFs_ALL<-do.call(rbind.data.frame,mget(cat_obj[present]))
+
+ORFs_ALL<-ORFs_ALL[(ORFs_ALL$ORF_pval_multi_ribo<0.05),]
+
+ORFs_ALL<-ORFs_ALL[!is.na(ORFs_ALL$ORF_pept),]
+names(ORFs_ALL)<-gsub(x=names(ORFs_ALL),pattern="st2vect",replacement="stop_pos")
+
+ORFs_ALL_filt<-ORFs_ALL[which((ORFs_ALL$pct_covered_onlymulti_ribo/ORFs_ALL$pct_region_covered_ribo)<0.3),]
+rem<-which(ORFs_ALL_filt[,"category"]=="nonccds_coding_ORFs" & ORFs_ALL_filt[,"annotation"]!="protein_coding")
+if(length(rem)>0){
+        ORFs_ALL_filt<-ORFs_ALL_filt[-rem,]
+}
+ORFs_ALL_filt<-ORFs_ALL_filt[!is.na(ORFs_ALL_filt$ORF_pept),]
+
+
+
+ORFs_ALL<-ORFs_ALL[,c("gene_id","gene_symbol","transcript_id","annotation",
+                      "length","strand", "n_exons", "P_sites_sum", "RNA_sites", "Ribo_cov_aver", 
+                      "RNA_cov_aver","category","ORF_id_tr", "start_pos","stop_pos", "annotated_start", "annotated_stop", "ORF_id_gen", 
+                      "ORF_length", "reads_ribo", "reads_rna", "ORF_P_sites","ORF_Psit_pct_in_frame", 
+                      "ORF_RNA_sites", "ORF_RNAsit_pct_in_frame", "ORF_pval_multi_ribo", 
+                      "ORF_pval_multi_rna","ORF_spec_multi_ribo","ORF_spec_multi_rna", "ORF_id_tr_annotated", "n_exons_ORF","pct_region_covered_ribo", "pct_covered_onlymulti_ribo", "pct_region_covered_rna",
+                      "pct_covered_onlymulti_rna", "Method", "header_tofasta", "ORF_pept")
+                   ]
+
+ORFs_ALL_filt<-ORFs_ALL_filt[,c("gene_id","gene_symbol","transcript_id","annotation",
+                                "length","strand", "n_exons", "P_sites_sum", "RNA_sites", "Ribo_cov_aver", 
+                                "RNA_cov_aver","category","ORF_id_tr", "start_pos","stop_pos", "annotated_start", "annotated_stop", "ORF_id_gen", 
+                                "ORF_length","reads_ribo","reads_rna", "ORF_P_sites", "ORF_Psit_pct_in_frame", 
+                                "ORF_RNA_sites", "ORF_RNAsit_pct_in_frame", "ORF_pval_multi_ribo", 
+                                "ORF_pval_multi_rna","ORF_spec_multi_ribo","ORF_spec_multi_rna", "ORF_id_tr_annotated", "n_exons_ORF","pct_region_covered_ribo", "pct_covered_onlymulti_ribo", "pct_region_covered_rna",
+                                "pct_covered_onlymulti_rna", "Method", "header_tofasta", "ORF_pept")
+                             ]
+
+names(ORFs_ALL)[which(names(ORFs_ALL)=="reads_ribo")]<-"ORF_reads_ribo"
+names(ORFs_ALL)[which(names(ORFs_ALL)=="reads_rna")]<-"ORF_reads_rna"
+names(ORFs_ALL_filt)[which(names(ORFs_ALL_filt)=="reads_ribo")]<-"ORF_reads_ribo"
+names(ORFs_ALL_filt)[which(names(ORFs_ALL_filt)=="reads_rna")]<-"ORF_reads_rna"
+
+#write.table(ORFs_ALL_filt,file="ORFs_more_filt",quote=F,col.names=T,row.names=F,sep="\t")
+#write.table(ORFs_ALL,file="ORFs_more",quote=F,col.names=T,row.names=F,sep="\t")
+#write.fasta(sequences=as.list(ORFs_ALL$ORF_pept),names=ORFs_ALL$header_tofasta,file.out="protein_db_more.fasta")
+
+if(dim(ORFs_new_more)[1]>0 & dim(ORFs_new_max)[1]>0 & dim(ORFs_new_best)[1]>0){
+        ORFs_new<-rbind(ORFs_new_max,ORFs_new_more[!ORFs_new_more[,"gene_id"]%in%ORFs_new_max[,"gene_id"],])
+        ORFs_new<-rbind(ORFs_new,ORFs_new_best[!ORFs_new_best[,"gene_id"]%in%ORFs_new[,"gene_id"],])
+        ORFs_new$category<-"ORFs_ccds"
+        ORFs_new$annotation<-"protein_coding"
+        ORFs_new$header_tofasta<-paste(ORFs_new$ORF_id_tr,ORFs_new$gene_id,ORFs_new$Method,ORFs_new$annotation,ORFs_new$category,ORFs_new$ORF_P_sites,ORFs_new$ORF_spec3_spec_ribo,ORFs_new$ORF_spec_multi_ribo,sep=":")
+}
+if(dim(ncORFS_new_more)[1]>0 & dim(ncORFS_new_max)[1]>0 & dim(ncORFS_new_best)[1]>0){
+        
+        ncORFS_new<-rbind(ncORFS_new_max,ncORFS_new_more[!ncORFS_new_max[,"gene_id"]%in%ncORFS_new_max[,"gene_id"],])
+        ncORFS_new<-rbind(ncORFS_new,ncORFS_new_best[!ncORFS_new_best[,"gene_id"]%in%ncORFS_new[,"gene_id"],])
+        ncORFS_new$category<-"ncORFS"
+        ncORFS_new$header_tofasta<-paste(ncORFS_new$ORF_id_tr,ncORFS_new$gene_id,ncORFS_new$Method,ncORFS_new$annotation,ncORFS_new$category,ncORFS_new$ORF_P_sites,ncORFS_new$ORF_spec3_spec_ribo,ncORFS_new$ORF_spec_multi_ribo,sep=":")
+        ncORFS_new[,c("annotated_start","annotated_stop","ORF_id_tr_annotated")]<-NA
+}
+if(dim(nonccdsORFS_new_more)[1]>0 & dim(nonccdsORFS_new_max)[1]>0 & dim(nonccdsORFS_new_best)[1]>0){
+        
+        nonccdsORFS_new<-rbind(nonccdsORFS_new_max,nonccdsORFS_new_more[!nonccdsORFS_new_max[,"gene_id"]%in%nonccdsORFS_new_max[,"gene_id"],])
+        nonccdsORFS_new<-rbind(nonccdsORFS_new,nonccdsORFS_new_best[!nonccdsORFS_new_best[,"gene_id"]%in%nonccdsORFS_new[,"gene_id"],])
+        nonccdsORFS_new$category<-"nonccds_coding_ORFs"
+        nonccdsORFS_new$header_tofasta<-paste(nonccdsORFS_new$ORF_id_tr,nonccdsORFS_new$gene_id,nonccdsORFS_new$Method,nonccdsORFS_new$annotation,nonccdsORFS_new$category,nonccdsORFS_new$ORF_P_sites,nonccdsORFS_new$ORF_spec3_spec_ribo,nonccdsORFS_new$ORF_spec_multi_ribo,sep=":")
+        nonccdsORFS_new[,c("annotated_start","annotated_stop","ORF_id_tr_annotated")]<-NA
+}
+if(dim(sORFS_new_more)[1]>0 & dim(sORFS_new_max)[1]>0 & dim(sORFS_new_best)[1]>0){
+        
+        sORFS_new<-rbind(sORFS_new_max,sORFS_new_more[!sORFS_new_max[,"gene_id"]%in%sORFS_new_max[,"gene_id"],])
+        sORFS_new<-rbind(sORFS_new,sORFS_new_best[!sORFS_new_best[,"gene_id"]%in%sORFS_new[,"gene_id"],])
+        sORFS_new$category<-sORFS_new$type
+        sORFS_new$type<-NULL
+        sORFS_new$annotation<-"protein_coding"
+        sORFS_new$header_tofasta<-paste(sORFS_new$ORF_id_tr,sORFS_new$gene_id,sORFS_new$Method,sORFS_new$annotation,sORFS_new$category,sORFS_new$ORF_P_sites,sORFS_new$ORF_spec3_spec_ribo,sORFS_new$ORF_spec_multi_ribo,sep=":")
+}
+if(is.null(sORFS_new$annotated_start)){sORFS_new$annotated_start<-NA}
+if(is.null(sORFS_new$annotated_stop)){sORFS_new$annotated_stop<-NA}
+if(is.null(sORFS_new$ORF_id_tr_annotated)){sORFS_new$ORF_id_tr_annotated<-NA}
+
+if(is.null(ORFs_new$annotated_start)){ORFs_new$annotated_start<-NA}
+if(is.null(ORFs_new$annotated_stop)){ORFs_new$annotated_stop<-NA}
+if(is.null(ORFs_new$ORF_id_tr_annotated)){ORFs_new$ORF_id_tr_annotated<-NA}
+
+
+cat_obj<-c("ORFs_new","ncORFS_new","nonccdsORFS_new","sORFS_new")
+present<-c()
+for(q in 1:length(cat_obj)){
+        present[q]<-exists(cat_obj[q])
+}
+
+ORFs_ALL<-do.call(rbind.data.frame,mget(cat_obj[present]))
+ORFs_ALL<-ORFs_ALL[(ORFs_ALL$ORF_pval_multi_ribo<0.05),]
+
+ORFs_ALL<-ORFs_ALL[!is.na(ORFs_ALL$ORF_pept),]
+names(ORFs_ALL)<-gsub(x=names(ORFs_ALL),pattern="st2vect",replacement="stop_pos")
+
+ORFs_ALL_filt<-ORFs_ALL[which((ORFs_ALL$pct_covered_onlymulti_ribo/ORFs_ALL$pct_region_covered_ribo)<0.3),]
+rem<-which(ORFs_ALL_filt[,"category"]=="nonccds_coding_ORFs" & ORFs_ALL_filt[,"annotation"]!="protein_coding")
+if(length(rem)>0){
+        ORFs_ALL_filt<-ORFs_ALL_filt[-rem,]
+}
+
+ORFs_ALL_filt<-ORFs_ALL_filt[!is.na(ORFs_ALL_filt$ORF_pept),]
+
+
+list_coords_bed<-list()
+
+for(i in 1:dim(ORFs_ALL)[1]){
+        orf<-ORFs_ALL[i,]
+        strand<-orf$strand
+        P_sites_sum<-orf$ORF_P_sites
+        orf_id<-orf$ORF_id_tr
+        orf_category<-orf$category
+        orf_annotation<-orf$annotation
+        
+        all_ex<-strsplit(orf$to_check_ALL,split=";")[[1]]
+        all_ex<-all_ex[all_ex!="NA"]
+        list_exs<-list()
+        for(j in 1:length(all_ex)){
+                ex<-strsplit(all_ex[j],split="_")[[1]]
+                bed<-data.frame(chr=ex[1],start=ex[2],end=ex[3],orf_name=paste(orf_id,orf_category,orf_annotation,sep=";"),P_sites=P_sites_sum,strand_bed=strand,stringsAsFactors=F)
+                list_exs[[j]]<-bed
+        }
+        exs<-do.call(args=list_exs,what=rbind.data.frame)
+        list_coords_bed[[i]]<-exs
+        
+}
+
+coords_bed<-do.call(args=list_coords_bed,what=rbind.data.frame)
+
+
+write.table(file="translated_ORFs.bed",x=coords_bed,col.names=F,row.names=F,quote=F,sep="\t")
+
+system("sort -k1,1 -k2,2n translated_ORFs.bed > translated_ORFs_sorted.bed")
+system("rm translated_ORFs.bed")
+
+
+list_coords_bed<-list()
+
+for(i in 1:dim(ORFs_ALL_filt)[1]){
+        orf<-ORFs_ALL_filt[i,]
+        strand<-orf$strand
+        P_sites_sum<-orf$ORF_P_sites
+        orf_id<-orf$ORF_id_tr
+        orf_category<-orf$category
+        orf_annotation<-orf$annotation
+        
+        all_ex<-strsplit(orf$to_check_ALL,split=";")[[1]]
+        all_ex<-all_ex[all_ex!="NA"]
+        list_exs<-list()
+        for(j in 1:length(all_ex)){
+                ex<-strsplit(all_ex[j],split="_")[[1]]
+                bed<-data.frame(chr=ex[1],start=ex[2],end=ex[3],orf_name=paste(orf_id,orf_category,orf_annotation,sep=";"),P_sites=P_sites_sum,strand_bed=strand,stringsAsFactors=F)
+                list_exs[[j]]<-bed
+        }
+        exs<-do.call(args=list_exs,what=rbind.data.frame)
+        list_coords_bed[[i]]<-exs
+        
+}
+
+coords_bed<-do.call(args=list_coords_bed,what=rbind.data.frame)
+
+
+write.table(file="translated_ORFs_filtered.bed",x=coords_bed,col.names=F,row.names=F,quote=F,sep="\t")
+
+system("sort -k1,1 -k2,2n translated_ORFs_filtered.bed > translated_ORFs_filtered_sorted.bed")
+system("rm translated_ORFs_filtered.bed")
+
+
+
+
+ORFs_ALL<-ORFs_ALL[,c("gene_id","gene_symbol","transcript_id","annotation",
+                      "length","strand", "n_exons", "P_sites_sum", "RNA_sites", "Ribo_cov_aver", 
+                      "RNA_cov_aver","category","ORF_id_tr", "start_pos","stop_pos", "annotated_start", "annotated_stop", "ORF_id_gen", 
+                      "ORF_length", "reads_ribo", "reads_rna", "ORF_P_sites","ORF_Psit_pct_in_frame", 
+                      "ORF_RNA_sites", "ORF_RNAsit_pct_in_frame", "ORF_pval_multi_ribo", 
+                      "ORF_pval_multi_rna","ORF_spec_multi_ribo","ORF_spec_multi_rna", "ORF_id_tr_annotated", "n_exons_ORF","pct_region_covered_ribo", "pct_covered_onlymulti_ribo", "pct_region_covered_rna",
+                      "pct_covered_onlymulti_rna", "Method", "header_tofasta", "ORF_pept")
+                   ]
+
+ORFs_ALL_filt<-ORFs_ALL_filt[,c("gene_id","gene_symbol","transcript_id","annotation",
+                                "length","strand", "n_exons", "P_sites_sum", "RNA_sites", "Ribo_cov_aver", 
+                                "RNA_cov_aver","category","ORF_id_tr", "start_pos","stop_pos", "annotated_start", "annotated_stop", "ORF_id_gen", 
+                                "ORF_length","reads_ribo", "reads_rna", "ORF_P_sites", "ORF_Psit_pct_in_frame", 
+                                "ORF_RNA_sites", "ORF_RNAsit_pct_in_frame", "ORF_pval_multi_ribo", 
+                                "ORF_pval_multi_rna","ORF_spec_multi_ribo","ORF_spec_multi_rna", "ORF_id_tr_annotated", "n_exons_ORF","pct_region_covered_ribo", "pct_covered_onlymulti_ribo", "pct_region_covered_rna",
+                                "pct_covered_onlymulti_rna", "Method", "header_tofasta", "ORF_pept")
+                             ]
+
+
+names(ORFs_ALL)[which(names(ORFs_ALL)=="reads_ribo")]<-"ORF_reads_ribo"
+names(ORFs_ALL)[which(names(ORFs_ALL)=="reads_rna")]<-"ORF_reads_rna"
+names(ORFs_ALL_filt)[which(names(ORFs_ALL_filt)=="reads_ribo")]<-"ORF_reads_ribo"
+names(ORFs_ALL_filt)[which(names(ORFs_ALL_filt)=="reads_rna")]<-"ORF_reads_rna"
+
+
+write.table(ORFs_ALL_filt,file="ORFs_max_filt",quote=F,col.names=T,row.names=F,sep="\t")
+write.table(ORFs_ALL,file="ORFs_max",quote=F,col.names=T,row.names=F,sep="\t")
+write.fasta(sequences=as.list(ORFs_ALL$ORF_pept),names=ORFs_ALL$header_tofasta,file.out="protein_db_max.fasta")
+
+print(paste("--- protein db and output final ORFs, Done! ---",date(),sep=" "))
+
diff --git a/scripts/create_tracks.bash b/scripts/create_tracks.bash
new file mode 100755
index 0000000..40f1305
--- /dev/null
+++ b/scripts/create_tracks.bash
@@ -0,0 +1,75 @@
+#!/bin/bash
+
+
+###################################################################
+#    This file is part of RiboTaper.
+#    RiboTaper is a method for defining traslated ORFs using
+#    Ribosome Profiling data.
+#   
+#    Copyright (C) 2015  Lorenzo Calviello
+#
+#    RiboTaper is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+
+#    RiboTaper is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with RiboTaper.  If not, see <http://www.gnu.org/licenses/>.
+#
+#    Contact: Lorenzo.Calviello@mdc-berlin.de
+#######################################################################
+
+
+##This script creates the data_tracks files, it uses as arguments a bed file, a name as an appendix for further analysis, the bedtools exec directory
+
+if [ $# -ne 4 ]; then  
+	echo "Usage: create_tracks.bash <bed_file> <fasta_file> <name> <bedtools_dir>"
+	exit 1
+fi
+if ! [[ -f "$1" ]]; then
+     echo "!!!!!   bed file not found!."
+     exit 1
+   fi
+
+if ! [[ -f "$2" ]]; then
+     echo "!!!!!   fasta file not found!."
+     exit 1
+   fi
+bedtools_dir=$4
+
+mkdir -p data_tracks
+
+echo "-----Calculating coverage tracks for each exon-----"
+
+$bedtools_dir"/coverageBed" -s -split -abam RIBO_unique.bam -b $1 | sort -k1,1 -k2,2g | sed 's/_//g' | awk '{ print $1 "_" $2 "_" $3 "_" $4 "_" $5"\t" $6 "\t" $7 "\t" $8 "\t" $9 "\t" $10}' > RIBO_unique_counts"_$3"
+
+$bedtools_dir"/coverageBed" -s -split -abam RNA_unique.bam -b $1 | sort -k1,1 -k2,2g |  sed 's/_//g' | awk '{ print $1 "_" $2 "_" $3 "_" $4 "_" $5"\t" $6 "\t" $7 "\t" $8 "\t" $9 "\t" $10}' > RNA_unique_counts"_$3"
+
+$bedtools_dir"/coverageBed" -s -split -abam RIBO_best.bam -b $1 | sort -k1,1 -k2,2g | sed 's/_//g' |  awk '{ print $1 "_" $2 "_" $3 "_" $4 "_" $5"\t" $6 "\t" $7 "\t" $8 "\t" $9 "\t" $10}' > RIBO_best_counts"_$3"
+
+$bedtools_dir"/coverageBed" -s -split -abam RNA_best.bam -b $1 | sort -k1,1 -k2,2g | sed 's/_//g' | awk '{ print $1 "_" $2 "_" $3 "_" $4 "_" $5"\t" $6 "\t" $7 "\t" $8 "\t" $9 "\t" $10}' > RNA_best_counts"_$3"
+
+
+$bedtools_dir"/coverageBed" -s -d -a P_sites_all -b $1 |  awk '{ print $1 ";" $2 ";" $3 ";" $4 ";" $5";" $6 "\t" "_" $8}' | awk -F"\t" '{if(a[$1])a[$1]=a[$1]" "$NF; else a[$1]=$NF}END{for (i in a)print i "\t" a[i]}'| sed 's/_//g' | sed 's/;/\t/g' | sort -k1,1 -k4,4 -k2,2g > data_tracks/P_sites_all_tracks"_$3"
+
+
+$bedtools_dir"/coverageBed" -s -d -split -abam RIBO_best.bam -b $1 | awk '{ print $1 ";" $2 ";" $3 ";" $4 ";" $5";" $6 "\t" "_" $8}' | awk -F"\t" '{if(a[$1])a[$1]=a[$1]" "$NF; else a[$1]=$NF}END{for (i in a)print i "\t" a[i]}'| sed 's/_//g' | sed 's/;/\t/g' | sort -k1,1 -k4,4 -k2,2g > data_tracks/RIBO_tracks"_$3"
+
+$bedtools_dir"/coverageBed" -s -d -split -abam RNA_best.bam -b $1 | awk '{ print $1 ";" $2 ";" $3 ";" $4 ";" $5";" $6 "\t" "_" $8}' | awk -F"\t" '{if(a[$1])a[$1]=a[$1]" "$NF; else a[$1]=$NF}END{for (i in a)print i "\t" a[i]}'| sed 's/_//g' | sed 's/;/\t/g' | sort -k1,1 -k4,4 -k2,2g > data_tracks/RNA_tracks"_$3"
+
+$bedtools_dir"/coverageBed" -s -d -a Centered_RNA -b $1 |  awk '{ print $1 ";" $2 ";" $3 ";" $4 ";" $5";" $6 "\t" "_" $8}' | awk -F"\t" '{if(a[$1])a[$1]=a[$1]" "$NF; else a[$1]=$NF}END{for (i in a)print i "\t" a[i]}'| sed 's/_//g' | sed 's/;/\t/g' | sort -k1,1 -k4,4 -k2,2g > data_tracks/Centered_RNA_tracks"_$3"
+
+echo "-----Merging tracks together-----"
+
+
+cat data_tracks/P_sites_all_tracks"_$3" data_tracks/RIBO_tracks"_$3" data_tracks/RNA_tracks"_$3" data_tracks/Centered_RNA_tracks"_$3" $2 | tr '\t' '_' | sed 's/_/\t/6' | awk -F"\t" '{a[$1]=a[$1]"\n" $1 "\t" $2}END{for (i in a)print i "\t" a[i];}' | awk -F"\t" '{if($2>=0)print $0}' | sed 's/_/ /5' | sed 's/\t/ /1' > data_tracks/Psit_Ribo_Rna_Cent_tracks"_$3"
+
+cut -f 1 data_tracks/Psit_Ribo_Rna_Cent_tracks"_$3" -d" " > data_tracks/index_tracks"_$3"
+
+
+
diff --git a/scripts/functions.R b/scripts/functions.R
new file mode 100755
index 0000000..a6b1cb0
--- /dev/null
+++ b/scripts/functions.R
@@ -0,0 +1,1734 @@
+library("XNomial")
+library("foreach")
+library("doMC")
+
+library("multitaper")
+library("seqinr")
+
+###################################################################
+#    This file is part of RiboTaper.
+#    RiboTaper is a method for defining traslated ORFs using
+#    Ribosome Profiling data.
+#   
+#    Copyright (C) 2015  Lorenzo Calviello
+#
+#    RiboTaper is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+
+#    RiboTaper is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with RiboTaper.  If not, see <http://www.gnu.org/licenses/>.
+#
+#    Contact: Lorenzo.Calviello@mdc-berlin.de
+#######################################################################
+
+
+###This functions reads big text files efficiently: from http://www.r-bloggers.com/faster-files-in-r/
+readBigText<-function(x){
+        f=file(x,"rb")
+        a=readChar(f,file.info(x)$size,useBytes=T);a<-strsplit(a,"\n",fixed=T,useBytes=T)[[1]]
+        close(f)
+        return(a)
+}
+
+
+
+### This function gets the FFT frequence and power values, from http://stackoverflow.com/questions/3485456/useful-little-functions-in-r
+
+
+getFFTFreqs<-function(Nyq.Freq, data)
+{
+        if ((length(data) %% 2) == 1) # Odd number of samples
+        {
+                FFTFreqs <- c(seq(0, Nyq.Freq, length.out=(length(data)+1)/2), 
+                              seq(-Nyq.Freq, 0, length.out=(length(data)-1)/2))
+        }
+        else # Even number
+        {
+                FFTFreqs <- c(seq(0, Nyq.Freq, length.out=length(data)/2), 
+                              seq(-Nyq.Freq, 0, length.out=length(data)/2))
+        }
+        
+        return (FFTFreqs)
+}
+
+### This function outputs the max FFT frequence and power values
+
+take_maxfreq_and_power_FFT_Spec<-function(x){
+        
+        if(length(x)<10){x<-c(rep(0,3),x,rep(0,3))}
+        gino<-getFFTFreqs(Nyq.Freq=0.5,data=x)
+        modFFT <- Mod(fft(x))
+        FFTdata <- cbind(gino, modFFT)
+        
+        freq3_fft<-abs(FFTdata[which(abs((abs(FFTdata[,1])-(1/3)))==min(abs((abs(FFTdata[,1])-(1/3))))),1])
+        
+        power3_fft<-FFTdata[which(abs((abs(FFTdata[,1])-(1/3)))==min(abs((abs(FFTdata[,1])-(1/3))))),2]
+        
+       
+        
+        spect_x<-spectrum(x,plot=FALSE)
+        
+        freq3_sp<-abs(spect_x$freq[which(abs(spect_x$freq-(1/3))==min(abs((spect_x$freq)-(1/3))))])
+        power3_sp<-abs(spect_x$spec[which(abs(spect_x$freq-(1/3))==min(abs((spect_x$freq)-(1/3))))])
+                
+        return(c(freq3_fft,power3_fft,freq3_sp,power3_sp))
+}
+
+
+### This function plots the raw FFT periodogram, from http://stackoverflow.com/questions/3485456/useful-little-functions-in-r
+
+
+plotFFT<-function(x, y, samplingFreq, shadeNyq=TRUE, showPeriod = TRUE)
+{
+        Nyq.Freq <- samplingFreq/2
+        FFTFreqs <- getFFTFreqs(Nyq.Freq, y)
+        
+        FFT <- fft(y)
+        modFFT <- Mod(FFT)
+        FFTdata <- cbind(FFTFreqs, modFFT)
+        plot(FFTdata[1:nrow(FFTdata)/2,], t="l", pch=20, lwd=2, cex=0.8, main="",
+             xlab="Frequency (Hz)", ylab="Power")
+        if (showPeriod == TRUE)
+        {
+                # Period axis on top        
+                a <- axis(3, lty=0, labels=FALSE)
+                axis(3, cex.axis=0.6, labels=format(1/a, digits=2), at=a)
+        }
+        if (shadeNyq == TRUE)
+        {
+                # Gray out lower frequencies
+                rect(0, 0, 2/max(x), max(FFTdata[,2])*2, col="gray", density=30)
+        }
+        
+        ret <- list("freq"=FFTFreqs, "FFT"=FFT, "modFFT"=modFFT)
+}
+
+### This function calculates the CSCPD as in Michel et al 2012 Gen Res
+
+
+dual_take_CSCPDs<-function(tracks_to_analyze=all_tracks,index_tracks=all_tracks_index,exon_ids=all_tracks_index){
+        unique_index<-unique(as.data.frame(exon_ids)[,"exon_id"])
+        interpolation_mat1 = matrix(NA, nrow = length(unique_index), ncol=100)
+        interpolation_mat2 = matrix(NA, nrow = length(unique_index), ncol=100)
+        interpolation_mat3 = matrix(NA, nrow = length(unique_index), ncol=100)
+        rownames(interpolation_mat1)<-unique_index
+        rownames(interpolation_mat2)<-unique_index
+        rownames(interpolation_mat3)<-unique_index
+        for(i in 1:length(unique_index)){
+                id<-unique_index[i]
+                exon_track<-tracks_to_analyze[index_tracks[,1]==id]
+                withsep<-strsplit(exon_track,split=" ")
+                x<-t(data.frame(withsep))
+                #rnames[i]<-x[1,1]
+                strand<-x[1,2]
+                if(length(grep("CCDS",id))>0){tracks_pre<-t(x[,-c(1:3)])} else {
+                        tracks_pre<-t(x[,-c(1:2)])}
+                if(strand=="-"){
+                        tracks<-cbind(rev(tracks_pre[,1]),rev(tracks_pre[,2]),rev(tracks_pre[,3]),rev(tracks_pre[,4]))
+                } else if (strand=="+"){
+                        tracks<-tracks_pre}
+                colnames(tracks)<-c("Psites","RiboCov","RNACov","RNAcent")
+                mode(tracks)<-"numeric"
+                length<-dim(tracks)[1]
+                Phase_P_sites_frame<-sum(tracks[seq(1,length,by=3),1])
+                Phase_P_sites_frame_1<-sum(tracks[seq(2,length,by=3),1])
+                Phase_P_sites_frame_2<-sum(tracks[seq(3,length,by=3),1])
+                FRAME_MAX_phase<-max.col(t(c(Phase_P_sites_frame,Phase_P_sites_frame_1,Phase_P_sites_frame_2)))-1
+                nts_toadd<-(3-FRAME_MAX_phase)%%3
+                
+                read_mat<-t(as.matrix(c(rep(x=0,nts_toadd),tracks[,1])))
+                length<-(length(read_mat))
+                
+                upprop = matrix(NA,nrow = 1,ncol = length)
+                downprop = matrix(NA,nrow = 1,ncol = length)
+                seq_length1 = (length/3)-1
+                seq_length2 = seq_length1 - 1
+                
+                #calculate the cumulative upstream proportions
+                denom = cumsum(read_mat[1,])
+                upprop[1,(1+3*(0:seq_length1))] = cumsum(read_mat[1,(1+3*(0:seq_length1))])/
+                        denom[(3+3*(0:seq_length1))]
+                upprop[1,(2+3*(0:seq_length1))] = cumsum(read_mat[1,(2+3*(0:seq_length1))])/
+                        denom[(3+3*(0:seq_length1))]
+                upprop[1,(3+3*(0:seq_length1))] = cumsum(read_mat[1,(3+3*(0:seq_length1))])/
+                        denom[(3+3*(0:seq_length1))]
+                #calculate the cumulative downstream proportions
+                totalsDown = rep(NA, seq_length1)
+                for (j in 1:seq_length1){
+                        totalsDown[j] = sum(read_mat[1+3*(j:seq_length1)]+ read_mat[2+3*(j:seq_length1)]+
+                                                    read_mat[3+3*(j:seq_length1)])}
+                downprop[1,(1+3*(0:seq_length2))] = rev(cumsum(rev(read_mat[1,(1+3*(1:seq_length1))])))/
+                        totalsDown[0:seq_length1]
+                downprop[1,(2+3*(0:seq_length2))] = rev(cumsum(rev(read_mat[1,(2+3*(1:seq_length1))])))/
+                        totalsDown[0:seq_length1]
+                downprop[1,(3+3*(0:seq_length2))] = rev(cumsum(rev(read_mat[1,(3+3*(1:seq_length1))])))/
+                        totalsDown[0:seq_length1]
+                #Calculate the CSCPD (absolute difference between cumulative upstream and downstream
+                #proportions for sub-codon positions 1,2,3)
+                third_full_seq_minus1 = (length/3)-1
+                y1 = abs(upprop-downprop)[1+3*(0:third_full_seq_minus1)]
+                y1_withoutNA = y1[which(y1 != "NA")] 
+                y1_withoutNaN = y1_withoutNA[which(y1_withoutNA != "NaN")]
+                
+                y2 = abs(upprop-downprop)[2+3*(0:third_full_seq_minus1)]
+                y2_withoutNA = y2[which(y2 != "NA")]
+                y2_withoutNaN = y2_withoutNA[which(y2_withoutNA != "NaN")]
+                
+                y3 = abs(upprop-downprop)[3+3*(0:third_full_seq_minus1)]
+                y3_withoutNA = y3[which(y3 != "NA")]
+                y3_withoutNaN = y3_withoutNA[which(y3_withoutNA != "NaN")]
+                
+                
+                
+                if(length(y1_withoutNaN)==0 ){
+                        y1_withoutNaN<-c(0.1,0.1,0.1,0.1,0.1)
+                }
+                if(length(y2_withoutNaN)==0 ){
+                        y2_withoutNaN<-c(0.1,0.1,0.1,0.1,0.1)
+                }
+                if(length(y3_withoutNaN)==0 ){
+                        y3_withoutNaN<-c(0.1,0.1,0.1,0.1,0.1)
+                }
+                
+                
+                
+                
+                if(length(y1_withoutNaN)<4 & length(y1_withoutNaN)>2 ){
+                        y1_withoutNaN<-c(y1_withoutNaN[1],y1_withoutNaN,y1_withoutNaN[length(y1_withoutNaN)])
+                }
+                if(length(y2_withoutNaN)<4 & length(y2_withoutNaN)>2 ){
+                        y2_withoutNaN<-c(y2_withoutNaN[1],y2_withoutNaN,y2_withoutNaN[length(y2_withoutNaN)])
+                }
+                if(length(y3_withoutNaN)<4 & length(y3_withoutNaN)>2 ){
+                        y3_withoutNaN<-c(y3_withoutNaN[1],y3_withoutNaN,y3_withoutNaN[length(y3_withoutNaN)])
+                }
+                
+                
+                
+                if(length(y1_withoutNaN)<3 & length(y1_withoutNaN)>0 ){
+                        y1_withoutNaN<-c(y1_withoutNaN[1],y1_withoutNaN[1],y1_withoutNaN,y1_withoutNaN[length(y1_withoutNaN)],y1_withoutNaN[length(y1_withoutNaN)])
+                }
+                if(length(y2_withoutNaN)<3 & length(y2_withoutNaN)>0 ){
+                        y2_withoutNaN<-c(y2_withoutNaN[1],y2_withoutNaN[1],y2_withoutNaN,y2_withoutNaN[length(y2_withoutNaN)],y2_withoutNaN[length(y2_withoutNaN)])
+                }
+                if(length(y3_withoutNaN)<3 & length(y3_withoutNaN)>0 ){
+                        y3_withoutNaN<-c(y3_withoutNaN[1],y3_withoutNaN[1],y3_withoutNaN,y3_withoutNaN[length(y3_withoutNaN)],y3_withoutNaN[length(y3_withoutNaN)])
+                }
+                
+                
+                
+                length_third_seq_withoutNaN = length(y1_withoutNaN)
+                length_third_seq_withoutNaN_minus1= length(y1_withoutNaN)-1
+                
+                
+                
+                x1 = 1+3*(0:length_third_seq_withoutNaN_minus1)
+                x2 = 2+3*(0:length_third_seq_withoutNaN_minus1)
+                x3 = 3+3*(0:length_third_seq_withoutNaN_minus1)
+                #Converting all coordinates in coding region to relative values between 0 and 1 and using a smoothing function
+                ys1 = smooth.spline(x1/(length_third_seq_withoutNaN*3),y1_withoutNaN)
+                ys2 = smooth.spline(x2/(length_third_seq_withoutNaN*3),y2_withoutNaN)
+                ys3 = smooth.spline(x3/(length_third_seq_withoutNaN*3),y3_withoutNaN)
+                #Sampling 100 equidistant CSCPD values between 0 and 1
+                xout = 0.01*(1:100)
+                yout1 = predict(ys1,xout)$y
+                yout2 = predict(ys2,xout)$y
+                yout3 = predict(ys3,xout)$y
+                interpolation_mat1[i,] = yout1
+                interpolation_mat2[i,] = yout2
+                interpolation_mat3[i,] = yout3
+        }
+        interpolation_list<-list(interpolation_mat1,interpolation_mat2,interpolation_mat3)
+        return(interpolation_list)
+}
+
+
+### This function calculates the PTS as in Michel et al 2012 Gen Res
+
+dual_calculate_PTSs<-function(all_tracks,index,prev_percentiles){
+        data_frame<-as.data.frame(prev_percentiles)
+        attach(data_frame)
+        unique_index<-unique(index)
+        difference_mat1 = matrix(NA, nrow = dim(unique_index)[1], ncol=100)
+        difference_mat2 = matrix(NA, nrow = dim(unique_index)[1], ncol=100)
+        difference_mat3 = matrix(NA, nrow = dim(unique_index)[1], ncol=100)
+        
+        
+        rnames = rep("",dim(unique_index)[1])
+        cnames = c("PTS1", "PTS2", "PTS3", "PTS")
+        PTS = matrix(0, nrow = dim(unique_index)[1], ncol=4, dimnames=list(rnames,cnames))
+        
+        
+        for(i in 1:dim(unique_index)[1]){
+                id<-unique_index[i,]
+                exon_track<-all_tracks[index==id]
+                withsep<-strsplit(exon_track,split=" ")
+                x<-t(data.frame(withsep))
+                rnames[i]<-x[1,1]
+                strand<-x[1,2]
+                if(length(grep("CCDS",id))>0){tracks_pre<-t(x[,-c(1:3)])} else {
+                        tracks_pre<-t(x[,-c(1:2)])}
+                if(strand=="-"){
+                        tracks<-cbind(rev(tracks_pre[,1]),rev(tracks_pre[,2]),rev(tracks_pre[,3]),rev(tracks_pre[,4]))
+                } else if (strand=="+"){
+                        tracks<-tracks_pre}
+                colnames(tracks)<-c("Psites","RiboCov","RNACov","RNAcent")
+                mode(tracks)<-"numeric"
+                length<-dim(tracks)[1]
+                Phase_P_sites_frame<-sum(tracks[seq(1,length,by=3),1])
+                Phase_P_sites_frame_1<-sum(tracks[seq(2,length,by=3),1])
+                Phase_P_sites_frame_2<-sum(tracks[seq(3,length,by=3),1])
+                FRAME_MAX_phase<-max.col(t(c(Phase_P_sites_frame,Phase_P_sites_frame_1,Phase_P_sites_frame_2)))-1
+                nts_toadd<-(3-FRAME_MAX_phase)%%3
+                
+                read_mat<-t(as.matrix(c(rep(x=0,nts_toadd),tracks[,1])))
+                length<-(length(read_mat))
+                
+                upprop = matrix(NA,nrow = 1,ncol = length)
+                downprop = matrix(NA,nrow = 1,ncol = length)
+                seq_length1 = (length/3)-1
+                seq_length2 = seq_length1 - 1
+                
+                #calculate the cumulative upstream proportions
+                denom = cumsum(read_mat[1,])
+                upprop[1,(1+3*(0:seq_length1))] = cumsum(read_mat[1,(1+3*(0:seq_length1))])/
+                        denom[(3+3*(0:seq_length1))]
+                upprop[1,(2+3*(0:seq_length1))] = cumsum(read_mat[1,(2+3*(0:seq_length1))])/
+                        denom[(3+3*(0:seq_length1))]
+                upprop[1,(3+3*(0:seq_length1))] = cumsum(read_mat[1,(3+3*(0:seq_length1))])/
+                        denom[(3+3*(0:seq_length1))]
+                #calculate the cumulative downstream proportions
+                totalsDown = rep(NA, seq_length1)
+                for (j in 1:seq_length1){
+                        totalsDown[j] = sum(read_mat[1+3*(j:seq_length1)]+ read_mat[2+3*(j:seq_length1)]+
+                                                    read_mat[3+3*(j:seq_length1)])}
+                downprop[1,(1+3*(0:seq_length2))] = rev(cumsum(rev(read_mat[1,(1+3*(1:seq_length1))])))/
+                        totalsDown[0:seq_length1]
+                downprop[1,(2+3*(0:seq_length2))] = rev(cumsum(rev(read_mat[1,(2+3*(1:seq_length1))])))/
+                        totalsDown[0:seq_length1]
+                downprop[1,(3+3*(0:seq_length2))] = rev(cumsum(rev(read_mat[1,(3+3*(1:seq_length1))])))/
+                        totalsDown[0:seq_length1]
+                #Calculate the CSCPD (absolute difference between cumulative upstream and downstream
+                #proportions for sub-codon positions 1,2,3)
+                third_full_seq_minus1 = (length/3)-1
+                y1 = abs(upprop-downprop)[1+3*(0:third_full_seq_minus1)]
+                y1_withoutNA = y1[which(y1 != "NA")] 
+                y1_withoutNaN = y1_withoutNA[which(y1_withoutNA != "NaN")]
+                
+                y2 = abs(upprop-downprop)[2+3*(0:third_full_seq_minus1)]
+                y2_withoutNA = y2[which(y2 != "NA")]
+                y2_withoutNaN = y2_withoutNA[which(y2_withoutNA != "NaN")]
+                
+                y3 = abs(upprop-downprop)[3+3*(0:third_full_seq_minus1)]
+                y3_withoutNA = y3[which(y3 != "NA")]
+                y3_withoutNaN = y3_withoutNA[which(y3_withoutNA != "NaN")]
+                
+                if(length(y1_withoutNaN)<4){
+                        y1_withoutNaN<-c(y1_withoutNaN[1],y1_withoutNaN[1],y1_withoutNaN,y1_withoutNaN[length(y1_withoutNaN)],y1_withoutNaN[length(y1_withoutNaN)])
+                }
+                if(length(y2_withoutNaN)<4){
+                        y2_withoutNaN<-c(y2_withoutNaN[1],y2_withoutNaN[1],y2_withoutNaN,y2_withoutNaN[length(y2_withoutNaN)],y2_withoutNaN[length(y2_withoutNaN)])
+                }
+                if(length(y3_withoutNaN)<4){
+                        y3_withoutNaN<-c(y3_withoutNaN[1],y3_withoutNaN[1],y3_withoutNaN,y3_withoutNaN[length(y3_withoutNaN)],y3_withoutNaN[length(y3_withoutNaN)])
+                }
+                
+                
+                length_third_seq_withoutNaN = length(y1_withoutNaN)
+                length_third_seq_withoutNaN_minus1= length(y1_withoutNaN)-1
+                
+                
+                
+                x1 = 1+3*(0:length_third_seq_withoutNaN_minus1)
+                x2 = 2+3*(0:length_third_seq_withoutNaN_minus1)
+                x3 = 3+3*(0:length_third_seq_withoutNaN_minus1)
+                #Converting all coordinates in coding region to relative values between 0 and 1 and using a smoothing function
+                ys1 = smooth.spline(x1/(length_third_seq_withoutNaN*3),y1_withoutNaN)
+                ys2 = smooth.spline(x2/(length_third_seq_withoutNaN*3),y2_withoutNaN)
+                ys3 = smooth.spline(x3/(length_third_seq_withoutNaN*3),y3_withoutNaN)
+                #Sampling 100 equidistant CSCPD values between 0 and 1
+                
+                xout = 0.01*(1:100)
+                yout1 = predict(ys1,xout)$y
+                yout2 = predict(ys2,xout)$y
+                yout3 = predict(ys3,xout)$y
+                
+                
+                difference_mat1[i,] = yout1 - Percentile_P1
+                difference_mat2[i,] = yout2 - Percentile_P2
+                difference_mat3[i,] = yout3 - Percentile_P3
+                gene_counter<-i
+                
+                for (k in 1:100)
+                {
+                        if (as.numeric(difference_mat1[gene_counter,k]) >= 0){
+                                PTS[gene_counter,1] = PTS[gene_counter,1] + difference_mat1[gene_counter,k]
+                        }
+                        if (difference_mat2[gene_counter,k] >= 0){
+                                PTS[gene_counter,2] = PTS[gene_counter,2] + difference_mat2[gene_counter,k]
+                        }
+                        if (difference_mat3[gene_counter,k] >= 0){
+                                PTS[gene_counter,3] = PTS[gene_counter,3] + difference_mat3[gene_counter,k]                
+                        }
+                }
+                
+                PTS[gene_counter,4] = PTS[gene_counter,1] + PTS[gene_counter,2] + PTS[gene_counter,3]
+                
+        }       
+        detach(data_frame)
+        PTS<-as.data.frame(PTS,row.names=F,stringsAsFactors=F)
+        PTS$exon_id<-unique_index[,1]
+        PTS<-PTS[,c("exon_id","PTS1","PTS2","PTS3","PTS")]
+        return(PTS)
+}
+
+
+### This function plots the PTS as in Michel et al 2012 Gen Res (outdated)
+
+
+
+plot_CSCPDs<-function(x,y){
+        one<-x[[1]][y,]
+        two<-x[[2]][y,]
+        three<-x[[3]][y,]
+        exon_id<-rownames(x[[1]])[y]
+        plot(one,type="l",col="red",ylim=c(0,1),ylab="CSCPDs")
+        lines(two,type="l",col="green")
+        lines(three,type="l",col="blue")
+        legend("top",exon_id)
+}
+
+### This function plots data_tracks information (P-sites distribution, FFT etc...) (outdated)
+
+
+plot_tracks_fig<-function(exon_id,complete_tracks=all_tracks,index=all_tracks_index){
+        
+        exon_track<-complete_tracks[index==exon_id]
+        withsep<-strsplit(exon_track,split=" ")
+        x<-t(data.frame(withsep))
+        strand<-x[1,2]
+        if(length(grep("CCDS",exon_id,))>0){tracks_pre<-t(x[,-c(1:3)])} else {
+                tracks_pre<-t(x[,-c(1:2)])}
+        if(strand=="-"){
+                tracks<-cbind(rev(tracks_pre[,1]),rev(tracks_pre[,2]),rev(tracks_pre[,3]),rev(tracks_pre[,4]))
+        } else if (strand=="+"){
+                tracks<-tracks_pre}
+        colnames(tracks)<-c("Psites","RiboCov","RNACov","RNAcent")
+        mode(tracks)<-"numeric"
+        length<-dim(tracks)[1]
+        
+        
+        Phase0<-round(x=sum(tracks[seq(1,dim(tracks)[1],by=3),1])/sum(tracks[,1]),digits=4)
+        Phase1<-round(x=sum(tracks[seq(2,dim(tracks)[1],by=3),1])/sum(tracks[,1]),digits=4)
+        Phase2<-round(x=sum(tracks[seq(3,dim(tracks)[1],by=3),1])/sum(tracks[,1]),digits=4)
+        
+        Phase0_RNA<-round(x=sum(tracks[seq(1,dim(tracks)[1],by=3),4])/sum(tracks[,4]),digits=4)
+        Phase1_RNA<-round(x=sum(tracks[seq(2,dim(tracks)[1],by=3),4])/sum(tracks[,4]),digits=4)
+        Phase2_RNA<-round(x=sum(tracks[seq(3,dim(tracks)[1],by=3),4])/sum(tracks[,4]),digits=4)
+        
+        valuesribo<-c(min(tracks[,2]),tracks[,2],min(tracks[,2]))
+        valuesrna<-c(min(tracks[,3]),tracks[,3],min(tracks[,3]))
+        nucleot<-c(min(seq(1,dim(tracks)[1])),seq(1,dim(tracks)[1]),max(seq(1,dim(tracks)[1])))
+        name_region<-exon_id
+        x11(width=16,height=10)
+        par(mar=c(4, 4, 1, 1))
+        split.screen( figs = c( 2, 2 ) )
+        split.screen( figs = c( 2, 1 ) ,screen=1)
+        
+        screen(5)   
+        
+        
+        plot(tracks[,1],type="h",col=c("red","dark green","blue"),ylab="P_sites",xlab="nt")
+        
+        split.screen( figs = c( 1, 2 ) ,screen=6)
+        screen(7)
+        barplot(c(Phase0,Phase1,Phase2),xlab="Phases",ylim=c(0,1),ylab="%_Alignments",main="%Frames_RIBO",col=c("red","dark green","blue"))
+        screen(8)
+        plotFFT(x=seq(1,dim(tracks)[1]),y=tracks[,1],samplingFreq=1)
+        
+        screen(2)
+        plot(tracks[,2],type="l",col="red",ylab="Ribo_cov",xlab="nt",main=x[1,1])
+        polygon(x=nucleot,y=valuesribo,col="red")
+        split.screen( figs = c( 2, 1 ) ,screen=3)
+        screen(9)
+        plot(tracks[,4],type="h",col=c("red","dark green","blue"),ylab="RNA_center",xlab="nt")
+        split.screen( figs = c( 1, 2 ) ,screen=10)
+        screen(11)
+        barplot(c(Phase0_RNA,Phase1_RNA,Phase2_RNA),ylim=c(0,1),xlab="Phases",ylab="%_Alignments",main="%Frames_RNA",col=c("red","dark green","blue"))
+        screen(12)
+        plotFFT(x=seq(1,dim(tracks)[1]),y=tracks[,4],samplingFreq=1)
+        screen(4)
+        plot(tracks[,3],type="l",col="dark grey",ylab="RNA_cov",xlab="nt")
+        polygon(x=nucleot,y=valuesrna,col="dark grey")  
+        close.screen(all.screens=T)
+}
+
+
+### This function calculates the PTS from the CSPD, as in Michel et al 2012 Gen Res
+
+
+calculate_PTS_from_CSPDs<-function(list_cscpds,quantile_value=0.95){
+        
+        cnames = c("Percentile_P1", "Percentile_P2", "Percentile_P3")
+        rnames = rep("",100)
+        quantiles = matrix(0, nrow = 100, ncol=3,dimnames=list(rnames,cnames))
+        
+        CSCPDs_1<-list_cscpds[[1]]
+        CSCPDs_2<-list_cscpds[[2]]
+        CSCPDs_3<-list_cscpds[[3]]
+        
+        difference_mat1 = matrix(NA, nrow = dim(CSCPDs_1)[1], ncol=100)
+        difference_mat2 = matrix(NA, nrow = dim(CSCPDs_1)[1], ncol=100)
+        difference_mat3 = matrix(NA, nrow = dim(CSCPDs_1)[1], ncol=100)
+        
+        for(j in 1:100){
+                quantiles[j,1] = quantile(CSCPDs_1[,j],quantile_value,na.rm = T)
+                quantiles[j,2] = quantile(CSCPDs_2[,j],quantile_value,na.rm = T)
+                quantiles[j,3] = quantile(CSCPDs_3[,j],quantile_value,na.rm = T)
+        }
+        
+        percentiles<-as.data.frame(quantiles,row.names=F)
+        
+        PTS = matrix(0, nrow = dim(CSCPDs_1)[1], ncol=4)
+        
+        
+
+        difference_mat1 = t(apply(X=CSCPDs_1,MARGIN=1,FUN=function(x){x<-x-percentiles[,"Percentile_P1"]}))
+        difference_mat2 = t(apply(X=CSCPDs_2,MARGIN=1,FUN=function(x){x<-x-percentiles[,"Percentile_P2"]}))
+        difference_mat3 = t(apply(X=CSCPDs_3,MARGIN=1,FUN=function(x){x<-x-percentiles[,"Percentile_P3"]}))
+        
+        PTS[,1]<-t(apply(X=difference_mat1,MARGIN=1,FUN=function(x){sum(x[x>=0])}))
+        PTS[,2]<-t(apply(X=difference_mat2,MARGIN=1,FUN=function(x){sum(x[x>=0])}))
+        PTS[,3]<-t(apply(X=difference_mat3,MARGIN=1,FUN=function(x){sum(x[x>=0])}))
+        PTS[,4]<-t(apply(X=PTS[,1:3],MARGIN=1,FUN=sum))
+        PTS<-as.data.frame(PTS,row.names=NULL,stringsAsFactors=F)
+        colnames(PTS)<-c("PTS1", "PTS2", "PTS3", "PTS")
+        PTS$exon_id<-rownames(CSCPDs_1)
+        PTS<-PTS[,c("exon_id","PTS1","PTS2","PTS3","PTS")]
+        return(PTS)
+}
+
+
+### This function takes frequencies F-values and spectral coefficient for a data-track object.
+### (you have to calculate slepian functions beforehand)
+
+take_freqs_Fvalues_all_around_3nt_spec<-function(x,n_tapers,time_bw,slepians_values){
+        if(length(x)<25){
+                remain<-50-length(x)
+                x<-c(rep(0,as.integer(remain/2)),x,rep(0,remain%%2+as.integer(remain/2)))
+        }
+        if(length(x)<1024/2){padding<-1024}
+        if(length(x)>=1024/2){padding<-"default"}
+        resSpec1 <- spec.mtm(as.ts(x), k=n_tapers, nw=time_bw, nFFT = padding, centreWithSlepians = TRUE, Ftest = TRUE, maxAdaptiveIterations = 100,returnZeroFreq=F,plot=F,dpssIN=slepians_values)
+        resSpec2<-dropFreqs(resSpec1,0.1,0.45)
+        freq_max<-resSpec2$freq[which(resSpec2$mtm$Ftest==max(resSpec2$mtm$Ftest))]
+        Fmax<-resSpec2$mtm$Ftest[which(resSpec2$mtm$Ftest==max(resSpec2$mtm$Ftest))]
+        P_all<-(pf(Fmax,df1=2,df2=(2*n_tapers)-2,lower.tail=F))
+        
+        resSpec2<-dropFreqs(resSpec2,0.29,0.39)
+        
+        freq_max_around_3nt<-resSpec2$freq[which(resSpec2$mtm$Ftest==max(resSpec2$mtm$Ftest))]
+        
+        Fmax_around_3nt<-resSpec2$mtm$Ftest[which(resSpec2$mtm$Ftest==max(resSpec2$mtm$Ftest))]
+        P_around_3nt<-(pf(q=Fmax_around_3nt,df1=2,df2=(2*n_tapers)-2,lower.tail=F))
+        
+        
+        freq_max_3nt<-resSpec1$freq[which(abs((resSpec1$freq-(1/3)))==min(abs((resSpec1$freq-(1/3)))))]
+        
+        Fmax_3nt<-resSpec1$mtm$Ftest[which(abs((resSpec1$freq-(1/3)))==min(abs((resSpec1$freq-(1/3)))))]
+        P_3nt<-(pf(q=Fmax_3nt,df1=2,df2=(2*n_tapers)-2,lower.tail=F))
+        Spec_3nt<-resSpec1$spec[which(abs((resSpec1$freq-(1/3)))==min(abs((resSpec1$freq-(1/3)))))]
+        
+        return(c(freq_max,P_all,freq_max_around_3nt,P_around_3nt,freq_max_3nt,P_3nt,Spec_3nt))
+        
+}
+
+
+### This function calculates periodicity and other statistics on single exon tracks
+
+
+make_analysis_exons<-function(x){
+        strand<-x[1,2]
+        tracks_pre<-t(x[,-c(1:2)])
+        
+        
+        if(strand=="-"){
+                tracks<-cbind(rev(tracks_pre[,1]),rev(tracks_pre[,2]),rev(tracks_pre[,3]),rev(tracks_pre[,4]),rev(tracks_pre[,5]))
+        } else if (strand=="+"){
+                tracks<-tracks_pre}
+        colnames(tracks)<-c("Psites","RiboCov","RNACov","RNAcent","Seq")
+        tracks<-tracks[,1:4]
+        if(is.null(dim(tracks))){
+                tracks<-t(as.matrix(tracks))
+        }
+        
+        mode(tracks)<-"numeric"
+        
+        exon<-data.frame(exon_id=x[1,1],stringsAsFactors=F,row.names=NULL)
+        exon$strand<-strand
+        exon$frame_start_pred<-NA
+        exon$frame_end_pred<-NA
+        
+        exon$length<-dim(tracks)[1]
+        length<-dim(tracks)[1]
+        
+        
+        P_sites_sum<-round(sum(tracks[,1]),digits=6)
+        exon$P_sites_sum<-P_sites_sum
+        
+        
+        Centered_sites_sum<-round(sum(tracks[,4]),digits=6)
+        exon$RNA_sites_sum<-Centered_sites_sum
+        exon$Ribocov_aver<-round(mean(tracks[,2]),digits=6)
+        exon$RNAseqcov_aver<-round(mean(tracks[,3]),digits=6)
+        exon$pctPhase_frame<-NA
+        exon$pctPhase_frame_1<-NA
+        exon$pctPhase_frame_2<-NA
+        exon$pctPhaseCentered_frame<-NA
+        exon$pctPhaseCentered_frame_1<-NA
+        exon$pctPhaseCentered_frame_2<-NA
+        if(length>2){
+                Phase_P_sites_frame<-sum(tracks[seq(1,length,by=3),1])
+                Phase_P_sites_frame_1<-sum(tracks[seq(2,length,by=3),1])
+                Phase_P_sites_frame_2<-sum(tracks[seq(3,length,by=3),1])
+                
+                
+                exon$pctPhase_frame<-Phase_P_sites_frame/P_sites_sum
+                exon$pctPhase_frame_1<-Phase_P_sites_frame_1/P_sites_sum
+                exon$pctPhase_frame_2<-Phase_P_sites_frame_2/P_sites_sum
+                
+                
+                Phase_Centered_sites_frame<-sum(tracks[seq(1,length,by=3),4])
+                Phase_Centered_sites_frame_1<-sum(tracks[seq(2,length,by=3),4])
+                Phase_Centered_sites_frame_2<-sum(tracks[seq(3,length,by=3),4])
+                
+                
+                exon$pctPhaseCentered_frame<-Phase_Centered_sites_frame/Centered_sites_sum
+                exon$pctPhaseCentered_frame_1<-Phase_Centered_sites_frame_1/Centered_sites_sum
+                exon$pctPhaseCentered_frame_2<-Phase_Centered_sites_frame_2/Centered_sites_sum
+                
+                
+                MAXPhase_frame<-max(c(exon$pctPhase_frame,exon$pctPhase_frame_1,exon$pctPhase_frame_2))
+                FRAME_MAX_phase<-max.col(t(c(exon$pctPhase_frame,exon$pctPhase_frame_1,exon$pctPhase_frame_2)))-1
+                
+                MAXPhaseCentered_frame<-max(c(exon$pctPhaseCentered_frame,exon$pctPhaseCentered_frame_1,exon$pctPhaseCentered_frame_2))
+                FRAME_MAX_phaseCentered<-max.col(t(c(exon$pctPhaseCentered_frame,exon$pctPhaseCentered_frame_1,exon$pctPhaseCentered_frame_2)))-1
+        }
+        
+        
+        exon$multit_freq_best_ribo<-NA
+        exon$pval_multit_3nt_ribo<-NA
+        exon$spec_multit_3nt_ribo<-NA
+        exon$fft_max_freq_ribo<-NA
+        exon$fft_power_3_ribo<-NA
+        exon$fft_aver_ribo<-NA
+        exon$spec_max_freq_ribo<-NA
+        exon$spec_power_3_ribo<-NA
+        exon$spec_aver_power_ribo<-NA
+        
+        exon$multit_freq_best_rna<-NA
+        exon$pval_multit_3nt_rna<-NA
+        exon$spec_multit_3nt_rna<-NA
+        exon$fft_max_freq_rna<-NA
+        exon$fft_power_3_rna<-NA
+        exon$fft_aver_rna<-NA
+        exon$spec_max_freq_rna<-NA
+        exon$spec_power_3_rna<-NA
+        exon$spec_aver_power_rna<-NA
+        
+        exon$ORF_score_ribo<-NA
+        exon$ORF_score_rna<-NA
+        
+        if(P_sites_sum>2 & length>5){
+                if(length<25){slepians<-dpss(n=length+(50-length),k=24,nw=12)}
+                if(length>=25){slepians<-dpss(n=length,k=24,nw=12)}
+                bestfreq_3ntpval_ribo<-take_freqs_Fvalues_all_around_3nt_spec(x=tracks[,1],n_tapers=24,time_bw=12,slepians_values=slepians)[c(1,6,7)]
+                exon$multit_freq_best_ribo<-bestfreq_3ntpval_ribo[1]
+                exon$pval_multit_3nt_ribo<-bestfreq_3ntpval_ribo[2]
+                exon$spec_multit_3nt_ribo<-bestfreq_3ntpval_ribo[3]
+                score1<-((Phase_P_sites_frame-P_sites_sum/3)^2)/(P_sites_sum/3)
+                score2<-((Phase_P_sites_frame_1-P_sites_sum/3)^2)/(P_sites_sum/3)
+                score3<-((Phase_P_sites_frame_2-P_sites_sum/3)^2)/(P_sites_sum/3)
+                exon$ORF_score_ribo<-log2(score1+score2+score3+1)
+                
+                if(max(tracks[,1])>(P_sites_sum*.7)){
+                        new_track<-tracks
+                        new_track[which(new_track[,1]==max(new_track[,1]))]<-0
+                        exon$ORF_score_ribo<-NA
+                        if(sum(new_track[,1])>2){
+                                Phase_P_sites_frame_corr<-sum(new_track[seq(1,length,by=3),1])
+                                Phase_P_sites_frame_1_corr<-sum(new_track[seq(2,length,by=3),1])
+                                Phase_P_sites_frame_2_corr<-sum(new_track[seq(3,length,by=3),1])
+                                score1<-((Phase_P_sites_frame_corr-sum(new_track[,1])/3)^2)/(sum(new_track[,1])/3)
+                                score2<-((Phase_P_sites_frame_1_corr-sum(new_track[,1])/3)^2)/(sum(new_track[,1])/3)
+                                score3<-((Phase_P_sites_frame_2_corr-sum(new_track[,1])/3)^2)/(sum(new_track[,1])/3)
+                                exon$ORF_score_ribo<-log2(score1+score2+score3+1)
+                        }
+                }
+                
+                gino<-getFFTFreqs(Nyq.Freq=0.5,data=tracks[,1])
+                modFFT <- Mod(fft(tracks[,1]))
+                FFTdata <- cbind(gino, modFFT)
+                exon$fft_aver_ribo<-mean(FFTdata[,2])
+                exon$fft_power_3_ribo<-FFTdata[which(abs((gino-(1/3)))==min(abs((gino-(1/3))))),2]
+                exon$fft_max_freq_ribo<-abs(gino[which(FFTdata==max((FFTdata[10:dim(FFTdata)[1]/2,2])),arr.ind=TRUE)[1]])[1]
+                
+                
+                spect_P_sites<-spectrum(tracks[,1],plot=FALSE)
+                exon$spec_max_freq_ribo<-spect_P_sites$freq[which(spect_P_sites$spec==max(spect_P_sites$spec),arr.ind=TRUE)][1]
+                exon$spec_power_3_ribo<-spect_P_sites$spec[which(abs((spect_P_sites$freq-(1/3)))==min(abs((spect_P_sites$freq-(1/3)))))]
+                exon$spec_aver_power_ribo<-mean(spect_P_sites$spec)        
+                if(Centered_sites_sum>2){
+                        
+                        gino<-getFFTFreqs(Nyq.Freq=0.5,data=tracks[,4])
+                        modFFT <- Mod(fft(tracks[,4]))
+                        FFTdata <- cbind(gino, modFFT)
+                        exon$fft_aver_rna<-mean(FFTdata[,2])
+                        exon$fft_power_3_rna<-FFTdata[which(abs((gino-(1/3)))==min(abs((gino-(1/3))))),2]
+                        exon$fft_max_freq_rna<-1/abs(gino[which(FFTdata==max((FFTdata[10:dim(FFTdata)[1]/2,2])),arr.ind=TRUE)[1]])[1]
+                        
+                        
+                        
+                        spect_rna<-spectrum(tracks[,4],plot=FALSE)
+                        exon$spec_max_freq_rna<-spect_rna$freq[which(spect_rna$spec==max(spect_rna$spec),arr.ind=TRUE)][1]
+                        exon$spec_power_3_rna<-spect_rna$spec[which(abs((spect_rna$freq-(1/3)))==min(abs((spect_rna$freq-(1/3)))))]
+                        exon$spec_aver_power_rna<-mean(spect_rna$spec)
+                        bestfreq_3ntpval_rna<-take_freqs_Fvalues_all_around_3nt_spec(x=tracks[,4],n_tapers=24,time_bw=12,slepians_values=slepians)[c(1,6,7)]
+                        
+                        exon$multit_freq_best_rna<-bestfreq_3ntpval_rna[1]
+                        exon$pval_multit_3nt_rna<-bestfreq_3ntpval_rna[2]
+                        exon$spec_multit_3nt_rna<-bestfreq_3ntpval_rna[3]
+                        score_rna_1<-((Phase_Centered_sites_frame-Centered_sites_sum/3)^2)/(Centered_sites_sum/3)
+                        score_rna_2<-((Phase_Centered_sites_frame_1-Centered_sites_sum/3)^2)/(Centered_sites_sum/3)
+                        score_rna_3<-((Phase_Centered_sites_frame_2-Centered_sites_sum/3)^2)/(Centered_sites_sum/3)
+                        exon$ORF_score_rna<-log2(score_rna_1+score_rna_2+score_rna_3+1)
+                        if(max(tracks[,4])>(Centered_sites_sum*.7)){
+                                new_track<-tracks
+                                new_track[which(new_track[,4]==max(new_track[,4]))]<-0
+                                exon$ORF_score_rna<-NA
+                                if(sum(new_track[,4])>2){
+                                        Phase_Centered_sites_frame_corr<-sum(new_track[seq(1,length,by=3),4])
+                                        Phase_Centered_sites_frame_1_corr<-sum(new_track[seq(2,length,by=3),4])
+                                        Phase_Centered_sites_frame_2_corr<-sum(new_track[seq(3,length,by=3),4])
+                                        score1<-((Phase_Centered_sites_frame_corr-sum(new_track[,4])/3)^2)/(sum(new_track[,4])/3)
+                                        score2<-((Phase_Centered_sites_frame_1_corr-sum(new_track[,4])/3)^2)/(sum(new_track[,4])/3)
+                                        score3<-((Phase_Centered_sites_frame_2_corr-sum(new_track[,4])/3)^2)/(sum(new_track[,4])/3)
+                                        exon$ORF_score_rna<-log2(score1+score2+score3+1)
+                                }
+                        }
+                }
+                
+                
+                
+        }
+        
+        
+        exon$chisq_ribo<-NA
+        exon$chisq_rna<-NA
+        
+        
+        if(P_sites_sum>15 & length>5){
+                exon$chisq_ribo<-chisq.test(as.table(c(Phase_P_sites_frame,Phase_P_sites_frame_1,Phase_P_sites_frame_2)))$p.value}
+        if(P_sites_sum<16 & P_sites_sum>0 & length>5){
+                exon$chisq_ribo<-xmulti(obs=c(Phase_P_sites_frame,Phase_P_sites_frame_1,Phase_P_sites_frame_2),expr=c(1,1,1),statName="Prob",detail=0)$pProb
+        }        
+        
+        
+        if(Centered_sites_sum>15 & length>5){
+                exon$chisq_rna<-chisq.test(as.table(c(Phase_Centered_sites_frame,Phase_Centered_sites_frame_1,Phase_Centered_sites_frame_2)))$p.value}
+        if(Centered_sites_sum<16 & Centered_sites_sum>0 & length>5){
+                exon$chisq_rna<-xmulti(obs=c(Phase_Centered_sites_frame,Phase_Centered_sites_frame_1,Phase_Centered_sites_frame_2),expr=c(1,1,1),statName="Prob",detail=0)$pProb
+        }
+                
+        
+        exon$max_notcov_ribo<-max((!tracks[,2]) * unlist(lapply(rle(tracks[,2])$lengths, seq_len)))
+        exon$coords_notcov_ribo<-max.col(t((!tracks[,2]) * unlist(lapply(rle(tracks[,2])$lengths, seq_len))))-max((!tracks[,2]) * unlist(lapply(rle(tracks[,2])$lengths, seq_len)))
+        
+        exon$max_notcov_rna<-max((!tracks[,3]) * unlist(lapply(rle(tracks[,3])$lengths, seq_len)))
+        exon$coords_notcov_rna<-max.col(t((!tracks[,3]) * unlist(lapply(rle(tracks[,3])$lengths, seq_len))))-max((!tracks[,3]) * unlist(lapply(rle(tracks[,3])$lengths, seq_len)))
+        
+        
+        
+        if(strand=="-"){
+                exon$coords_notcov_ribo<-length-exon$coords_notcov_ribo
+                exon$coords_notcov_rna<-length-exon$coords_notcov_rna
+        }
+        
+        if(exon$max_notcov_ribo==0){
+                exon$max_notcov_ribo<-"NA"
+        }
+        
+        if(exon$max_notcov_rna==0){
+                exon$max_notcov_rna<-"NA"
+        }
+        
+        exon$notcovered_ribo<-sum(tracks[,2] == 0)
+        exon$notcovered_rna<-sum(tracks[,3] == 0)
+        
+        
+        
+        
+        if(length>2){
+                exon$frame_start_pred<-FRAME_MAX_phase
+                exon$frame_end_pred<-(length-(FRAME_MAX_phase+1))%%3
+        }
+        if(x[1,2]=="-" & length>2){
+                
+                exon$frame_end_pred<-FRAME_MAX_phase
+                exon$frame_start_pred<-(length-(FRAME_MAX_phase+1))%%3
+        }
+        
+        
+        exon
+        
+        
+}
+
+
+### This function annotates exons based on their position relative to CCDS exons
+
+annotate_exons<-function(x){
+        annot_pos<-x[,c("type","start","end","length.x","P_sites_sum","RNA_sites_sum","notcovered_ribo","notcovered_rna","nt_more","nt_more_ribocovered","nt_more_P_sites","nt_more_rnacovered","nt_more_cent_sites","overlapping_ccds_start","overlapping_ccds_end")]
+        
+        ccdss<-which(annot_pos$type=="ccds")
+        if(length(ccdss)>0){
+                ccdss_coords<-annot_pos[ccdss,2:3]
+                ccdss_all<-annot_pos[ccdss,]
+                
+                
+                middle_ex<-which(annot_pos[,1]=="exon")
+                middle_ex_coords<-annot_pos[which(annot_pos[,1]=="exon"),]
+                listcoordsccds<-list()
+                
+                for(i in seq(1,dim(ccdss_coords)[1])){
+                        listcoordsccds[[i]]<-seq(from=ccdss_coords[i,1],to=ccdss_coords[i,2])
+                }
+                
+                if(length(middle_ex)>0){
+                        for(y in seq(1,dim(middle_ex_coords)[1])){
+                                a<-seq(from=middle_ex_coords[y,2],to=middle_ex_coords[y,3])
+                                intersect<-c()
+                                beginning<-c()
+                                endpos<-c()
+                                for(i in seq(1,length(listcoordsccds))){
+                                        b<-listcoordsccds[[i]]
+                                        if(sum(a%in%b)>0){
+                                                intersect[i]<-TRUE
+                                                beginning[i]<-(a%in%b)[1]
+                                                endpos[i]<-(a%in%b)[length(a%in%b)]} else {
+                                                        intersect[i]<-FALSE
+                                                        beginning[i]<-FALSE
+                                                        endpos[i]<-FALSE
+                                                }
+                                }
+                                if(sum(intersect)>0 & sum(beginning)>0 & sum(endpos)>0){middle_ex_coords[y,1]<-"inside_ccds"}
+                                if(sum(intersect)>0 & sum(beginning)==0 & sum(endpos)>0){middle_ex_coords[y,1]<-"overlapping_ccds"}
+                                if(sum(intersect)>0 & sum(beginning)>0 & sum(endpos)==0){middle_ex_coords[y,1]<-"overlapping_ccds"}
+                                if(sum(intersect)>0 & sum(beginning)==0 & sum(endpos)==0){middle_ex_coords[y,1]<-"containing_ccds"}
+                                ccds_inters<-ccdss_all[intersect,]
+                                if(dim(ccds_inters)[1]>1){middle_ex_coords[y,1]<-"overlapping_multiple_ccdss"}
+                                if(dim(ccds_inters)[1]==1){middle_ex_coords[y,"nt_more"]<-middle_ex_coords[y,"length.x"]-ccds_inters[,"length.x"]
+                                                           middle_ex_coords[y,"nt_more_ribocovered"]<-1-((middle_ex_coords[y,"notcovered_ribo"]-ccds_inters[,"notcovered_ribo"])/middle_ex_coords[y,"nt_more"])
+                                                           middle_ex_coords[y,"nt_more_P_sites"]<-middle_ex_coords[y,"P_sites_sum"]-ccds_inters[,"P_sites_sum"]
+                                                           middle_ex_coords[y,"nt_more_rnacovered"]<-1-((middle_ex_coords[y,"notcovered_rna"]-ccds_inters[,"notcovered_rna"])/middle_ex_coords[y,"nt_more"])
+                                                           middle_ex_coords[y,"nt_more_cent_sites"]<-middle_ex_coords[y,"RNA_sites_sum"]-ccds_inters[,"RNA_sites_sum"]
+                                                           middle_ex_coords[y,"overlapping_ccds_start"]<-ccds_inters[,"start"]
+                                                           middle_ex_coords[y,"overlapping_ccds_end"]<-ccds_inters[,"end"]}
+                        }
+                }
+                
+                inside_ex<-middle_ex_coords[,1]=="inside_ccds"
+                
+                
+                if(length(middle_ex)>0){
+                        middle_ex_coords[middle_ex_coords[,2]%in%ccdss_coords[,1] & middle_ex_coords[,1]!="overlapping_multiple_ccdss",1]<-"exon_alt_donor"
+                        middle_ex_coords[middle_ex_coords[,3]%in%ccdss_coords[,2] & middle_ex_coords[,1]!="overlapping_multiple_ccdss",1]<-"exon_alt_acceptor"
+                        middle_ex_coords[middle_ex_coords[,2]%in%ccdss_coords[,1] & middle_ex_coords[,1]=="overlapping_multiple_ccdss",1]<-"overlapping_multiple_ccdss_alt_donor"
+                        middle_ex_coords[middle_ex_coords[,3]%in%ccdss_coords[,2] & middle_ex_coords[,1]=="overlapping_multiple_ccdss",1]<-"overlapping_multiple_ccdss_alt_acceptor"
+                        annot_pos[middle_ex,]<-middle_ex_coords
+                }
+                
+                if(sum(inside_ex)>0){
+                        middle_ex_coords[inside_ex & middle_ex_coords[,1]=="exon_alt_donor",1]<-"int_exon_alt_donor"
+                        middle_ex_coords[inside_ex & middle_ex_coords[,1]=="exon_alt_acceptor",1]<-"int_exon_alt_acceptor"  
+                }
+                annot_pos[middle_ex,]<-middle_ex_coords
+                
+                
+                
+                annot_pos[1:(ccdss[1]-1),1]<-"5_utrs_ex"
+                annot_pos[ccdss,1]<-"ccds"
+                coords_start<-c(annot_pos[ccdss[1],2],annot_pos[ccdss[1],3])
+                ccdss_start<-annot_pos[ccdss[1],]
+                five_with_cds<-which(annot_pos[,2]<=coords_start[1] & annot_pos[,3]>=coords_start[2])
+                five_with_cds<-five_with_cds[!five_with_cds%in%ccdss]
+                annot_pos[five_with_cds,1]<-"5_utrs_st"
+                annot_pos_fiveutr<-annot_pos[five_with_cds,]
+                for(f in seq(1,dim(annot_pos_fiveutr)[1])){
+                        annot_pos_fiveutr[f,"nt_more"]<-as.numeric(annot_pos_fiveutr[f,"length.x"]-ccdss_start[,"length.x"])
+                        annot_pos_fiveutr[f,"nt_more_ribocovered"]<-1-((annot_pos_fiveutr[f,"notcovered_ribo"]-ccdss_start[,"notcovered_ribo"])/annot_pos_fiveutr[f,"nt_more"])
+                        annot_pos_fiveutr[f,"nt_more_P_sites"]<-annot_pos_fiveutr[f,"P_sites_sum"]-ccdss_start[,"P_sites_sum"]
+                        annot_pos_fiveutr[f,"nt_more_rnacovered"]<-1-((annot_pos_fiveutr[f,"notcovered_rna"]-ccdss_start[,"notcovered_rna"])/annot_pos_fiveutr[f,"nt_more"])
+                        annot_pos_fiveutr[f,"nt_more_cent_sites"]<-annot_pos_fiveutr[f,"RNA_sites_sum"]-ccdss_start[,"RNA_sites_sum"]
+                        annot_pos_fiveutr[f,"overlapping_ccds_start"]<-ccdss_start[1,2]
+                        annot_pos_fiveutr[f,"overlapping_ccds_end"]<-ccdss_start[1,3]
+                }
+                annot_pos[five_with_cds,]<-annot_pos_fiveutr
+                
+                
+                annot_pos[(1+(ccdss[length(ccdss)])):dim(annot_pos)[1],1]<-"3_utrs_ex"
+                annot_pos[ccdss,1]<-"ccds"
+                coords_stop<-c(annot_pos[tail(ccdss,1),2],annot_pos[tail(ccdss,1),3])
+                ccdss_stop<-annot_pos[tail(ccdss,1),]
+                three_with_cds<-which(annot_pos[,2]<=coords_stop[1] & annot_pos[,3]>=coords_stop[2])
+                three_with_cds<-three_with_cds[!three_with_cds%in%ccdss]
+                annot_pos[three_with_cds,1]<-"3_utrs_st"
+                annot_pos_threeutr<-annot_pos[three_with_cds,]
+                for(f in seq(1,dim(annot_pos_threeutr)[1])){
+                        annot_pos_threeutr[f,"nt_more"]<-as.numeric(annot_pos_threeutr[f,"length.x"]-ccdss_stop[,"length.x"])
+                        annot_pos_threeutr[f,"nt_more_ribocovered"]<-1-((annot_pos_threeutr[f,"notcovered_ribo"]-ccdss_stop[,"notcovered_ribo"])/annot_pos_threeutr[f,"nt_more"])
+                        annot_pos_threeutr[f,"nt_more_P_sites"]<-annot_pos_threeutr[f,"P_sites_sum"]-ccdss_stop[,"P_sites_sum"]
+                        annot_pos_threeutr[f,"nt_more_rnacovered"]<-1-((annot_pos_threeutr[f,"notcovered_rna"]-ccdss_stop[,"notcovered_rna"])/annot_pos_threeutr[f,"nt_more"])
+                        annot_pos_threeutr[f,"nt_more_cent_sites"]<-annot_pos_threeutr[f,"RNA_sites_sum"]-ccdss_stop[,"RNA_sites_sum"]
+                        annot_pos_threeutr[f,"overlapping_ccds_start"]<-ccdss_stop[,"start"]
+                        annot_pos_threeutr[f,"overlapping_ccds_end"]<-ccdss_stop[,"end"]
+                }
+                annot_pos[ three_with_cds,]<-annot_pos_threeutr
+                
+                if(x$strand.x[1]=="-"){
+                        int_don<-which(annot_pos[,1]=="int_exon_alt_donor")
+                        int_acc<-which(annot_pos[,1]=="int_exon_alt_acceptor")
+                        don<-which(annot_pos[,1]=="exon_alt_donor")
+                        acc<-which(annot_pos[,1]=="exon_alt_acceptor")
+                        multi_don<-which(annot_pos[,1]=="overlapping_multiple_ccdss_alt_donor")
+                        multi_acc<-which(annot_pos[,1]=="overlapping_multiple_ccdss_alt_acceptor")
+                        fiveex<-which(annot_pos[,1]=="5_utrs_ex")
+                        fivest<-which(annot_pos[,1]=="5_utrs_st")
+                        threeex<-which(annot_pos[,1]=="3_utrs_ex")
+                        threest<-which(annot_pos[,1]=="3_utrs_st")
+                        annot_pos[don,1]<-"exon_alt_acceptor"
+                        annot_pos[acc,1]<-"exon_alt_donor"
+                        annot_pos[int_don,1]<-"int_exon_alt_acceptor"
+                        annot_pos[int_acc,1]<-"int_exon_alt_donor"
+                        annot_pos[multi_don,1]<-"overlapping_multiple_ccdss_alt_acceptor"
+                        annot_pos[multi_acc,1]<-"overlapping_multiple_ccdss_alt_donor"
+                        annot_pos[fiveex,1]<-"3_utrs_ex"
+                        annot_pos[fivest,1]<-"3_utrs_st"
+                        annot_pos[threeex,1]<-"5_utrs_ex"
+                        annot_pos[threest,1]<-"5_utrs_st"
+                        
+                }
+                annot_pos<-annot_pos[!is.na(annot_pos[,"start"]),]
+        }
+        
+        
+        x[,c("type","start","end","length.x","P_sites_sum","RNA_sites_sum","notcovered_ribo","notcovered_rna","nt_more","nt_more_ribocovered","nt_more_P_sites","nt_more_rnacovered","nt_more_cent_sites","overlapping_ccds_start","overlapping_ccds_end")]<-annot_pos
+        x
+}
+
+
+### This function calculates periodicity on NON-CCDS region of an exons
+
+
+alt_exon_analysis<-function(x,sequences=seq_exons,tracks_exons=all_tracks,index_tracks=tracks_index){
+        
+        
+        exon<-x
+        names_exons<-names(sequences)
+        
+        seq_exon<-sequences[which(names_exons%in%exon["coords2"])][[1]]
+        myexon_id<-exon[,"exon_id"]
+        exon_track<-tracks_exons[index_tracks==myexon_id]
+        withsep<-strsplit(exon_track,split=" ")
+        x<-t(data.frame(withsep))
+        
+        strand<-x[1,2]
+        tracks_pre<-t(x[,-c(1:2)])
+        
+        if(strand=="-"){
+                tracks<-cbind(rev(tracks_pre[,1]),rev(tracks_pre[,2]),rev(tracks_pre[,3]),rev(tracks_pre[,4]),rev(tracks_pre[,5]))
+        } else if (strand=="+"){
+                tracks<-tracks_pre}
+        colnames(tracks)<-c("Psites","RiboCov","RNACov","RNAcent","Seq")
+        
+        tracks<-tracks[,1:4]
+        
+        mode(tracks)<-"numeric"
+        length<-dim(tracks)[1]
+        exon$exon_id_noccds<-exon$exon_id
+        if(exon$type=="exon_alt_acceptor"){
+                
+                tracks<-tracks[1:exon$nt_more,]
+                seq_exon<-seq_exon[1:(exon$nt_more)]
+                length<-exon$nt_more
+                if(strand=="+"){exon$end=exon$overlapping_ccds_start-1}
+                if(strand=="-"){exon$start=exon$overlapping_ccds_end+1}
+                exon$exon_id_noccds<-paste(exon$chr,exon$start,exon$end,exon$type,exon$gene_id,sep="_")
+        }
+        
+        
+        if(exon$type=="exon_alt_donor"){
+                tracks<-tracks[(length+1-exon$nt_more):length,]
+                seq_exon<-seq_exon[(length+1-exon$nt_more):length]
+                length<-exon$nt_more
+                if(strand=="+"){exon$start=exon$overlapping_ccds_end+1}
+                if(strand=="-"){exon$end=exon$overlapping_ccds_start-1}
+                exon$exon_id_noccds<-paste(exon$chr,exon$start,exon$end,exon$type,exon$gene_id,sep="_")
+        }
+        
+        exon<-data.frame(exon_id=exon$exon_id_noccds,exon_id_orig=exon$exon_id,type=exon$type,gene_id=exon$gene_id,annotation=exon$annotation)
+        exon$strand<-strand
+        exon$length<-dim(tracks)[1]
+        exon$frame_start_pred<-NA
+        exon$frame_end_pred<-NA
+        
+        
+        length<-dim(tracks)[1]
+        
+        
+        P_sites_sum<-round(sum(tracks[,1]),digits=6)
+        exon$P_sites_sum<-P_sites_sum
+        
+        
+        Centered_sites_sum<-round(sum(tracks[,4]),digits=6)
+        exon$RNA_sites_sum<-Centered_sites_sum
+        exon$Ribocov_aver<-round(mean(tracks[,2]),digits=6)
+        exon$RNAseqcov_aver<-round(mean(tracks[,3]),digits=6)
+        exon$pctPhase_frame<-NA
+        exon$pctPhase_frame_1<-NA
+        exon$pctPhase_frame_2<-NA
+        exon$pctPhaseCentered_frame<-NA
+        exon$pctPhaseCentered_frame_1<-NA
+        exon$pctPhaseCentered_frame_2<-NA
+        if(length>2){
+                Phase_P_sites_frame<-sum(tracks[seq(1,length,by=3),1])
+                Phase_P_sites_frame_1<-sum(tracks[seq(2,length,by=3),1])
+                Phase_P_sites_frame_2<-sum(tracks[seq(3,length,by=3),1])
+                
+                
+                exon$pctPhase_frame<-Phase_P_sites_frame/P_sites_sum
+                exon$pctPhase_frame_1<-Phase_P_sites_frame_1/P_sites_sum
+                exon$pctPhase_frame_2<-Phase_P_sites_frame_2/P_sites_sum
+                
+                
+                Phase_Centered_sites_frame<-sum(tracks[seq(1,length,by=3),4])
+                Phase_Centered_sites_frame_1<-sum(tracks[seq(2,length,by=3),4])
+                Phase_Centered_sites_frame_2<-sum(tracks[seq(3,length,by=3),4])
+                
+                
+                exon$pctPhaseCentered_frame<-Phase_Centered_sites_frame/Centered_sites_sum
+                exon$pctPhaseCentered_frame_1<-Phase_Centered_sites_frame_1/Centered_sites_sum
+                exon$pctPhaseCentered_frame_2<-Phase_Centered_sites_frame_2/Centered_sites_sum
+                
+                
+                MAXPhase_frame<-max(c(exon$pctPhase_frame,exon$pctPhase_frame_1,exon$pctPhase_frame_2))
+                FRAME_MAX_phase<-max.col(t(c(exon$pctPhase_frame,exon$pctPhase_frame_1,exon$pctPhase_frame_2)))-1
+                
+                MAXPhaseCentered_frame<-max(c(exon$pctPhaseCentered_frame,exon$pctPhaseCentered_frame_1,exon$pctPhaseCentered_frame_2))
+                FRAME_MAX_phaseCentered<-max.col(t(c(exon$pctPhaseCentered_frame,exon$pctPhaseCentered_frame_1,exon$pctPhaseCentered_frame_2)))-1
+        }
+        
+        
+        exon$multit_freq_best_ribo<-NA
+        exon$pval_multit_3nt_ribo<-NA
+        exon$spec_multit_3nt_ribo<-NA
+        exon$fft_max_freq_ribo<-NA
+        exon$fft_power_3_ribo<-NA
+        exon$fft_aver_ribo<-NA
+        exon$spec_max_freq_ribo<-NA
+        exon$spec_power_3_ribo<-NA
+        exon$spec_aver_power_ribo<-NA
+        
+        exon$multit_freq_best_rna<-NA
+        exon$pval_multit_3nt_rna<-NA
+        exon$spec_multit_3nt_rna<-NA
+        exon$fft_max_freq_rna<-NA
+        exon$fft_power_3_rna<-NA
+        exon$fft_aver_rna<-NA
+        exon$spec_max_freq_rna<-NA
+        exon$spec_power_3_rna<-NA
+        exon$spec_aver_power_rna<-NA
+        
+        exon$ORF_score_ribo<-NA
+        exon$ORF_score_rna<-NA
+        
+        if(P_sites_sum>2 & length>5){
+                if(length<25){slepians<-dpss(n=length+(50-length),k=24,nw=12)}
+                if(length>=25){slepians<-dpss(n=length,k=24,nw=12)}
+                bestfreq_3ntpval_ribo<-take_freqs_Fvalues_all_around_3nt_spec(x=tracks[,1],n_tapers=24,time_bw=12,slepians_values=slepians)[c(1,6,7)]
+                exon$multit_freq_best_ribo<-bestfreq_3ntpval_ribo[1]
+                exon$pval_multit_3nt_ribo<-bestfreq_3ntpval_ribo[2]
+                exon$spec_multit_3nt_ribo<-bestfreq_3ntpval_ribo[3]
+                score1<-((Phase_P_sites_frame-P_sites_sum/3)^2)/(P_sites_sum/3)
+                score2<-((Phase_P_sites_frame_1-P_sites_sum/3)^2)/(P_sites_sum/3)
+                score3<-((Phase_P_sites_frame_2-P_sites_sum/3)^2)/(P_sites_sum/3)
+                exon$ORF_score_ribo<-log2(score1+score2+score3+1)
+                
+                
+                gino<-getFFTFreqs(Nyq.Freq=0.5,data=tracks[,1])
+                modFFT <- Mod(fft(tracks[,1]))
+                FFTdata <- cbind(gino, modFFT)
+                exon$fft_aver_ribo<-mean(FFTdata[,2])
+                exon$fft_power_3_ribo<-FFTdata[which(abs((gino-(1/3)))==min(abs((gino-(1/3))))),2]
+                exon$fft_max_freq_ribo<-abs(gino[which(FFTdata==max((FFTdata[10:dim(FFTdata)[1]/2,2])),arr.ind=TRUE)[1]])[1]
+                
+                
+                spect_P_sites<-spectrum(tracks[,1],plot=FALSE)
+                exon$spec_max_freq_ribo<-spect_P_sites$freq[which(spect_P_sites$spec==max(spect_P_sites$spec),arr.ind=TRUE)][1]
+                exon$spec_power_3_ribo<-spect_P_sites$spec[which(abs((spect_P_sites$freq-(1/3)))==min(abs((spect_P_sites$freq-(1/3)))))]
+                exon$spec_aver_power_ribo<-mean(spect_P_sites$spec)        
+                if(Centered_sites_sum>2){
+                        
+                        gino<-getFFTFreqs(Nyq.Freq=0.5,data=tracks[,4])
+                        modFFT <- Mod(fft(tracks[,4]))
+                        FFTdata <- cbind(gino, modFFT)
+                        exon$fft_aver_rna<-mean(FFTdata[,2])
+                        exon$fft_power_3_rna<-FFTdata[which(abs((gino-(1/3)))==min(abs((gino-(1/3))))),2]
+                        exon$fft_max_freq_rna<-1/abs(gino[which(FFTdata==max((FFTdata[10:dim(FFTdata)[1]/2,2])),arr.ind=TRUE)[1]])[1]
+                        
+                        
+                        
+                        spect_rna<-spectrum(tracks[,4],plot=FALSE)
+                        exon$spec_max_freq_rna<-spect_rna$freq[which(spect_rna$spec==max(spect_rna$spec),arr.ind=TRUE)][1]
+                        exon$spec_power_3_rna<-spect_rna$spec[which(abs((spect_rna$freq-(1/3)))==min(abs((spect_rna$freq-(1/3)))))]
+                        exon$spec_aver_power_rna<-mean(spect_rna$spec)
+                        bestfreq_3ntpval_rna<-take_freqs_Fvalues_all_around_3nt_spec(x=tracks[,4],n_tapers=24,time_bw=12,slepians_values=slepians)[c(1,6,7)]
+                        
+                        exon$multit_freq_best_rna<-bestfreq_3ntpval_rna[1]
+                        exon$pval_multit_3nt_rna<-bestfreq_3ntpval_rna[2]
+                        exon$spec_multit_3nt_rna<-bestfreq_3ntpval_rna[3]
+                
+                        score_rna_1<-((Phase_Centered_sites_frame-Centered_sites_sum/3)^2)/(Centered_sites_sum/3)
+                        score_rna_2<-((Phase_Centered_sites_frame_1-Centered_sites_sum/3)^2)/(Centered_sites_sum/3)
+                        score_rna_3<-((Phase_Centered_sites_frame_2-Centered_sites_sum/3)^2)/(Centered_sites_sum/3)
+                        exon$ORF_score_rna<-log2(score_rna_1+score_rna_2+score_rna_3+1)
+                }
+                
+                
+                
+        }
+        
+        
+        exon$chisq_ribo<-NA
+        exon$chisq_rna<-NA
+        
+        
+        if(P_sites_sum>15 & length>5){
+                exon$chisq_ribo<-chisq.test(as.table(c(Phase_P_sites_frame,Phase_P_sites_frame_1,Phase_P_sites_frame_2)))$p.value}
+        if(P_sites_sum<16 & P_sites_sum>0 & length>5){
+                exon$chisq_ribo<-xmulti(obs=c(Phase_P_sites_frame,Phase_P_sites_frame_1,Phase_P_sites_frame_2),expr=c(1,1,1),statName="Prob",detail=0)$pProb
+        }        
+        
+        
+        if(Centered_sites_sum>15 & length>5){
+                exon$chisq_rna<-chisq.test(as.table(c(Phase_Centered_sites_frame,Phase_Centered_sites_frame_1,Phase_Centered_sites_frame_2)))$p.value}
+        if(Centered_sites_sum<16 & Centered_sites_sum>0 & length>5){
+                exon$chisq_rna<-xmulti(obs=c(Phase_Centered_sites_frame,Phase_Centered_sites_frame_1,Phase_Centered_sites_frame_2),expr=c(1,1,1),statName="Prob",detail=0)$pProb
+        }
+        
+        
+        exon$max_notcov_ribo<-max((!tracks[,2]) * unlist(lapply(rle(tracks[,2])$lengths, seq_len)))
+        exon$coords_notcov_ribo<-max.col(t((!tracks[,2]) * unlist(lapply(rle(tracks[,2])$lengths, seq_len))))-max((!tracks[,2]) * unlist(lapply(rle(tracks[,2])$lengths, seq_len)))
+        
+        exon$max_notcov_rna<-max((!tracks[,3]) * unlist(lapply(rle(tracks[,3])$lengths, seq_len)))
+        exon$coords_notcov_rna<-max.col(t((!tracks[,3]) * unlist(lapply(rle(tracks[,3])$lengths, seq_len))))-max((!tracks[,3]) * unlist(lapply(rle(tracks[,3])$lengths, seq_len)))
+        
+        
+        
+        if(strand=="-"){
+                exon$coords_notcov_ribo<-length-exon$coords_notcov_ribo
+                exon$coords_notcov_rna<-length-exon$coords_notcov_rna
+        }
+        
+        if(exon$max_notcov_ribo==0){
+                exon$max_notcov_ribo<-"NA"
+        }
+        
+        if(exon$max_notcov_rna==0){
+                exon$max_notcov_rna<-"NA"
+        }
+        
+        exon$notcovered_ribo<-sum(tracks[,2] == 0)
+        exon$notcovered_rna<-sum(tracks[,3] == 0)
+        
+        
+        
+        
+        if(length>2){
+                exon$frame_start_pred<-FRAME_MAX_phase
+                exon$frame_end_pred<-(length-(FRAME_MAX_phase+1))%%3
+        }
+        if(x[1,2]=="-" & length>2){
+                
+                exon$frame_end_pred<-FRAME_MAX_phase
+                exon$frame_start_pred<-(length-(FRAME_MAX_phase+1))%%3
+        }
+        
+        pept<-NA
+        exon$transl_pept_notccds<-NA
+        if(P_sites_sum>0){
+                if(exon$strand=="-"){
+                        pept<-unlist(getTrans(seq_exon,sens="F",frame=exon$frame_end_pred))
+                } else {pept<-unlist(getTrans(seq_exon,sens="F",frame=exon$frame_start_pred))}
+                exon$transl_pept_notccds<-paste(pept,sep="",collapse="")
+        }
+        
+        return(exon)
+}
+
+
+
+### This function calculates coherence values for candidate regions with multi-frame translation
+
+
+calculate_coherence<-function(x){
+        strand<-x[1,2]
+        tracks_pre<-t(x[,-c(1:2)])
+        
+        
+        if(strand=="-"){
+                tracks<-cbind(rev(tracks_pre[,1]),rev(tracks_pre[,2]),rev(tracks_pre[,3]),rev(tracks_pre[,4]))
+        } else if (strand=="+"){
+                tracks<-tracks_pre}
+        colnames(tracks)<-c("Psites","RiboCov","RNACov","RNAcent")
+        mode(tracks)<-"numeric"
+        
+        exon<-data.frame(exon_id=x[1,1],stringsAsFactors=F,row.names=NULL)
+        exon$strand<-strand
+        exon$frame_start_pred<-NA
+        exon$frame_end_pred<-NA
+        
+        exon$length<-dim(tracks)[1]
+        length<-dim(tracks)[1]
+        
+        
+        P_sites_sum<-round(sum(tracks[,1]),digits=6)
+        exon$P_sites_sum<-P_sites_sum
+        
+        
+        Centered_sites_sum<-round(sum(tracks[,4]),digits=6)
+        exon$RNA_sites_sum<-Centered_sites_sum
+        exon$Ribocov_aver<-round(mean(tracks[,2]),digits=6)
+        exon$RNAseqcov_aver<-round(mean(tracks[,3]),digits=6)
+        exon$pctPhase_frame<-NA
+        exon$pctPhase_frame_1<-NA
+        exon$pctPhase_frame_2<-NA
+        exon$pctPhaseCentered_frame<-NA
+        exon$pctPhaseCentered_frame_1<-NA
+        exon$pctPhaseCentered_frame_2<-NA
+        if(length>2){
+                Phase_P_sites_frame<-sum(tracks[seq(1,length,by=3),1])
+                Phase_P_sites_frame_1<-sum(tracks[seq(2,length,by=3),1])
+                Phase_P_sites_frame_2<-sum(tracks[seq(3,length,by=3),1])
+                
+                
+                exon$pctPhase_frame<-Phase_P_sites_frame/P_sites_sum
+                exon$pctPhase_frame_1<-Phase_P_sites_frame_1/P_sites_sum
+                exon$pctPhase_frame_2<-Phase_P_sites_frame_2/P_sites_sum
+                
+                
+                Phase_Centered_sites_frame<-sum(tracks[seq(1,length,by=3),4])
+                Phase_Centered_sites_frame_1<-sum(tracks[seq(2,length,by=3),4])
+                Phase_Centered_sites_frame_2<-sum(tracks[seq(3,length,by=3),4])
+                
+                
+                exon$pctPhaseCentered_frame<-Phase_Centered_sites_frame/Centered_sites_sum
+                exon$pctPhaseCentered_frame_1<-Phase_Centered_sites_frame_1/Centered_sites_sum
+                exon$pctPhaseCentered_frame_2<-Phase_Centered_sites_frame_2/Centered_sites_sum
+                
+                
+                MAXPhase_frame<-max(c(exon$pctPhase_frame,exon$pctPhase_frame_1,exon$pctPhase_frame_2))
+                FRAME_MAX_phase<-max.col(t(c(exon$pctPhase_frame,exon$pctPhase_frame_1,exon$pctPhase_frame_2)))-1
+                
+                MAXPhaseCentered_frame<-max(c(exon$pctPhaseCentered_frame,exon$pctPhaseCentered_frame_1,exon$pctPhaseCentered_frame_2))
+                FRAME_MAX_phaseCentered<-max.col(t(c(exon$pctPhaseCentered_frame,exon$pctPhaseCentered_frame_1,exon$pctPhaseCentered_frame_2)))-1
+        }
+        
+        
+        exon$multit_freq_best_ribo<-NA
+        exon$pval_multit_3nt_ribo<-NA
+        exon$spec_multit_3nt_ribo<-NA
+
+        exon$coherence_1_2_ribo<-NA
+        exon$coherence_1_3_ribo<-NA
+        exon$coherence_2_3_ribo<-NA
+        exon$min_coherence_ribo<-NA
+        exon$multit_freq_best_rna<-NA
+        exon$pval_multit_3nt_rna<-NA
+        exon$spec_multit_3nt_rna<-NA
+        exon$coherence_1_2_rna<-NA
+        exon$coherence_1_3_rna<-NA
+        exon$coherence_2_3_rna<-NA
+        exon$min_coherence_rna<-NA
+        if(P_sites_sum>10 & length>5){
+                
+                if(length<25){slepians<-dpss(n=length+(50-length),k=24,nw=12)}
+                if(length>=25){slepians<-dpss(n=length,k=24,nw=12)}
+                bestfreq_3ntpval_ribo<-take_freqs_Fvalues_all_around_3nt_spec(x=tracks[,1],n_tapers=24,time_bw=12,slepians_values=slepians)[c(1,6,5,7)]
+                exon$multit_freq_best_ribo<-bestfreq_3ntpval_ribo[1]
+                exon$pval_multit_3nt_ribo<-bestfreq_3ntpval_ribo[2]
+                exon$spec_multit_3nt_ribo<-bestfreq_3ntpval_ribo[4]
+                
+                y<-tracks[,1]
+                
+                if(length(y)<25){
+                        remain<-50-length(y)
+                        y<-c(rep(0,as.integer(remain/2)),y,rep(0,remain%%2+as.integer(remain/2)))
+                }
+                if(length(y)<1024/2){padding<-1024}
+                if(length(y)>=1024/2){padding<-"default"}
+                length<-length(y)
+                
+                y1<-rep(0,length)
+                y2<-rep(0,length)
+                y3<-rep(0,length)
+                
+                y1[seq(1,length,by=3)]<-y[seq(1,length,by=3)]
+                y2[seq(2,length,by=3)]<-y[seq(2,length,by=3)]
+                y3[seq(3,length,by=3)]<-y[seq(3,length,by=3)]
+                
+                
+                
+                spec_y1<-spec.mtm(timeSeries=as.ts(y1),nw=12,k=24,dpssIN=slepians,returnInternals=T,plot=F,nFFT=padding)
+                spec_y2<-spec.mtm(timeSeries=as.ts(y2),nw=12,k=24,dpssIN=slepians,returnInternals=T,plot=F,nFFT=padding)
+                spec_y3<-spec.mtm(timeSeries=as.ts(y3),nw=12,k=24,dpssIN=slepians,returnInternals=T,plot=F,nFFT=padding)
+                
+                coh1_2<-mtm.coh(spec_y1,spec_y2,plot=F)
+                coh1_3<-mtm.coh(spec_y1,spec_y3,plot=F)
+                coh2_3<-mtm.coh(spec_y2,spec_y3,plot=F)
+                
+                exon$coherence_1_2_ribo<-coh1_2$msc[which(coh1_2$freq==bestfreq_3ntpval_ribo[3])]
+                exon$coherence_1_3_ribo<-coh1_3$msc[which(coh1_3$freq==bestfreq_3ntpval_ribo[3])]
+                exon$coherence_2_3_ribo<-coh2_3$msc[which(coh2_3$freq==bestfreq_3ntpval_ribo[3])]
+                exon$min_coherence_ribo<-min(c(exon$coherence_1_2_ribo,exon$coherence_1_3_ribo,exon$coherence_2_3_ribo),na.rm=T)
+                if((Phase_Centered_sites_frame > 5 & Phase_Centered_sites_frame_1 > 5) | (Phase_Centered_sites_frame > 5 & Phase_Centered_sites_frame_2 > 5) | (Phase_Centered_sites_frame_1 > 5 & Phase_Centered_sites_frame_2 > 5)){
+                        
+                        bestfreq_3ntpval_rna<-take_freqs_Fvalues_all_around_3nt_spec(x=tracks[,4],n_tapers=24,time_bw=12,slepians_values=slepians)[c(1,6,5,7)]
+                        exon$multit_freq_best_rna<-bestfreq_3ntpval_rna[1]
+                        exon$pval_multit_3nt_rna<-bestfreq_3ntpval_rna[2]
+                        exon$spec_multit_3nt_rna<-bestfreq_3ntpval_rna[4]
+                        
+                        y<-tracks[,4]
+                        
+                        if(length(y)<25){
+                                remain<-50-length(y)
+                                y<-c(rep(0,as.integer(remain/2)),y,rep(0,remain%%2+as.integer(remain/2)))
+                        }
+                        if(length(y)<1024/2){padding<-1024}
+                        if(length(y)>=1024/2){padding<-"default"}
+                        length<-length(y)
+                        
+                        y1<-rep(0,length)
+                        y2<-rep(0,length)
+                        y3<-rep(0,length)
+                        
+                        y1[seq(1,length,by=3)]<-y[seq(1,length,by=3)]
+                        y2[seq(2,length,by=3)]<-y[seq(2,length,by=3)]
+                        y3[seq(3,length,by=3)]<-y[seq(3,length,by=3)]
+                        
+                        
+                        
+                        spec_y1<-spec.mtm(timeSeries=as.ts(y1),nw=12,k=24,dpssIN=slepians,returnInternals=T,plot=F,nFFT=padding)
+                        spec_y2<-spec.mtm(timeSeries=as.ts(y2),nw=12,k=24,dpssIN=slepians,returnInternals=T,plot=F,nFFT=padding)
+                        spec_y3<-spec.mtm(timeSeries=as.ts(y3),nw=12,k=24,dpssIN=slepians,returnInternals=T,plot=F,nFFT=padding)
+                        
+                        coh1_2<-mtm.coh(spec_y1,spec_y2,plot=F)
+                        coh1_3<-mtm.coh(spec_y1,spec_y3,plot=F)
+                        coh2_3<-mtm.coh(spec_y2,spec_y3,plot=F)
+                        
+                        exon$coherence_1_2_rna<-coh1_2$msc[which(coh1_2$freq==bestfreq_3ntpval_rna[3])]
+                        exon$coherence_1_3_rna<-coh1_3$msc[which(coh1_3$freq==bestfreq_3ntpval_rna[3])]
+                        exon$coherence_2_3_rna<-coh2_3$msc[which(coh2_3$freq==bestfreq_3ntpval_rna[3])]
+                        exon$min_coherence_rna<-min(c(exon$coherence_1_2_rna,exon$coherence_1_3_rna,exon$coherence_2_3_rna),na.rm=T)
+                        
+                }
+        }
+        exon
+}
+
+
+### This function calculates exonic information on nonCCDS ORFs, to calculate multimapping information and CDS overlaps
+
+
+pre_multi_nonCCDS_ORFs<-function(x,counter,all_exons_in_the_sign_transcr=exons_transcr_nonccds_sign,signif_exons=nonccds_res_sign){
+        transcr<-x[,"transcript_id"]
+        trascr_length<-x$length
+        orf_strand<-x$strand
+        transcr_data<-data.frame(transcript_id=transcr)
+        
+        exons_in_transcr<-all_exons_in_the_sign_transcr[all_exons_in_the_sign_transcr[,4]%in%transcr,"exon_id"]
+        if(orf_strand=="-"){exons_in_transcr<-rev(exons_in_transcr)}
+        
+        exons_in_transcr_data<-nonccds_res[nonccds_res[,"exon_id"]%in%exons_in_transcr,]
+        exons_in_transcr_data<-exons_in_transcr_data[match(exons_in_transcr,exons_in_transcr_data$exon_id),]
+        
+        orf_start<-x$start_pos
+        orf_end<-x$st2vect
+        cumsumexons<-cumsum(exons_in_transcr_data$length.x)
+        
+        st_ex<-which((cumsumexons-orf_start)==min(cumsumexons[cumsumexons>orf_start]-orf_start))
+        end_ex<-which((cumsumexons-orf_end)==min(cumsumexons[cumsumexons>=orf_end]-orf_end))
+        in_betw_ex<-st_ex:end_ex
+        in_betw_ex<-in_betw_ex[!in_betw_ex%in%c(st_ex,end_ex)>0]
+        exon_inbetween_data<-exons_in_transcr_data[in_betw_ex,]
+        
+        
+        coord_start<-NA
+        coord_end<-NA
+        nt_to_rem<-NA
+        rem_len<-0
+        if(st_ex>1){rem_len<-cumsumexons[st_ex-1]}
+        if(x$strand=="+"){coord_start<-exons_in_transcr_data[st_ex,"start"] + (orf_start-rem_len)}
+        if(x$strand=="-"){coord_start<-exons_in_transcr_data[st_ex,"end"] - (orf_start-rem_len)}
+        
+        if(length(in_betw_ex)==0){
+                if(st_ex==end_ex){nt_to_rem<-0}
+                if(st_ex!=end_ex){if(x$strand=="+"){
+                        nt_to_rem<-exons_in_transcr_data[st_ex,"end"]-coord_start
+                }
+                                  if(x$strand=="-"){
+                                          nt_to_rem<-coord_start-exons_in_transcr_data[st_ex,"start"]
+                                  }
+                }
+        }
+        
+        if(length(in_betw_ex)>0){
+                nt_in_betw<-sum(exons_in_transcr_data[in_betw_ex,"length.x"])
+                if(x$strand=="+"){
+                        nt_to_rem<-exons_in_transcr_data[st_ex,"end"]-coord_start
+                }
+                if(x$strand=="-"){
+                        nt_to_rem<-coord_start-exons_in_transcr_data[st_ex,"start"]
+                }
+                nt_to_rem<-nt_to_rem+nt_in_betw
+        }
+        
+        if(st_ex==end_ex & x$strand=="+"){coord_end<-coord_start+x$ORF_length+1}
+        if(st_ex==end_ex & x$strand=="-"){coord_end<-coord_start-x$ORF_length+1}
+        
+        if(st_ex!=end_ex & x$strand=="+"){coord_end<-exons_in_transcr_data[end_ex,"start"] + (x$ORF_length-nt_to_rem)+1}
+        if(st_ex!=end_ex & x$strand=="-"){coord_end<-exons_in_transcr_data[end_ex,"end"] - (x$ORF_length-nt_to_rem)+1}
+        
+        if(x$strand=="-"){
+                coord_start2<-coord_start
+                coord_start<-coord_end
+                coord_end<-coord_start2
+        }
+        
+        
+        if(st_ex!=end_ex & x$strand=="+"){to_check_st<-paste(exons_in_transcr_data[st_ex,"chr"],coord_start,exons_in_transcr_data[st_ex,"end"],"EXONnonCCDS",x$gene_id,x$strand,sep="_")
+                                          to_check_end<-paste(exons_in_transcr_data[end_ex,"chr"],exons_in_transcr_data[end_ex,"start"],coord_end,"EXONnonCCDS",x$gene_id,x$strand,sep="_")
+                                          to_check<-paste(to_check_st,to_check_end,sep=";")
+                                          
+        }
+        if(st_ex!=end_ex & x$strand=="-"){to_check_st<-paste(exons_in_transcr_data[st_ex,"chr"],exons_in_transcr_data[st_ex,"start"],coord_end,"EXONnonCCDS",x$gene_id,x$strand,sep="_")
+                                          to_check_end<-paste(exons_in_transcr_data[end_ex,"chr"],coord_start,exons_in_transcr_data[end_ex,"end"],"EXONnonCCDS",x$gene_id,x$strand,sep="_")
+                                          to_check<-paste(to_check_st,to_check_end,sep=";")
+        }
+        
+        if(st_ex==end_ex){to_check<-paste(exons_in_transcr_data[st_ex,"chr"],coord_start,coord_end,"EXONnonCCDS",x$gene_id,x$strand,sep="_")}
+        x$to_check<-to_check
+        x$to_check_rem<-NA
+        if(length(in_betw_ex)>0){
+                x$to_check_rem<-paste(exon_inbetween_data$exon_id,collapse=";")
+                
+        }
+        x$ORF_id_tr<-paste(transcr_data$transcript_id,orf_start,orf_end,sep="_")
+        x$ORF_id_gen<-paste(exons_in_transcr_data[st_ex,"chr"],coord_start,coord_end,sep="_")
+        x
+        
+        
+}
+
+### This function calculates exonic information on CCDS ORFs, to calculate multimapping information and CDS overlaps
+
+
+
+pre_multi_CCDS_ORFs<-function(x,counter,all_exons_in_the_sign_transcr=exons_transcr_nonccds_sign,signif_exons=nonccds_res){
+        transcr<-x[,"transcript_id"]
+        trascr_length<-x$length
+        orf_strand<-x$strand
+        transcr_data<-data.frame(transcript_id=transcr)
+        
+        exons_in_transcr<-all_exons_in_the_sign_transcr[all_exons_in_the_sign_transcr[,4]%in%transcr,"coords_id"]
+        if(orf_strand=="-"){exons_in_transcr<-rev(exons_in_transcr)}
+        
+        exons_in_transcr_data<-signif_exons[signif_exons[,"coords"]%in%exons_in_transcr,]
+        exons_in_transcr_data<-exons_in_transcr_data[match(exons_in_transcr,exons_in_transcr_data$coords),]
+        
+        orf_start<-x$start_pos
+        orf_end<-x$st2vect
+        cumsumexons<-cumsum(exons_in_transcr_data$length.x)
+        
+        st_ex<-which((cumsumexons-orf_start)==min(cumsumexons[cumsumexons>orf_start]-orf_start))
+        end_ex<-which((cumsumexons-orf_end)==min(cumsumexons[cumsumexons>=orf_end]-orf_end))
+        in_betw_ex<-st_ex:end_ex
+        in_betw_ex<-in_betw_ex[!in_betw_ex%in%c(st_ex,end_ex)>0]
+        exon_inbetween_data<-exons_in_transcr_data[in_betw_ex,]
+        
+        
+        coord_start<-NA
+        coord_end<-NA
+        nt_to_rem<-NA
+        rem_len<-0
+        if(st_ex>1){rem_len<-cumsumexons[st_ex-1]}
+        if(x$strand=="+"){coord_start<-exons_in_transcr_data[st_ex,"start"] + (orf_start-rem_len)}
+        if(x$strand=="-"){coord_start<-exons_in_transcr_data[st_ex,"end"] - (orf_start-rem_len)}
+        
+        if(length(in_betw_ex)==0){
+                if(st_ex==end_ex){nt_to_rem<-0}
+                if(st_ex!=end_ex){if(x$strand=="+"){
+                        nt_to_rem<-exons_in_transcr_data[st_ex,"end"]-coord_start
+                }
+                                  if(x$strand=="-"){
+                                          nt_to_rem<-coord_start-exons_in_transcr_data[st_ex,"start"]
+                                  }
+                }
+        }
+        
+        if(length(in_betw_ex)>0){
+                nt_in_betw<-sum(exons_in_transcr_data[in_betw_ex,"length.x"])
+                if(x$strand=="+"){
+                        nt_to_rem<-exons_in_transcr_data[st_ex,"end"]-coord_start
+                }
+                if(x$strand=="-"){
+                        nt_to_rem<-coord_start-exons_in_transcr_data[st_ex,"start"]
+                }
+                nt_to_rem<-nt_to_rem+nt_in_betw
+        }
+        
+        if(st_ex==end_ex & x$strand=="+"){coord_end<-coord_start+x$ORF_length+1}
+        if(st_ex==end_ex & x$strand=="-"){coord_end<-coord_start-x$ORF_length+1}
+        
+        if(st_ex!=end_ex & x$strand=="+"){coord_end<-exons_in_transcr_data[end_ex,"start"] + (x$ORF_length-nt_to_rem)+1}
+        if(st_ex!=end_ex & x$strand=="-"){coord_end<-exons_in_transcr_data[end_ex,"end"] - (x$ORF_length-nt_to_rem)+1}
+        
+        if(x$strand=="-"){
+                coord_start2<-coord_start
+                coord_start<-coord_end
+                coord_end<-coord_start2
+        }
+        
+        
+        if(st_ex!=end_ex & x$strand=="+"){to_check_st<-paste(exons_in_transcr_data[st_ex,"chr"],coord_start,exons_in_transcr_data[st_ex,"end"],"CCDS",x$gene_id,x$strand,sep="_")
+                                          to_check_end<-paste(exons_in_transcr_data[end_ex,"chr"],exons_in_transcr_data[end_ex,"start"],coord_end,"CCDS",x$gene_id,x$strand,sep="_")
+                                          to_check<-paste(to_check_st,to_check_end,sep=";")
+                                          
+        }
+        if(st_ex!=end_ex & x$strand=="-"){to_check_st<-paste(exons_in_transcr_data[st_ex,"chr"],exons_in_transcr_data[st_ex,"start"],coord_end,"CCDS",x$gene_id,x$strand,sep="_")
+                                          to_check_end<-paste(exons_in_transcr_data[end_ex,"chr"],coord_start,exons_in_transcr_data[end_ex,"end"],"CCDS",x$gene_id,x$strand,sep="_")
+                                          to_check<-paste(to_check_st,to_check_end,sep=";")
+        }
+        
+        if(st_ex==end_ex){to_check<-paste(exons_in_transcr_data[st_ex,"chr"],coord_start,coord_end,"CCDS",x$gene_id,x$strand,sep="_")}
+        x$to_check<-to_check
+        x$to_check_rem<-NA
+        if(length(in_betw_ex)>0){
+                x$to_check_rem<-paste(exon_inbetween_data$exon_id,collapse=";")
+                
+        }
+        x$ORF_id_tr<-paste(transcr_data$transcript_id,orf_start,orf_end,sep="_")
+        x$ORF_id_gen<-paste(exons_in_transcr_data[st_ex,"chr"],coord_start,coord_end,sep="_")
+        x
+        
+        
+}
+
+
+
+
+### This function calculates results for real and simulated exons for the multitaper analysis
+
+
+take_simuls_multi<-function(x,tapers,bw,nsimul){
+        unique_ex_id<-x[,"exon_id"]
+        list_exons_tracks<-list()
+        for(i in seq(1:length(unique_ex_id))){
+                list_exons_tracks[[i]]<-all_tracks[index==unique_ex_id[i]]
+        }
+        simuls_eachexons<-list()
+        for(s in 1:length(unique_ex_id)){
+                withsep<-strsplit(list_exons_tracks[[s]],split=" ")
+                x<-t(data.frame(withsep))
+                id<-unique_ex_id[s]
+                exon<-data.frame(exon_id=id,stringsAsFactors=F,row.names=NULL)
+                strand<-x[1,2]
+                tracks_pre<-t(x[,-c(1:2)])
+                if(strand=="-"){
+                        tracks<-cbind(rev(tracks_pre[,1]),rev(tracks_pre[,2]),rev(tracks_pre[,3]),rev(tracks_pre[,4]))
+                } else if (strand=="+"){
+                        tracks<-tracks_pre}
+                colnames(tracks)<-c("Psites","RiboCov","RNACov","RNAcent")
+                mode(tracks)<-"numeric"
+                length<-dim(tracks)[1]
+                exon$length<-length
+                
+                if(length<25){
+                        slepians<-dpss(n=length+(50-length),k=tapers,nw=bw)
+                }
+                if(length>=25){
+                        slepians<-dpss(n=length,k=tapers,nw=bw)
+                }
+                
+                exon$pval_multi_ribo<-take_freqs_Fvalues_all_around_3nt_spec(x=tracks[,1],n_tapers=tapers,time_bw=bw,slepians_values=slepians)[6]
+                ribo_covered_pos<-which(tracks[,2]>0)
+                P_sites_sum<-sum(tracks[,1])
+                exon$P_sites_sum<-P_sites_sum
+                exon$RNA_sites_sum<-sum(tracks[,4])
+                Phase_P_sites_frame<-sum(tracks[seq(1,length,by=3),1])
+                Phase_P_sites_frame_1<-sum(tracks[seq(2,length,by=3),1])
+                Phase_P_sites_frame_2<-sum(tracks[seq(3,length,by=3),1])
+                score1<-((Phase_P_sites_frame-P_sites_sum/3)^2)/(P_sites_sum/3)
+                score2<-((Phase_P_sites_frame_1-P_sites_sum/3)^2)/(P_sites_sum/3)
+                score3<-((Phase_P_sites_frame_2-P_sites_sum/3)^2)/(P_sites_sum/3)
+                
+                simuls_results<-foreach(j=1:nsimul,.combine=c,.multicombine=T) %dopar%{
+                        set.seed(j)
+                        simtrack<-rep(0,length)
+                        rand_pos<-sample(ribo_covered_pos,P_sites_sum,replace=T)
+                        for(i in rand_pos){
+                                simtrack[i]<-simtrack[i]+1
+                        }
+                        
+                        simul_Pval_multi_3nt<-take_freqs_Fvalues_all_around_3nt_spec(x=simtrack,n_tapers=tapers,time_bw=bw,slepians_values=slepians)[6]
+                        
+                        return(simul_Pval_multi_3nt)
+                }
+                exon$n_simul_sign_multi<-sum(simuls_results<0.05)
+                exon$pct_simul_sign_multi<-sum(simuls_results<0.05)/length(simuls_results)
+                simuls_eachexons[[s]]<-exon
+                
+        }
+        results_simuls<-do.call(args=simuls_eachexons,what=rbind.data.frame)
+        results_simuls
+}
+
+
+### This function calculates results for real and simulated exons, for Chi-square and ORFscore
+
+
+take_simuls_chisq_ORFscore<-function(x,nsimul,cutoff_ORFscore=quantile85_ORFscore){
+        unique_ex_id<-x[,"exon_id"]
+        list_exons_tracks<-list()
+        for(i in seq(1:length(unique_ex_id))){
+                list_exons_tracks[[i]]<-all_tracks[index==unique_ex_id[i]]
+        }
+        simuls_eachexons<-list()
+        for(s in 1:length(unique_ex_id)){
+                withsep<-strsplit(list_exons_tracks[[s]],split=" ")
+                x<-t(data.frame(withsep))
+                id<-unique_ex_id[s]
+                exon<-data.frame(exon_id=id,stringsAsFactors=F,row.names=NULL)
+                strand<-x[1,2]
+                tracks_pre<-t(x[,-c(1:2)])
+                if(strand=="-"){
+                        tracks<-cbind(rev(tracks_pre[,1]),rev(tracks_pre[,2]),rev(tracks_pre[,3]),rev(tracks_pre[,4]))
+                } else if (strand=="+"){
+                        tracks<-tracks_pre}
+                colnames(tracks)<-c("Psites","RiboCov","RNACov","RNAcent")
+                mode(tracks)<-"numeric"
+                length<-dim(tracks)[1]
+                
+                
+                ribo_covered_pos<-which(tracks[,2]>0)
+                P_sites_sum<-sum(tracks[,1])
+                Phase_P_sites_frame<-sum(tracks[seq(1,length,by=3),1])
+                Phase_P_sites_frame_1<-sum(tracks[seq(2,length,by=3),1])
+                Phase_P_sites_frame_2<-sum(tracks[seq(3,length,by=3),1])
+                score1<-((Phase_P_sites_frame-P_sites_sum/3)^2)/(P_sites_sum/3)
+                score2<-((Phase_P_sites_frame_1-P_sites_sum/3)^2)/(P_sites_sum/3)
+                score3<-((Phase_P_sites_frame_2-P_sites_sum/3)^2)/(P_sites_sum/3)
+                exon$ORF_score<-log2(score1+score2+score3+1)
+                if(P_sites_sum>15){
+                        exon$chisq<-chisq.test(as.table(c(Phase_P_sites_frame,Phase_P_sites_frame_1,Phase_P_sites_frame_2)))$p.value}
+                if(P_sites_sum<16 & P_sites_sum>0){
+                        exon$chisq<-xmulti(obs=c(Phase_P_sites_frame,Phase_P_sites_frame_1,Phase_P_sites_frame_2),expr=c(1,1,1),statName="Prob",detail=0)$pProb
+                }        
+                simuls_results<-foreach(j=1:nsimul,.combine=rbind,.multicombine=T) %dopar%{       
+                        set.seed(j)
+                        simtrack<-rep(0,length)
+                        rand_pos<-sample(ribo_covered_pos,P_sites_sum,replace=T)
+                        for(i in rand_pos){
+                                simtrack[i]<-simtrack[i]+1
+                        }
+                        
+                        Phase_P_sites_frame<-sum(simtrack[seq(1,length,by=3)])
+                        Phase_P_sites_frame_1<-sum(simtrack[seq(2,length,by=3)])
+                        Phase_P_sites_frame_2<-sum(simtrack[seq(3,length,by=3)])
+                        
+                        score1<-((Phase_P_sites_frame-P_sites_sum/3)^2)/(P_sites_sum/3)
+                        score2<-((Phase_P_sites_frame_1-P_sites_sum/3)^2)/(P_sites_sum/3)
+                        score3<-((Phase_P_sites_frame_2-P_sites_sum/3)^2)/(P_sites_sum/3)
+                        simul_ORF_score<-log2(score1+score2+score3+1)
+                        if(P_sites_sum>15){
+                                simul_Chisq<-chisq.test(as.table(c(Phase_P_sites_frame,Phase_P_sites_frame_1,Phase_P_sites_frame_2)))$p.value}
+                        if(P_sites_sum<16 & P_sites_sum>0){
+                                simul_Chisq<-xmulti(obs=c(Phase_P_sites_frame,Phase_P_sites_frame_1,Phase_P_sites_frame_2),expr=c(1,1,1),statName="Prob",detail=0)$pProb
+                        }        
+                        return(c(simul_Chisq,simul_ORF_score))
+                }
+                colnames(simuls_results)<-c("simul_Chisq","simul_ORF_score")
+                exon$n_simul_sign_Chiq<-sum(simuls_results[,"simul_Chisq"]<0.05)
+                exon$n_simul_sign_ORFscore<-sum(simuls_results[,"simul_ORF_score"]>cutoff_ORFscore)
+                exon$pct_simul_sign_Chiq<-sum(simuls_results[,"simul_Chisq"]<0.05)/dim(simuls_results)[1]
+                exon$pct_simul_sign_ORFscore<-sum(simuls_results[,"simul_ORF_score"]>6)/dim(simuls_results)[1]
+                simuls_eachexons[[s]]<-exon
+                
+        }
+        results_simuls<-do.call(args=simuls_eachexons,what=rbind.data.frame)
+        results_simuls
+}
+
+
+# Multiple plot function, from http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_%28ggplot2%29/
+#
+# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
+# - cols:   Number of columns in layout
+# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
+#
+# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
+# then plot 1 will go in the upper left, 2 will go in the upper right, and
+# 3 will go all the way across the bottom.
+#
+multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
+        require(grid)
+        
+        # Make a list from the ... arguments and plotlist
+        plots <- c(list(...), plotlist)
+        
+        numPlots = length(plots)
+        
+        # If layout is NULL, then use 'cols' to determine layout
+        if (is.null(layout)) {
+                # Make the panel
+                # ncol: Number of columns of plots
+                # nrow: Number of rows needed, calculated from # of cols
+                layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
+                                 ncol = cols, nrow = ceiling(numPlots/cols))
+        }
+        
+        if (numPlots==1) {
+                print(plots[[1]])
+                
+        } else {
+                # Set up the page
+                grid.newpage()
+                pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
+                
+                # Make each plot, in the correct location
+                for (i in 1:numPlots) {
+                        # Get the i,j matrix positions of the regions that contain this subplot
+                        matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
+                        
+                        print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
+                                                        layout.pos.col = matchidx$col))
+                }
+        }
+}
diff --git a/scripts/genes_coor.R b/scripts/genes_coor.R
new file mode 100755
index 0000000..ad2f02f
--- /dev/null
+++ b/scripts/genes_coor.R
@@ -0,0 +1,14 @@
+#!/usr/bin/Rscript
+
+all_ex<-read.table("all_exons.bed",stringsAsFactors=F,header=F)
+spli<-split.data.frame(all_ex,f=all_ex$V5)
+
+spli2<-lapply(spli,FUN=function(x){
+        minc<-min(x[,2])
+        maxc<-max(x[,3])
+        data.frame(chr=x[1,1],start=minc,end=maxc,le=maxc-minc,gene_id=x[1,5],strand=x[1,6],stringsAsFactors=F)
+})
+spli3<-do.call(what=rbind.data.frame,args=spli2)
+write.table(file="genes_start_end",x=spli3,col.names=F,row.names=F,quote=F,sep="\t")
+system("sort -k1,1 -k2,2n genes_start_end > genes_start_end.bed ")
+system("rm genes_start_end")
\ No newline at end of file
diff --git a/scripts/gtf_to_start_stop_tr.R b/scripts/gtf_to_start_stop_tr.R
new file mode 100755
index 0000000..5fa320f
--- /dev/null
+++ b/scripts/gtf_to_start_stop_tr.R
@@ -0,0 +1,93 @@
+#!/usr/bin/Rscript
+
+###################################################################
+#    This file is part of RiboTaper.
+#    RiboTaper is a method for defining traslated ORFs using
+#    Ribosome Profiling data.
+#   
+#    Copyright (C) 2015  Lorenzo Calviello
+#
+#    RiboTaper is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+
+#    RiboTaper is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with RiboTaper.  If not, see <http://www.gnu.org/licenses/>.
+#
+#    Contact: Lorenzo.Calviello@mdc-berlin.de
+#######################################################################
+
+
+###script for creating transcript-level coordinates of CDS positions (start and stop) from a .gtf file
+
+print(paste("--- extracting transcript-level CDS cordinates","---",date(),sep=" "))
+
+exons_cds_all<-read.table("exons_cds_all",stringsAsFactors=F,header=F)
+colnames(exons_cds_all)<-c("chr","type","start","end","strand","transcript_id")
+tr_cds<-unique(exons_cds_all[exons_cds_all[,"type"]=="CDS","transcript_id"])
+
+exons_cds_all2<-exons_cds_all[exons_cds_all[,"transcript_id"]%in%tr_cds,]
+exons_cds_all2$length<-1+(exons_cds_all2$end-exons_cds_all2$start)
+list_exons_cds_tr<-split.data.frame(x=exons_cds_all2,f=exons_cds_all2$transcript_id,drop=T)
+
+list_coords<-list()
+for(i in 1:length(list_exons_cds_tr)){
+        transcr<-tr_cds[i]
+        trascr_data<-list_exons_cds_tr[[transcr]]
+        
+        strand<-trascr_data$strand[1]
+        
+        exons_in_transcr<-trascr_data[trascr_data[,"type"]=="exon",]
+        if(strand=="-"){exons_in_transcr<-exons_in_transcr[dim(exons_in_transcr)[1]:1,]}
+        
+        
+        
+        cds_in_transcr<-trascr_data[trascr_data[,"type"]=="CDS",]
+        if(strand=="-"){cds_in_transcr<-cds_in_transcr[dim(cds_in_transcr)[1]:1,]}
+        
+        
+        cumsumexons<-cumsum(exons_in_transcr$length)
+        revcumsumexons<-cumsum(rev(exons_in_transcr$length))
+        
+        cumsumcds<-cumsum(cds_in_transcr$length)
+        
+        st_cod<-cds_in_transcr[1,"start"]
+        if(strand=="-"){st_cod<-cds_in_transcr[1,"end"]}
+        
+        end_cod<-(cds_in_transcr[dim(cds_in_transcr)[1],"end"])
+        if(strand=="-"){end_cod<-(cds_in_transcr[dim(cds_in_transcr)[1],"start"])}
+        
+        st_ex<-which((st_cod>=exons_in_transcr$start & st_cod<=exons_in_transcr$end))
+        
+        end_ex<-which((end_cod>=exons_in_transcr$start & end_cod<=exons_in_transcr$end))
+        
+        nt_dist_start<-st_cod-exons_in_transcr[st_ex,"start"]
+        if(strand=="-"){nt_dist_start<-exons_in_transcr[st_ex,"end"]-st_cod}
+        
+        if(st_ex>1){nt_dist_start<-nt_dist_start+cumsumexons[st_ex-1]}
+        
+        nt_dist_stop<-exons_in_transcr[end_ex,"end"]-end_cod
+        if(strand=="-"){nt_dist_stop<-end_cod-exons_in_transcr[end_ex,"start"]}
+        
+        if(end_ex<dim(exons_in_transcr)[1]){nt_dist_stop<-nt_dist_stop+revcumsumexons[dim(exons_in_transcr)[1]-end_ex]}
+        
+        tr_len<-sum(exons_in_transcr$length)
+        start_coord<-nt_dist_start+1
+        stop_coord<-(tr_len-nt_dist_stop)+3
+        if(nt_dist_stop==0){stop_coord<-tr_len}
+        x<-data.frame(transcript_id<-transcr,start_tr<-start_coord,stop_tr<-stop_coord)
+        list_coords[[i]]<-x
+}
+
+coords<-do.call(what=rbind.data.frame,args=list_coords)
+colnames(coords)<-c("transcript_id","start_tx","stop_tx")
+write.table(coords,file="cds_coords_transcripts",row.names=F,col.names=F,quote=F,sep="\t")
+
+print(paste("--- extracting transcript-level CDS cordinates, Done!","---",date(),sep=" "))
+
diff --git a/scripts/include_multi_nomerge.R b/scripts/include_multi_nomerge.R
new file mode 100755
index 0000000..8329a64
--- /dev/null
+++ b/scripts/include_multi_nomerge.R
@@ -0,0 +1,79 @@
+#!/usr/bin/Rscript
+
+###################################################################
+#    This file is part of RiboTaper.
+#    RiboTaper is a method for defining traslated ORFs using
+#    Ribosome Profiling data.
+#   
+#    Copyright (C) 2015  Lorenzo Calviello
+#
+#    RiboTaper is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+
+#    RiboTaper is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with RiboTaper.  If not, see <http://www.gnu.org/licenses/>.
+#
+#    Contact: Lorenzo.Calviello@mdc-berlin.de
+#######################################################################
+
+
+###script to group the information about the multimapping read coverage vs uniquely mapping reads
+
+
+args <- commandArgs(trailingOnly = TRUE)
+
+
+names_covbeds<-c("exon_id","strand","reads","bases_covered","total_bases","pct_region_covered")
+
+
+RIBO_best<-read.table(paste("RIBO_best_counts",as.character(args[1]),sep="_"),stringsAsFactors=F,header=F)
+
+colnames(RIBO_best)<-names_covbeds
+
+
+RIBO_unique<-read.table(paste("RIBO_unique_counts",as.character(args[1]),sep="_"),stringsAsFactors=F,header=F)
+
+colnames(RIBO_unique)<-names_covbeds
+
+
+RNA_best<-read.table(paste("RNA_best_counts",as.character(args[1]),sep="_"),stringsAsFactors=F,header=F)
+
+colnames(RNA_best)<-names_covbeds
+
+
+RNA_unique<-read.table(paste("RNA_unique_counts",as.character(args[1]),sep="_"),stringsAsFactors=F,header=F)
+
+colnames(RNA_unique)<-names_covbeds
+
+
+multi_table_RIBO<-merge(RIBO_best,RIBO_unique,by="exon_id")
+multi_table_RNA<-merge(RNA_best,RNA_unique,by="exon_id")
+multi_table_RIBO$pct_covered_onlymulti<-multi_table_RIBO$pct_region_covered.x-multi_table_RIBO$pct_region_covered.y
+multi_table_RIBO$reads_multi<-multi_table_RIBO$reads.x-multi_table_RIBO$reads.y
+
+multi_table_RNA$pct_covered_onlymulti<-multi_table_RNA$pct_region_covered.x-multi_table_RNA$pct_region_covered.y
+multi_table_RNA$reads_multi<-multi_table_RNA$reads.x-multi_table_RNA$reads.y
+
+
+multi_table_RIBO<-multi_table_RIBO[,c(1,2,5,3,13,6,12)]
+names(multi_table_RIBO)<-c("exon_id", "strand", "length.y", "reads_ribo", "reads_multi_ribo","pct_region_covered_ribo", 
+                           "pct_covered_onlymulti_ribo")
+
+
+
+multi_table_RNA<-multi_table_RNA[,c(1,3,13,6,12)]
+names(multi_table_RNA)<-c("exon_id", "reads_rna", "reads_multi_rna", "pct_region_covered_rna", 
+                          "pct_covered_onlymulti_rna")
+
+multi_table<-merge(multi_table_RIBO,multi_table_RNA,by="exon_id")
+
+
+write.table(multi_table,file=paste("multi_table",as.character(args[1]),sep="_"),quote=F,row.names=F,sep="\t",col.names=T)
+
diff --git a/scripts/metag.R b/scripts/metag.R
new file mode 100755
index 0000000..9f1d689
--- /dev/null
+++ b/scripts/metag.R
@@ -0,0 +1,133 @@
+#!/usr/bin/Rscript
+
+
+###################################################################
+#    This file is part of RiboTaper.
+#    RiboTaper is a method for defining traslated ORFs using
+#    Ribosome Profiling data.
+#   
+#    Copyright (C) 2015  Lorenzo Calviello
+#
+#    RiboTaper is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+
+#    RiboTaper is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with RiboTaper.  If not, see <http://www.gnu.org/licenses/>.
+#
+#    Contact: Lorenzo.Calviello@mdc-berlin.de
+#######################################################################
+
+
+###script for making aggregate plots around start-stop codons, takes as argument the bed file from create_metaplots.bash
+
+print(paste("--- plotting aggreate start-stop profiles","---",date(),sep=" "))
+
+args <- commandArgs(trailingOnly = TRUE)
+
+reads<-read.table(args[1],stringsAsFactors=F,header=F,sep="\t",comment.char="")
+colnames(reads)<-c("chr","start","end","read_id","map_quality","strand",".1",".2",".3","spanning_exons","length_per_exon","length_introns","chr_stst","start_stst","end_stst","type_stst","gene_id_stst","strand_stst")
+
+
+reads_simpl<-reads[reads[,"length_introns"]=="0",]
+
+reads_simpl$count<-1
+
+list_str<-split.data.frame(reads_simpl,f=reads_simpl[,"strand"])
+list_str[["+"]]$distance<-list_str[["+"]][,"start"]-list_str[["+"]][,"start_stst"]
+list_str[["-"]]$distance<-list_str[["-"]][,"end_stst"]-list_str[["-"]][,"end"]
+
+reads_simpl<-do.call(rbind.data.frame,list_str)
+
+dists_all<-with(reads_simpl,aggregate(count,by=list(type_stst,length_per_exon,distance),FUN=sum))
+colnames(dists_all)<-c("type","length","distance","counts")
+lw<-3
+lengths<-as.numeric(sort(unique(dists_all$length),decreasing=F))
+for(i in lengths){
+        names<-paste(args,"_",as.character(i),".png",sep="")
+        
+        png(filename=names,width=3024,height=1968)
+        par(mfrow=c(2,2),cex=2.6)
+        
+        starts_ok<-dists_all[dists_all[,"length"]==i & dists_all[,"type"]=="start_codon",]
+        stops_ok<-dists_all[dists_all[,"length"]==i & dists_all[,"type"]=="stop_codon",]
+        
+        starts<-starts_ok[starts_ok[,"distance"]%in%c(-20:20),]
+        if(dim(starts)[1]==0){plot(1,1,type="n")}
+        if(dim(starts)[1]>0){
+                plotto<-as.data.frame(t(t(-20:20)),stringsAsFactors=F)
+                colnames(plotto)<-"distance"
+                plotto$counts<-0
+                for(g in 1:dim(plotto)[1]){
+                        dis<-plotto$distance[g]
+                        if(sum(starts$distance==dis)>0){
+                                plotto[g,"counts"]<-starts$counts[starts$distance==dis]
+                        }
+                }
+                plot(plotto$counts,col=c("red","blue","green"),type="h",xlab="Distance",ylab="Alignments",xaxt="n",main=paste("distance 5' - starts",as.character(starts$length[1]),"nt\n",i,sep=" "),lwd=lw)
+                axis(1, at=seq(1,length(plotto$counts),by=1), labels=as.character(seq(min(plotto$distance),max(plotto$distance),by=1)),xaxp = c(-40,40,80),las=2)
+        }
+        starts<-starts_ok[starts_ok[,"distance"]%in%c(16:56),]
+        
+        if(dim(starts)[1]==0){plot(1,1,type="n")}
+        if(dim(starts)[1]>0){
+                plotto<-as.data.frame(t(t(16:56)),stringsAsFactors=F)
+                colnames(plotto)<-"distance"
+                plotto$counts<-0
+                for(g in 1:dim(plotto)[1]){
+                        dis<-plotto$distance[g]
+                        if(sum(starts$distance==dis)>0){
+                                plotto[g,"counts"]<-starts$counts[starts$distance==dis]
+                        }
+                }
+                plot(plotto$counts,col=c("red","blue","green"),type="h",xlab="Distance",ylab="Alignments",xaxt="n",main=paste("distance 5' - starts",as.character(starts$length[1]),"nt\n",i,sep=" "),lwd=lw)
+                axis(1, at=seq(1,length(plotto$counts),by=1), labels=as.character(seq(min(plotto$distance),max(plotto$distance),by=1)),xaxp = c(-40,40,80),las=2)
+        }
+        
+        stops<-stops_ok[stops_ok[,"distance"]%in%c(-68:-28),]
+        
+        if(dim(stops)[1]==0){plot(1,1,type="n")}
+        if(dim(stops)[1]>0){
+                
+                plotto<-as.data.frame(t(t(-68:-28)),stringsAsFactors=F)
+                colnames(plotto)<-"distance"
+                plotto$counts<-0
+                for(g in 1:dim(plotto)[1]){
+                        dis<-plotto$distance[g]
+                        if(sum(stops$distance==dis)>0){
+                                plotto[g,"counts"]<-stops$counts[stops$distance==dis]
+                        }
+                }
+                plot(plotto$counts,col=c("red","blue","green"),type="h",xlab="Distance",ylab="Alignments",xaxt="n",main=paste("distance 5' - stops",as.character(stops$length[1]),"nt\n",i,sep=" "),lwd=lw)
+                axis(1, at=seq(1,length(plotto$counts),by=1), labels=as.character(seq(min(plotto$distance),max(plotto$distance),by=1)),xaxp = c(-40,40,80),las=2)
+        }
+        
+        
+        stops<-stops_ok[stops_ok[,"distance"]%in%c(-32:10),]
+        
+        if(dim(stops)[1]==0){plot(1,1,type="n")}
+        if(dim(stops)[1]>0){
+                plotto<-as.data.frame(t(t(-32:10)),stringsAsFactors=F)
+                colnames(plotto)<-"distance"
+                plotto$counts<-0
+                for(g in 1:dim(plotto)[1]){
+                        dis<-plotto$distance[g]
+                        if(sum(stops$distance==dis)>0){
+                                plotto[g,"counts"]<-stops$counts[stops$distance==dis]
+                        }
+                }
+                plot(plotto$counts,col=c("red","blue","green"),type="h",xlab="Distance",ylab="Alignments",xaxt="n",main=paste("distance 5' - stops",as.character(stops$length[1]),"nt\n",i,sep=" "),lwd=lw)
+                axis(1, at=seq(1,length(plotto$counts),by=1), labels=as.character(seq(min(plotto$distance),max(plotto$distance),by=1)),xaxp = c(-40,40,80),las=2)
+        }
+        
+        dev.off()
+}
+
+print(paste("--- aggregate start-stop plots, Done!","---",date(),sep=" "))
+
diff --git a/scripts/quality_check.R b/scripts/quality_check.R
new file mode 100755
index 0000000..80d914b
--- /dev/null
+++ b/scripts/quality_check.R
@@ -0,0 +1,265 @@
+#!/usr/bin/Rscript
+
+
+###################################################################
+#    This file is part of RiboTaper.
+#    RiboTaper is a method for defining traslated ORFs using
+#    Ribosome Profiling data.
+#   
+#    Copyright (C) 2015  Lorenzo Calviello
+#
+#    RiboTaper is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+
+#    RiboTaper is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with RiboTaper.  If not, see <http://www.gnu.org/licenses/>.
+#
+#    Contact: Lorenzo.Calviello@mdc-berlin.de
+#######################################################################
+
+
+###script for plotting general results about the exon-level analysis as a QC step, takes as arguments the annotation directory
+
+
+print(paste("--- create QC plots ---",date(),sep=" "))
+
+
+
+
+args <- commandArgs(trailingOnly = TRUE)
+
+ribo_best<-system("samtools view -c RIBO_best.bam",intern=T)
+ribo_unique<-system("samtools view -c RIBO_unique.bam",intern=T)
+ribo_psit<-system("wc -l P_sites_all ",intern=T)
+ribo_psit<-strsplit(ribo_psit,split=" ")[[1]][1]
+
+rna_best<-system("samtools view -c RNA_best.bam",intern=T)
+rna_unique<-system("samtools view -c RNA_unique.bam",intern=T)
+rna_psit<-system("wc -l Centered_RNA ",intern=T)
+rna_psit<-strsplit(rna_psit,split=" ")[[1]][1]
+
+all_annot<-read.table("all_calculations_ccdsgenes_annot_new",header=T,stringsAsFactors=F,quote = "")
+
+ccds<-all_annot[all_annot[,"type"]=="ccds" & all_annot[,"P_sites_sum"]>5 & !is.na(all_annot$pval_multit_3nt_ribo),]
+
+nonccds<-read.table("results_nonccds_annot",header=T,stringsAsFactors=F,quote = "")
+noncoding<-nonccds[nonccds[,"annotation"]!="protein_coding",]
+noncoding<-noncoding[noncoding[,"P_sites_sum"]>5,]
+noncoding<-noncoding[noncoding[,"length.x"]>5,]
+utrs<-all_annot[all_annot[,"type"]%in%c("3_utrs_ex","5_utrs_ex"),]
+utrs<-utrs[utrs[,"P_sites_sum"]>5,]
+
+fra<-paste(args[1],"frames_ccds",sep = "/")
+
+frames<-read.table(fra,stringsAsFactors=F,header=F)
+
+colnames(frames)<-c("exon_id","frame_start_annot","strand","length")
+ccds_frames<-merge(ccds,frames,by="exon_id")
+ccds_frames$ok_annot<-FALSE
+
+ccds_frames[ccds_frames[,"strand"]=="+" & ccds_frames[,"frame_start_annot"]==ccds_frames[,"frame_start_pred"],"ok_annot"]<-TRUE
+ccds_frames[ccds_frames[,"strand"]=="-" & ccds_frames[,"frame_start_annot"]==ccds_frames[,"frame_end_pred"],"ok_annot"]<-TRUE
+
+
+lib_size<-as.numeric(ribo_psit)
+
+all_ccds<-ccds
+
+all_ccds$RPKM_ribo<-apply(X=all_ccds,MARGIN=1,FUN=function(x){(10^9 * as.numeric(x["P_sites_sum"]))/(lib_size * as.numeric(x["length.x"]))})
+
+quantiles_RPKM_ribo<-quantile(all_ccds$RPKM_ribo,probs=seq(0,1,length.out=8))
+all_ccds$quant_RPKM_ribo<-cut(x=all_ccds$RPKM_ribo,breaks=quantiles_RPKM_ribo,labels=as.character(1:7))
+quantiles_length<-quantile(all_ccds$length.x,probs=seq(0,1,length.out=8))
+all_ccds$quant_length<-cut(x=all_ccds$length.x,breaks=quantiles_length,labels=as.character(1:7))
+
+length_rpkm<-rbind(c(2,2),c(2,4),c(2,6),c(4,2),c(4,4),c(4,6),c(6,2),c(6,4),c(6,6))
+rownames(length_rpkm)<-c("short_low","short_med","short_high","medium_low","medium_med","medium_high","long_low","long_med","long_high")
+colnames(length_rpkm)<-c("length","rpkm")
+results<-list()
+for(i in 1:dim(length_rpkm)[1]){
+        combin<-length_rpkm[i,]
+        name<-rownames(length_rpkm)[i]
+        exons_all<-all_ccds[all_ccds[,"quant_length"]==combin["length"] & all_ccds[,"quant_RPKM_ribo"]==combin["rpkm"],]
+        if(dim(exons_all)[1]>10){
+                res<-as.data.frame(t(as.matrix(table(exons_all[,"pval_multit_3nt_ribo"]<0.05)/dim(exons_all)[1])),stringsAsFactors=F)
+                if(dim(res)[2]==2){
+                        colnames(res)<-c("non-periodic","periodic")
+                }
+                if(dim(res)[2]==1){
+                        if(res[,1]==FALSE){
+                                res[,2]<-0
+                                colnames(res)<-c("non-periodic","periodic")
+                        }
+                        if(res[,1]==TRUE){
+                                res[,2]<-res[,1]
+                                res[,1]<-0
+                                colnames(res)<-c("non-periodic","periodic")
+                                
+                        }
+                }
+                res_rna<-as.data.frame(t(as.matrix(table(exons_all[,"pval_multit_3nt_rna"]<0.05)/sum(!is.na(exons_all$pval_multit_3nt_rna)))),stringsAsFactors=F)
+                
+                if(dim(res_rna)[2]==2){
+                        colnames(res_rna)<-c("non-periodic","periodic")
+                }
+                if(dim(res_rna)[2]==1){
+                        if(res_rna[,1]==TRUE){
+                                res_rna[,2]<-0
+                                colnames(res_rna)<-c("non-periodic","periodic")
+                        }
+                        if(res_rna[,1]==FALSE){
+                                res_rna[,2]<-res_rna[,1]
+                                res_rna[,1]<-0
+                                colnames(res_rna)<-c("non-periodic","periodic")
+                                
+                        }
+                }
+                res<-cbind(res,res_rna)
+                res[,"n_exons"]<-dim(exons_all)[1]
+                res[,"RPKM"]<-paste(paste(round(quantiles_RPKM_ribo[combin[2]],digits=1),round(quantiles_RPKM_ribo[combin[2]+1],digits=1),sep="-"),"RPKM")
+                res[,"length"]<-paste(paste(round(quantiles_length[combin[1]],digits=1),round(quantiles_length[combin[1]+1],digits=1),sep="-"),"nt")
+                res[,"category"]<-name
+        }
+        if(dim(exons_all)[1]<=10){
+                res<-NULL
+        }
+        results[[i]]<-res
+        
+}
+results<-do.call(rbind.data.frame,args=results)
+
+results$length<-factor(results$length, levels=unique(results$length))
+results$RPKM<-factor(results$RPKM, levels=unique(results$RPKM))
+
+###
+
+pdf(file="quality_check_plots.pdf",width=35,height=25,onefile=T,title="")
+
+
+lefts<-c(0,.25,.5,.75, 0,.25,.5,.75, 0,.5, 0,.5, 0,.5)
+
+rights<-c(.25,.5,.75,1, .25,.5,.75,1, .5,1, .5,1, .5,1)
+
+bottoms<-c(.75,.75,.75,.75, .5,.5,.5,.5, .33,.33,.166,.166,0,0)
+
+tops<-c(1,1,1,1, .75,.75,.75,.75, .5,.5,.33,.33,.166,.166)
+
+
+matfig<-(cbind(lefts,rights,bottoms,tops))
+
+close.screen(a=T)
+#par(mgp=c(13, 1, 0))
+n_ccds<-length(which(all_annot$type=="ccds"))
+n_ccds_5nt<-dim(all_ccds)[1]
+n_ccds_5nt_rna<-length(which(!is.na(ccds$chisq_rna)))
+
+
+n_ccds_period<-length(which(all_ccds$pval_multit_3nt_ribo<0.05))
+n_ccds_chisq<-length(which(all_ccds$chisq_ribo<0.05))
+n_ccds_period_rna<-length(which(all_ccds$pval_multit_3nt_rna<0.05))
+n_ccds_chisq_rna<-length(which(all_ccds$chisq_rna<0.05))
+
+split.screen(matfig)
+
+screen(1)
+par(mar=c(6.1,8,2,2))
+
+barp<-barplot(as.numeric(c(ribo_best,ribo_unique,ribo_psit)),beside=T,col=c("orange","red","dark red"),names.arg=c(""),cex.axis=1.8,cex.lab=1.8,cex.main=1.8,cex=1.8,mgp=c(13, 1, 0),main="")
+axis(side=1,labels=c("Ribo\naligned reads","Ribo\nunique reads","P-sites\npositions"),at=barp,cex.axis=1.8,cex.main=1.8,cex=1.8,mgp=c(3,2.1,0))
+screen(2)
+par(mar=c(6.1,8,2,2))
+
+barp<-barplot(as.numeric(c(rna_best,rna_unique,rna_psit)),beside=T,col=c("white","grey","dark grey"),names.arg=c(""),cex.axis=1.8,cex.lab=1.8,cex.main=1.8,cex=1.8,main="")
+axis(side=1,labels=c("RNA\naligned reads","RNA\nunique reads","RNA-sites\npositions"),at=barp,cex.axis=1.8,cex.main=1.8,cex=1.8,mgp=c(3,2.1,0))
+screen(3)
+par(mar=c(6.1,8,2,2))
+
+barp<-barplot(c(n_ccds,n_ccds_5nt,n_ccds_5nt_rna),names.arg=c(""),col=c("white","indianred2","red"),cex.main=1.8,cex.axis=1.8,cex.lab=1.8,cex.names=1.8,main="")
+axis(side=1,labels=c("all ccds\nexons","ccds exons\n>5 P-sites","ccds exons\n>5 RNA & P-sit"),at=barp,cex.axis=1.8,cex.main=1.8,cex=1.8,mgp=c(3,2.1,0))
+screen(4)
+par(mar=c(6.1,8,2,2))
+
+rna_ok<-length(which(!is.na(ccds$pval_multit_3nt_rna)))
+rna_ok2<-length(which(!is.na(ccds$chisq_rna)))
+m_r<-table(ccds$pval_multit_3nt_ribo<0.05)/dim(ccds)[1]
+m_rn<-table(ccds$pval_multit_3nt_rna<0.05)/rna_ok
+# 
+c_r<-table(ccds$chisq_ribo<0.05)/dim(ccds)[1]
+c_rn<-table(ccds$chisq_rna<0.05)/rna_ok2
+
+
+barp<-barplot(c(m_r[2],c_r[2],m_rn[1],c_rn[1]),xpd=F,col=c("red","red","grey","grey"),space=0.1,names.arg="",ylab="% CCDS exons",main="",cex.main=1.8,cex.axis=1.8,cex.lab=1.8,cex.names=1.8)
+axis(side=1,labels=c("Multitap\nribo","Chi-sq\nribo","Multitap\nrna","Chi-sq\nrna"),at=barp,cex.axis=1.8,cex.main=1.8,cex=1.8,mgp=c(3,2.1,0))
+
+#barplot(c(m_rn[1],c_rn[1]),xpd=F,col="grey",space=0.1,names.arg=c("Multi-taper test","Chi-squared test"),ylab="% CCDS exons",main="Negative exons, (P-value > 0.05) \n This_study RNA-seq",cex.main=1.8,cex.axis=1.8,cex.lab=1.8,cex.names=1.8)
+screen(5)
+par(mar=c(6.1,8,2,2))
+
+hist(ccds$pval_multit_3nt_rna,breaks=50,col="grey",main="",cex.main=1.8,cex.axis=1.8,cex.lab=1.8,xlab="")
+axis(side=1,labels="P-values multitaper test \n RNA-seq",at=.5,cex.axis=1.8,cex.main=1.8,cex=1.8,mgp=c(3,5,0))
+screen(6)
+par(mar=c(6.1,8,2,2))
+
+hist(ccds$chisq_rna,breaks=50,col="grey",main="",xlab="",cex.main=1.8,cex.axis=1.8,cex.lab=1.8)
+axis(side=1,labels="P-values Chi-squared test \n RNA-seq",at=.5,cex.axis=1.8,cex.main=1.8,cex=1.8,mgp=c(3,5,0))
+screen(7)
+par(mar=c(6.1,8,2,2))
+
+gino<-density(apply(ccds[,c("pctPhase_frame","pctPhase_frame_1","pctPhase_frame_2")],FUN=max,1),from=0,to=1)
+plot(gino,col="violet",main="",xlab="% of P-sites on the max frame",cex.main=2,cex.axis=2,lwd=5,cex.lab=2)
+gino<-density(apply(utrs[,c("pctPhase_frame","pctPhase_frame_1","pctPhase_frame_2")],FUN=max,1),from=0,to=1)
+lines(gino,col="dark grey",lwd=5)
+gino<-density(apply(noncoding[,c("pctPhase_frame","pctPhase_frame_1","pctPhase_frame_2")],FUN=max,1),from=0,to=1)
+lines(gino,col="orange",lwd=5)
+legend("topleft",c("CCDS exons","UTRs","non-coding"),lty=c(1,1,1),col=c("violet","orange","dark grey"),lwd=c(2.2,2.2,2.2),cex=1.8)
+
+
+screen(8)
+par(mar=c(6.1,1,2,2))
+
+same_as_annot<-table(ccds_frames$ok_annot)/dim(ccds_frames)[1]
+names(same_as_annot)<-c("diff_annot","same_annot")
+same_as_annot<-round(same_as_annot,digits=3)
+pie(same_as_annot,labels=paste(names(same_as_annot),":\n",as.character(100 * same_as_annot),"%"),col=c("dark grey","violet"),cex=1.8,cex.main=2,main="",init.angle=270)
+
+
+
+for(j in 1:3){
+        to_barpl<-split.data.frame(results,f=results$length)
+        barpl<-(to_barpl[[j]])
+        if(j==1){screen(9)}
+        if(j==2){screen(11)}
+        if(j==3){screen(13)}
+        par(mar=c(6.1,8,2,2))
+        
+        barp<-barplot(t(100*as.matrix(barpl[,2:1])),ylim=c(0,100),col=c("dark red","grey"),names.arg=rep("",dim(barpl)[1]),ylab="% periodic CCDS exons\nRibo-seq",main=paste(barpl$length[1],"exons"),cex.axis=1.8,cex.lab=1.8,cex.main=1.8,cex=1.8)
+        axis(side=1,labels=paste(barpl$RPKM,"\n n_of exons=",barpl$n_exons),at=barp,cex.axis=1.8,cex.main=1.8,cex=1.8,mgp=c(3,2.7,0))
+        
+        #         if(j==1){
+        #                 mtext(side=3,"3nt periodicity in ccds exons, Ribo-seq",line=+2.6)
+        #         }
+        if(j==1){screen(10)}
+        if(j==2){screen(12)}
+        if(j==3){screen(14)} 
+        par(mar=c(6.1,8,2,2))
+        
+        barp<-barplot(t(100*as.matrix(barpl[,4:3])),ylim=c(0,100),col=c("dark red","dark grey"),names.arg=rep("",dim(barpl)[1]),ylab="% periodic CCDS exons\nRNA-seq",main=paste(barpl$length[1],"exons"),cex.axis=1.8,cex.lab=1.8,cex.main=1.8,cex=1.8)
+        axis(side=1,labels=paste(barpl$RPKM,"\n n_of exons=",barpl$n_exons),at=barp,cex.axis=1.8,cex.main=1.8,cex=1.8,mgp=c(3,2.7,0))
+        
+        #         if(j==1){
+        #                 mtext(side=3,"3nt periodicity in ccds exons, RNA-seq",line=+2.6)
+        #         }
+}
+###
+
+
+dev.off()
+
+print(paste("--- QC plots Done! ---",date(),sep=" "))
diff --git a/scripts/tracks_analysis.R b/scripts/tracks_analysis.R
new file mode 100755
index 0000000..ca4ceb0
--- /dev/null
+++ b/scripts/tracks_analysis.R
@@ -0,0 +1,127 @@
+#!/usr/bin/Rscript
+
+
+###################################################################
+#    This file is part of RiboTaper.
+#    RiboTaper is a method for defining traslated ORFs using
+#    Ribosome Profiling data.
+#   
+#    Copyright (C) 2015  Lorenzo Calviello
+#
+#    RiboTaper is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+
+#    RiboTaper is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with RiboTaper.  If not, see <http://www.gnu.org/licenses/>.
+#
+#    Contact: Lorenzo.Calviello@mdc-berlin.de
+#######################################################################
+
+
+###script for analyzing data tracks, it takes as arguments the name tag of the track (i.e. "ccds"), the RiboTaper scripts directory, the n of cores
+
+args <- commandArgs(trailingOnly = TRUE)
+
+print(paste("--- analyzing",args[1],"exonic tracks","---",date(),sep=" "))
+
+
+suppressMessages(source(paste(args[2],"functions.R",sep = "/")))
+
+
+registerDoMC(args[3])
+
+sink(file=NULL,type="message")
+
+
+all_tracks<-readBigText(paste("data_tracks/Psit_Ribo_Rna_Cent_tracks",as.character(args[1]),sep="_"))
+
+all_index<-read.table(paste("data_tracks/index_tracks",as.character(args[1]),sep="_"),stringsAsFactors=F,header=F)
+
+index_chrs<-sapply(strsplit(all_index$V1,split="_"),"[[",1)
+
+
+regions<-unique(all_index)
+
+results_regions<-list()
+
+
+
+results_regions<-foreach(s=1:dim(regions)[1],.combine=rbind,.multicombine=T) %dopar%{
+        tryCatch({
+                chr_reg<-sapply(strsplit(regions[s,1],split="_"),"[[",1)
+                all_tr<-all_tracks[which(index_chrs==chr_reg)]
+                ind_tr<-subset(all_index,index_chrs==chr_reg)
+                x<-all_tr[ind_tr==regions[s,1]]
+                x<-t(data.frame(strsplit(x,split=" ")))
+                return(make_analysis_exons(x))
+                
+        }, error=function(x){
+                
+                return("error")
+        }
+        )
+}
+
+
+
+
+names_covbeds<-c("exon_id","strand","reads","bases_covered","total_bases","pct_region_covered")
+
+
+RIBO_best<-read.table(paste("RIBO_best_counts",as.character(args[1]),sep="_"),stringsAsFactors=F,header=F)
+
+colnames(RIBO_best)<-names_covbeds
+
+
+RIBO_unique<-read.table(paste("RIBO_unique_counts",as.character(args[1]),sep="_"),stringsAsFactors=F,header=F)
+
+colnames(RIBO_unique)<-names_covbeds
+
+
+RNA_best<-read.table(paste("RNA_best_counts",as.character(args[1]),sep="_"),stringsAsFactors=F,header=F)
+
+colnames(RNA_best)<-names_covbeds
+
+
+RNA_unique<-read.table(paste("RNA_unique_counts",as.character(args[1]),sep="_"),stringsAsFactors=F,header=F)
+
+colnames(RNA_unique)<-names_covbeds
+
+
+
+
+multi_table_RIBO<-merge(RIBO_best,RIBO_unique,by="exon_id")
+multi_table_RNA<-merge(RNA_best,RNA_unique,by="exon_id")
+multi_table_RIBO$pct_covered_onlymulti<-multi_table_RIBO$pct_region_covered.x-multi_table_RIBO$pct_region_covered.y
+multi_table_RIBO$reads_multi<-multi_table_RIBO$reads.x-multi_table_RIBO$reads.y
+
+multi_table_RNA$pct_covered_onlymulti<-multi_table_RNA$pct_region_covered.x-multi_table_RNA$pct_region_covered.y
+multi_table_RNA$reads_multi<-multi_table_RNA$reads.x-multi_table_RNA$reads.y
+
+
+multi_table_RIBO<-multi_table_RIBO[,c(1,2,5,3,13,6,12)]
+names(multi_table_RIBO)<-c("exon_id", "strand", "length", "reads_ribo", "reads_multi_ribo","pct_region_covered_ribo", 
+                           "pct_covered_onlymulti_ribo")
+
+
+
+multi_table_RNA<-multi_table_RNA[,c(1,3,13,6,12)]
+names(multi_table_RNA)<-c("exon_id", "reads_rna", "reads_multi_rna", "pct_region_covered_rna", 
+                          "pct_covered_onlymulti_rna")
+
+multi_table<-merge(multi_table_RIBO,multi_table_RNA,by="exon_id")
+
+RESULTS<-merge(results_regions,multi_table,by=1)
+
+write.table(RESULTS,file=paste("results",args[1],sep="_"),quote=F,sep="\t",row.names=F)
+
+
+print(paste("--- track_analysis Done!","---",date(),sep=" "))
+