-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathPE_2Dmap_density.py
107 lines (94 loc) · 2.99 KB
/
PE_2Dmap_density.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/env python
# Copyright (c) 2014
# Authors: Chongzhi Zang
#
# This software is distributable under the terms of the GNU General
# Public License (GPL) v2, the text of which can be found at
# http://www.gnu.org/copyleft/gpl.html. Installing, importing or
# otherwise using this module constitutes acceptance of the terms of
# this License.
#
# Disclaimer
#
# This software is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# Comments and/or additions are welcome (send e-mail to:
import re, os, sys, shutil
from math import *
from string import *
from optparse import OptionParser
import operator
import GenomeData
import SeparateByChrom
import Utility
def unique_density_sorted(infile, outfile):
'''infile can only contain reads from one chromosome and only one kind of strands (+/-). file must be pre-sorted by column2, then by column3.'''
f = open(infile,'r')
o = open(outfile, 'w')
total = 0
retained = 0
l = f.readline()
l = l.strip()
sline = l.split()
if len(sline) >= 3:
current_x = atoi(sline[1])
current_y = atoi(sline[3])
current_count = 1
total = 1
retained = 0
for line in f:
if not re.match("#", line):
total += 1
line = line.strip()
sline = line.split()
x = atoi(sline[1])
y = atoi(sline[3])
if x != current_x:
o.write('\t'.join([str(current_x),str(current_y),str(current_count)]) + '\n')
retained += 1
current_line = sline
current_x = x
current_y = y
current_count = 1
elif y != current_y:
o.write('\t'.join([str(current_x),str(current_y),str(current_count)]) + '\n')
retained += 1
current_line = sline
current_x = x
current_y = y
current_count = 1
else:
current_count += 1
assert current_x == x
assert current_y == y
o.write('\t'.join([str(current_x),str(current_y),str(current_count)]) + '\n')
retained += 1
f.close()
o.close()
return (total, retained)
def process(infile, outfile):
'''infile can only contain reads for one gene'''
try:
if os.system('sort -g -k 2,4 %s > temp' % (infile)):
raise
except: sys.stderr.write(str(infile) + " reads do not exist\n");
(p_total, p_retained) = unique_density_sorted('temp', outfile)
#print chrom, "\tAll fragments:",p_total, "\tRetained fragments:", p_retained
os.system('rm temp')
def main(argv):
parser = OptionParser()
parser.add_option("-b", "--raw_bed_file", action="store", type="string",
dest="bed_file", help="raw bed file", metavar="<file>")
parser.add_option("-o", "--output_file_name", action="store", type="string",
dest="out_file", help="output file name", metavar="<file>")
(opt, args) = parser.parse_args(argv)
if len(argv) < 4:
parser.print_help()
sys.exit(1)
process(opt.bed_file, opt.out_file)
if __name__ == "__main__":
main(sys.argv)