-
Notifications
You must be signed in to change notification settings - Fork 1
/
bowtie2bed.py
107 lines (89 loc) · 2.76 KB
/
bowtie2bed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/env python
# Copyright (c) 2010 DFCI/HSPH
# Authors: Chongzhi Zang and X. Shirley Liu
#
# This software is distributable under the terms of the GNU
# General Public License (GPL) v2, the text of which can be found at
# http://www.gnu.org/copyleft/gpl.html. Installing, importing or otherwise
# using this module constitutes acceptance of the terms of this License.
#
# Disclaimer
#
# This software is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# Comments and/or additions are welcome (send e-mail to:
#
import re, os, sys, shutil
from math import *
from string import *
from optparse import OptionParser
import operator
## get BED module
import BED;
import UCSC;
"""
This module is used to convert a Bowtie output format file in to a BED file.
"""
plus = re.compile('\+');
minus = re.compile('\-');
def chromname(number):
if number in ch.keys():
return ch[number]
else:
return number
def strandsign(number):
if number == '1':
return '+'
elif number == '-1':
return '-'
else:
return number
def bowtie2BED(input_file, output_file):
testinfile = open(input_file,'r')
# Figure out which column to start
test = testinfile.readline()
test = test.strip()
test = test.split()
i = 0
while i < len(test):
if plus.match(test[i]) or minus.match(test[i]):
strand = i
break
else:
i += 1
chrom = strand + 1
start = strand + 2
seq = strand + 3
rest = strand + 5
tagsize = len(test[seq])
testinfile.close()
infile = open(input_file,'r')
outfile = open(output_file, 'w')
for line in infile:
line = line.strip()
sline = line.split()
if plus.match(sline[strand]):
left = sline[start]
right = str(int(sline[start]) + tagsize)
elif minus.match(sline[strand]):
left = str(int(sline[start]) - tagsize)
right = sline[start]
outfile.write(sline[chrom] + '\t' + left + '\t' + right + '\t' + sline[seq] + '\t' + sline[rest] + '\t' + sline[strand] + '\n')
infile.close()
outfile.close()
def main(argv):
parser = OptionParser()
parser.add_option("-i", "--inputfile", action="store", type="string", dest="input_file", metavar="<file>", help="bowtie output format file")
# parser.add_option("-f", "--tagsize", action="store", type="int", dest="tagsize", metavar="<int>", help="sequence read length", default=25)
parser.add_option("-o", "--outputfile", action="store", type="string", dest="output_file", metavar="<file>", help="output BED file")
(opt, args) = parser.parse_args(argv)
if len(argv) < 4:
parser.print_help()
sys.exit(1)
bowtie2BED(opt.input_file, opt.output_file)
if __name__ == "__main__":
main(sys.argv)