forked from albertwcheng/albert-bioinformatics-scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
BED2YoungLabFormat.py
executable file
·109 lines (77 loc) · 2.08 KB
/
BED2YoungLabFormat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/usr/bin/python
from sys import stderr
from sys import stdout
from sys import argv
from glob import glob
from os.path import basename
"""
readlength 6
chr3 150 155 -
"""
def sortExplace(L):
try:
return sorted(L)
except NameError:
Lprime=L[:]
Lprime.sort()
return Lprime
def BED2YoungLabFormat(filename,ofilename):
fin=open(filename)
readInfo=dict()
#store read info into [mismatch][chr][]=coord
for line in fin:
fields=line.rstrip().split("\t")
chrom=fields[0].strip()
if chrom[:3]=='chr':
chrom=chrom[3:]
strand=fields[3]
position=int(fields[1])
endposition=int(fields[2])
tra=endposition-position
mismatch=0
if strand=='-':
position=-(position+tra)
try:
mismatchSlot=readInfo[mismatch]
except KeyError:
mismatchSlot=dict()
readInfo[mismatch]=mismatchSlot
try:
chrSlot=mismatchSlot[chrom]
except KeyError:
chrSlot=[]
mismatchSlot[chrom]=chrSlot
chrSlot.append(position)
fin.close()
fout=open(ofilename,'w')
for mismatch in sortExplace(readInfo.keys()):
mismatchSlot=readInfo[mismatch]
mismatchKey="#U"+str(mismatch)
fout.write(mismatchKey+"\n")
for chrom in sortExplace(mismatchSlot.keys()):
fout.write(">"+chrom+"\n")
chrSlot=mismatchSlot[chrom]
chrSlot.sort()
for position in chrSlot:
fout.write(str(position)+"\n")
fout.close()
def changeExtension(prefix,filename,removeOrigExtension,newsuffix):
bnfile=basename(filename)
fncomp=bnfile.split(".")
lfncomp=len(fncomp)
if lfncomp > 1 and removeOrigExtension:
del fncomp[lfncomp-1]
fncomp.append(newsuffix)
return prefix+"/"+(".".join(fncomp))
def BED2YoungLabFormat_Main(fileList,outPrefix,newSuffix):
print >> stderr, "Convert bedfiles",fileList,"to young lab files"
for file in fileList:
ofile=changeExtension(outPrefix,file,True,newSuffix)
print >> stderr,file,">>",ofile
BED2YoungLabFormat(file,ofile)
print >> stderr, "<Done>"
largv=len(argv)
if largv<4:
print >> stderr,argv[0],"srcFiles outputFolder newextesion"
else:
BED2YoungLabFormat_Main(argv[1:largv-2],argv[largv-2],argv[largv-1])