-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathblast2clans.py
88 lines (75 loc) · 1.59 KB
/
blast2clans.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import sys
import csv
import operator
header = """
<param>
maxmove=0.1
pval=1
usescval=false
complexatt=true
cooling=1.0
currcool=1.0
attfactor=10.0
attvalpow=1
repfactor=10.0
repvalpow=1
dampening=1.0
minattract=1.0
cluster2d=false
blastpath=''
formatdbpath=''
showinfo=false
zoom=1.0
dotsize=10
ovalsize=10
groupsize=4
usefoldchange=false
avgfoldchange=false
colorcutoffs=0.0;0.1;0.2;0.3;0.4;0.5;0.6;0.7;0.8;0.9;
colorarr=(230;230;230):(207;207;207):(184;184;184):(161;161;161):(138;138;138):(115;115;115):(92;92;92):(69;69;69):(46;46;46):(23;23;23):
</param>
<rotmtx>
1.0;0.0;0.0;
0.0;1.0;0.0;
0.0;0.0;1.0;
</rotmtx>
<seq>
"""
inputFileName = sys.argv[1]
outputFileName = sys.argv[2]
f = open(inputFileName, 'rt')
names2Index = {}
count = 0
print("Create list of sequence names with indeces.")
line = f.readline()
while line:
row = line.split()
if row[0] not in names2Index:
names2Index[row[0]] = count
count += 1
line = f.readline()
f.close()
fn = open(outputFileName, 'w')
fn.write('sequences={}\n'.format(count))
fn.write(header)
for r in sorted(names2Index.items(), key=operator.itemgetter(1)):
fn.write(">{}\nX\n".format(r[0]))
fn.write("</seq>\n<hsp>\n")
print("List with names and indeces has been created")
print("Creating links")
f = open(inputFileName, 'rt')
line = f.readline()
while line:
row = line.split()
pos1 = 0
pos2 = 0
if row[0] in names2Index:
pos1 = names2Index[row[0]]
if row[1] in names2Index:
pos2 = names2Index[row[1]]
fn.write("{0} {1}:{2}\n".format(pos1,pos2,row[2]))
line = f.readline()
fn.write("</hsp>\n")
f.close()
print("Links have been created")
fn.close()