-
Notifications
You must be signed in to change notification settings - Fork 10
/
main.py
executable file
·146 lines (125 loc) · 3.41 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#!/usr/bin/python -u
#
# Protocol Informatics Prototype
# Written by Marshall Beddoe <[email protected]>
# Copyright (c) 2004 Baseline Research
#
# Licensed under the LGPL
#
from PI import *
import sys, getopt
def main():
print "Protocol Informatics Prototype (v0.01 beta)"
print "Written by Marshall Beddoe <[email protected]>"
print "Copyright (c) 2004 Baseline Research\n"
# Defaults
format = None
weight = 1.0
graph = False
#
# Parse command line options and do sanity checking on arguments
#
try:
(opts, args) = getopt.getopt(sys.argv[1:], "pagw:")
except:
usage()
for o,a in opts:
if o in ["-p"]:
format = "pcap"
elif o in ["-a"]:
format = "ascii"
elif o in ["-w"]:
weight = float(a)
elif o in ["-g"]:
graph = True
else:
usage()
if len(args) == 0:
usage()
if weight < 0.0 or weight > 1.0:
print "FATAL: Weight must be between 0 and 1"
sys.exit(-1)
file = sys.argv[len(sys.argv) - 1]
try:
file
except:
usage()
#
# Open file and get sequences
#
if format == "pcap":
try:
sequences = input.Pcap(file)
except IOError:
print "FATAL: Error opening '%s'" % file
sys.exit(-1)
elif format == "ascii":
try:
sequences = input.ASCII(file)
except IOError:
print "FATAL: Error opening '%s'" % file
sys.exit(-1)
else:
print "FATAL: Specify file format"
sys.exit(-1)
if len(sequences) == 0:
print "FATAL: No sequences found in '%s'" % file
sys.exit(-1)
else:
print "Found %d unique sequences in '%s'" % (len(sequences), file)
#
# Create distance matrix (LocalAlignment, PairwiseIdentity, Entropic)
#
print "Creating distance matrix ..",
dmx = distance.LocalAlignment(sequences)
print "complete"
#
# Pass distance matrix to phylogenetic creation function
#
print "Creating phylogenetic tree ..",
phylo = phylogeny.UPGMA(sequences, dmx, minval=weight)
print "complete"
#
# Output some pretty graphs of each cluster
#
if graph:
cnum = 1
for cluster in phylo:
out = "graph-%d" % cnum
print "Creating %s .." % out,
cluster.graph(out)
print "complete"
cnum += 1
print "\nDiscovered %d clusters using a weight of %.02f" % \
(len(phylo), weight)
#
# Perform progressive multiple alignment against clusters
#
i = 1
alist = []
for cluster in phylo:
print "Performing multiple alignment on cluster %d .." % i,
aligned = multialign.NeedlemanWunsch(cluster)
print "complete"
alist.append(aligned)
i += 1
print ""
#
# Display each cluster of aligned sequences
#
i = 1
for seqs in alist:
print "Output of cluster %d" % i
output.Ansi(seqs)
i += 1
print ""
def usage():
print "usage: %s [-gpa] [-w <weight>] <sequence file>" % \
sys.argv[0]
print " -g\toutput graphviz of phylogenetic trees"
print " -p\tpcap format"
print " -a\tascii format"
print " -w\tdifference weight for clustering"
sys.exit(-1)
if __name__ == "__main__":
main()