-
Notifications
You must be signed in to change notification settings - Fork 1
/
rp_prob_classify.py
executable file
·86 lines (71 loc) · 2.77 KB
/
rp_prob_classify.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/env python2.4
"""
Score a set of sequences using a model
usage: %prog data score_matrix out [options]
-i, --input=FILE: Input filename (otherwise stdin)
-d, --output=FILE: Output filename (otherwise stdout)
-f, --format=FILE: Format of input data. 'ints' by default, or 'maf'
-m, --mapping=FILE: A mapping (alphabet reduction) to apply to each sequence (optional)
-M, --model=name: Name of model to train (default 'standard')
-G, --mincols=INT: Minimum number of scorable positions
-c, --classnames=A,B: Names to use for the classes in the output.
"""
from __future__ import division
import pkg_resources
pkg_resources.require( "bx-python" )
import cookbook.doc_optparse
import sys
import traceback
from numpy import *
import rp.io
import rp.mapping
import rp.models
nan = float( 'nan' )
def run( data_file, out_file, model_fnames, format, mapping, modname, mincols, classnames ):
max_order = 0
models = []
for f in model_fnames:
model = rp.models.get( modname ).prob_from_file( f )
radix = model.get_radix()
order = model.get_order()
max_order = max( max_order, order )
models.append( model )
# Score each
for string in rp.io.get_reader( data_file, format, mapping ):
if len( string ) < max_order or sum( string != -1 ) < mincols:
print >> out_file, "\t".join( ( [ "NA" ] * len( models ) ) + [ "NO DATA" ] )
else:
probs = [ model.score( string ) for model in models ]
best = classnames[ argmax( probs ) ]
print >> out_file, "\t".join( map( str, list( exp( probs ) ) + [ best ] ) )
def main():
# Parse command line
try:
options, args = cookbook.doc_optparse.parse( __doc__ )
model_fnames = args
if options.input:
in_file = open( options.input )
else:
in_file = sys.stdin
if options.output:
out_file = open( options.output, "w" )
else:
out_file = sys.stdout
modname = getattr( options, 'model' )
if modname is None: modname = 'standard'
if options.mapping:
align_count, mapping = rp.mapping.alignment_mapping_from_file( file( options.mapping ) )
else:
mapping = None
mincols = getattr( options, 'mincols' )
if mincols: mincols = int( mincols )
if options.classnames:
classnames = options.classnames.split( "," )
assert len( classnames ) == len( model_fnames )
else:
classnames = map( str, range( len( model_fnames ) ) )
except:
cookbook.doc_optparse.exit()
run( in_file, out_file, model_fnames, options.format, mapping, modname, mincols, classnames )
out_file.close()
if __name__ == "__main__": main()