-
Notifications
You must be signed in to change notification settings - Fork 1
/
rp_train_probs.py
executable file
·69 lines (55 loc) · 1.96 KB
/
rp_train_probs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env python
"""
Train a discriminating model from two data sets and store it.
usage: %prog pos_data out [options]
-f, --format=FILE: Format of input data. 'ints' by default, or 'maf'
-m, --mapping=FILE: A mapping (alphabet reduction) to apply to each sequence (optional)
-r, --radix=N: Radix
-o, --order=N: Order
-M, --model=name: Name of model to train (default 'standard')
"""
import pkg_resources
pkg_resources.require( "bx-python" )
import array
import cookbook.doc_optparse
import sys
import traceback
import rp.io
import rp.mapping
import rp.models
def run( pos_file, out_file, format, mapping, radix, order, modname ):
# Read integer sequences
pos_strings = list( rp.io.get_reader( pos_file, format, mapping ) )
# Determine radix
if not radix:
if mapping: radix = mapping.get_out_size()
else: radix = max( map( max, pos_strings ) ) + 1
# Build model
print "about to train"
model = rp.models.prob_train( modname, order, radix, pos_strings )
print "trained"
# Write to out file
print "about to write"
model.to_file( out_file )
print "written"
def main():
# Parse command line
try:
options, args = cookbook.doc_optparse.parse( __doc__ )
pos_fname, out_fname = args
order = int( getattr( options, 'order' ) )
radix = getattr( options, 'radix' )
format = getattr( options, 'format' )
modname = getattr( options, 'model' )
if modname is None: modname = 'standard'
if options.mapping:
align_count, mapping = rp.mapping.alignment_mapping_from_file( file( options.mapping ) )
print "Align count:", align_count, "Mapping: ", mapping
else:
mapping = None
except:
cookbook.doc_optparse.exit()
out = open( out_fname, "w" )
run( open( pos_fname ), out, format, mapping, radix, order, modname )
out.close()
if __name__ == "__main__": main()