-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrp_cv.py
executable file
·87 lines (68 loc) · 2.71 KB
/
rp_cv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/usr/bin/env python
"""
Use cross validation to evaluate a model for some training data.
usage: %prog pos_data neg_data [options]
-f, --format=FILE: Format of input data. 'ints' by default, or 'maf'
-m, --mapping=FILE: A mapping (alphabet reduction) to apply to each sequence (optional)
-r, --radix=N: Radix (optional)
-o, --orders=N,...: Orders to cross validate over
-F, --fold=N: Fold (default 5)
-M, --model=name: Name of model to train (default 'standard')
-l, --loo: Use leave-one-out cross validation (fold is ignored in this case)
"""
import array
import bx.cookbook.doc_optparse
import sys
import traceback
import time
import rp.cv
import rp.io
import rp.mapping
import rp.models
default_fold = 5
def run( pos_file, neg_file, format, mapping, radix, orders, modname, fold, loo ):
# Split up
# Read integer sequences
pos_strings = list( rp.io.get_reader( pos_file, format, mapping ) )
neg_strings = list( rp.io.get_reader( neg_file, format, mapping ) )
# Determine radix
if not radix:
if mapping: radix = mapping.get_out_size()
else: radix = max( map( max, pos_strings ) + map( max, neg_strings ) ) + 1
print "Order TP ~TP ~FN FN FP ~FP ~TN TN % time"
# Cross validate for various orders
for order in orders:
model_factory = lambda d0, d1: rp.models.train( modname, order, radix, d0, d1 )
if loo: passes = 1
else: passes = 5
cv_engine = rp.cv.CV( model_factory, pos_strings, neg_strings, fold, passes, loo )
start_time = time.time()
cv_engine.run()
seconds = time.time() - start_time
print "%5d " % order,
print cv_engine.cls1, cv_engine.cls2,
print " %2.2f %2.2f" % ( cv_engine.get_success_rate()*100, seconds )
def main():
# Parse command line
options, args = bx.cookbook.doc_optparse.parse( __doc__ )
#try:
if 1:
pos_fname, neg_fname = args
orders = map( int, getattr( options, 'orders' ).split( ',' ) )
radix = getattr( options, 'radix', None )
if radix: radix = int( radix )
modname = getattr( options, 'model' )
if modname is None: modname = 'standard'
if options.fold:
fold = int( options.fold )
else:
fold = default_fold
if options.mapping:
align_count, mapping = rp.mapping.alignment_mapping_from_file( file( options.mapping ) )
else:
mapping = None
loo = bool( options.loo )
#except:
# cookbook.doc_optparse.exit()
run( open( pos_fname ), open( neg_fname ), options.format, mapping, radix, orders, modname, fold, loo )
if __name__ == "__main__": main()