-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcv_old_models.py
executable file
·95 lines (79 loc) · 2.79 KB
/
cv_old_models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/env python
import argparse
import pandas as pd
import numpy as np
np.random.seed(1) # for reproducibility
from nested_kfold import nested_kfold
from metrics import (
create_score_dict,
binarize_y,
)
DATASET = 'data/dataset.csv'
TARGET_NAME = 'MalignancyCharacter'
df = pd.read_csv(DATASET)
y = df[TARGET_NAME].values.astype(np.int)
y_bin = binarize_y(y)
indexes = nested_kfold(y, method='stratified')
def cv_old_models(df, indexes, y_bin):
models = [
'TimmermannBin',
'LR1Bin',
'LR2Bin',
'SMBin',
'AdnexBin',
]
models_dict = dict.fromkeys(models)
for model in models:
test_scores = []
for fold in indexes:
y_true = y_bin[fold['test']]
y_pred = df[model].values.astype(np.int)[fold['test']]
test_scores.append(create_score_dict(y_true, y_pred))
models_dict[model] = pd.DataFrame(test_scores)
return models_dict
def create_std_mean_score(df):
return pd.DataFrame(
{'mean': df.mean(), 'std': df.std()}
)
def print_result(old_models_cv_results, name):
score_df = old_models_cv_results[name]
print score_df
print create_std_mean_score(score_df)
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='Script to do cross validation using old models on OVA dataset')
parser.add_argument(
'--tim', dest='tim', action='store_true',
help='Getting CV results using Timmerman logistic regression model')
parser.add_argument(
'--lr1', dest='lr1', action='store_true',
help='Getting CV results using LR1 IOTA model')
parser.add_argument(
'--lr2', dest='lr2', action='store_true',
help='Getting CV results using LR2 IOTA model')
parser.add_argument(
'--sm', dest='sm', action='store_true',
help='Getting CV results using SM model')
parser.add_argument(
'--adnex', dest='adnex', action='store_true',
help='Getting CV results using Andex model')
parser.add_argument(
'--all', dest='all', action='store_true',
help='Getting CV results using Timmerman / LR1 / LR2 / SM / Adnex model')
old_models_cv_results = cv_old_models(df, indexes, y_bin)
args = parser.parse_args()
if args.tim or args.all:
print "Timmerman Model:"
print_result(old_models_cv_results, 'TimmermannBin')
if args.lr1 or args.all:
print "LR1 Model:"
print_result(old_models_cv_results, 'LR1Bin')
if args.lr2 or args.all:
print "LR2 Model:"
print_result(old_models_cv_results, 'LR2Bin')
if args.sm or args.all:
print "SM Model:"
print_result(old_models_cv_results, 'SMBin')
if args.adnex or args.all:
print "Adnex Model:"
print_result(old_models_cv_results, 'AdnexBin')