-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnested_kfold.py
52 lines (44 loc) · 1.46 KB
/
nested_kfold.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import numpy as np
from sklearn.cross_validation import (
KFold,
StratifiedKFold,
)
SEED = 1
METHODS = {
'standard': KFold,
'stratified': StratifiedKFold
}
def nested_kfold(y, n_folds=10, shuffle=True,
random_state=SEED, method='standard'):
"""
y - array of classes
"""
folding_class = METHODS.get(method)
n = y
if method == 'standard':
n = len(y)
k_fold = folding_class(n, n_folds=n_folds, shuffle=shuffle,
random_state=random_state)
all_indexes = []
for train_indices, test_indices in k_fold:
result = {}
nested_n = nested_y = y[train_indices]
if method == 'standard':
nested_n = len(nested_y)
nested_fold = folding_class(nested_n, n_folds=n_folds,
shuffle=shuffle, random_state=random_state)
nested_indexes = []
for nested_train, nested_val in nested_fold:
current = {}
current['train'] = train_indices[nested_train]
current['val'] = train_indices[nested_val]
nested_indexes.append(current)
result['train'] = train_indices
result['test'] = test_indices
result['nested_indexes'] = nested_indexes
all_indexes.append(result)
return all_indexes
if __name__ == '__main__':
y = np.array([np.random.randint(2) for _ in range(100)])
print nested_kfold(y)
print nested_kfold(y, method='stratified')