Skip to content

Commit

Permalink
Bev_crossval implemented
Browse files Browse the repository at this point in the history
  • Loading branch information
dipaco committed Feb 13, 2017
1 parent 761d95e commit c14d1f3
Show file tree
Hide file tree
Showing 6 changed files with 231 additions and 9 deletions.
1 change: 1 addition & 0 deletions Implemented_balu_comands
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ FeatureSelection:
PerformanceEvaluation:
Bev_confusion
Bev_performance
Bev_crossval

Classification:
Bcl_lda
Expand Down
2 changes: 1 addition & 1 deletion balu/Classification/Bcl_nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def Bcl_nn(*args):
if 'iter' not in options:
options['iter'] = 100

options['string'] = 'nn,{0} '.format(options['method'])
options['string'] = 'nn,{0} '.format(options['method'])
if train:

all_activation_functions = ['identity', 'logistic', 'tanh', 'relu']
Expand Down
11 changes: 5 additions & 6 deletions balu/DataSelectionAndGeneration/Bds_nostratify.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,9 @@ def Bds_nostratify(X, d, s):
Xr = X[j, :]
dr = d[j, 0]
r = np.floor(s * N)
R = np.array([[0, r], [r, N]]).astype(int)
X1 = Xr[R[0, 0]:R[0, 1], :]
d1 = dr[R[0, 0]:R[0, 1]]
X2 = Xr[R[1, 0]:R[1, 1], :]
d2 = dr[R[1, 0]:R[1, 1]]
X1 = Xr[0:r, :]
d1 = dr[0:r]
X2 = Xr[r:, :]
d2 = dr[r:]

return X1, d1, X2, d2
return X1, d1, X2, d2
221 changes: 221 additions & 0 deletions balu/PerformanceEvaluation/Bev_crossval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
import numpy as np
from balu.Classification import Bcl_structure
from .Bev_performance import Bev_performance
from scipy import stats


def Bev_crossval(X, d, options):
"""p, ci = Bev_crossval(X, d, options)
Toolbox: Balu
Cross-validation evaluation of a classifier.
v-fold Cross Validation in v groups of samples X and classification d
according to given method. If v is equal to the number of samples,
i.e., v = size(X,1), this method works as the original
cross-validation, where training will be in X without sample i and
testing in sample i. ci is the confidence interval.
X is a matrix with features (columns)
d is the ideal classification for X
options['b'] is a Balu classifier or several classifiers (see example)
options['v'] is the number of groups (folders) of the cross-validations
options['c'] is the probability of the confidence intervale.
options['strat'] = True means the portions are stratified.
options['show'] = True displays results.
Example for one classifier without stratified data:
from balu.ImagesAndData import balu_load
from balu.InputOutput import Bio_plotfeatures
from balu.PerformanceEvaluation import Bev_crossval
data = balu_load('datagauss') # simulated data (2 classes, 2 features)
X = data['X']
d = data['d']
Bio_plotfeatures(X, d) # plot feature space
op = {
'b': [
{'name': 'knn', 'options': {'k': 5}} # knn with 5 neighbors
],
'strat': False,
'v': 10,
'c': 0.9,
'show': False # 10 groups cross-validation for 90% confidence
}
p, ci = Bev_crossval(X, d, op) # cross valitadion
print(p)
print(ci)
Example for more classifiers with stratified data:
from balu.ImagesAndData import balu_load
from balu.InputOutput import Bio_plotfeatures
from balu.PerformanceEvaluation import Bev_crossval
data = balu_load('datagauss') # simulated data (2 classes, 2 features)
X = data['X']
d = data['d']
Bio_plotfeatures(X, d) # plot feature space
op = {
'b': [
{'name': 'knn', 'options': {'k': 5}}, # knn with 5 neighbors
{'name': 'knn', 'options': {'k': 7}}, # % KNN with 7 neighbors
{'name': 'knn', 'options': {'k': 9}}, # KNN with 9 neighbors
{'name': 'lda', 'options': {'p': []}}, # LDA
{'name': 'qda', 'options': {'p': []}}, # QDA
{'name': 'nn', 'options': {'method': 3, 'iter': 10}}, # Neural Network
{'name': 'svm', 'options': {'kernel': 2}}, # rbf-svm
{'name': 'dmin', 'options': {}}, # Euclidean distance
{'name': 'maha', 'options': {}} # Mahalanobis distance
],
'strat': True,
'v': 10,
'c': 0.95,
'show': True # 10 groups cross-validation for 90% confidence
}
p, ci = Bev_crossval(X, d, op) # cross valitadion
print(p)
print(ci) % cross valitadion
The mean performance of classifier k is given in p(k), and the
confidence intervals for c*100% are in ci(k,:).
(c) Grima, PUC-DCC, 2011: D.Mery, E.Cortazar, B. Banerjee
http://grima.ing.puc.cl
With collaboration from:
Diego Patiño ([email protected]) -> Translated implementation into python (2016)
"""

v = options['v']
b = options['b']
c = options['c']

if len(d.shape) < 2:
d = np.expand_dims(d, axis=1)

if 'strat' in options:
strat = options['strat']
else:
strat = False

if v == 1:
print('Warning: cross validation with only one group means data training = data test.')

if 'show' not in options:
show = True
else:
show = options['show']

n = len(b)
N = X.shape[0]

dmin = d.min()
dmax = d.max()
nn = range(dmin, dmax+1)

p = np.zeros(n)
ci = np.zeros((n, 2))
for k in range(n):

if v == 1:
XX = X
XXt = X
dd = d
ddt = d
elif strat:
temporal = [[None, None] for i in range(v)]
for cl in range(dmin, dmax + 1):
selec, _ = np.where(d == cl)
XTemp = X[selec, :]
dTemp = d[selec, :]

numElem = selec.size

# Checking if every class has at least v samples, otherwise raise Warning (Sandipan)
if numElem < v:
print('Class {0} has only {1} samples, less than fold value {2}!!!\n', dTemp[0, 0], numElem, v)

j = np.argsort(np.random.rand(numElem))
r = int(np.floor(numElem/float(v)))

XTemp = XTemp[j, :]
dTemp = dTemp[j, :]

for iTemp in range(v):
rango = np.arange(iTemp*r, (iTemp + 1)*r)

temporal[iTemp][0] = XTemp[rango, :] if temporal[iTemp][0] is None else np.vstack((temporal[iTemp][0], XTemp[rango, :]))
temporal[iTemp][1] = dTemp[rango, :] if temporal[iTemp][1] is None else np.vstack((temporal[iTemp][1], dTemp[rango, :]))

Xr = None
dr = None
R = np.zeros((v, 2))
ant = 0
for iTemp in range(v):

Xr = temporal[iTemp][0] if Xr is None else np.vstack((Xr, temporal[iTemp][0]))
dr = temporal[iTemp][1] if dr is None else np.vstack((dr, temporal[iTemp][1]))

next = dr.shape[0]
R[iTemp, :] = np.array([ant, next-1])
ant = next

else:
rn = np.random.rand(N)
j = np.argsort(rn)

Xr = X[j, :]
dr = d[j, :]

r = np.floor(N/float(v))
R = np.zeros((v, 2))
ini = 0
for i in range(v-1):
R[i, :] = np.array([ini, ini+r-1])
ini += r

R[-1, :] = np.array([ini, N-1])

pp = np.zeros(v)
for i in range(v):
if v > 1:
XXt = Xr[R[i, 0]:R[i, 1] + 1, :]
ddt = dr[R[i, 0]:R[i, 1] + 1, :]
XX = None
dd = None
for j in range(v):
if j != i:

XX = Xr[R[j, 0]:R[j, 1] + 1, :] if XX is None else np.vstack((XX, Xr[R[j, 0]:R[j, 1] + 1, :]))
dd = dr[R[j, 0]:R[j, 1] + 1, :] if dd is None else np.vstack((dd, dr[R[j, 0]:R[j, 1] + 1, :]))

dds, ops = Bcl_structure(XX, dd, XXt, b[k])
pp[i] = Bev_performance(ddt, dds, nn)

p[k] = np.mean(pp)
s = ops[0]['options']['string']

# Confidence Interval
if v > 1:
pm = p[k]
mu = pm
sigma = np.sqrt(pm * (1 - pm) / float(N))

if v > 20:
p1, p2 = stats.norm.interval(c, loc=mu, scale=sigma)
else:
#Not sure if v or v - 1
p1, p2 = stats.t.interval(c, v-1, loc=mu, scale=sigma)

ci[k, :] = np.array([p1, p2])

if show:
print('{0:3d}) {1} {2:.2f}% in ({3:.2f}, {4:.2f}%) with CI={5:.2f}% \n'.format(k, s, p[k]*100, p1*100, p2*100, c*100))
else:
ci[k, :] = np.array([0.0, 0.0])

return p, ci
2 changes: 1 addition & 1 deletion balu/PerformanceEvaluation/Bev_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,4 +75,4 @@ class is min(nn) and the highest class is max(nn).
if p.size == 1:
return p[0, 0]
else:
return p[:, 0]
return p[:, 0]
3 changes: 2 additions & 1 deletion balu/PerformanceEvaluation/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .Bev_performance import Bev_performance
from .Bev_confusion import Bev_confusion
from .Bev_crossval import Bev_crossval

__all__ = ['Bev_performance', 'Bev_confusion']
__all__ = ['Bev_performance', 'Bev_confusion', 'Bev_crossval']

0 comments on commit c14d1f3

Please sign in to comment.