-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathproposed.py
122 lines (98 loc) · 4.21 KB
/
proposed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from sklearn.base import ClassifierMixin
from sklearn.neighbors._base import NeighborsBase, KNeighborsMixin
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from sklearn.utils.estimator_checks import check_estimator
from sklearn.neighbors import NearestNeighbors, KDTree
from sklearn.preprocessing import LabelEncoder
from scipy.stats import nbinom
class Proposed(KNeighborsMixin, ClassifierMixin, NeighborsBase):
def __init__(
self, n_neighbors=5, algorithm="auto",
leaf_size=30, p=2, metric="minkowski",
metric_params=None, n_jobs=None,
):
super().__init__(
n_neighbors=n_neighbors,
algorithm=algorithm, leaf_size=leaf_size,
metric=metric, p=p, metric_params=metric_params,
n_jobs=n_jobs,
)
def fit(self, X, y):
# self._fit(X, y)
# Check that X and y have correct shape
X, y = check_X_y(X, y)
# Store the classes seen during fit
self.le_ = LabelEncoder()
y = self.le_.fit_transform(y)
self.classes_ = self.le_.classes_
_, counts = np.unique(y, return_counts=True)
self.n_features_in_ = X.shape[1]
self.counts_ = counts
if X.shape[0] < 1:
raise ValueError("Atleast 1 sample reqruied.")
if len(counts) == 1:
raise ValueError(f"Unknown label type: Proposed estimator only \
supports 2 classes, got 1 class.")
if len(counts) != 2:
raise ValueError(f"Unknown label type: Proposed estimator only \
supports 2 classes, got {len(counts)} classes.")
# self.X_ = X
# self.y_ = y
self.minority_class_ = counts.argmin()
self.minority_ = KNeighborsClassifier(**self.get_params())
self.minority_.fit(X[y == self.minority_class_],
y[y == self.minority_class_])
self.majority_ = KNeighborsClassifier(**self.get_params())
self.majority_.fit(X[y != self.minority_class_],
y[y != self.minority_class_])
self.p_ = (y == self.minority_class_).mean()
# Return the classifier
return self
def predict_proba(self, X):
""" A reference implementation of a prediction for a classifier.
Parameters
----------
X : array-like, shape (n_samples, n_features)
The input samples.
Returns
-------
y : ndarray, shape (n_samples,)
The label for each sample is the label of the closest sample
seen during fit.
"""
# Check is fit had been called
check_is_fitted(self)
# Input validation
X = check_array(X)
minor_distances, _ = self.minority_.kneighbors(X)
n_pts = self.majority_.n_samples_fit_
major_distances, _ = self.majority_.kneighbors(X, n_neighbors=n_pts)
fails = self.compute_fails(major_distances, minor_distances)
evidence_1 = nbinom.sf(fails, 1 + np.arange(self.n_neighbors), self.p_) + \
0.5*nbinom.pmf(fails, 1 + np.arange(self.n_neighbors), self.p_)
cols = np.abs(evidence_1 - 1/2).argmax(1)
rows = np.arange(evidence_1.shape[0])
prob_1 = evidence_1[rows, cols]
if not self.minority_class_:
prob_1 = 1 - prob_1
return np.stack((1-prob_1, prob_1), axis=1)
def compute_fails(self, major_distances, minor_distances):
q1, t = major_distances.shape
q2, k = minor_distances.shape
assert q1 == q2
a = major_distances.reshape((q1, t, 1))
b = minor_distances.reshape((q1, 1, k))
fails = (a < b).astype(int).sum(1)
return fails
def predict(self, X):
probs = self.predict_proba(X)
return self.le_.inverse_transform(probs.argmax(1))
def _more_tags(self):
return {'binary_only': True, 'multioutput': False}
if __name__ == "__main__":
print("Checking Estimator")
check_estimator(Proposed())
# check_estimator(Proposed2())
print("Estimator Check Passed")