-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathnonstationary_feature_remover.py
43 lines (31 loc) · 1.43 KB
/
nonstationary_feature_remover.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from sklearn.base import BaseEstimator, TransformerMixin
import lightgbm as lgb
import numpy as np
import pandas as pd
class NonstationaryFeatureRemover(BaseEstimator, TransformerMixin):
def __init__(self, remove_count=None, remove_ratio=None):
if remove_count and remove_ratio:
raise Exception('remove_count and remove_ratio cannot be set simultaneously')
self.remove_count = remove_count
self.remove_ratio = remove_ratio
def fit(self, X, y=None):
X = self._validate_data(X)
model = lgb.LGBMRegressor(n_jobs=-1, random_state=1)
model.fit(X, np.arange(X.shape[0]))
importances = model.feature_importances_
if self.remove_count:
remove_count = self.remove_count
else:
remove_count = int(self.remove_ratio * X.shape[1])
features = list(range(X.shape[1]))
feature_imp = pd.DataFrame(zip(importances, features), columns=['value', 'feature'])
feature_imp = feature_imp.sort_values('value')
for i in range(X.shape[1] - remove_count, X.shape[1]):
features.remove(int(feature_imp['feature'].iloc[i]))
self.selected_features_ = np.array(features)
return self
def transform(self, X, y=None):
X = self._validate_data(X)
return X[:, self.selected_features_].copy()
def inverse_transform(self, X, y=None):
raise Exception('inverse_transform not implemented')