-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathdemo.py
80 lines (59 loc) · 2.35 KB
/
demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# This demo based on IJCAI-2018 CVR prediction
import FeatureSelection as FS
from sklearn.metrics import log_loss
import lightgbm as lgbm
import pandas as pd
import numpy as np
def prepareData():
"""prepare you dataset here"""
df = pd.read_csv('input/feature_test.txt', sep='\s+')
df = df[~pd.isnull(df.is_trade)]
item_category_list_unique = list(np.unique(df.item_category_list))
df.item_category_list.replace(item_category_list_unique,
list(np.arange(len(item_category_list_unique))), inplace=True)
return df
def modelscore(y_test, y_pred):
"""set up the evaluation score"""
return log_loss(y_test, y_pred)
def validation(X, y, features, clf, lossfunction):
"""set up your validation method"""
totaltest = 0
for D in [24]:
T = (X.day != D)
X_train, X_test = X[T], X[~T]
X_train, X_test = X_train[features], X_test[features]
y_train, y_test = y[T], y[~T]
clf.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test)],
eval_metric='logloss', verbose=False, early_stopping_rounds=200)
totaltest += lossfunction(y_test, clf.predict_proba(X_test)[:, 1])
totaltest /= 1.0
return totaltest
def add(x, y):
return x + y
def substract(x, y):
return x - y
def times(x, y):
return x * y
def divide(x, y):
return (x + 0.001) / (y + 0.001)
CrossMethod = {'+': add,
'-': substract,
'*': times,
'/': divide, }
def main():
sf = FS.Select(Sequence=False, Random=True,
Cross=False) # select the way you want to process searching
sf.ImportDF(prepareData(), label='is_trade')
sf.ImportLossFunction(modelscore, direction='descend')
sf.ImportCrossMethod(CrossMethod)
sf.NonTrainableFeatures = ['instance_id', 'item_property_list', 'context_id',
'context_timestamp', 'predict_category_property', 'is_trade']
sf.InitialFeatures(
['item_category_list', 'item_price_level', 'item_sales_level', 'item_collected_level',
'item_pv_level', 'day'])
sf.clf = lgbm.LGBMClassifier(random_state=1, num_leaves=6, n_estimators=5000, max_depth=3,
learning_rate=0.05, n_jobs=8)
sf.logfile = 'record.log'
sf.run(validation)
if __name__ == "__main__":
main()