-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathlogit-stacker.py
57 lines (43 loc) · 1.65 KB
/
logit-stacker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in
import pandas as pd
import numpy as np
from scipy.special import expit, logit
almost_zero = 1e-10
almost_one = 1-almost_zero
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory
import os
print(os.listdir("../input"))
scores = {}
id_col = "click_id"
target = "is_attributed"
df = pd.read_csv("../input/simple-averaging/submission_final.csv",index_col=id_col).rename(columns={target: '1'}) # 0.80121
scores["1"] = 0.95
df["2"] = pd.read_csv("../input/notebook-version-of-talkingdata-lb-0-9786/sub_it24.csv")[target].values # 0.793
scores["2"] = 0.80
df["3"] = pd.read_csv("../input/log-and-harmonic-mean-lets-go/submission_geo.csv")[target].values # 0.78583
scores["3"] = 0.50
# More NN..
# Add https://www.kaggle.com/emotionevil/nlp-and-stacking-starter-dpcnn-lgb-lb0-80/notebook
weights = [0] * len(df.columns)
power = 120
dic = {}
for i,col in enumerate(df.columns):
weights[i] = scores[col] ** power
dic[i] = df[col].clip(almost_zero,almost_one).apply(logit) * weights[i]
print(weights[:])
totalweight = sum(weights)
temp = []
for x in dic:
if x == 0:
temp = dic[x]
else:
temp = temp+dic[x]
# Average
temp = temp/(totalweight)
df[target] = temp
df[target] = df[target].apply(expit)
df[target].to_csv("ensembling_submission.csv", index=True, header=True)
print(df[target].head())