-
Notifications
You must be signed in to change notification settings - Fork 125
/
Copy pathfigs_demo.py
118 lines (87 loc) · 3.94 KB
/
figs_demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# # Setup
# +
# %load_ext autoreload
# %autoreload 2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import plot_tree, DecisionTreeClassifier
from sklearn import metrics
# TODo remove when package is updated
import sys,os
sys.path.append(os.path.expanduser('~/imodels'))
# installable with: `pip install imodels`
from imodels import FIGSClassifier
import demo_helper
np.random.seed(13)
# -
# Let's start by loading some data in...
# Note, we need to still load the reg dataset first to get the same splits as in `imodels_demo.ipynb` due to the call to random
# +
# ames housing dataset: https://www.openml.org/search?type=data&status=active&id=43926
X_train_reg, X_test_reg, y_train_reg, y_test_reg, feat_names_reg = demo_helper.get_ames_data()
# diabetes dataset: https://www.openml.org/search?type=data&sort=runs&id=37&status=active
X_train, X_test, y_train, y_test, feat_names = demo_helper.get_diabetes_data()
# feat_names meanings:
# ["#Pregnant", "Glucose concentration test", "Blood pressure(mmHg)",
# "Triceps skin fold thickness(mm)",
# "2-Hour serum insulin (mu U/ml)", "Body mass index", "Diabetes pedigree function", "Age (years)"]
# load some data
# print('Regression data training', X_train_reg.shape, 'Classification data training', X_train.shape)
# -
# ***
# # FIGS
model_figs = FIGSClassifier(max_rules=7, max_trees=3)
model_figs.fit(X_train, y_train, feature_names=feat_names);
print(model_figs)
print(model_figs.print_tree(X_train, y_train))
model_figs.plot(fig_size=7)
# ## Gini Importance
dfp_importance = pd.DataFrame({'feat_names': feat_names})
dfp_importance['feature'] = dfp_importance.index
dfp_importance_gini = pd.DataFrame({'importance_gini': model_figs.feature_importances_})
dfp_importance_gini['feature'] = dfp_importance_gini.index
dfp_importance_gini['importance_gini_pct'] = dfp_importance_gini['importance_gini'].rank(pct=True)
dfp_importance = pd.merge(dfp_importance, dfp_importance_gini, on='feature', how='left')
dfp_importance = dfp_importance.sort_values(by=['importance_gini', 'feature'], ascending=[False, True]).reset_index(drop=True)
display(dfp_importance)
# ***
# # `dtreeviz` Integration
# One tree at a time only, showing tree 0 here
# +
import dtreeviz
from imodels.tree.viz_utils import extract_sklearn_tree_from_figs
dt = extract_sklearn_tree_from_figs(model_figs, tree_num=0, n_classes=2)
viz_model = dtreeviz.model(dt, X_train=X_train, y_train=y_train, feature_names=feat_names, target_name='y', class_names=[0, 1])
# -
color_params = {'classes': dtreeviz.colors.mpl_colors, 'hist_bar': 'C0', 'legend_edge': None}
for _ in ['axis_label', 'title', 'legend_title', 'text', 'arrow', 'node_label', 'tick_label', 'leaf_label', 'wedge', 'text_wedge']:
color_params[_] = 'black'
dtv_params_gen = {'colors': color_params, 'fontname': 'Arial', 'figsize': (4, 3)}
dtv_params = {'leaftype': 'barh',
'label_fontsize': 10,
'colors': dtv_params_gen['colors'],
'fontname': dtv_params_gen['fontname']
}
viz_model.view(**dtv_params)
x_example = X_train[13]
display(pd.DataFrame([{col: value for col,value in zip(feat_names, x_example)}]))
print(viz_model.explain_prediction_path(x=x_example))
viz_model.view(**dtv_params, x=x_example)
viz_model.view(**dtv_params, show_node_labels=True, fancy=False)
viz_model.ctree_leaf_distributions(**dtv_params_gen)
viz_model.leaf_purity(display_type='plot', **dtv_params_gen)
# ***
# # `SKompiler` Integration
# One tree at a time only, showing tree 0 here
# +
from skompiler import skompile
from imodels.tree.viz_utils import extract_sklearn_tree_from_figs
dt = extract_sklearn_tree_from_figs(model_figs, tree_num=0, n_classes=2)
expr = skompile(dt.predict_proba, feat_names)
# +
# Currently broken, see https://github.com/konstantint/SKompiler/issues/16
# print(expr.to('sqlalchemy/sqlite', component=1, assign_to='tree_0'))
# -
print(expr.to('python/code'))