-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path13_Drop_Binary.py
41 lines (37 loc) · 1 KB
/
13_Drop_Binary.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import pandas as pd
data = pd.DataFrame({
'Shape':['circle', 'oval', 'square', 'square'],
'Color':['pink', 'yellow', 'pink', 'yellow']
})
# print(data)
# output
# Shape Color
# 0 circle pink
# 1 oval yellow
# 2 square pink
# 3 square yellow
from sklearn.preprocessing import OneHotEncoder
# drop = None (default) creates one feature column per category
one_hot = OneHotEncoder(sparse_output=False, drop=None)
print(one_hot.fit_transform(data))
#output :
# [[1. 0. 0. 1. 0.]
# [0. 1. 0. 0. 1.]
# [0. 0. 1. 1. 0.]
# [0. 0. 1. 0. 1.]]
# drop = 'first' drops the forst category in each feature
one_hot = OneHotEncoder(sparse_output=False, drop='first')
print(one_hot.fit_transform(data))
# output:
# [[0. 0. 0.]
# [1. 0. 1.]
# [0. 1. 0.]
# [0. 1. 1.]]
# drop = 'if_binary' drops the first category of binary features
one_hot = OneHotEncoder(sparse_output=False, drop='if_binary')
print(one_hot.fit_transform(data))
# output:
# [[1. 0. 0. 0.]
# [0. 1. 0. 1.]
# [0. 0. 1. 0.]
# [0. 0. 1. 1.]]