-
Notifications
You must be signed in to change notification settings - Fork 12
/
one_hot_encoder.py
60 lines (43 loc) · 1.7 KB
/
one_hot_encoder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from niaaml.preprocessing.encoding.feature_encoder import FeatureEncoder
from sklearn.preprocessing import OneHotEncoder as OHE
import pandas as pd
__all__ = ["OneHotEncoder"]
class OneHotEncoder(FeatureEncoder):
r"""Implementation of one-hot encoder.
Date:
2020
Author:
Luka Pečnik
License:
MIT
Reference:
Seger, Cedric. "An investigation of categorical variable encoding techniques in machine learning: binary versus one-hot and feature hashing." (2018).
Documentation:
https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html
See Also:
* :class:`niaaml.preprocessing.encoding.FeatureEncoder`
"""
Name = "One-Hot Encoder"
def __init__(self, **kwargs):
r"""Initialize feature encoder."""
self.__one_hot_encoder = OHE(handle_unknown="ignore")
def fit(self, feature):
r"""Fit feature encoder.
Arguments:
feature (pandas.core.frame.DataFrame): A column (categorical) from DataFrame of features.
"""
self.__one_hot_encoder.fit(feature)
def transform(self, feature):
r"""Transform feature's values.
Arguments:
feature (pandas.core.frame.DataFrame): A column (categorical) from DataFrame of features.
Returns:
pandas.core.frame.DataFrame: A transformed column.
"""
return pd.DataFrame(self.__one_hot_encoder.transform(feature).toarray())
def to_string(self):
r"""User friendly representation of the object.
Returns:
str: User friendly representation of the object.
"""
return FeatureEncoder.to_string(self).format(name=self.Name)