-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprocess_data.py
31 lines (27 loc) · 1.14 KB
/
process_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import pandas as pd
class Processor(object):
def __init__(self, data, target):
self.data = data
self.target = target
def fill_null(self, col, flag):
if flag == "pad":
return self.data[col].fillna(method='pad', inplace=True)
elif flag == "bfill":
return self.data[col].fillna(method='bfill', inplace=True)
elif flag == "mean":
return self.data[col].fillna(self.data[col].mean(), inplace=True)
elif flag == "interpolate":
return self.data[col].interpolate()
def date_transfer(self, col, flag):
self.data[col] = pd.to_datetime(self.data[col])
if flag == "week":
self.data.loc[:, 'day_of_week'] = self.data[col].dt.dayofweek
elif flag == "year":
self.data.loc[:, 'day_of_year'] = self.data[col].dt.dayofyear
self.data.drop(columns=col, inplace=True)
return self.data
def drop_uni(self):
for col in self.data.columns:
if len(self.data[col].value_counts()) == 1:
print(">> Drop uni col:%s" % col)
return col