-
Notifications
You must be signed in to change notification settings - Fork 0
/
preprocess.py
27 lines (21 loc) · 843 Bytes
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
def preprocess_data(data):
data = data.dropna()
numerical_cols = ['income', 'debt', 'credit_score', 'age', 'loan_amount']
categorical_cols = ['employment_status', 'marital_status']
numerical_transformer = Pipeline(steps=[
('scaler', StandardScaler())
])
categorical_transformer = Pipeline(steps=[
('onehot', OneHotEncoder(handle_unknown='ignore'))
])
preprocessor = ColumnTransformer(
transformers=[
('num', numerical_transformer, numerical_cols),
('cat', categorical_transformer, categorical_cols)
])
processed_data = preprocessor.fit_transform(data)
return processed_data