-
Notifications
You must be signed in to change notification settings - Fork 0
/
preProcess.py
79 lines (60 loc) · 2.42 KB
/
preProcess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, datasets
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, recall_score, precision_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler
import pandas as pd
import scipy.stats as sp
import sys
argToName = {'dia':'BP Dia_mmHg', 'sys':'LA Systolic BP_mmHg', 'eda':'EDA_microsiemens', 'res': 'Respiration Rate_BPM'}
def formatdata(data, arg):
columns = ['ID', 'Type', 'Class', 'Data']
data[columns] = data.Data.str.split(',', n=3, expand=True)
data.Data = data.Data.str.split(',')
if arg != 'all':
fullName = argToName[arg]
data = data[data['Type'] == fullName]
return data
df = pd.read_csv('data1.csv', delimiter='|', names = ['Data'])
filteredDf = formatdata(df, "sys")
features = []
#mean, variance, entropy, min, and max
for index, row in filteredDf.iterrows():
row.Data = [float(i) for i in row.Data]
features.append([sum(row.Data)/len(row.Data),
np.var(row.Data),
sp.entropy(pd.Series(row.Data).value_counts()),
min(row.Data),
max(row.Data),
row.Class
])
table = pd.DataFrame(features, index=list(range(len(features))), columns=['mean', 'variance', 'entropy', 'min', 'max', 'class'])
#get data
x = table.iloc[:,0:5].values
#get class
y = table.iloc[:, 5].values
########################################
kf = KFold(n_splits=10)
rf = RandomForestClassifier()
confMatrices = np.ndarray(shape=(2,2))
accuracy = 0
recallScore = 0
precisionScore = 0
for train_index, test_index in kf.split(x):
x_train, x_test = x[train_index], x[test_index]
y_train, y_test = y[train_index], y[test_index]
rf.fit(x_train, y_train)
print("xtrain", x_train, y_train)
y_pred = rf.predict(x_test)
confMatrices += confusion_matrix(y_test, y_pred)
accuracy+=accuracy_score(y_test, y_pred)
recallScore += recall_score(y_pred=y_pred, y_true=y_test, pos_label='Pain')
precisionScore+=precision_score(y_pred=y_pred, y_true=y_test, pos_label='Pain')
#########################################
print(f"Conf. Matrix: \n {confMatrices/10}")
print(f"Accuracy Score: {accuracy/10}")
print(f"Recall Score: {recallScore/10}")
print(f"Precision Score: {precisionScore/10}")