-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcleaning.py
45 lines (38 loc) · 1.36 KB
/
cleaning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import pandas as pd
import numpy as np
import math
import matplotlib as plt
import os as os
# Creating empty list for broken tooth and healthy gearbox datasets
broken = []
healthy = []
# Defining the path where the datasets are stored
pth1 = 'dataset/BrokenTooth'
pth2 = 'dataset/Healthy'
# Reading the dataset as the list items using the OS library to access the files and read_csv function from pandas to read the daatasets
i = 0
for file in os.listdir(pth1):
broken.append(pd.read_csv(os.path.join(pth1, file)))
#print(broken[i].head())
i += 1
j = 0
for file in os.listdir(pth2):
healthy.append(pd.read_csv(os.path.join(pth2, file)))
#print(healthy[j].head())
j += 1
# Adding load and broken/healthy information to the dataset
for i in range(0, 10):
load = 10*i
# gearbox_status = 0 refers the gearbox is faulty while, failure = 1 refers to healthy gearbox
broken[i]['load'] = load
broken[i]['gearbox_status'] = 0
healthy[i]['load'] = load
healthy[i]['gearbox_status'] = 1
# Aggregating dataset for broken and healthy
healthy_agg = healthy[0]
broken_agg = broken[0]
for i in range(1,10):
healthy_agg = healthy_agg.append(healthy[i], ignore_index=True)
broken_agg = broken_agg.append(broken[i], ignore_index=True)
# Agregrating total dataset
healthy_broken_agg = pd.concat([healthy_agg, broken_agg], axis=0, ignore_index=True)