-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_explanation.py
47 lines (30 loc) · 998 Bytes
/
data_explanation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import numpy as np
import pandas as pd
def find_data(data, cmp_data):
# for i in range(data.shape[0]):
merged_data = np.intersect1d(data, cmp_data)
print(len(merged_data))
if len(merged_data) >= len(cmp_data) / 2:
return True
return False
def view_data(x, y):
print("*********** DATA STATISTICS ***********")
print('x shape: ', x.shape)
print('y shape: ', y.shape)
print('Does X contain NAN: ', np.isnan(x).any())
print('Does Y contain NAN: ', np.isnan(y).any())
index = 0
# print('X[%d]\n' % index, x[index])
# print('Y[%d]\n' % index, y[index])
index = 1
# print('X[%d]\n' % index, x[index])
# print('Y[%d]\n' % index, y[index])
def is_bad_data(data, gap=1):
is_error = data[0, 1] != data[0+gap, 0]
return is_error
# print(data[i])
def data_summary(shape_list):
x = np.array(shape_list)
s = pd.Series(x)
print(s)
print(s.describe())