-
Notifications
You must be signed in to change notification settings - Fork 0
/
1d.py
72 lines (66 loc) · 1.58 KB
/
1d.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import numpy as np
import pandas as pd
from sklearn.preprocessing import Imputer
import seaborn as sns
import matplotlib.pyplot as plt
from pandas.plotting import scatter_matrix
df=pd.read_csv("C:/Users/Shanu/PycharmProjects/Crime-data/communities.csv")
# print(dataset.values[0][1])
df=df.replace('?',0)
my_l=df.columns.values.tolist()
# my_l = my_l[:-1]
my_list_arr=np.array(my_l)
CV=df.std()/df.mean()
CV_= pd.DataFrame({'label':CV.index, 'CV':CV.values})
print("CV values features:")
print(CV)
# CV_=CV_.sort_values(by='CV', ascending=False, na_position='first')
#
# my_arr=np.array(CV_.values[:,:])
# print(CV_.values[:,:])
# matching_array=[]
#
# for i in range(0,11):
# matching_array.append(my_arr[i][0])
# print(matching_array)
#
# df_for_plot=pd.DataFrame()
# for i in range(0,11):
# df_for_plot[matching_array[i]]=df[matching_array[i]]
# print(df_for_plot)
#
# df_for_plot.plot(kind='box', subplots=True, layout=(5,5), sharex=False, sharey=False)
#
# df_for_plot.plot.box()
#
# scatter_matrix(df_for_plot)
#
# plt.show()
# #
# #
#
# # temp=np.array(CV)
#
# # print(temp.shape)
# # temp=-np.sort(-temp)
# # # print(my_list_arr)
# # print(temp)
#
# # print(my_list_arr.shape)
# # X=dataset.values[:,:3]
# # X_p=dataset.values[:,3:4]
# # X_pp=dataset.values[:,4:127]
# #
# # Y=dataset.values[:,127:]
# #
# # print(X)
# # print("now Y","\n",Y)
# # try:
# # imp=Imputer(missing_values="NaN", strategy='mean', axis=0, verbose=0, copy=True)
# # imp.fit(X,Y)
# # print(X)
# # # imp.fit(X_p)
# # imp.fit(X_pp)
#
# # print(X)
#