-
Notifications
You must be signed in to change notification settings - Fork 11
/
extract_features_pca.py
79 lines (68 loc) · 2.52 KB
/
extract_features_pca.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import numpy as np
import os
from sklearn.decomposition import PCA
import glob
def extract_features_pca(nn_model):
'''
Do PCA dimension reduction on the features extracted by NN models
and save as npz file
e.g., dimension: (1, 2048) --> (1, 300) for ResNet-50
:param nn_model:
:return:
'''
raw_feature_path = './dataset/feature_' + nn_model
ProcessedPath = './dataset/feature_' + nn_model +'_pca'
train_list = []
test_list = []
train_files = glob.glob(raw_feature_path + '/train/*')
test_files = glob.glob(raw_feature_path + '/test/*')
for f in train_files:
x = np.load(f)
x = np.squeeze(x, axis=0)
train_list.append(x)
for f in test_files:
x = np.load(f)
x = np.squeeze(x, axis=0)
test_list.append(x)
train_list = np.array(train_list)
test_list = np.array(test_list)
print(train_list.shape, test_list.shape) # (300, 2048) (60, 2048)
# normalize the train and test data using train_mean
train_mean = np.mean(train_list, axis=0)
train_list -= train_mean
test_list -= train_mean
# do pca
# pca = PCA(n_components=None)
pca = PCA(n_components=0.99)
train_pca = pca.fit_transform(train_list)
test_pca = pca.transform(test_list)
print(train_pca.shape, test_pca.shape)
# expand dimension
train_pca_list = train_pca.tolist()
for i in range(len(train_pca_list)):
train_pca_list[i] = np.expand_dims(train_pca_list[i], axis=0)
test_pca_list = test_pca.tolist()
for i in range(len(test_pca_list)):
test_pca_list[i] = np.expand_dims(test_pca_list[i], axis=0)
# save the features as npz files
for i in range(len(train_pca_list)):
train_feature = train_pca_list[i]
train_file = train_files[i]
train_file = os.path.split(train_file)[1]
image_pre, ext = os.path.splitext(os.path.splitext(train_file)[0])
savepath = os.path.join(ProcessedPath, 'train')
if (not os.path.exists(savepath)):
os.makedirs(savepath)
np.save(os.path.join(savepath,image_pre+'.npz'), train_feature)
for i in range(len(test_pca_list)):
test_feature = test_pca_list[i]
test_file = test_files[i]
test_file = os.path.split(test_file)[1]
image_pre, ext = os.path.splitext(os.path.splitext(test_file)[0])
savepath = os.path.join(ProcessedPath, 'test')
if (not os.path.exists(savepath)):
os.makedirs(savepath)
np.save(os.path.join(savepath,image_pre+'.npz'), test_feature)
if __name__ == '__main__':
nn_model = 'resnet50'
extract_features_pca(nn_model)