-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathrun-preprocessed.py
127 lines (99 loc) · 4.92 KB
/
run-preprocessed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import os
import tensorflow as tf
print("TensorFlow version: " + tf.__version__)
# Reduce logging verbosity to solve https://stackoverflow.com/questions/52512381/disable-image-parsing-warnings-in-tensorflow-python
#os.environ["TF_CPP_MIN_LOG_LEVEL"]="2"
#tf.logging.set_verbosity(tf.logging.ERROR)
thumbnail_height=40
thumbnail_width=thumbnail_height
out_shape = tf.convert_to_tensor([thumbnail_height, thumbnail_width])
batch_size = 100
datapath = "data/thumbnails-" + str(thumbnail_height) + "/"
data_folders = [datapath + "training/0", datapath + "training/1"]
classes = [0., 1.]
file_names = [] # Path of all data files
labels = [] # Label of each data file (same size as the array above)
for d, l in zip(data_folders, classes):
name = [os.path.join(d,f) for f in os.listdir(d)] # get the list of all the images file names
file_names.extend(name)
labels.extend([l] * len(name))
epoch_size = 10
print("file_names: " + str(file_names))
print("labels: " + str(labels))
print("epoch_size: " + str(epoch_size))
file_names = tf.convert_to_tensor(file_names, dtype=tf.string)
labels = tf.convert_to_tensor(labels)
dataset = tf.data.Dataset.from_tensor_slices((file_names, labels))
dataset = dataset.repeat().shuffle(epoch_size)
def map_fn(path, label):
# path/label represent values for a single example
image = tf.image.decode_png(tf.read_file(path), channels=1)
# some mapping to constant size - be careful with distorting aspect ratios
image = tf.image.resize_images(image, out_shape)
#image = tf.image.rgb_to_grayscale(image) # Improvement: Use Gleam grayscale, better for face recognition: https://stackoverflow.com/a/29989836/226958 https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0029740
# color normalization - just an example
image = tf.to_float(image) * (2. / 255) - 1
label = tf.expand_dims(label, axis=-1)
return image, label
# num_parallel_calls > 1 induces intra-batch shuffling
dataset = dataset.map(map_fn, num_parallel_calls=8)
dataset = dataset.batch(batch_size)
dataset = dataset.prefetch(1)
print("dataset: " + str(dataset))
images, labels = dataset.make_one_shot_iterator().get_next()
# Following is from https://www.tensorflow.org/tutorials/keras/basic_classification
from tensorflow import keras
model = keras.Sequential([
keras.layers.Conv2D(filters=64, kernel_size=2, padding='same', activation='relu', input_shape=(thumbnail_height, thumbnail_width, 1)),
keras.layers.MaxPooling2D(pool_size=2),
keras.layers.Dropout(0.3),
keras.layers.Conv2D(filters=32, kernel_size=2, padding='same', activation='relu'),
keras.layers.MaxPooling2D(pool_size=2),
keras.layers.Dropout(0.3),
keras.layers.Flatten(),
keras.layers.Dense(256, activation=tf.nn.relu),
keras.layers.Dropout(0.5),
keras.layers.Dense(1, activation=tf.nn.sigmoid)
])
model.compile(optimizer=tf.train.AdamOptimizer(),
loss='binary_crossentropy',
metrics=['accuracy'])
print("images: " + str(images))
print("labels: " + str(labels))
model.fit(images, labels, epochs=epoch_size, verbose=1, steps_per_epoch=1095)
# Test
batch_size_test = 100
data_folders_test = [datapath + "test/0", datapath + "test/1"]
file_names_test = [] # Path of all data files
labels_test = [] # Label of each data file (same size as the array above)
for d_test, l_test in zip(data_folders_test, classes):
name_test = [os.path.join(d_test,f_test) for f_test in os.listdir(d_test)] # get the list of all the images file names
file_names_test.extend(name_test)
labels_test.extend([l_test] * len(name_test))
epoch_size_test = 1
print("file_names: " + str(file_names_test))
print("labels: " + str(labels_test))
print("epoch_size: " + str(epoch_size_test))
dataset_test = tf.data.Dataset.from_tensor_slices((file_names_test, labels_test))
dataset_test = dataset_test.repeat().shuffle(epoch_size_test)
# num_parallel_calls > 1 induces intra-batch shuffling
dataset_test = dataset_test.map(map_fn, num_parallel_calls=8)
dataset_test = dataset_test.batch(len(file_names_test))
dataset_test = dataset_test.prefetch(1)
print("dataset: " + str(dataset_test))
images_test, labels_test = dataset_test.make_one_shot_iterator().get_next()
loss_test, accuracy_test = model.evaluate(images_test, labels_test, steps=1)
# Print prediction and certaincy for each image
predictions = model.predict(images_test, steps=1)
f = open("test-results.csv", "w")
for i, prediction in enumerate(predictions):
name = file_names_test[i]
label = str(file_names_test[i][15:16])
prediction_str = str(prediction[0])
f.write(prediction_str + "," + label + "," + name + "\n")
#print("Prediction: " + str(prediction) + " Actual: " + str(tf.keras.backend.get_value(labels_test[i])[0]) + " File: " + file_names_test[i])
print("Prediction: " + prediction_str + " Actual: " + label + " File: " + name)
#print(labels_test[i][0])
#with tf.Session() as sess:
# print(labels_test[i].eval())
print('Test accuracy:', accuracy_test)