From 0e503a1605c6a0fcacfd79114917e0dafc25fa6d Mon Sep 17 00:00:00 2001 From: Kiran Sanjeevan Date: Mon, 25 Jun 2018 21:30:52 -0700 Subject: [PATCH] More Keras additions --- README.md | 2 +- net/netdecode.py | 66 ++--------------------------------------------- net/netloss.py | 8 ++++++ net/netparams.py | 13 ++++------ net/proper_nms.py | 62 ++++++++++++++++++++++++++++++++++++++++++++ net/utils.py | 5 ++-- yolov2.py | 1 - 7 files changed, 81 insertions(+), 76 deletions(-) create mode 100644 net/proper_nms.py diff --git a/README.md b/README.md index e4b9fce..93adc33 100644 --- a/README.md +++ b/README.md @@ -197,7 +197,7 @@ Then, in another terminal tab you can run `tensorboard --logdir=logs/run_X` and - [ ] cfg parser - [ ] Anchor generation for custom datasets - [ ] mAP write up - +- [ ] Add webcam support #### Inspired from diff --git a/net/netdecode.py b/net/netdecode.py index 9ef23e9..9b555a6 100644 --- a/net/netdecode.py +++ b/net/netdecode.py @@ -61,7 +61,7 @@ def __call__(self, y_sing_pred): selected_boxes = K.gather(flatten_boxes, selected_indices) selected_scores = K.gather(flatten_scores, selected_indices) - selected_classes = tf.gather(flatten_classes, selected_indices) + selected_classes = K.gather(flatten_classes, selected_indices) # Repassem aixo vale score_mask = selected_scores>self.detection_threshold @@ -72,69 +72,7 @@ def __call__(self, y_sing_pred): return process_outs(selected_boxes, selected_scores, K.cast(selected_classes, np.float32)) - def proper_yolo_nms(self, y_sing_pred): - # NMS need to be applied per class, since two different boxes could predict with high confidence - # two objects that have high IOU - # At the same time, even though NMS has to be done per class, it can only be done with max values - # of P(O) * P(Class|O) since we want to avoid same box predicting 2 overlapping objects. - # Doing both these things turns out to be a fucking pain. - - # CONSIDER USING tf.while_loop for the FOR - - b_xy = tf.sigmoid(y_sing_pred[..., 0:2]) + YoloParams.c_grid[0] - b_wh = tf.exp(y_sing_pred[..., 2:4])*YoloParams.anchors[0] - b_xy1 = b_xy - b_wh / 2. - b_xy2 = b_xy + b_wh / 2. - boxes = tf.concat([b_xy1, b_xy2], axis=-1) - - - scores_all = tf.expand_dims(tf.sigmoid(y_sing_pred[..., 4]), axis=-1) * tf.nn.softmax(y_sing_pred[...,5:]) - indicator_detection = scores_all > self.detection_threshold - - scores_all = scores_all * tf.to_float(indicator_detection) - - classes = tf.argmax(scores_all, axis=-1) - - scores = tf.reduce_max(scores_all, axis=-1) - - flatten_boxes = tf.reshape(boxes, - shape=(YoloParams.GRID_SIZE*YoloParams.GRID_SIZE*YoloParams.NUM_BOUNDING_BOXES, 4)) - flatten_scores = tf.reshape(scores, - shape=(YoloParams.GRID_SIZE*YoloParams.GRID_SIZE*YoloParams.NUM_BOUNDING_BOXES, )) - flatten_classes = tf.reshape(classes, - shape=(YoloParams.GRID_SIZE*YoloParams.GRID_SIZE*YoloParams.NUM_BOUNDING_BOXES, )) - - output_boxes = [] - output_scores = [] - output_classes = [] - for c in range(YoloParams.NUM_CLASSES): - if tf.reduce_sum(tf.to_float(tf.equal(flatten_classes, c))) > 0: - filtered_flatten_boxes = tf.boolean_mask(flatten_boxes, tf.equal(flatten_classes, c)) - filtered_flatten_scores = tf.boolean_mask(flatten_scores, tf.equal(flatten_classes, c)) - filtered_flatten_classes = tf.boolean_mask(flatten_classes, tf.equal(flatten_classes, c)) - - selected_indices = tf.image.non_max_suppression( - filtered_flatten_boxes, filtered_flatten_scores, self.max_boxes, self.iou_threshold) - - selected_boxes = K.gather(filtered_flatten_boxes, selected_indices) - selected_scores = K.gather(filtered_flatten_scores, selected_indices) - selected_classes = K.gather(filtered_flatten_classes, selected_indices) - - - output_boxes.append( selected_boxes ) - output_scores.append( selected_scores ) - output_classes.append( selected_classes ) - - - print(output_boxes) - - print(tf.concat(output_boxes, axis=-1).eval()) - print(tf.concat(output_scores, axis=-1).eval()) - print(tf.concat(output_classes, axis=-1).eval()) - - return tf.concat(output_boxes, axis=-1), tf.concat(output_scores, axis=-1), tf.concat(output_classes, axis=-1) - - + if __name__ == '__main__': diff --git a/net/netloss.py b/net/netloss.py index 93c145f..71bf160 100644 --- a/net/netloss.py +++ b/net/netloss.py @@ -55,6 +55,7 @@ def __init__(self): self.lambda_class = YoloParams.CLASS_SCALE self.norm = False + self.paper_imp = False def coord_loss(self, y_true, y_pred): @@ -116,6 +117,13 @@ def class_loss(self, y_true, y_pred): loss_class_arg = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=b_class, logits=b_class_pred) + ''' + if self.paper_imp: + loss_class_arg = + + ''' + + loss_class = K.sum(loss_class_arg * indicator_class, axis=[1,2,3]) return loss_class / (norm_class + EPSILON) diff --git a/net/netparams.py b/net/netparams.py index c64efe7..e75c18a 100644 --- a/net/netparams.py +++ b/net/netparams.py @@ -1,6 +1,6 @@ import pickle, argparse, json, os, sys -import tensorflow as tf +from keras import backend as K import numpy as np @@ -53,14 +53,11 @@ config = json.loads(config_buffer.read()) - def generate_yolo_grid(batch, g, num_bb): - c_x = tf.to_float(tf.reshape(tf.tile(tf.range(g), [g]), (1, g, g, 1, 1))) - c_y = tf.transpose(c_x, (0,2,1,3,4)) - return tf.tile(tf.concat([c_x, c_y], -1), [batch, 1, 1, num_bb, 1]) - - - + c_x = K.cast(K.reshape(K.tile(K.arange(g), [g]), (1, g, g, 1, 1)), K.floatx()) + c_y = K.permute_dimensions(c_x, (0,2,1,3,4)) + return K.tile(K.concatenate([c_x, c_y], -1), [batch, 1, 1, num_bb, 1]) + def get_threshold(value): if value > 1. or value < 0: diff --git a/net/proper_nms.py b/net/proper_nms.py new file mode 100644 index 0000000..41a5c9e --- /dev/null +++ b/net/proper_nms.py @@ -0,0 +1,62 @@ + def proper_yolo_nms(self, y_sing_pred): + # NMS need to be applied per class, since two different boxes could predict with high confidence + # two objects that have high IOU + # At the same time, even though NMS has to be done per class, it can only be done with max values + # of P(O) * P(Class|O) since we want to avoid same box predicting 2 overlapping objects. + # Doing both these things turns out to be a fucking pain. + + # CONSIDER USING tf.while_loop for the FOR + + b_xy = tf.sigmoid(y_sing_pred[..., 0:2]) + YoloParams.c_grid[0] + b_wh = tf.exp(y_sing_pred[..., 2:4])*YoloParams.anchors[0] + b_xy1 = b_xy - b_wh / 2. + b_xy2 = b_xy + b_wh / 2. + boxes = tf.concat([b_xy1, b_xy2], axis=-1) + + + scores_all = tf.expand_dims(tf.sigmoid(y_sing_pred[..., 4]), axis=-1) * tf.nn.softmax(y_sing_pred[...,5:]) + indicator_detection = scores_all > self.detection_threshold + + scores_all = scores_all * tf.to_float(indicator_detection) + + classes = tf.argmax(scores_all, axis=-1) + + scores = tf.reduce_max(scores_all, axis=-1) + + flatten_boxes = tf.reshape(boxes, + shape=(YoloParams.GRID_SIZE*YoloParams.GRID_SIZE*YoloParams.NUM_BOUNDING_BOXES, 4)) + flatten_scores = tf.reshape(scores, + shape=(YoloParams.GRID_SIZE*YoloParams.GRID_SIZE*YoloParams.NUM_BOUNDING_BOXES, )) + flatten_classes = tf.reshape(classes, + shape=(YoloParams.GRID_SIZE*YoloParams.GRID_SIZE*YoloParams.NUM_BOUNDING_BOXES, )) + + output_boxes = [] + output_scores = [] + output_classes = [] + for c in range(YoloParams.NUM_CLASSES): + if tf.reduce_sum(tf.to_float(tf.equal(flatten_classes, c))) > 0: + filtered_flatten_boxes = tf.boolean_mask(flatten_boxes, tf.equal(flatten_classes, c)) + filtered_flatten_scores = tf.boolean_mask(flatten_scores, tf.equal(flatten_classes, c)) + filtered_flatten_classes = tf.boolean_mask(flatten_classes, tf.equal(flatten_classes, c)) + + selected_indices = tf.image.non_max_suppression( + filtered_flatten_boxes, filtered_flatten_scores, self.max_boxes, self.iou_threshold) + + selected_boxes = K.gather(filtered_flatten_boxes, selected_indices) + selected_scores = K.gather(filtered_flatten_scores, selected_indices) + selected_classes = K.gather(filtered_flatten_classes, selected_indices) + + + output_boxes.append( selected_boxes ) + output_scores.append( selected_scores ) + output_classes.append( selected_classes ) + + + print(output_boxes) + + print(tf.concat(output_boxes, axis=-1).eval()) + print(tf.concat(output_scores, axis=-1).eval()) + print(tf.concat(output_classes, axis=-1).eval()) + + return tf.concat(output_boxes, axis=-1), tf.concat(output_scores, axis=-1), tf.concat(output_classes, axis=-1) + diff --git a/net/utils.py b/net/utils.py index 9f39e6d..4ff922a 100755 --- a/net/utils.py +++ b/net/utils.py @@ -252,8 +252,9 @@ def setup_logging(logging_path='logs'): log_path = os.path.join(os.getcwd(),logging_path) mkdir_p(log_path) - get_ind = lambda x: int(x.split('_')[1]) - #run_counter = max([get_ind(r) for r in os.listdir(log_path)]) + 1 + check_names = lambda y: y if isinstance(y, int) else -1 + get_ind = lambda x: int(check_names(x.split('_')[1])) + run_counter = max(map(get_ind, os.listdir(log_path)), default=-1) + 1 run_path = os.path.join(log_path, 'run_%s'%run_counter) diff --git a/yolov2.py b/yolov2.py index 1fba7cd..38526f2 100644 --- a/yolov2.py +++ b/yolov2.py @@ -1,6 +1,5 @@ import pickle, argparse, json, cv2, os -import tensorflow as tf import numpy as np from tqdm import tqdm import matplotlib.pyplot as plt