diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e838f72 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*.h5 +__pycache__/ +.DS_Store diff --git a/README.md b/README.md index 6438254..2622aec 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ Pretrained COCO/VOC keras models can be downloaded [here](https://drive.google.c ##### *conf* -Pass a config.json file that looks like this: +Pass a config.json file that looks like this (minus the comments!): ``` { @@ -119,7 +119,7 @@ Example: python3 dourflow.py theoffice.png -m coco_model.h5 -c coco_config.json -t 0.35 ```
- +
diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/confs/config_coco.json b/confs/config_coco.json new file mode 100755 index 0000000..60eaf3c --- /dev/null +++ b/confs/config_coco.json @@ -0,0 +1,33 @@ +{ + "model" : { + "input_size": 416, + "grid_size": 13, + "true_box_buffer": 30, + "iou_threshold": 0.5, + "nms_threshold": 0.3 + }, + "config_path" : { + "labels": "models/coco/labels_coco.txt", + "anchors": "models/coco/anchors_coco.txt", + "arch_plotname": "" + }, + "train": { + "out_model_name": "", + "image_folder": "", + "annot_folder": "", + "batch_size": 16, + "learning_rate": 1e-4, + "num_epochs": 20, + "object_scale": 5.0 , + "no_object_scale": 1.0, + "coord_scale": 1.0, + "class_scale": 1.0, + "verbose": 1 + }, + + "valid": { + "image_folder": "", + "annot_folder": "", + "pred_folder": "" + } +} diff --git a/confs/config_voc.json b/confs/config_voc.json new file mode 100644 index 0000000..876c7ac --- /dev/null +++ b/confs/config_voc.json @@ -0,0 +1,34 @@ +{ + "model" : { + "input_size": 416, + "grid_size": 13, + "true_box_buffer": 10, + "iou_threshold": 0.5, + "nms_threshold": 0.3 + }, + "config_path" : { + "labels": "models/voc/labels_voc.txt", + "anchors": "models/voc/anchors_voc.txt", + "arch_plotname": "" + }, + "train": { + "out_model_name": "yolo_retrained_voc.h5", + "image_folder": "/home/kiran/Documents/DATA/VOC/train/imgs", + "annot_folder": "/home/kiran/Documents/DATA/VOC/train/anns", + "batch_size": 16, + "learning_rate": 1e-4, + "num_epochs": 20, + "object_scale": 5.0 , + "no_object_scale": 1.0, + "coord_scale": 1.0, + "class_scale": 1.0, + "verbose": 1 + }, + + "valid": { + "image_folder": "/home/kiran/Documents/DATA/VOC/valid/imgs", + "annot_folder": "/home/kiran/Documents/DATA/VOC/valid/anns", + "pred_folder": "/home/kiran/Documents/DATA/VOC/valid/img_pred", + "plot_preds": true + } +} diff --git a/dourflow.py b/dourflow.py new file mode 100644 index 0000000..abd9dd0 --- /dev/null +++ b/dourflow.py @@ -0,0 +1,22 @@ + +from net.netarch import generate_model +from net.netparams import YoloParams +from yolov2 import YoloV2, YoloInferenceModel +import os + + +# Add CPU option +os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" +os.environ["CUDA_VISIBLE_DEVICES"] = "0" + + +if __name__ == '__main__': + + if YoloParams.WEIGHT_FILE: + generate_model() + + else: + YoloV2().run() + + + diff --git a/models/coco/anchors_coco.txt b/models/coco/anchors_coco.txt new file mode 100755 index 0000000..808be3a --- /dev/null +++ b/models/coco/anchors_coco.txt @@ -0,0 +1 @@ +0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 diff --git a/models/coco/labels_coco.txt b/models/coco/labels_coco.txt new file mode 100755 index 0000000..941cb4e --- /dev/null +++ b/models/coco/labels_coco.txt @@ -0,0 +1,80 @@ +person +bicycle +car +motorcycle +airplane +bus +train +truck +boat +traffic light +fire hydrant +stop sign +parking meter +bench +bird +cat +dog +horse +sheep +cow +elephant +bear +zebra +giraffe +backpack +umbrella +handbag +tie +suitcase +frisbee +skis +snowboard +sports ball +kite +baseball bat +baseball glove +skateboard +surfboard +tennis racket +bottle +wine glass +cup +fork +knife +spoon +bowl +banana +apple +sandwich +orange +broccoli +carrot +hot dog +pizza +donut +cake +chair +couch +potted plant +bed +dining table +toilet +tv +laptop +mouse +remote +keyboard +cell phone +microwave +oven +toaster +sink +refrigerator +book +clock +vase +scissors +teddy bear +hair drier +toothbrush diff --git a/models/voc/anchors_voc.txt b/models/voc/anchors_voc.txt new file mode 100755 index 0000000..5374c6f --- /dev/null +++ b/models/voc/anchors_voc.txt @@ -0,0 +1 @@ +1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071 diff --git a/models/voc/labels_voc.txt b/models/voc/labels_voc.txt new file mode 100755 index 0000000..1168c39 --- /dev/null +++ b/models/voc/labels_voc.txt @@ -0,0 +1,20 @@ +aeroplane +bicycle +bird +boat +bottle +bus +car +cat +chair +cow +diningtable +dog +horse +motorbike +person +pottedplant +sheep +sofa +train +tvmonitor \ No newline at end of file diff --git a/net/__init__.py b/net/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/net/netarch.py b/net/netarch.py new file mode 100644 index 0000000..037a738 --- /dev/null +++ b/net/netarch.py @@ -0,0 +1,227 @@ +from keras.models import Model, load_model +from keras.layers import Reshape, Conv2D, Input, MaxPooling2D, BatchNormalization, Lambda +from keras.layers.advanced_activations import LeakyReLU + +from keras.layers.merge import concatenate + +import tensorflow as tf +import numpy as np +import pickle, argparse, json, os + +from keras.utils.vis_utils import plot_model + +from net.netparams import YoloParams +from net.netdecode import YoloOutProcess + + +class YoloArchitecture(object): + + def __init__(self): + + self.in_model_name = YoloParams.IN_MODEL + self.plot_name = YoloParams.ARCH_FNAME + + def get_model(self, loss_func): + + yolo_model = self._load_yolo_model(loss_func) + + if YoloParams.YOLO_MODE == 'train': + new_yolo_model = self._setup_transfer_learning(yolo_model) + #new_name = self.tl_weights_name.split('.')[0] + '_rand.h5' + #new_yolo_model.save_weights(new_name) + + elif YoloParams.YOLO_MODE in ['inference','validate','video']: + new_yolo_model = yolo_model + + else: + raise ValueError( + 'Please set \'--action\' to \'train\', \'validate\' or pass an image file/dir.') + + if self.plot_name: + plot_model(new_yolo_model, to_file=self.plot_name, show_shapes=True) + + return new_yolo_model + + + def _load_yolo_model(self, loss_func): + # Error if not compiled with yolo_loss? + if os.path.isfile(self.in_model_name): + model = load_model(self.in_model_name, + custom_objects={'yolo_loss': loss_func}) + return model + else: + raise ValueError('Need to load full model in order to do ' + 'transfer learning. Run script again with desired TL ' + 'config and weight file to generate model.') + + + def weights_to_model(self, in_path, out_path): + yolo_model = self._yolo_v2_architecture() + + try: + yolo_model.load_weights(in_path) + + except IOError as e: + print('File for pre-trained weights not found.') + + yolo_model.save(out_path) + return yolo_model + + + + def _yolo_v2_architecture(self): + # Parse from cfg! + self.layer_counter = 0 + + def space_to_depth_x2(x): + + import tensorflow as tf + return tf.space_to_depth(x, block_size=2) + + + def conv2D_bn_leaky(inp, filters, kernel_size=(3,3), strides=(1,1), maxpool=False): + self.layer_counter += 1 + x = Conv2D(filters, kernel_size=kernel_size, strides=strides, + padding='same', use_bias=False)(inp) + + x = BatchNormalization()(x) + x = LeakyReLU(alpha=0.1)(x) + if maxpool: + return MaxPooling2D(pool_size=(2, 2))(x) + return x + + input_image = Input(shape=(YoloParams.INPUT_SIZE, YoloParams.INPUT_SIZE, 3), name='input') + + # Layer 1 + x = conv2D_bn_leaky(input_image, 32, (3,3), (1,1), maxpool=True) + + # Layer 2 + x = conv2D_bn_leaky(x, 64, maxpool=True) + + # Layer 3 + x = conv2D_bn_leaky(x, 128) + + # Layer 4 + x = conv2D_bn_leaky(x, 64, kernel_size=(1,1)) + + # Layer 5 + x = conv2D_bn_leaky(x, 128, maxpool=True) + + # Layer 6 + x = conv2D_bn_leaky(x, 256) + + # Layer 7 + x = conv2D_bn_leaky(x, 128, kernel_size=(1,1)) + + # Layer 8 + x = conv2D_bn_leaky(x, 256, maxpool=True) + + # Layer 9 + x = conv2D_bn_leaky(x, 512) + + # Layer 10 + x = conv2D_bn_leaky(x, 256, kernel_size=(1,1)) + + # Layer 11 + x = conv2D_bn_leaky(x, 512) + + # Layer 12 + x = conv2D_bn_leaky(x, 256, kernel_size=(1,1)) + + # Layer 13 + x = conv2D_bn_leaky(x, 512) + + skip_connection = x + x = MaxPooling2D(pool_size=(2, 2))(x) + + # Layer 14 + x = conv2D_bn_leaky(x, 1024) + + # Layer 15 + x = conv2D_bn_leaky(x, 512, kernel_size=(1,1)) + # Layer 16 + x = conv2D_bn_leaky(x, 1024) + + # Layer 17 + x = conv2D_bn_leaky(x, 512, kernel_size=(1,1)) + # Layer 18 + x = conv2D_bn_leaky(x, 1024) + + # Layer 19 + x = conv2D_bn_leaky(x, 1024) + + # Layer 20 + x = conv2D_bn_leaky(x, 1024) + + # Layer 21 + skip_connection = conv2D_bn_leaky(skip_connection, 64, kernel_size=(1,1)) + skip_connection = Lambda(space_to_depth_x2)(skip_connection) + x = concatenate([skip_connection, x]) + + # Layer 22 + x = conv2D_bn_leaky(x, 1024) + + # Final Conv2D + x = Conv2D(YoloParams.NUM_BOUNDING_BOXES * (4 + 1 + YoloParams.NUM_CLASSES), (1,1), + strides=(1,1), padding='same')(x) + + + output = Reshape((YoloParams.GRID_SIZE, YoloParams.GRID_SIZE, + YoloParams.NUM_BOUNDING_BOXES, 4 + 1 + YoloParams.NUM_CLASSES))(x) + + yolo_model = Model(input_image, output) + + return yolo_model + + + + def _setup_transfer_learning(self, yolo_model): + + new_yolo_model = self._yolo_v2_update(yolo_model) + + layer = new_yolo_model.layers[-2] # the last convolutional layer + weights = layer.get_weights() + + S2 = YoloParams.GRID_SIZE*YoloParams.GRID_SIZE + new_kernel = np.random.normal(size=weights[0].shape)/S2 + new_bias = np.random.normal(size=weights[1].shape)/S2 + + layer.set_weights([new_kernel, new_bias]) + + return new_yolo_model + + + + def _yolo_v2_update(self, old_yolo_model): + + x = Conv2D(YoloParams.NUM_BOUNDING_BOXES * (4 + 1 + YoloParams.NUM_CLASSES), (1,1), + strides=(1,1), padding='same', name='conv_23')(old_yolo_model.layers[-3].output) + + output = Reshape((YoloParams.GRID_SIZE, YoloParams.GRID_SIZE, + YoloParams.NUM_BOUNDING_BOXES, 4 + 1 + YoloParams.NUM_CLASSES))(x) + + yolo_model = Model(old_yolo_model.input, output) + + return yolo_model + + +def generate_model(): + + yolo_arch = YoloArchitecture() + + d = os.path.dirname(YoloParams.WEIGHT_FILE) + + out_fname = os.path.join(d, 'model.h5') + + print('------------------------------------') + print('Reading weights from: %s'%YoloParams.WEIGHT_FILE) + print('Loading into YOLO V2 architecture and storing...') + print('\n\n') + yolo_arch.weights_to_model(YoloParams.WEIGHT_FILE, out_fname) + print('\tModel saved: %s'%out_fname) + print('\n\n------------------------------------') + print('Done.') + + + + diff --git a/net/netdecode.py b/net/netdecode.py new file mode 100644 index 0000000..9ef23e9 --- /dev/null +++ b/net/netdecode.py @@ -0,0 +1,161 @@ +import tensorflow as tf +from keras import backend as K +import numpy as np + +from net.netparams import YoloParams + + +def process_outs(b, s, c): + + b_p = b + # Expand dims of scores and classes so we can concat them + # with the boxes and have the output of NMS as an added layer of YOLO. + # Have to do another expand_dims this time on the first dim of the result + # since NMS doesn't know about BATCH_SIZE (operates on 2D, see + # https://www.tensorflow.org/api_docs/python/tf/image/non_max_suppression) + # but keras needs this dimension in the output. + s_p = K.expand_dims(s, axis=-1) + c_p = K.expand_dims(c, axis=-1) + + output_stack = K.concatenate([b_p, s_p, c_p], axis=1) + return K.expand_dims(output_stack, axis=0) + + +class YoloOutProcess(object): + + + def __init__(self): + + self.max_boxes = YoloParams.TRUE_BOX_BUFFER + self.nms_threshold = YoloParams.NMS_THRESHOLD + self.detection_threshold = YoloParams.DETECTION_THRESHOLD + + + def __call__(self, y_sing_pred): + + # need to convert b's from GRID_SIZE units into IMG coords. Divide by grid here. + b_xy = (K.sigmoid(y_sing_pred[..., 0:2]) + YoloParams.c_grid[0]) / YoloParams.GRID_SIZE + b_wh = (K.exp(y_sing_pred[..., 2:4])*YoloParams.anchors[0]) / YoloParams.GRID_SIZE + b_xy1 = b_xy - b_wh / 2. + b_xy2 = b_xy + b_wh / 2. + boxes = K.concatenate([b_xy1, b_xy2], axis=-1) + + scores_all = K.expand_dims(K.sigmoid(y_sing_pred[..., 4]), axis=-1) * K.softmax(y_sing_pred[...,5:]) + indicator_detection = scores_all > self.detection_threshold + scores_all = scores_all * K.cast(indicator_detection, np.float32) + + classes = K.argmax(scores_all, axis=-1) + scores = K.max(scores_all, axis=-1) + + S2B = YoloParams.GRID_SIZE*YoloParams.GRID_SIZE*YoloParams.NUM_BOUNDING_BOXES + + flatten_boxes = K.reshape(boxes, shape=(S2B, 4)) + flatten_scores = K.reshape(scores, shape=(S2B, )) + flatten_classes = K.reshape(classes, shape=(S2B, )) + + selected_indices = tf.image.non_max_suppression( + flatten_boxes, + flatten_scores, + max_output_size=self.max_boxes, + iou_threshold=self.nms_threshold) + + selected_boxes = K.gather(flatten_boxes, selected_indices) + selected_scores = K.gather(flatten_scores, selected_indices) + selected_classes = tf.gather(flatten_classes, selected_indices) + + # Repassem aixo vale + score_mask = selected_scores>self.detection_threshold + + selected_boxes = tf.boolean_mask(selected_boxes, score_mask) + selected_scores = tf.boolean_mask(selected_scores, score_mask) + selected_classes = tf.boolean_mask(selected_classes, score_mask) + + return process_outs(selected_boxes, selected_scores, K.cast(selected_classes, np.float32)) + + def proper_yolo_nms(self, y_sing_pred): + # NMS need to be applied per class, since two different boxes could predict with high confidence + # two objects that have high IOU + # At the same time, even though NMS has to be done per class, it can only be done with max values + # of P(O) * P(Class|O) since we want to avoid same box predicting 2 overlapping objects. + # Doing both these things turns out to be a fucking pain. + + # CONSIDER USING tf.while_loop for the FOR + + b_xy = tf.sigmoid(y_sing_pred[..., 0:2]) + YoloParams.c_grid[0] + b_wh = tf.exp(y_sing_pred[..., 2:4])*YoloParams.anchors[0] + b_xy1 = b_xy - b_wh / 2. + b_xy2 = b_xy + b_wh / 2. + boxes = tf.concat([b_xy1, b_xy2], axis=-1) + + + scores_all = tf.expand_dims(tf.sigmoid(y_sing_pred[..., 4]), axis=-1) * tf.nn.softmax(y_sing_pred[...,5:]) + indicator_detection = scores_all > self.detection_threshold + + scores_all = scores_all * tf.to_float(indicator_detection) + + classes = tf.argmax(scores_all, axis=-1) + + scores = tf.reduce_max(scores_all, axis=-1) + + flatten_boxes = tf.reshape(boxes, + shape=(YoloParams.GRID_SIZE*YoloParams.GRID_SIZE*YoloParams.NUM_BOUNDING_BOXES, 4)) + flatten_scores = tf.reshape(scores, + shape=(YoloParams.GRID_SIZE*YoloParams.GRID_SIZE*YoloParams.NUM_BOUNDING_BOXES, )) + flatten_classes = tf.reshape(classes, + shape=(YoloParams.GRID_SIZE*YoloParams.GRID_SIZE*YoloParams.NUM_BOUNDING_BOXES, )) + + output_boxes = [] + output_scores = [] + output_classes = [] + for c in range(YoloParams.NUM_CLASSES): + if tf.reduce_sum(tf.to_float(tf.equal(flatten_classes, c))) > 0: + filtered_flatten_boxes = tf.boolean_mask(flatten_boxes, tf.equal(flatten_classes, c)) + filtered_flatten_scores = tf.boolean_mask(flatten_scores, tf.equal(flatten_classes, c)) + filtered_flatten_classes = tf.boolean_mask(flatten_classes, tf.equal(flatten_classes, c)) + + selected_indices = tf.image.non_max_suppression( + filtered_flatten_boxes, filtered_flatten_scores, self.max_boxes, self.iou_threshold) + + selected_boxes = K.gather(filtered_flatten_boxes, selected_indices) + selected_scores = K.gather(filtered_flatten_scores, selected_indices) + selected_classes = K.gather(filtered_flatten_classes, selected_indices) + + + output_boxes.append( selected_boxes ) + output_scores.append( selected_scores ) + output_classes.append( selected_classes ) + + + print(output_boxes) + + print(tf.concat(output_boxes, axis=-1).eval()) + print(tf.concat(output_scores, axis=-1).eval()) + print(tf.concat(output_classes, axis=-1).eval()) + + return tf.concat(output_boxes, axis=-1), tf.concat(output_scores, axis=-1), tf.concat(output_classes, axis=-1) + + + +if __name__ == '__main__': + + sess = tf.InteractiveSession() + + max_boxes = 10 + nms_threshold = 0.1 + boxes = tf.convert_to_tensor(np.random.rand(10,4), np.float32) + scores = tf.convert_to_tensor(np.random.rand(10,), np.float32) + + classes = tf.convert_to_tensor((10.*np.random.rand(10,)%3).astype(int), np.float32) + + s,b,c = yolo_non_max_suppression(scores, boxes, classes, max_boxes, nms_threshold) + + print(boxes.eval().shape) + print(scores.eval().shape) + print(classes.eval().shape) + + print('-----------------------') + + print(b.eval().shape) + print(s.eval().shape) + print(c.eval().shape) + diff --git a/net/neteval.py b/net/neteval.py new file mode 100644 index 0000000..f4a3414 --- /dev/null +++ b/net/neteval.py @@ -0,0 +1,323 @@ + + + +from net.netparams import YoloParams +from net.netdecode import YoloOutProcess + +import matplotlib.pyplot as plt +import numpy as np +import cv2, os +import keras +from net.utils import draw_boxes, compute_iou, mkdir_p, yolo_normalize, mkdir_p, handle_empty_indexing + +from tqdm import tqdm + + + + +class YoloDataGenerator(keras.utils.Sequence): + 'Generates data for Keras' + def __init__(self, images, shuffle=True): + + self.images = self._prune_ann_labels(images) + self.input_size = YoloParams.INPUT_SIZE + self.anchors = YoloParams.anchors + + self.generator = None + + self.batch_size = YoloParams.BATCH_SIZE + + self.shuffle = shuffle + self.on_epoch_end() + + def __len__(self): + 'Denotes the number of batches per epoch' + # return int(np.ceil(float(len(self.images))/self.config['BATCH_SIZE'])) + return int(np.floor(len(self.images) / self.batch_size)) + + def __getitem__(self, index): + 'Generate one batch of data' + bound_l = index*self.batch_size + bound_r = (index+1)*self.batch_size + + return self._data_to_yolo_output(self.images[bound_l:bound_r]) + + def load_image_name(self, i): + return self.images[i]['filename'] + + + def load_image(self, i): + return cv2.imread(self.images[i]['filename']) + + def load_annotation(self, i): + labels = [] + bboxes = [] + + height = self.images[i]['height'] + width = self.images[i]['width'] + + for obj in self.images[i]['object']: + #if obj['name'] in YoloParams.CLASS_LABELS: + labels.append( obj['name'] ) + bboxes.append( + [obj['xmin'] / width, obj['ymin'] / height, obj['xmax'] / width, obj['ymax'] / height] ) + + + class_inds = [YoloParams.CLASS_TO_INDEX[l] for l in labels] + + return np.array(bboxes), np.array(class_inds) + + def on_epoch_end(self): + 'Updates indexes after each epoch' + if self.shuffle: np.random.shuffle(self.images) + + def _prune_ann_labels(self, images): + clean_images = [] + for im in images: + clean_im = im.copy() + clean_objs = [] + for obj in clean_im['object']: + if obj['name'] in YoloParams.CLASS_LABELS: + clean_objs.append( obj ) + + clean_im.update({'object' : clean_objs}) + clean_images.append(clean_im) + + return clean_images + + + def _data_to_yolo_output(self, batch_images): + + # INPUT IMAGES READY FOR TRAINING + x_batch = np.zeros((len(batch_images), self.input_size, self.input_size, 3)) + + # GET DESIRED NETWORK OUTPUT + y_batch = np.zeros((len(batch_images), YoloParams.GRID_SIZE, + YoloParams.GRID_SIZE, YoloParams.NUM_BOUNDING_BOXES, 4+1+len(YoloParams.CLASS_LABELS))) + + grid_factor = YoloParams.GRID_SIZE / self.input_size + + for j, train_instance in enumerate(batch_images): + + img_raw = cv2.imread(train_instance['filename']) + + h_factor_resize = img_raw.shape[0] / self.input_size + w_factor_resize = img_raw.shape[1] / self.input_size + + img = cv2.resize(img_raw, (self.input_size, self.input_size)) + + for obj_box_idx, label in enumerate(train_instance['object']): + + xmin_resized = int(round(label['xmin'] / w_factor_resize)) + xmax_resized = int(round(label['xmax'] / w_factor_resize)) + ymin_resized = int(round(label['ymin'] / h_factor_resize)) + ymax_resized = int(round(label['ymax'] / h_factor_resize)) + + bbox_center_x = .5*(xmin_resized + xmax_resized) * grid_factor + grid_x = int(bbox_center_x) + + bbox_center_y = .5*(ymin_resized + ymax_resized) * grid_factor + grid_y = int(bbox_center_y) + + obj_indx = YoloParams.CLASS_LABELS.index(label['name']) + + bbox_w = (xmax_resized - xmin_resized) * grid_factor + bbox_h = (ymax_resized - ymin_resized) * grid_factor + + shifted_wh = np.array([0,0,bbox_w, bbox_h]) + + func = lambda prior: compute_iou((0,0,prior[0],prior[1]), shifted_wh) + + anchor_winner = np.argmax(np.apply_along_axis(func, -1, self.anchors)) + + # assign ground truth x, y, w, h, confidence and class probs to y_batch + + # ASSIGN CLASS CONFIDENCE + y_batch[j, grid_y, grid_x, anchor_winner, 0:4] = [bbox_center_x, bbox_center_y, bbox_w, bbox_h] + + # ASSIGN OBJECTNESS CONF + y_batch[j, grid_y, grid_x, anchor_winner, 4 ] = 1. + + # ASSIGN CORRECT CLASS TO + y_batch[j, grid_y, grid_x, anchor_winner, 4+1+obj_indx] = 1 + + # number of labels per instance !> than true_box_buffer, add check in processing (?) + x_batch[j] = yolo_normalize(img) + + ############################################################ + # x_batch -> list of input images + # y_batch -> list of network ouput gt values for each image + ############################################################ + return x_batch, y_batch + + + + +class YoloEvaluate(object): + + + def __init__(self, generator, model): + + self.inf_model = model + self.generator = generator + self.class_labels = np.array(YoloParams.CLASS_LABELS) + + self.iou_detection_threshold = YoloParams.IOU_THRESHOLD + + self.val_out_path = YoloParams.VALIDATION_OUT_PATH + self.debug_plots = True if self.val_out_path else False + + if self.debug_plots: mkdir_p(self.val_out_path) + + + def _find_detection(self, q_box, boxes, global_index): + + if boxes.size == 0: + #print('EMPTY BOXES') + return -1 + + ious = list(map(lambda x: compute_iou(q_box, x), boxes)) + + max_iou_index = np.argmax( ious ) + + if ious[max_iou_index] > self.iou_detection_threshold: + return global_index[max_iou_index] + + return -1 + + + def _plot_preds(self, image, pred_info, true_info, image_index): + + image_out = draw_boxes(image, pred_info) + image_out = draw_boxes(image_out, true_info) + image_name = os.path.basename( self.generator.load_image_name(image_index) ) + + outfile = os.path.join(self.val_out_path, image_name) + cv2.imwrite(outfile, image_out) + + + + def _process_image(self, i): + + true_boxes, true_labels = self.generator.load_annotation(i) + + image = self.generator.load_image(i) + + pred_boxes, conf, pred_labels, _ = self.inf_model.predict(image.copy()) + + if self.debug_plots: + + # np.array(YoloParams.CLASS_LABELS)[pred_labels] + label_names_pred = handle_empty_indexing(self.class_labels, pred_labels) + label_names_true = self.class_labels[true_labels] + + pred_info = (pred_boxes, conf, label_names_pred) + true_info = (true_boxes, None, label_names_true) + + self._plot_preds(image.copy(), pred_info=pred_info, true_info=true_info, image_index=i) + + + sorted_inds = np.argsort(-conf) + + repeat_mask = [True]*len(true_boxes) + matched_labels = [] + global_index = np.arange(len(true_labels)) + + + image_results = [] + image_labels = [0]*YoloParams.NUM_CLASSES + + for tl in true_labels: + image_labels[tl] += 1 + + + for i in sorted_inds: + + label_mask = (pred_labels[i] == true_labels) + index_subset = global_index[(repeat_mask)&(label_mask)] + true_boxes_subset = true_boxes[(repeat_mask)&(label_mask)] + + idx = self._find_detection(pred_boxes[i], true_boxes_subset, index_subset) + + if idx != -1: + matched_labels.append(idx) + repeat_mask[idx] = False + + image_results.append([pred_labels[i], conf[i], 1 if idx != -1 else 0]) + + return image_results, image_labels + + + def _interp_ap(self, precision, recall): + + if precision.size == 0 or recall.size == 0: + return 0. + + iap = 0 + for r in np.arange(0.,1.1, 0.1): + recall_mask = (recall >= r) + p_max = precision[recall_mask] + + iap += np.max( p_max if p_max.size > 0 else [0] ) + + return iap / 11 + + + def compute_ap(self, detections, num_gts): + + detections_sort_indx = np.argsort(-detections[:,1]) + detections = detections[detections_sort_indx] + + precision = [] + recall = [] + + if num_gts == 0: + return 0. + + for i in range(1, len(detections) + 1): + + precision.append( np.sum(detections[:i][:,2]) / i ) + recall.append( np.sum(detections[:i][:,2]) / num_gts ) + + return self._interp_ap(np.array(precision), np.array(recall)) + + + + + def __call__(self): + + detection_results = [] + detection_labels = np.array([0]*YoloParams.NUM_CLASSES) + + num_annotations = 0 + counter = 0 + + for i in tqdm(range(len(self.generator.images)), desc='Batch Processed'): + counter += 1 + + image_name = os.path.basename( self.generator.load_image_name(i) ) + + #if image_name == '2011_003285.jpg': + + image_results, image_labels = self._process_image(i) + + detection_results.extend(image_results) + detection_labels += np.array(image_labels) + + + detection_results = np.array(detection_results) + + ap_dic = {} + for class_ind, num_gts in enumerate(detection_labels): + class_detections = detection_results[detection_results[:,0]==class_ind] + + ap = self.compute_ap(class_detections, num_gts) + + ap_dic[self.class_labels[class_ind]] = ap + + + return ap_dic + + + + diff --git a/net/netloss.py b/net/netloss.py new file mode 100644 index 0000000..5f33079 --- /dev/null +++ b/net/netloss.py @@ -0,0 +1,185 @@ + +import tensorflow as tf +import numpy as np + +from net.netparams import YoloParams + +EPSILON = 1e-6 + + +def calculate_ious(A1, A2, use_iou=True): + + if not use_iou: + return 1. + + A1_xy = A1[..., 0:2] + A1_wh = A1[..., 2:4] + + A2_xy = A2[..., 0:2] + A2_wh = A2[..., 2:4] + + A1_wh_half = A1_wh / 2. + A1_mins = A1_xy - A1_wh_half + A1_maxes = A1_xy + A1_wh_half + + A2_wh_half = A2_wh / 2. + A2_mins = A2_xy - A2_wh_half + A2_maxes = A2_xy + A2_wh_half + + intersect_mins = tf.maximum(A2_mins, A1_mins) + intersect_maxes = tf.minimum(A2_maxes, A1_maxes) + intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.) + intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] + + true_areas = A1_wh[..., 0] * A1_wh[..., 1] + pred_areas = A2_wh[..., 0] * A2_wh[..., 1] + + union_areas = pred_areas + true_areas - intersect_areas + iou_scores = tf.truediv(intersect_areas, union_areas) + + return iou_scores + + +class YoloLoss(object): + # ADD WARM UP CONDITIONS + + def __init__(self): + + self.__name__ = 'yolo_loss' + self.iou_threshold = YoloParams.IOU_THRESHOLD + self.readjust_obj_score = True + + self.lambda_coord = YoloParams.COORD_SCALE + self.lambda_noobj = YoloParams.NO_OBJECT_SCALE + self.lambda_obj = YoloParams.OBJECT_SCALE + self.lambda_class = YoloParams.CLASS_SCALE + + self.norm = False + + def coord_loss(self, y_true, y_pred): + + b_xy_pred = y_pred[..., :2] + b_wh_pred = y_pred[..., 2:4] + + b_xy = y_true[..., 0:2] + b_wh = y_true[..., 2:4] + + indicator_coord = tf.expand_dims(y_true[..., 4], axis=-1) * self.lambda_coord + + norm_coord = 1 + if self.norm: + norm_coord = tf.reduce_sum(tf.to_float(indicator_coord > 0.0)) + + + loss_xy = tf.reduce_sum(tf.square(b_xy - b_xy_pred) * indicator_coord, axis=[1,2,3,4]) + #loss_wh = tf.reduce_sum(tf.square(b_wh - b_wh_pred) * indicator_coord, axis=[1,2,3,4]) + loss_wh = tf.reduce_sum(tf.square(tf.sqrt(b_wh) - tf.sqrt(b_wh_pred)) * indicator_coord, axis=[1,2,3,4]) + + return (loss_wh + loss_xy) / (norm_coord + EPSILON) / 2 + + + def obj_loss(self, y_true, y_pred): + + b_o = calculate_ious(y_true, y_pred, use_iou=self.readjust_obj_score) * y_true[..., 4] + b_o_pred = y_pred[..., 4] + + num_true_labels = YoloParams.GRID_SIZE*YoloParams.GRID_SIZE*YoloParams.NUM_BOUNDING_BOXES + y_true_p = tf.reshape(y_true[..., :4], shape=(YoloParams.BATCH_SIZE, 1, 1, 1, num_true_labels, 4)) + iou_scores_buff = calculate_ious(y_true_p, tf.expand_dims(y_pred, axis=4)) + best_ious = tf.reduce_max(iou_scores_buff, axis=4) + + indicator_noobj = tf.to_float(best_ious < self.iou_threshold) * (1 - y_true[..., 4]) * self.lambda_noobj + indicator_obj = y_true[..., 4] * self.lambda_obj + + + norm_conf = 1 + if self.norm: + norm_conf = tf.reduce_sum(tf.to_float((indicator_obj + indicator_noobj) > 0.0)) + + loss_obj = tf.reduce_sum(tf.square(b_o-b_o_pred) * (indicator_obj + indicator_noobj), axis=[1,2,3]) + + return loss_obj / (norm_conf + EPSILON) / 2 + + + def class_loss(self, y_true, y_pred): + + b_class = tf.argmax(y_true[..., 5:], axis=-1) + b_class_pred = y_pred[..., 5:] + + indicator_class = y_true[..., 4] * tf.gather( + YoloParams.CLASS_WEIGHTS, b_class) * self.lambda_class + + norm_class = 1 + if self.norm: + norm_class = tf.reduce_sum(tf.to_float(indicator_class > 0.0)) + + loss_class_arg = tf.nn.sparse_softmax_cross_entropy_with_logits( + labels=b_class, logits=b_class_pred) + + loss_class = tf.reduce_sum(loss_class_arg * indicator_class, axis=[1,2,3]) + + return loss_class / (norm_class + EPSILON) + + + def _transform_netout(self, y_pred_raw): + y_pred_xy = tf.sigmoid(y_pred_raw[..., :2]) + YoloParams.c_grid + y_pred_wh = tf.exp(y_pred_raw[..., 2:4]) * YoloParams.anchors + y_pred_conf = tf.sigmoid(y_pred_raw[..., 4:5]) + y_pred_class = y_pred_raw[...,5:] + + return tf.concat([y_pred_xy, y_pred_wh, y_pred_conf, y_pred_class], axis=-1) + + + + def __call__(self, y_true, y_pred_raw): + + y_pred = self._transform_netout(y_pred_raw) + + total_coord_loss = self.coord_loss(y_true, y_pred) + total_obj_loss = self.obj_loss(y_true, y_pred) + total_class_loss = self.class_loss(y_true, y_pred) + + loss = total_coord_loss + total_obj_loss + total_class_loss + + #loss = tf.Print(loss, [total_coord_loss], message='\nCoord Loss \t', summarize=1000) + #loss = tf.Print(loss, [total_obj_loss], message='Conf Loss \t', summarize=1000) + #oss = tf.Print(loss, [total_class_loss], message='Class Loss \t', summarize=1000) + #oss = loss = tf.Print(loss, [loss], message='Total Loss \t', summarize=1000) + + return loss + + + + +if __name__ == '__main__': + + sess = tf.InteractiveSession() + + y_pred = tf.convert_to_tensor(np.random.rand(16,13,13,5,85), np.float32) + y_true = tf.convert_to_tensor(np.random.rand(16,13,13,5,85), np.float32) + + var = YoloLoss() + + print( var(y_true, y_pred).eval() ) + + + + + + + + + + + + + + + + + + + + + + diff --git a/net/netparams.py b/net/netparams.py new file mode 100644 index 0000000..c64efe7 --- /dev/null +++ b/net/netparams.py @@ -0,0 +1,144 @@ + +import pickle, argparse, json, os, sys +import tensorflow as tf +import numpy as np + + +argparser = argparse.ArgumentParser( + description='dourflow: a keras YOLO V2 implementation.') + + +argparser.add_argument( + 'action', + help='what to do: \'train\', \'validate\' or pass an image file/dir.') + + +argparser.add_argument( + '-m', + '--model', + help='path to input yolo v2 keras model', + default='yolo_model.h5') + + +argparser.add_argument( + '-c', + '--conf', + help='path to configuration file', + default='config.json') + + +argparser.add_argument( + '-t', + '--threshold', + type=float, + help='detection threshold', + default=0.3) + + +argparser.add_argument( + '-w', + '--weight_file', + help='path to weight file', + default='weights.h5') + + +args = argparser.parse_args() + + +action = args.action +config_path = args.conf + + +with open(config_path) as config_buffer: + config = json.loads(config_buffer.read()) + + + +def generate_yolo_grid(batch, g, num_bb): + c_x = tf.to_float(tf.reshape(tf.tile(tf.range(g), [g]), (1, g, g, 1, 1))) + c_y = tf.transpose(c_x, (0,2,1,3,4)) + return tf.tile(tf.concat([c_x, c_y], -1), [batch, 1, 1, num_bb, 1]) + + + + +def get_threshold(value): + if value > 1. or value < 0: + raise ValueError('Please enter a valid threshold (between 0. and 1.).') + return value + + + +class YoloParams(object): + + # Mode + PREDICT_IMAGE = '' + WEIGHT_FILE = '' + if action != 'gen': + if action == 'validate' or action == 'train': + YOLO_MODE = action + else: + if os.path.isdir(action): + YOLO_MODE = 'inference' + elif os.path.isfile(action): + if action.split('.')[1] in ['mp4','avi','wmv','mpg','mpeg']: + YOLO_MODE = 'video' + else: + YOLO_MODE = 'inference' + else: + raise ValueError('First argument for dourflow must be: \'training\',' + ' \'validation\' or an image file/dir.') + + PREDICT_IMAGE = action + else: + assert args.weight_file, "Need to pass weight file if generating model." + # Paths + WEIGHT_FILE = args.weight_file + + TRAIN_IMG_PATH = config['train']['image_folder'] + TRAIN_ANN_PATH = config['train']['annot_folder'] + + VALIDATION_IMG_PATH = config['valid']['image_folder'] + VALIDATION_ANN_PATH = config['valid']['annot_folder'] + VALIDATION_OUT_PATH = config['valid']['pred_folder'] + + # Model + #IN_MODEL = config['config_path']['in_model'] + IN_MODEL = args.model + OUT_MODEL_NAME = config['train']['out_model_name'] + + ARCH_FNAME = config['config_path']['arch_plotname'] + + # Classes + CLASS_LABELS = [x.rstrip() for x in open(config['config_path']['labels'])] + NUM_CLASSES = len(CLASS_LABELS) + CLASS_TO_INDEX = dict(zip(CLASS_LABELS, np.arange(NUM_CLASSES))) + CLASS_WEIGHTS = np.ones(NUM_CLASSES, dtype='float32') + + # Infrastructure params + INPUT_SIZE = config['model']['input_size'] + GRID_SIZE = config['model']['grid_size'] + TRUE_BOX_BUFFER = config['model']['true_box_buffer'] + ANCHORS = [float(a) for a in open(config['config_path']['anchors']).read().split(', ')] + + NUM_BOUNDING_BOXES = len(ANCHORS) // 2 + OBJECT_SCALE = 5.0 + NO_OBJECT_SCALE = 1.0 + CLASS_SCALE = 1.0 + COORD_SCALE = 1.0 + + # Train params + BATCH_SIZE = config['train']['batch_size'] + L_RATE = config['train']['learning_rate'] + NUM_EPOCHS = config['train']['num_epochs'] + TRAIN_VERBOSE = config['train']['verbose'] + + # Thresholding + IOU_THRESHOLD = get_threshold(config['model']['iou_threshold']) + NMS_THRESHOLD = get_threshold(config['model']['nms_threshold']) + DETECTION_THRESHOLD = get_threshold(args.threshold) + + # Additional / Precomputing + c_grid = generate_yolo_grid(BATCH_SIZE, GRID_SIZE, NUM_BOUNDING_BOXES) + anchors = np.reshape(ANCHORS, [1,1,1,NUM_BOUNDING_BOXES,2]) + diff --git a/net/utils.py b/net/utils.py new file mode 100755 index 0000000..9f39e6d --- /dev/null +++ b/net/utils.py @@ -0,0 +1,291 @@ +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +import os, errno +import xml.etree.ElementTree as ET + +import tensorflow as tf +import copy +import cv2 +from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard + + + + +def mkdir_p(path): + try: + os.makedirs(path) + except OSError as exc: # Python >2.5 + if exc.errno == errno.EEXIST and os.path.isdir(path): + pass + else: + raise + + +def compute_iou(bb_1, bb_2): + + xa0, ya0, xa1, ya1 = bb_1 + xb0, yb0, xb1, yb1 = bb_2 + + intersec = (min([xa1, xb1]) - max([xa0, xb0]))*(min([ya1, yb1]) - max([ya0, yb0])) + + union = (xa1 - xa0)*(ya1 - ya0) + (xb1 - xb0)*(yb1 - yb0) - intersec + + return intersec / union + + +def benchmark_timings(data, path=''): + + fig = plt.figure(figsize=(10,15)) + ax = plt.gca() + df = pd.DataFrame(data) + df.plot(ax=ax, kind='area', subplots=True) + plt.savefig(path + 'timings.png', format='png') + plt.close() + + df2 = df.apply(lambda x: x/df['total'], axis=0)[['decode', 'prediction', 'prepro']] + + fig = plt.figure(figsize=(20,13)) + ax = fig.add_subplot(111) + df2.plot(ax=ax) + vals = ax.get_yticks() + ax.set_yticklabels(['{:,.1%}'.format(x) for x in vals]) + plt.savefig(path + 'timings_combined.png', format='png') + plt.close() + + + + +def space_to_depth_x2(x): + """Thin wrapper for Tensorflow space_to_depth with block_size=2.""" + # Import currently required to make Lambda work. + # See: https://github.com/fchollet/keras/issues/5088#issuecomment-273851273 + + # the function to implement the orgnization layer (thanks to github.com/allanzelener/YAD2K) + + + # tf.space_to_depth: + # Input: [batch, height, width, depth] + # Output: [batch, height/block_size, width/block_size, depth*block_size*block_size] + # Example: [1,4,4,1] -> [1,2,2,4] or in this case [?,38,38,64] -> [?,19,19,256] + # This operation is useful for resizing the activations between convolutions (but keeping all data), + # e.g. instead of pooling. It is also useful for training purely convolutional models. + + # space_to_depth_x2 is just tf.space_to_depth wrapped with block_size=2 + + + # Example + """ + input shape = (4,4,1) + + [ + [[1], [2], [3], [4]], + [[5], [6], [7], [8]], + [[9], [10], [11], [12]], + [[13], [14], [15], [16]] + ] + + is divided into the following chunks (block_size, block_size, channels): + + [[[1], [2]], [[[3], [4]], + [[5], [6]]] [[7], [8]]] + + [[[9], [10],] [[[11], [12]], + [[13], [14]]] [[15], [16]]] + + flatten each chunk to a single array: + + [[1, 2, 5, 6]], [[3, 4, 7, 8]] + [[9, 10, 13, 14]], [[11, 12, 15, 16]] + + + spatially rearrange chunks according to their initial position: + + [ + [[1, 2, 5, 6]], [[3, 4, 7, 8]], + [[9 10, 13, 14]], [[11, 12, 15, 16]] + ] + + output shape = (2,2,4) + """ + import tensorflow as tf + return tf.space_to_depth(x, block_size=2) + + +def draw_boxes(image, info): + image_h, image_w, _ = image.shape + + boxes, scores, labels = info + color_mod = 255 + + for i in range(len(boxes)): + xmin = int(boxes[i][0]*image_w) + ymin = int(boxes[i][1]*image_h) + xmax = int(boxes[i][2]*image_w) + ymax = int(boxes[i][3]*image_h) + + if scores is None: + #text = "%s"%(labels[i]) + text = '' + color_mod = 0 + else: + text = "%s (%.1f%%)"%(labels[i], 100*scores[i]) + + cv2.rectangle(image, (xmin,ymin), (xmax,ymax), (color_mod,255,0), 2) + + cv2.putText(image, + text, + (xmin, ymin - 15), + cv2.FONT_HERSHEY_COMPLEX, + 1e-3 * image_h, + (color_mod,255,0), 1) + return image + + +def parse_annotation(ann_dir, img_dir, labels=[]): + # from https://github.com/experiencor/keras-yolo2/blob/master/preprocessing.py + all_imgs = [] + seen_labels = {} + # go through annotations by sorted filename + for ann in sorted(os.listdir(ann_dir)): + img = {'object':[]} + tree = ET.parse(os.path.join(ann_dir, ann)) + + for elem in tree.iter(): + if 'filename' in elem.tag: + img['filename'] = os.path.join(img_dir, elem.text) + if 'width' in elem.tag: + img['width'] = int(elem.text) + if 'height' in elem.tag: + img['height'] = int(elem.text) + if 'object' in elem.tag or 'part' in elem.tag: + obj = {} + + for attr in list(elem): + if 'name' in attr.tag: + obj['name'] = attr.text + + if obj['name'] in seen_labels: + seen_labels[obj['name']] += 1 + else: + seen_labels[obj['name']] = 1 + + if len(labels) > 0 and obj['name'] not in labels: + break + else: + img['object'] += [obj] + + if 'bndbox' in attr.tag: + for dim in list(attr): + if 'xmin' in dim.tag: + obj['xmin'] = int(round(float(dim.text))) + if 'ymin' in dim.tag: + obj['ymin'] = int(round(float(dim.text))) + if 'xmax' in dim.tag: + obj['xmax'] = int(round(float(dim.text))) + if 'ymax' in dim.tag: + obj['ymax'] = int(round(float(dim.text))) + + if len(img['object']) > 0: + all_imgs += [img] + + # all_imgs: [img1, img2, img3, ..] + # + """ + img: + {'object' : [{'name': 'class1', 'xmin': , 'ymin': , 'xmax': , 'ymax': }, # object 1 + {'name': 'class1', 'xmin': , 'ymin': , 'xmax': , 'ymax': }, # object 2 + {'name': 'class2', 'xmin': , 'ymin': , 'xmax': , 'ymax': }] # object 3 + 'filename' :