diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e838f72
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+*.h5
+__pycache__/
+.DS_Store
diff --git a/README.md b/README.md
index 6438254..2622aec 100644
--- a/README.md
+++ b/README.md
@@ -55,7 +55,7 @@ Pretrained COCO/VOC keras models can be downloaded [here](https://drive.google.c
   
 
 ##### *conf*
-Pass a config.json file that looks like this:
+Pass a config.json file that looks like this (minus the comments!):
 
 ```
 {
@@ -119,7 +119,7 @@ Example:
 python3 dourflow.py theoffice.png -m coco_model.h5 -c coco_config.json -t 0.35
 ```
 <p align="center">
-<img src="result_plots/batchex.png" width="400px"/>
+<img src="result_plots/batchex.png" width="500px"/>
 </p>
 
 
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/confs/config_coco.json b/confs/config_coco.json
new file mode 100755
index 0000000..60eaf3c
--- /dev/null
+++ b/confs/config_coco.json
@@ -0,0 +1,33 @@
+{
+    "model" : {
+        "input_size":       416,
+        "grid_size":        13,
+        "true_box_buffer":  30,
+        "iou_threshold":    0.5,  
+        "nms_threshold":    0.3
+    },
+    "config_path" : {
+        "labels":           "models/coco/labels_coco.txt",
+        "anchors":          "models/coco/anchors_coco.txt",
+        "arch_plotname":    ""
+    },
+    "train": {
+        "out_model_name":   "",
+        "image_folder":     "",
+        "annot_folder":     "",
+        "batch_size":       16,
+        "learning_rate":    1e-4,
+        "num_epochs":       20,
+        "object_scale":     5.0 ,
+        "no_object_scale":  1.0,
+        "coord_scale":      1.0,
+        "class_scale":      1.0,
+        "verbose":          1
+    },
+
+    "valid": {
+        "image_folder":     "",
+        "annot_folder":     "",
+        "pred_folder":      ""
+    }
+}
diff --git a/confs/config_voc.json b/confs/config_voc.json
new file mode 100644
index 0000000..876c7ac
--- /dev/null
+++ b/confs/config_voc.json
@@ -0,0 +1,34 @@
+{
+    "model" : {
+        "input_size":       416,
+        "grid_size":        13,
+        "true_box_buffer":  10,
+        "iou_threshold":    0.5,  
+        "nms_threshold":    0.3
+    },
+    "config_path" : {
+        "labels":           "models/voc/labels_voc.txt",
+        "anchors":          "models/voc/anchors_voc.txt",
+        "arch_plotname":    ""
+    },
+    "train": {
+        "out_model_name":   "yolo_retrained_voc.h5",
+        "image_folder":     "/home/kiran/Documents/DATA/VOC/train/imgs",
+        "annot_folder":     "/home/kiran/Documents/DATA/VOC/train/anns",
+        "batch_size":       16,
+        "learning_rate":    1e-4,
+        "num_epochs":       20,
+        "object_scale":     5.0 ,
+        "no_object_scale":  1.0,
+        "coord_scale":      1.0,
+        "class_scale":      1.0,
+        "verbose":          1
+    },
+
+    "valid": {
+        "image_folder":     "/home/kiran/Documents/DATA/VOC/valid/imgs",
+        "annot_folder":     "/home/kiran/Documents/DATA/VOC/valid/anns",
+        "pred_folder":      "/home/kiran/Documents/DATA/VOC/valid/img_pred",
+        "plot_preds":       true
+    }
+}
diff --git a/dourflow.py b/dourflow.py
new file mode 100644
index 0000000..abd9dd0
--- /dev/null
+++ b/dourflow.py
@@ -0,0 +1,22 @@
+
+from net.netarch import generate_model
+from net.netparams import YoloParams
+from yolov2 import YoloV2, YoloInferenceModel
+import os
+
+
+# Add CPU option
+os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+
+
+if __name__ == '__main__':
+
+    if YoloParams.WEIGHT_FILE:
+        generate_model()
+
+    else:
+        YoloV2().run()
+        
+
+
diff --git a/models/coco/anchors_coco.txt b/models/coco/anchors_coco.txt
new file mode 100755
index 0000000..808be3a
--- /dev/null
+++ b/models/coco/anchors_coco.txt
@@ -0,0 +1 @@
+0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828
diff --git a/models/coco/labels_coco.txt b/models/coco/labels_coco.txt
new file mode 100755
index 0000000..941cb4e
--- /dev/null
+++ b/models/coco/labels_coco.txt
@@ -0,0 +1,80 @@
+person
+bicycle
+car
+motorcycle
+airplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+couch
+potted plant
+bed
+dining table
+toilet
+tv
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
diff --git a/models/voc/anchors_voc.txt b/models/voc/anchors_voc.txt
new file mode 100755
index 0000000..5374c6f
--- /dev/null
+++ b/models/voc/anchors_voc.txt
@@ -0,0 +1 @@
+1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071
diff --git a/models/voc/labels_voc.txt b/models/voc/labels_voc.txt
new file mode 100755
index 0000000..1168c39
--- /dev/null
+++ b/models/voc/labels_voc.txt
@@ -0,0 +1,20 @@
+aeroplane
+bicycle
+bird
+boat
+bottle
+bus
+car
+cat
+chair
+cow
+diningtable
+dog
+horse
+motorbike
+person
+pottedplant
+sheep
+sofa
+train
+tvmonitor
\ No newline at end of file
diff --git a/net/__init__.py b/net/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/net/netarch.py b/net/netarch.py
new file mode 100644
index 0000000..037a738
--- /dev/null
+++ b/net/netarch.py
@@ -0,0 +1,227 @@
+from keras.models import Model, load_model
+from keras.layers import Reshape, Conv2D, Input, MaxPooling2D, BatchNormalization, Lambda
+from keras.layers.advanced_activations import LeakyReLU
+
+from keras.layers.merge import concatenate
+
+import tensorflow as tf
+import numpy as np
+import pickle, argparse, json, os
+
+from keras.utils.vis_utils import plot_model
+
+from net.netparams import YoloParams
+from net.netdecode import YoloOutProcess
+
+
+class YoloArchitecture(object):
+
+    def __init__(self):
+
+        self.in_model_name = YoloParams.IN_MODEL
+        self.plot_name = YoloParams.ARCH_FNAME
+
+    def get_model(self, loss_func):
+
+        yolo_model = self._load_yolo_model(loss_func)
+
+        if YoloParams.YOLO_MODE == 'train':
+            new_yolo_model = self._setup_transfer_learning(yolo_model)
+            #new_name = self.tl_weights_name.split('.')[0] + '_rand.h5'
+            #new_yolo_model.save_weights(new_name)
+
+        elif YoloParams.YOLO_MODE in ['inference','validate','video']:
+            new_yolo_model = yolo_model
+
+        else:
+            raise ValueError(
+            'Please set \'--action\' to \'train\', \'validate\' or pass an image file/dir.')
+            
+        if self.plot_name:
+            plot_model(new_yolo_model, to_file=self.plot_name, show_shapes=True)
+
+        return new_yolo_model
+
+
+    def _load_yolo_model(self, loss_func):
+        # Error if not compiled with yolo_loss?
+        if os.path.isfile(self.in_model_name):
+            model = load_model(self.in_model_name,
+                custom_objects={'yolo_loss': loss_func})
+            return model
+        else:
+            raise ValueError('Need to load full model in order to do '
+                'transfer learning. Run script again with desired TL '
+                'config and weight file to generate model.')
+            
+        
+    def weights_to_model(self, in_path, out_path):
+        yolo_model = self._yolo_v2_architecture()
+
+        try:
+            yolo_model.load_weights(in_path)
+        
+        except IOError as e:
+            print('File for pre-trained weights not found.')
+
+        yolo_model.save(out_path)
+        return yolo_model
+
+
+
+    def _yolo_v2_architecture(self):
+        # Parse from cfg!
+        self.layer_counter = 0
+
+        def space_to_depth_x2(x):
+   
+            import tensorflow as tf
+            return tf.space_to_depth(x, block_size=2)
+
+        
+        def conv2D_bn_leaky(inp, filters, kernel_size=(3,3), strides=(1,1), maxpool=False):
+            self.layer_counter += 1
+            x = Conv2D(filters, kernel_size=kernel_size, strides=strides,
+             padding='same', use_bias=False)(inp)
+
+            x = BatchNormalization()(x)
+            x = LeakyReLU(alpha=0.1)(x)
+            if maxpool:
+                return MaxPooling2D(pool_size=(2, 2))(x)
+            return x
+
+        input_image = Input(shape=(YoloParams.INPUT_SIZE, YoloParams.INPUT_SIZE, 3), name='input')
+
+        # Layer 1
+        x = conv2D_bn_leaky(input_image, 32, (3,3), (1,1), maxpool=True)
+
+        # Layer 2
+        x = conv2D_bn_leaky(x, 64, maxpool=True)
+
+        # Layer 3
+        x = conv2D_bn_leaky(x, 128)
+
+        # Layer 4
+        x = conv2D_bn_leaky(x, 64, kernel_size=(1,1))
+
+        # Layer 5
+        x = conv2D_bn_leaky(x, 128, maxpool=True)
+
+        # Layer 6
+        x = conv2D_bn_leaky(x, 256)
+
+        # Layer 7
+        x = conv2D_bn_leaky(x, 128, kernel_size=(1,1))
+
+        # Layer 8
+        x = conv2D_bn_leaky(x, 256, maxpool=True)
+
+        # Layer 9
+        x = conv2D_bn_leaky(x, 512)
+
+        # Layer 10
+        x = conv2D_bn_leaky(x, 256, kernel_size=(1,1))
+
+        # Layer 11
+        x = conv2D_bn_leaky(x, 512)
+
+        # Layer 12
+        x = conv2D_bn_leaky(x, 256, kernel_size=(1,1))
+
+        # Layer 13
+        x = conv2D_bn_leaky(x, 512)
+
+        skip_connection = x
+        x = MaxPooling2D(pool_size=(2, 2))(x)
+
+        # Layer 14
+        x = conv2D_bn_leaky(x, 1024)
+
+        # Layer 15
+        x = conv2D_bn_leaky(x, 512, kernel_size=(1,1))
+        # Layer 16
+        x = conv2D_bn_leaky(x, 1024)
+
+        # Layer 17
+        x = conv2D_bn_leaky(x, 512, kernel_size=(1,1))
+        # Layer 18
+        x = conv2D_bn_leaky(x, 1024)
+
+        # Layer 19
+        x = conv2D_bn_leaky(x, 1024)
+
+        # Layer 20
+        x = conv2D_bn_leaky(x, 1024)
+
+        # Layer 21
+        skip_connection = conv2D_bn_leaky(skip_connection, 64, kernel_size=(1,1))
+        skip_connection = Lambda(space_to_depth_x2)(skip_connection)
+        x = concatenate([skip_connection, x])
+
+        # Layer 22
+        x = conv2D_bn_leaky(x, 1024)
+
+        # Final Conv2D
+        x = Conv2D(YoloParams.NUM_BOUNDING_BOXES * (4 + 1 + YoloParams.NUM_CLASSES), (1,1), 
+            strides=(1,1), padding='same')(x)     
+
+
+        output = Reshape((YoloParams.GRID_SIZE, YoloParams.GRID_SIZE, 
+            YoloParams.NUM_BOUNDING_BOXES, 4 + 1 + YoloParams.NUM_CLASSES))(x)
+
+        yolo_model = Model(input_image, output)
+
+        return yolo_model
+
+
+
+    def _setup_transfer_learning(self, yolo_model):
+
+        new_yolo_model = self._yolo_v2_update(yolo_model)
+
+        layer   = new_yolo_model.layers[-2] # the last convolutional layer
+        weights = layer.get_weights()
+
+        S2 = YoloParams.GRID_SIZE*YoloParams.GRID_SIZE
+        new_kernel = np.random.normal(size=weights[0].shape)/S2
+        new_bias   = np.random.normal(size=weights[1].shape)/S2
+
+        layer.set_weights([new_kernel, new_bias])
+
+        return new_yolo_model
+
+
+
+    def _yolo_v2_update(self, old_yolo_model):
+
+        x = Conv2D(YoloParams.NUM_BOUNDING_BOXES * (4 + 1 + YoloParams.NUM_CLASSES), (1,1), 
+            strides=(1,1), padding='same', name='conv_23')(old_yolo_model.layers[-3].output)
+        
+        output = Reshape((YoloParams.GRID_SIZE, YoloParams.GRID_SIZE, 
+            YoloParams.NUM_BOUNDING_BOXES, 4 + 1 + YoloParams.NUM_CLASSES))(x)
+
+        yolo_model = Model(old_yolo_model.input, output)
+
+        return yolo_model
+
+
+def generate_model():
+
+    yolo_arch = YoloArchitecture()
+    
+    d = os.path.dirname(YoloParams.WEIGHT_FILE)
+
+    out_fname = os.path.join(d, 'model.h5')
+
+    print('------------------------------------')
+    print('Reading weights from: %s'%YoloParams.WEIGHT_FILE)
+    print('Loading into YOLO V2 architecture and storing...')
+    print('\n\n')
+    yolo_arch.weights_to_model(YoloParams.WEIGHT_FILE, out_fname)
+    print('\tModel saved: %s'%out_fname)
+    print('\n\n------------------------------------')
+    print('Done.')
+
+
+
+
diff --git a/net/netdecode.py b/net/netdecode.py
new file mode 100644
index 0000000..9ef23e9
--- /dev/null
+++ b/net/netdecode.py
@@ -0,0 +1,161 @@
+import tensorflow as tf
+from keras import backend as K
+import numpy as np 
+
+from net.netparams import YoloParams
+
+
+def process_outs(b, s, c):
+    
+    b_p = b
+    # Expand dims of scores and classes so we can concat them 
+    # with the boxes and have the output of NMS as an added layer of YOLO.
+    # Have to do another expand_dims this time on the first dim of the result
+    # since NMS doesn't know about BATCH_SIZE (operates on 2D, see 
+    # https://www.tensorflow.org/api_docs/python/tf/image/non_max_suppression) 
+    # but keras needs this dimension in the output.
+    s_p = K.expand_dims(s, axis=-1)
+    c_p = K.expand_dims(c, axis=-1)
+    
+    output_stack = K.concatenate([b_p, s_p, c_p], axis=1)
+    return K.expand_dims(output_stack, axis=0)
+
+
+class YoloOutProcess(object):
+
+
+    def __init__(self):
+
+        self.max_boxes = YoloParams.TRUE_BOX_BUFFER
+        self.nms_threshold = YoloParams.NMS_THRESHOLD
+        self.detection_threshold = YoloParams.DETECTION_THRESHOLD
+
+
+    def __call__(self, y_sing_pred):
+
+        # need to convert b's from GRID_SIZE units into IMG coords. Divide by grid here. 
+        b_xy = (K.sigmoid(y_sing_pred[..., 0:2]) + YoloParams.c_grid[0]) / YoloParams.GRID_SIZE
+        b_wh = (K.exp(y_sing_pred[..., 2:4])*YoloParams.anchors[0]) / YoloParams.GRID_SIZE
+        b_xy1 = b_xy - b_wh / 2.
+        b_xy2 = b_xy + b_wh / 2.
+        boxes = K.concatenate([b_xy1, b_xy2], axis=-1)
+        
+        scores_all = K.expand_dims(K.sigmoid(y_sing_pred[..., 4]), axis=-1) * K.softmax(y_sing_pred[...,5:])
+        indicator_detection = scores_all > self.detection_threshold
+        scores_all = scores_all * K.cast(indicator_detection, np.float32)
+
+        classes = K.argmax(scores_all, axis=-1)
+        scores = K.max(scores_all, axis=-1)
+
+        S2B = YoloParams.GRID_SIZE*YoloParams.GRID_SIZE*YoloParams.NUM_BOUNDING_BOXES
+
+        flatten_boxes = K.reshape(boxes, shape=(S2B, 4))
+        flatten_scores = K.reshape(scores, shape=(S2B, ))
+        flatten_classes = K.reshape(classes, shape=(S2B, ))
+
+        selected_indices = tf.image.non_max_suppression(
+                                        flatten_boxes, 
+                                        flatten_scores, 
+                                        max_output_size=self.max_boxes, 
+                                        iou_threshold=self.nms_threshold)
+        
+        selected_boxes = K.gather(flatten_boxes, selected_indices)
+        selected_scores = K.gather(flatten_scores, selected_indices)
+        selected_classes = tf.gather(flatten_classes, selected_indices)
+
+        # Repassem aixo vale
+        score_mask = selected_scores>self.detection_threshold
+
+        selected_boxes = tf.boolean_mask(selected_boxes, score_mask)  
+        selected_scores = tf.boolean_mask(selected_scores, score_mask)  
+        selected_classes = tf.boolean_mask(selected_classes, score_mask)  
+        
+        return process_outs(selected_boxes, selected_scores, K.cast(selected_classes, np.float32))
+
+    def proper_yolo_nms(self, y_sing_pred):
+        # NMS need to be applied per class, since two different boxes could predict with high confidence
+        # two objects that have high IOU
+        # At the same time, even though NMS has to be done per class, it can only be done with max values
+        # of P(O) * P(Class|O) since we want to avoid same box predicting 2 overlapping objects.
+        # Doing both these things turns out to be a fucking pain.
+
+        # CONSIDER USING tf.while_loop for the FOR
+
+        b_xy = tf.sigmoid(y_sing_pred[..., 0:2]) + YoloParams.c_grid[0]
+        b_wh = tf.exp(y_sing_pred[..., 2:4])*YoloParams.anchors[0]
+        b_xy1 = b_xy - b_wh / 2.
+        b_xy2 = b_xy + b_wh / 2.
+        boxes = tf.concat([b_xy1, b_xy2], axis=-1)
+
+        
+        scores_all = tf.expand_dims(tf.sigmoid(y_sing_pred[..., 4]), axis=-1) * tf.nn.softmax(y_sing_pred[...,5:])
+        indicator_detection = scores_all > self.detection_threshold
+
+        scores_all = scores_all * tf.to_float(indicator_detection)
+
+        classes = tf.argmax(scores_all, axis=-1)
+
+        scores = tf.reduce_max(scores_all, axis=-1)
+        
+        flatten_boxes = tf.reshape(boxes, 
+            shape=(YoloParams.GRID_SIZE*YoloParams.GRID_SIZE*YoloParams.NUM_BOUNDING_BOXES, 4))
+        flatten_scores = tf.reshape(scores, 
+            shape=(YoloParams.GRID_SIZE*YoloParams.GRID_SIZE*YoloParams.NUM_BOUNDING_BOXES, ))
+        flatten_classes = tf.reshape(classes, 
+            shape=(YoloParams.GRID_SIZE*YoloParams.GRID_SIZE*YoloParams.NUM_BOUNDING_BOXES, ))
+
+        output_boxes = []
+        output_scores = []
+        output_classes = []
+        for c in range(YoloParams.NUM_CLASSES):
+            if tf.reduce_sum(tf.to_float(tf.equal(flatten_classes, c))) > 0:
+                filtered_flatten_boxes = tf.boolean_mask(flatten_boxes, tf.equal(flatten_classes, c))
+                filtered_flatten_scores = tf.boolean_mask(flatten_scores, tf.equal(flatten_classes, c))
+                filtered_flatten_classes = tf.boolean_mask(flatten_classes, tf.equal(flatten_classes, c))
+
+                selected_indices = tf.image.non_max_suppression( 
+                    filtered_flatten_boxes, filtered_flatten_scores, self.max_boxes, self.iou_threshold)
+
+                selected_boxes = K.gather(filtered_flatten_boxes, selected_indices)
+                selected_scores = K.gather(filtered_flatten_scores, selected_indices)
+                selected_classes = K.gather(filtered_flatten_classes, selected_indices)
+
+
+                output_boxes.append( selected_boxes )
+                output_scores.append( selected_scores )
+                output_classes.append( selected_classes )
+
+
+        print(output_boxes)
+
+        print(tf.concat(output_boxes, axis=-1).eval()) 
+        print(tf.concat(output_scores, axis=-1).eval())
+        print(tf.concat(output_classes, axis=-1).eval())
+
+        return tf.concat(output_boxes, axis=-1), tf.concat(output_scores, axis=-1), tf.concat(output_classes, axis=-1)
+
+
+
+if __name__ == '__main__':
+
+    sess = tf.InteractiveSession()
+
+    max_boxes = 10
+    nms_threshold = 0.1
+    boxes = tf.convert_to_tensor(np.random.rand(10,4), np.float32)
+    scores = tf.convert_to_tensor(np.random.rand(10,), np.float32)
+
+    classes = tf.convert_to_tensor((10.*np.random.rand(10,)%3).astype(int), np.float32)
+
+    s,b,c = yolo_non_max_suppression(scores, boxes, classes, max_boxes, nms_threshold)
+
+    print(boxes.eval().shape)
+    print(scores.eval().shape)
+    print(classes.eval().shape)
+
+    print('-----------------------')
+
+    print(b.eval().shape)
+    print(s.eval().shape)
+    print(c.eval().shape)
+
diff --git a/net/neteval.py b/net/neteval.py
new file mode 100644
index 0000000..f4a3414
--- /dev/null
+++ b/net/neteval.py
@@ -0,0 +1,323 @@
+
+
+
+from net.netparams import YoloParams
+from net.netdecode import YoloOutProcess
+
+import matplotlib.pyplot as plt
+import numpy as np
+import cv2, os
+import keras
+from net.utils import draw_boxes, compute_iou, mkdir_p, yolo_normalize, mkdir_p, handle_empty_indexing
+
+from tqdm import tqdm
+
+
+
+
+class YoloDataGenerator(keras.utils.Sequence):
+    'Generates data for Keras'
+    def __init__(self, images, shuffle=True):
+
+        self.images = self._prune_ann_labels(images)
+        self.input_size = YoloParams.INPUT_SIZE
+        self.anchors = YoloParams.anchors
+
+        self.generator = None
+
+        self.batch_size = YoloParams.BATCH_SIZE
+        
+        self.shuffle = shuffle
+        self.on_epoch_end()
+
+    def __len__(self):
+        'Denotes the number of batches per epoch'
+        # return int(np.ceil(float(len(self.images))/self.config['BATCH_SIZE']))
+        return int(np.floor(len(self.images) / self.batch_size))
+
+    def __getitem__(self, index):
+        'Generate one batch of data'
+        bound_l = index*self.batch_size
+        bound_r = (index+1)*self.batch_size
+
+        return self._data_to_yolo_output(self.images[bound_l:bound_r])
+
+    def load_image_name(self, i):
+        return self.images[i]['filename']
+
+
+    def load_image(self, i):
+        return cv2.imread(self.images[i]['filename'])
+
+    def load_annotation(self, i):
+        labels = []
+        bboxes = []
+        
+        height = self.images[i]['height']
+        width = self.images[i]['width']
+
+        for obj in self.images[i]['object']:
+            #if obj['name'] in YoloParams.CLASS_LABELS:
+            labels.append( obj['name'] )
+            bboxes.append( 
+                [obj['xmin'] / width, obj['ymin'] / height, obj['xmax'] / width, obj['ymax'] / height] )
+
+
+        class_inds = [YoloParams.CLASS_TO_INDEX[l] for l in labels]
+
+        return np.array(bboxes), np.array(class_inds)
+
+    def on_epoch_end(self):
+        'Updates indexes after each epoch'
+        if self.shuffle: np.random.shuffle(self.images)
+
+    def _prune_ann_labels(self, images):
+        clean_images = []
+        for im in images:
+            clean_im = im.copy()
+            clean_objs = []
+            for obj in clean_im['object']:
+                if obj['name'] in YoloParams.CLASS_LABELS:
+                    clean_objs.append( obj )
+
+            clean_im.update({'object' : clean_objs})
+            clean_images.append(clean_im)
+
+        return clean_images
+
+
+    def _data_to_yolo_output(self, batch_images):
+
+        # INPUT IMAGES READY FOR TRAINING
+        x_batch = np.zeros((len(batch_images), self.input_size, self.input_size, 3))
+
+        # GET DESIRED NETWORK OUTPUT
+        y_batch = np.zeros((len(batch_images), YoloParams.GRID_SIZE,  
+            YoloParams.GRID_SIZE, YoloParams.NUM_BOUNDING_BOXES, 4+1+len(YoloParams.CLASS_LABELS)))
+
+        grid_factor = YoloParams.GRID_SIZE / self.input_size
+
+        for j, train_instance in enumerate(batch_images):
+            
+            img_raw = cv2.imread(train_instance['filename'])
+
+            h_factor_resize = img_raw.shape[0] / self.input_size
+            w_factor_resize = img_raw.shape[1] / self.input_size 
+
+            img = cv2.resize(img_raw, (self.input_size, self.input_size))
+
+            for obj_box_idx, label in enumerate(train_instance['object']):
+
+                xmin_resized = int(round(label['xmin'] / w_factor_resize))
+                xmax_resized = int(round(label['xmax'] / w_factor_resize))
+                ymin_resized = int(round(label['ymin'] / h_factor_resize))
+                ymax_resized = int(round(label['ymax'] / h_factor_resize))
+
+                bbox_center_x = .5*(xmin_resized + xmax_resized) * grid_factor
+                grid_x = int(bbox_center_x)
+                
+                bbox_center_y = .5*(ymin_resized + ymax_resized) * grid_factor 
+                grid_y = int(bbox_center_y)
+                
+                obj_indx  = YoloParams.CLASS_LABELS.index(label['name'])
+                
+                bbox_w = (xmax_resized - xmin_resized) * grid_factor
+                bbox_h = (ymax_resized - ymin_resized) * grid_factor
+                
+                shifted_wh = np.array([0,0,bbox_w, bbox_h])
+
+                func = lambda prior: compute_iou((0,0,prior[0],prior[1]), shifted_wh)
+
+                anchor_winner = np.argmax(np.apply_along_axis(func, -1, self.anchors))
+                        
+                # assign ground truth x, y, w, h, confidence and class probs to y_batch
+
+                # ASSIGN CLASS CONFIDENCE
+                y_batch[j, grid_y, grid_x, anchor_winner, 0:4] = [bbox_center_x, bbox_center_y, bbox_w, bbox_h]
+
+                # ASSIGN OBJECTNESS CONF
+                y_batch[j, grid_y, grid_x, anchor_winner, 4  ] = 1.
+
+                # ASSIGN CORRECT CLASS TO
+                y_batch[j, grid_y, grid_x, anchor_winner, 4+1+obj_indx] = 1
+                
+                # number of labels per instance !> than true_box_buffer, add check in processing (?)
+            x_batch[j] = yolo_normalize(img)
+
+        ############################################################
+        # x_batch -> list of input images
+        # y_batch -> list of network ouput gt values for each image
+        ############################################################
+        return x_batch, y_batch
+
+
+
+
+class YoloEvaluate(object):
+
+
+    def __init__(self, generator, model):
+
+        self.inf_model = model
+        self.generator = generator
+        self.class_labels = np.array(YoloParams.CLASS_LABELS)
+
+        self.iou_detection_threshold = YoloParams.IOU_THRESHOLD
+
+        self.val_out_path = YoloParams.VALIDATION_OUT_PATH
+        self.debug_plots = True if self.val_out_path else False
+    
+        if self.debug_plots: mkdir_p(self.val_out_path)        
+
+
+    def _find_detection(self, q_box, boxes, global_index):
+
+        if boxes.size == 0:
+            #print('EMPTY BOXES')
+            return -1
+
+        ious = list(map(lambda x: compute_iou(q_box, x), boxes))
+
+        max_iou_index = np.argmax( ious )
+
+        if ious[max_iou_index] > self.iou_detection_threshold:
+            return global_index[max_iou_index]
+
+        return -1
+
+
+    def _plot_preds(self, image, pred_info, true_info, image_index):
+
+        image_out = draw_boxes(image, pred_info)
+        image_out = draw_boxes(image_out, true_info)
+        image_name = os.path.basename( self.generator.load_image_name(image_index) )
+    
+        outfile = os.path.join(self.val_out_path, image_name)
+        cv2.imwrite(outfile, image_out)
+
+
+
+    def _process_image(self, i):
+        
+        true_boxes, true_labels = self.generator.load_annotation(i)
+
+        image = self.generator.load_image(i)
+
+        pred_boxes, conf, pred_labels, _ = self.inf_model.predict(image.copy())
+
+        if self.debug_plots:
+
+            # np.array(YoloParams.CLASS_LABELS)[pred_labels]
+            label_names_pred = handle_empty_indexing(self.class_labels, pred_labels)
+            label_names_true = self.class_labels[true_labels]
+
+            pred_info = (pred_boxes, conf, label_names_pred)
+            true_info = (true_boxes, None, label_names_true)
+
+            self._plot_preds(image.copy(), pred_info=pred_info, true_info=true_info, image_index=i)
+
+
+        sorted_inds = np.argsort(-conf)
+
+        repeat_mask = [True]*len(true_boxes)
+        matched_labels = []
+        global_index = np.arange(len(true_labels))
+
+
+        image_results = []
+        image_labels = [0]*YoloParams.NUM_CLASSES
+
+        for tl in true_labels:
+            image_labels[tl] += 1
+
+
+        for i in sorted_inds:
+
+            label_mask = (pred_labels[i] == true_labels)
+            index_subset = global_index[(repeat_mask)&(label_mask)]
+            true_boxes_subset = true_boxes[(repeat_mask)&(label_mask)]
+
+            idx = self._find_detection(pred_boxes[i], true_boxes_subset, index_subset)
+
+            if idx != -1: 
+                matched_labels.append(idx)
+                repeat_mask[idx] = False
+
+            image_results.append([pred_labels[i], conf[i], 1 if idx != -1 else 0])
+
+        return image_results, image_labels
+
+
+    def _interp_ap(self, precision, recall):
+
+        if precision.size == 0 or recall.size == 0:
+            return 0.
+
+        iap = 0
+        for r in np.arange(0.,1.1, 0.1):
+            recall_mask = (recall >= r)
+            p_max = precision[recall_mask]
+            
+            iap += np.max( p_max if p_max.size > 0 else [0] )
+
+        return iap / 11
+
+
+    def compute_ap(self, detections, num_gts):
+
+        detections_sort_indx = np.argsort(-detections[:,1])
+        detections = detections[detections_sort_indx]
+
+        precision = []
+        recall = []
+
+        if num_gts == 0:
+            return 0.
+
+        for i in range(1, len(detections) + 1):
+
+            precision.append( np.sum(detections[:i][:,2]) / i )
+            recall.append( np.sum(detections[:i][:,2]) / num_gts )
+
+        return self._interp_ap(np.array(precision), np.array(recall))
+
+
+
+
+    def __call__(self):
+
+        detection_results = []
+        detection_labels = np.array([0]*YoloParams.NUM_CLASSES)
+
+        num_annotations = 0 
+        counter = 0
+
+        for i in tqdm(range(len(self.generator.images)), desc='Batch Processed'):
+            counter += 1
+
+            image_name = os.path.basename( self.generator.load_image_name(i) )
+
+            #if image_name == '2011_003285.jpg':
+
+            image_results, image_labels = self._process_image(i)
+
+            detection_results.extend(image_results)
+            detection_labels += np.array(image_labels)
+
+
+        detection_results = np.array(detection_results)
+
+        ap_dic = {}
+        for class_ind, num_gts in enumerate(detection_labels):
+            class_detections = detection_results[detection_results[:,0]==class_ind]            
+
+            ap = self.compute_ap(class_detections, num_gts)
+
+            ap_dic[self.class_labels[class_ind]] = ap
+
+
+        return ap_dic
+
+
+
+
diff --git a/net/netloss.py b/net/netloss.py
new file mode 100644
index 0000000..5f33079
--- /dev/null
+++ b/net/netloss.py
@@ -0,0 +1,185 @@
+
+import tensorflow as tf
+import numpy as np
+
+from net.netparams import YoloParams
+
+EPSILON = 1e-6
+
+
+def calculate_ious(A1, A2, use_iou=True):
+
+    if not use_iou: 
+        return 1.
+
+    A1_xy = A1[..., 0:2]
+    A1_wh = A1[..., 2:4]
+
+    A2_xy = A2[..., 0:2]
+    A2_wh = A2[..., 2:4]
+    
+    A1_wh_half = A1_wh / 2.
+    A1_mins    = A1_xy - A1_wh_half
+    A1_maxes   = A1_xy + A1_wh_half
+    
+    A2_wh_half = A2_wh / 2.
+    A2_mins = A2_xy - A2_wh_half
+    A2_maxes   = A2_xy + A2_wh_half
+
+    intersect_mins  = tf.maximum(A2_mins,  A1_mins)
+    intersect_maxes = tf.minimum(A2_maxes, A1_maxes)
+    intersect_wh    = tf.maximum(intersect_maxes - intersect_mins, 0.)
+    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
+    
+    true_areas = A1_wh[..., 0] * A1_wh[..., 1]
+    pred_areas = A2_wh[..., 0] * A2_wh[..., 1]
+
+    union_areas = pred_areas + true_areas - intersect_areas
+    iou_scores  = tf.truediv(intersect_areas, union_areas)
+
+    return iou_scores
+
+
+class YoloLoss(object):
+    # ADD WARM UP CONDITIONS
+
+    def __init__(self):
+
+        self.__name__ = 'yolo_loss'
+        self.iou_threshold = YoloParams.IOU_THRESHOLD
+        self.readjust_obj_score = True
+
+        self.lambda_coord = YoloParams.COORD_SCALE
+        self.lambda_noobj = YoloParams.NO_OBJECT_SCALE
+        self.lambda_obj = YoloParams.OBJECT_SCALE
+        self.lambda_class = YoloParams.CLASS_SCALE
+
+        self.norm = False
+
+    def coord_loss(self, y_true, y_pred):
+        
+        b_xy_pred = y_pred[..., :2]
+        b_wh_pred = y_pred[..., 2:4]
+        
+        b_xy = y_true[..., 0:2]
+        b_wh = y_true[..., 2:4]
+
+        indicator_coord = tf.expand_dims(y_true[..., 4], axis=-1) * self.lambda_coord
+
+        norm_coord = 1
+        if self.norm:
+            norm_coord = tf.reduce_sum(tf.to_float(indicator_coord > 0.0))
+
+
+        loss_xy = tf.reduce_sum(tf.square(b_xy - b_xy_pred) * indicator_coord, axis=[1,2,3,4])
+        #loss_wh = tf.reduce_sum(tf.square(b_wh - b_wh_pred) * indicator_coord, axis=[1,2,3,4])
+        loss_wh = tf.reduce_sum(tf.square(tf.sqrt(b_wh) - tf.sqrt(b_wh_pred)) * indicator_coord, axis=[1,2,3,4])
+
+        return (loss_wh + loss_xy) / (norm_coord + EPSILON) / 2
+
+
+    def obj_loss(self, y_true, y_pred):
+
+        b_o = calculate_ious(y_true, y_pred, use_iou=self.readjust_obj_score) * y_true[..., 4]
+        b_o_pred = y_pred[..., 4]
+
+        num_true_labels = YoloParams.GRID_SIZE*YoloParams.GRID_SIZE*YoloParams.NUM_BOUNDING_BOXES
+        y_true_p = tf.reshape(y_true[..., :4], shape=(YoloParams.BATCH_SIZE, 1, 1, 1, num_true_labels, 4))
+        iou_scores_buff = calculate_ious(y_true_p, tf.expand_dims(y_pred, axis=4))
+        best_ious = tf.reduce_max(iou_scores_buff, axis=4)
+
+        indicator_noobj = tf.to_float(best_ious < self.iou_threshold) * (1 - y_true[..., 4]) * self.lambda_noobj
+        indicator_obj = y_true[..., 4] * self.lambda_obj
+
+
+        norm_conf = 1
+        if self.norm:
+            norm_conf = tf.reduce_sum(tf.to_float((indicator_obj + indicator_noobj)  > 0.0))
+
+        loss_obj = tf.reduce_sum(tf.square(b_o-b_o_pred) * (indicator_obj + indicator_noobj), axis=[1,2,3])
+
+        return loss_obj / (norm_conf + EPSILON) / 2
+
+
+    def class_loss(self, y_true, y_pred):
+
+        b_class = tf.argmax(y_true[..., 5:], axis=-1)
+        b_class_pred = y_pred[..., 5:]
+
+        indicator_class = y_true[..., 4] * tf.gather(
+            YoloParams.CLASS_WEIGHTS, b_class) * self.lambda_class
+
+        norm_class = 1
+        if self.norm:
+            norm_class = tf.reduce_sum(tf.to_float(indicator_class > 0.0))
+
+        loss_class_arg = tf.nn.sparse_softmax_cross_entropy_with_logits(
+            labels=b_class, logits=b_class_pred)
+
+        loss_class = tf.reduce_sum(loss_class_arg * indicator_class, axis=[1,2,3])
+
+        return loss_class / (norm_class + EPSILON)
+
+
+    def _transform_netout(self, y_pred_raw):
+        y_pred_xy = tf.sigmoid(y_pred_raw[..., :2]) + YoloParams.c_grid
+        y_pred_wh = tf.exp(y_pred_raw[..., 2:4]) * YoloParams.anchors
+        y_pred_conf = tf.sigmoid(y_pred_raw[..., 4:5])
+        y_pred_class = y_pred_raw[...,5:]
+
+        return tf.concat([y_pred_xy, y_pred_wh, y_pred_conf, y_pred_class], axis=-1)
+
+
+
+    def __call__(self, y_true, y_pred_raw):
+
+        y_pred = self._transform_netout(y_pred_raw)
+        
+        total_coord_loss = self.coord_loss(y_true, y_pred)
+        total_obj_loss = self.obj_loss(y_true, y_pred)
+        total_class_loss = self.class_loss(y_true, y_pred)
+
+        loss = total_coord_loss + total_obj_loss + total_class_loss
+
+        #loss = tf.Print(loss, [total_coord_loss], message='\nCoord Loss \t', summarize=1000)
+        #loss = tf.Print(loss, [total_obj_loss], message='Conf Loss \t', summarize=1000)
+        #oss = tf.Print(loss, [total_class_loss], message='Class Loss \t', summarize=1000)
+        #oss = loss = tf.Print(loss, [loss], message='Total Loss \t', summarize=1000)
+
+        return  loss
+
+
+
+
+if __name__ == '__main__':
+    
+    sess = tf.InteractiveSession()
+
+    y_pred = tf.convert_to_tensor(np.random.rand(16,13,13,5,85), np.float32)
+    y_true = tf.convert_to_tensor(np.random.rand(16,13,13,5,85), np.float32)
+
+    var = YoloLoss()
+
+    print( var(y_true, y_pred).eval() )
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/net/netparams.py b/net/netparams.py
new file mode 100644
index 0000000..c64efe7
--- /dev/null
+++ b/net/netparams.py
@@ -0,0 +1,144 @@
+
+import pickle, argparse, json, os, sys
+import tensorflow as tf
+import numpy as np
+
+
+argparser = argparse.ArgumentParser(
+    description='dourflow: a keras YOLO V2 implementation.')
+
+
+argparser.add_argument(
+    'action',
+    help='what to do: \'train\', \'validate\' or pass an image file/dir.')
+
+
+argparser.add_argument(
+    '-m',
+    '--model',
+    help='path to input yolo v2 keras model',
+    default='yolo_model.h5')
+
+
+argparser.add_argument(
+    '-c',
+    '--conf',
+    help='path to configuration file',
+    default='config.json')
+
+
+argparser.add_argument(
+    '-t',
+    '--threshold',
+    type=float,
+    help='detection threshold',
+    default=0.3)
+
+
+argparser.add_argument(
+        '-w',
+        '--weight_file',
+        help='path to weight file',
+        default='weights.h5')
+
+
+args = argparser.parse_args()
+
+
+action = args.action
+config_path = args.conf
+
+
+with open(config_path) as config_buffer:    
+        config = json.loads(config_buffer.read())
+
+
+
+def generate_yolo_grid(batch, g, num_bb):
+    c_x = tf.to_float(tf.reshape(tf.tile(tf.range(g), [g]), (1, g, g, 1, 1)))
+    c_y = tf.transpose(c_x, (0,2,1,3,4))
+    return tf.tile(tf.concat([c_x, c_y], -1), [batch, 1, 1, num_bb, 1])
+
+
+
+
+def get_threshold(value):
+    if value > 1. or value < 0:
+        raise ValueError('Please enter a valid threshold (between 0. and 1.).')
+    return value
+
+
+
+class YoloParams(object):
+    
+    # Mode
+    PREDICT_IMAGE = ''
+    WEIGHT_FILE = ''
+    if action != 'gen':
+        if action == 'validate' or action == 'train':
+            YOLO_MODE = action
+        else:
+            if os.path.isdir(action):
+                YOLO_MODE = 'inference'
+            elif os.path.isfile(action):
+                if action.split('.')[1] in ['mp4','avi','wmv','mpg','mpeg']:
+                    YOLO_MODE = 'video'
+                else:
+                    YOLO_MODE = 'inference'
+            else:
+                raise ValueError('First argument for dourflow must be: \'training\','
+                    ' \'validation\' or an image file/dir.')    
+
+            PREDICT_IMAGE = action
+    else:
+        assert args.weight_file, "Need to pass weight file if generating model."
+        # Paths
+        WEIGHT_FILE = args.weight_file
+
+    TRAIN_IMG_PATH = config['train']['image_folder'] 
+    TRAIN_ANN_PATH = config['train']['annot_folder']
+
+    VALIDATION_IMG_PATH = config['valid']['image_folder']
+    VALIDATION_ANN_PATH = config['valid']['annot_folder']
+    VALIDATION_OUT_PATH = config['valid']['pred_folder']
+
+    # Model    
+    #IN_MODEL = config['config_path']['in_model']
+    IN_MODEL = args.model
+    OUT_MODEL_NAME = config['train']['out_model_name']
+    
+    ARCH_FNAME = config['config_path']['arch_plotname']
+
+    # Classes
+    CLASS_LABELS = [x.rstrip() for x in open(config['config_path']['labels'])]
+    NUM_CLASSES = len(CLASS_LABELS)
+    CLASS_TO_INDEX = dict(zip(CLASS_LABELS, np.arange(NUM_CLASSES)))
+    CLASS_WEIGHTS = np.ones(NUM_CLASSES, dtype='float32')
+
+    # Infrastructure params
+    INPUT_SIZE = config['model']['input_size']
+    GRID_SIZE = config['model']['grid_size']
+    TRUE_BOX_BUFFER = config['model']['true_box_buffer']
+    ANCHORS = [float(a) for a in open(config['config_path']['anchors']).read().split(', ')]
+
+    NUM_BOUNDING_BOXES = len(ANCHORS) // 2
+    OBJECT_SCALE = 5.0
+    NO_OBJECT_SCALE  = 1.0
+    CLASS_SCALE = 1.0
+    COORD_SCALE = 1.0
+
+    # Train params
+    BATCH_SIZE = config['train']['batch_size']
+    L_RATE = config['train']['learning_rate']
+    NUM_EPOCHS = config['train']['num_epochs']
+    TRAIN_VERBOSE = config['train']['verbose']
+
+    # Thresholding
+    IOU_THRESHOLD = get_threshold(config['model']['iou_threshold'])
+    NMS_THRESHOLD = get_threshold(config['model']['nms_threshold'])
+    DETECTION_THRESHOLD = get_threshold(args.threshold)
+
+    # Additional / Precomputing  
+    c_grid = generate_yolo_grid(BATCH_SIZE, GRID_SIZE, NUM_BOUNDING_BOXES)
+    anchors = np.reshape(ANCHORS, [1,1,1,NUM_BOUNDING_BOXES,2])
+
diff --git a/net/utils.py b/net/utils.py
new file mode 100755
index 0000000..9f39e6d
--- /dev/null
+++ b/net/utils.py
@@ -0,0 +1,291 @@
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import os, errno
+import xml.etree.ElementTree as ET
+
+import tensorflow as tf
+import copy
+import cv2
+from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
+
+
+
+
+def mkdir_p(path):
+    try:
+        os.makedirs(path)
+    except OSError as exc:  # Python >2.5
+        if exc.errno == errno.EEXIST and os.path.isdir(path):
+            pass
+        else:
+            raise
+
+
+def compute_iou(bb_1, bb_2):
+
+    xa0, ya0, xa1, ya1 = bb_1
+    xb0, yb0, xb1, yb1 = bb_2
+
+    intersec = (min([xa1, xb1]) - max([xa0, xb0]))*(min([ya1, yb1]) - max([ya0, yb0]))
+
+    union = (xa1 - xa0)*(ya1 - ya0) + (xb1 - xb0)*(yb1 - yb0) - intersec
+
+    return intersec / union
+
+
+def benchmark_timings(data, path=''):
+
+    fig = plt.figure(figsize=(10,15))
+    ax = plt.gca()
+    df = pd.DataFrame(data)
+    df.plot(ax=ax, kind='area', subplots=True)
+    plt.savefig(path + 'timings.png', format='png')
+    plt.close()
+
+    df2 = df.apply(lambda x: x/df['total'], axis=0)[['decode', 'prediction', 'prepro']]
+
+    fig = plt.figure(figsize=(20,13))
+    ax = fig.add_subplot(111)
+    df2.plot(ax=ax)
+    vals = ax.get_yticks()
+    ax.set_yticklabels(['{:,.1%}'.format(x) for x in vals])
+    plt.savefig(path + 'timings_combined.png', format='png')
+    plt.close()
+
+
+
+
+def space_to_depth_x2(x):
+    """Thin wrapper for Tensorflow space_to_depth with block_size=2."""
+    # Import currently required to make Lambda work.
+    # See: https://github.com/fchollet/keras/issues/5088#issuecomment-273851273
+    
+    # the function to implement the orgnization layer (thanks to github.com/allanzelener/YAD2K)
+
+
+    # tf.space_to_depth:
+    # Input: [batch, height, width, depth]
+    # Output: [batch, height/block_size, width/block_size, depth*block_size*block_size]
+    # Example: [1,4,4,1] -> [1,2,2,4] or in this case [?,38,38,64] -> [?,19,19,256]
+    # This operation is useful for resizing the activations between convolutions (but keeping all data),
+    # e.g. instead of pooling. It is also useful for training purely convolutional models.
+
+    # space_to_depth_x2 is just tf.space_to_depth wrapped with block_size=2
+
+
+    # Example
+    """
+    input shape = (4,4,1)
+    
+    [
+        [[1], [2], [3], [4]],
+        [[5], [6], [7], [8]],
+        [[9], [10], [11], [12]],
+        [[13], [14], [15], [16]]
+    ]
+    
+    is divided into the following chunks (block_size, block_size, channels):
+    
+    [[[1], [2]],       [[[3], [4]],
+     [[5], [6]]]        [[7], [8]]]
+    
+    [[[9], [10],]      [[[11], [12]],
+     [[13], [14]]]      [[15], [16]]]
+     
+     flatten each chunk to a single array:
+
+    [[1, 2, 5, 6]],      [[3, 4, 7, 8]]
+    [[9, 10, 13, 14]],    [[11, 12, 15, 16]]
+
+
+    spatially rearrange chunks according to their initial position:
+    
+    [
+        [[1, 2, 5, 6]], [[3, 4, 7, 8]],
+        [[9 10, 13, 14]], [[11, 12, 15, 16]]
+    ]
+    
+    output shape = (2,2,4)             
+    """
+    import tensorflow as tf
+    return tf.space_to_depth(x, block_size=2)
+
+
+def draw_boxes(image, info):
+    image_h, image_w, _ = image.shape
+
+    boxes, scores, labels = info
+    color_mod = 255
+
+    for i in range(len(boxes)):
+        xmin = int(boxes[i][0]*image_w)
+        ymin = int(boxes[i][1]*image_h)
+        xmax = int(boxes[i][2]*image_w)
+        ymax = int(boxes[i][3]*image_h)  
+
+        if scores is None:
+            #text = "%s"%(labels[i])
+            text = ''
+            color_mod = 0
+        else:
+            text = "%s (%.1f%%)"%(labels[i], 100*scores[i])
+
+        cv2.rectangle(image, (xmin,ymin), (xmax,ymax), (color_mod,255,0), 2)
+
+        cv2.putText(image, 
+                    text, 
+                    (xmin, ymin - 15), 
+                    cv2.FONT_HERSHEY_COMPLEX, 
+                    1e-3 * image_h, 
+                    (color_mod,255,0), 1)
+    return image          
+        
+
+def parse_annotation(ann_dir, img_dir, labels=[]):
+    # from https://github.com/experiencor/keras-yolo2/blob/master/preprocessing.py
+    all_imgs = []
+    seen_labels = {}
+    # go through annotations by sorted filename
+    for ann in sorted(os.listdir(ann_dir)):
+        img = {'object':[]}
+        tree = ET.parse(os.path.join(ann_dir, ann))
+        
+        for elem in tree.iter():
+            if 'filename' in elem.tag:
+                img['filename'] = os.path.join(img_dir, elem.text)
+            if 'width' in elem.tag:
+                img['width'] = int(elem.text)
+            if 'height' in elem.tag:
+                img['height'] = int(elem.text)
+            if 'object' in elem.tag or 'part' in elem.tag:
+                obj = {}
+                
+                for attr in list(elem):
+                    if 'name' in attr.tag:
+                        obj['name'] = attr.text
+
+                        if obj['name'] in seen_labels:
+                            seen_labels[obj['name']] += 1
+                        else:
+                            seen_labels[obj['name']] = 1
+                        
+                        if len(labels) > 0 and obj['name'] not in labels:
+                            break
+                        else:
+                            img['object'] += [obj]
+                            
+                    if 'bndbox' in attr.tag:
+                        for dim in list(attr):
+                            if 'xmin' in dim.tag:
+                                obj['xmin'] = int(round(float(dim.text)))
+                            if 'ymin' in dim.tag:
+                                obj['ymin'] = int(round(float(dim.text)))
+                            if 'xmax' in dim.tag:
+                                obj['xmax'] = int(round(float(dim.text)))
+                            if 'ymax' in dim.tag:
+                                obj['ymax'] = int(round(float(dim.text)))
+
+        if len(img['object']) > 0:
+            all_imgs += [img]
+    
+    # all_imgs: [img1, img2, img3, ..]
+    # 
+    """
+    img: 
+        {'object' : [{'name': 'class1', 'xmin': , 'ymin': , 'xmax': , 'ymax': }, # object 1
+                    {'name': 'class1', 'xmin': , 'ymin': , 'xmax': , 'ymax': },  # object 2
+                    {'name': 'class2', 'xmin': , 'ymin': , 'xmax': , 'ymax': }]  # object 3
+         'filename' : <where the image file is stored>,
+         'width':, 
+         'height': 
+            }
+    """
+    # seen_labels: {'classname': count}
+    return all_imgs, seen_labels
+
+
+
+def yolo_normalize(image):
+    return image / 255.
+
+
+
+# https://stackoverflow.com/questions/47877475/keras-tensorboard-plot-train-and-validation-scalars-in-a-same-figure?rq=1
+
+class TrainValTensorBoard(TensorBoard):
+    def __init__(self, log_dir='./logs', **kwargs):
+        # Make the original `TensorBoard` log to a subdirectory 'training'
+        training_log_dir = os.path.join(log_dir, 'training')
+        super(TrainValTensorBoard, self).__init__(training_log_dir, **kwargs)
+
+        # Log the validation metrics to a separate subdirectory
+        self.val_log_dir = os.path.join(log_dir, 'validation')
+
+    def set_model(self, model):
+        # Setup writer for validation metrics
+        self.val_writer = tf.summary.FileWriter(self.val_log_dir)
+        super(TrainValTensorBoard, self).set_model(model)
+
+    def on_epoch_end(self, epoch, logs=None):
+        logs = logs or {}
+        val_logs = {k.replace('val_', ''): v for k, v in logs.items() if k.startswith('val_')}
+        for name, value in val_logs.items():
+            summary = tf.Summary()
+            summary_value = summary.value.add()
+            summary_value.simple_value = value.item()
+            summary_value.tag = name
+            self.val_writer.add_summary(summary, epoch)
+        self.val_writer.flush()
+
+        logs = {k: v for k, v in logs.items() if not k.startswith('val_')}
+        super(TrainValTensorBoard, self).on_epoch_end(epoch, logs)
+
+    def on_train_end(self, logs=None):
+        super(TrainValTensorBoard, self).on_train_end(logs)
+        self.val_writer.close()
+
+
+
+def setup_logging(logging_path='logs'):
+
+    log_path = os.path.join(os.getcwd(),logging_path)
+    mkdir_p(log_path)
+
+    get_ind = lambda x: int(x.split('_')[1])
+    #run_counter = max([get_ind(r) for r in os.listdir(log_path)]) + 1
+    run_counter = max(map(get_ind, os.listdir(log_path)), default=-1) + 1
+
+    run_path = os.path.join(log_path, 'run_%s'%run_counter)
+    mkdir_p(run_path)
+
+    print('Logging set up, to monitor training run:\n'
+        '\t\'tensorboard --logdir=%s\'\n'%run_path)
+
+    return run_path
+
+
+def handle_empty_indexing(arr, idx):
+    if idx.size > 0:
+        return arr[idx]
+    return []
+
+
+
+if __name__ == '__main__':
+
+    imgs, cnts = parse_annotation('/home/kiran/Downloads/VOCdevkit/VOC2012/Annotations/','/home/kiran/Downloads/VOCdevkit/VOC2012/JPEGImages/')
+    imgs, cnts = parse_annotation('/home/kiran/Downloads/VOCdevkit2007/VOC2007/Annotations/','/home/kiran/Downloads/VOCdevkit2007/VOC2007/JPEGImages/')
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/split_dataset.py b/split_dataset.py
new file mode 100644
index 0000000..400d67d
--- /dev/null
+++ b/split_dataset.py
@@ -0,0 +1,99 @@
+
+import os
+from shutil import copy2
+from random import shuffle
+
+import argparse
+
+from tqdm import tqdm
+from utils import mkdir_p
+
+
+argparser = argparse.ArgumentParser(
+    description='Process training / validation data.')
+
+argparser.add_argument(
+    '-p',
+    help='fraction of dataset for training split.',
+    default=0.75)
+
+argparser.add_argument(
+    '--in_ann',
+    help='path to input annotations file.')
+
+
+argparser.add_argument(
+    '--in_img',
+    help='path to input images file.')
+
+
+argparser.add_argument(
+    '--output',
+    help='path to output dir.')
+
+
+#path = '/home/kiran/Downloads/VOCdevkit/VOC2012/JPEGImages/'
+
+
+def sample_from_dir(paths, train_p):
+    
+    img_path, ann_path, out_path = paths
+
+    imgs = os.listdir(img_path)
+
+    total_num = len(imgs)
+    train_num = int(len(imgs)*train_p)
+
+    img_fmt = '.' + imgs[0].split('.')[1]
+    fns = [im.split('.')[0] for im in imgs] 
+
+    shuffle(fns)
+
+    fn_train = fns[:train_num]
+    fn_val = fns[train_num:]
+
+    out_paths = []
+
+    for subf, file_names in tqdm([('train', fn_train), ('valid', fn_val)], desc='Train/Val', leave=False):
+
+        out_img_path = os.path.join(out_path, subf, 'imgs')
+        out_ann_path = os.path.join(out_path, subf, 'anns')
+
+        out_paths.extend([out_img_path, out_ann_path])        
+
+        mkdir_p(out_img_path)
+        mkdir_p(out_ann_path)
+
+        for f in tqdm(file_names, desc='Files', leave=False):
+            
+            img_fnm = os.path.join(img_path, f + img_fmt)#'/home/kiran/Downloads/VOCdevkit/VOC2012/JPEGImages/' + f + '.jpg'
+            ann_fnm = os.path.join(ann_path, f + '.xml')#'/home/kiran/Downloads/VOCdevkit/VOC2012/Annotations/' + f + '.xml' 
+
+            if os.path.isfile(img_fnm) and os.path.isfile(ann_fnm):
+                copy2(ann_fnm, out_ann_path)
+                copy2(img_fnm, out_img_path)
+            else:
+                print(img_fnm)
+                pritn(ann_fnm)
+                raise ValueError('\nAL TANTO\n')
+
+    print('------------------------------------')
+    print('Train / Validation Splits stored.\nPaths:')
+    print('\n\tTrain:')
+    print('\n\t\tImages:      %s'%out_paths[0])
+    print('\n\t\tAnnotations: %s'%out_paths[1])
+    
+    print('\n\t Validate:')
+    print('\n\t\tImages:      %s'%out_paths[2])
+    print('\n\t\tAnnotations: %s'%out_paths[3])
+
+    print('------------------------------------')
+
+
+if __name__ == '__main__':
+    # python3 split_dataset.py -p 0.75 --in_ann ~/Downloads/VOCdevkit/VOC2012/Annotations/ --in_img ~/Downloads/VOCdevkit/VOC2012/JPEGImages/ --output ~/Documents/DATA/VOC
+
+    args = argparser.parse_args()
+
+    paths = (args.in_img, args.in_ann, args.output)
+    sample_from_dir(paths, args.p)
\ No newline at end of file
diff --git a/yolov2.py b/yolov2.py
new file mode 100644
index 0000000..1fba7cd
--- /dev/null
+++ b/yolov2.py
@@ -0,0 +1,299 @@
+
+import pickle, argparse, json, cv2, os
+import tensorflow as tf
+import numpy as np
+from tqdm import tqdm
+import matplotlib.pyplot as plt
+
+from keras import backend as K
+from keras.layers import Lambda
+from keras.models import Model
+from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
+from keras.optimizers import SGD, Adam, RMSprop
+
+from net.utils import parse_annotation, yolo_normalize, mkdir_p, \
+setup_logging, TrainValTensorBoard, draw_boxes
+
+from net.netparams import YoloParams
+from net.netloss import YoloLoss
+from net.neteval import YoloDataGenerator, YoloEvaluate
+from net.netdecode import YoloOutProcess
+from net.netarch import YoloArchitecture
+
+
+class YoloInferenceModel(object):
+
+    def __init__(self, model):
+        self._yolo_out = YoloOutProcess()
+        self._inf_model = self._extend_processing(model)
+
+    def _extend_processing(self, model):
+        output = Lambda(self._yolo_out, name='lambda_2')(model.output)
+        return Model(model.input, output)
+
+
+    def _prepro_single_image(self, image):
+        image = cv2.resize(image, 
+            (YoloParams.INPUT_SIZE, YoloParams.INPUT_SIZE))
+        # yolo normalize
+        image = yolo_normalize(image)
+        image = image[:,:,::-1]
+        # cv2 has the channel as bgr, revert to to rgb for Yolo Pass
+        image = np.expand_dims(image, 0)
+
+        return image
+
+    def predict(self, image):
+
+        image = self._prepro_single_image(image)
+        output = self._inf_model.predict(image)[0]
+
+        if output.size == 0:
+            return [np.array([])]*4
+
+        boxes = output[:,:4]
+        scores = output[:,4]
+        label_idxs = output[:,5].astype(int)
+
+        labels = [YoloParams.CLASS_LABELS[l] for l in label_idxs]
+
+        return boxes, scores, label_idxs, labels 
+
+
+
+class YoloV2(object):
+
+
+    def __init__(self):
+
+        self.yolo_arch = YoloArchitecture()
+        self.yolo_loss = YoloLoss()
+
+        self.trained_model_name = YoloParams.OUT_MODEL_NAME
+        self.debug_timings = True
+
+
+    def run(self, **kwargs):
+
+        self.model = self.yolo_arch.get_model(self.yolo_loss)
+        
+        if YoloParams.YOLO_MODE == 'train':
+            self.training()
+
+        else: 
+            self.inf_model = YoloInferenceModel(self.model)
+
+            if YoloParams.YOLO_MODE == 'inference':
+                self.inference(YoloParams.PREDICT_IMAGE)
+
+            elif YoloParams.YOLO_MODE == 'validate':
+                self.validation()
+
+            elif YoloParams.YOLO_MODE == 'video':
+                self.video_inference(YoloParams.PREDICT_IMAGE)
+
+        # Sometimes bug: https://github.com/tensorflow/tensorflow/issues/3388
+        K.clear_session()
+
+
+    def inference(self, path):
+
+        flag = self.debug_timings
+
+        if os.path.isdir(path):
+            fnames = [os.path.join(path, f) for f in os.listdir(path)
+            if os.path.isfile(os.path.join(path, f))]
+
+            out_fname_mod = '.png'
+            out_path = os.path.join(path, 'out') 
+            mkdir_p(out_path)
+
+        else:   
+            fnames = [path]
+            out_fname_mod = '_pred.png'
+            out_path = os.path.dirname(path)
+            flag = False                  
+
+        for f in tqdm(fnames, desc='Processing Batch'):
+
+            image = cv2.imread(f)
+            plt.figure(figsize=(10,10))
+
+            boxes, scores, _, labels = self.inf_model.predict(image.copy())
+
+            image = draw_boxes(image, (boxes, scores, labels))
+            out_name =  os.path.join(out_path, os.path.basename(f).split('.')[0] + out_fname_mod)           
+            cv2.imwrite(out_name, image)
+
+
+    def video_inference(self, filename):
+
+        cap = cv2.VideoCapture(filename)    
+        video_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        video_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        video_len = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        size = (video_width, video_height)
+        fps = round(cap.get(cv2.CAP_PROP_FPS))
+
+        #fourcc = cv2.VideoWriter_fourcc(*"MJPG")
+        fourcc = cv2.VideoWriter_fourcc(*"XVID")
+
+        writer = cv2.VideoWriter(filename.split('.')[0]+"_pred.mp4", fourcc, fps, size)
+        
+        for i in tqdm(range(video_len)):
+        #while(cap.isOpened()):
+
+            ret, frame = cap.read()
+            #if ret==True:
+
+            boxes, scores, _, labels = self.inf_model.predict(frame)
+
+            frame_pred = draw_boxes(frame, (boxes, scores, labels))
+
+            writer.write(frame_pred)
+
+            #cv2.imshow('Output',frame_pred)
+            if cv2.waitKey(1) & 0xFF == ord('q'):
+                break
+            #else:
+            #    break
+
+        cap.release()
+        writer.release()
+        cv2.destroyAllWindows()
+
+
+    def video_inference2(self):
+        # MAKEONE FOR WEBCAM, USE COMMENTED CODE FROM ABOVE
+        video_reader = cv2.VideoCapture(video_inp)
+
+        nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
+        frame_h = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
+
+        video_writer = cv2.VideoWriter(video_out,
+                                       cv2.VideoWriter_fourcc(*'XVID'), 
+                                       50.0, 
+                                       (frame_w, frame_h))
+
+
+        for i in tqdm(range(nb_frames)):
+            ret, image = video_reader.read()
+
+            boxes, scores, _, labels = self.inf_model.predict(image)
+
+            image = draw_boxes(image, (boxes, scores, labels))
+            
+            video_writer.write(np.uint8(image))
+            
+        video_reader.release()
+        video_writer.release()  
+
+
+
+    def validation(self):
+
+        valid_data, _ = parse_annotation(
+            YoloParams.VALIDATION_ANN_PATH, YoloParams.VALIDATION_IMG_PATH)
+
+        generator = YoloDataGenerator(valid_data, shuffle=True)
+
+        yolo_eval = YoloEvaluate(generator=generator, model=self.inf_model)
+        AP = yolo_eval()
+
+        mAP_values = []
+        for class_label, ap in AP.items():
+            print("AP( %s ): %.3f"%(class_label, ap))
+            mAP_values.append( ap )
+
+        print('-------------------------------')
+        print("mAP: %.3f"%(np.mean(mAP_values)))
+
+        return AP
+        
+
+
+    def training(self):
+
+        train_data, _ = parse_annotation(
+            YoloParams.TRAIN_ANN_PATH, YoloParams.TRAIN_IMG_PATH)
+        valid_data, _ = parse_annotation(
+            YoloParams.VALIDATION_ANN_PATH, YoloParams.VALIDATION_IMG_PATH)
+
+        train_gen = YoloDataGenerator(train_data, shuffle=True)
+        valid_gen = YoloDataGenerator(valid_data, shuffle=True)
+
+
+        early_stop = EarlyStopping(monitor='val_loss', 
+                               min_delta=0.001, 
+                               patience=3, 
+                               mode='min', 
+                               verbose=1)
+
+
+        log_path = setup_logging()
+
+        checkpoint_path = os.path.join(log_path, self.trained_model_name)
+        checkpoint = ModelCheckpoint(
+                                checkpoint_path, 
+                                monitor='val_loss', 
+                                verbose=1, 
+                                save_best_only=True, 
+                                mode='min', 
+                                period=1)
+
+        #tb_path = os.path.join(log_path, )
+        tensorboard = TrainValTensorBoard(
+                            log_dir=log_path,
+                            histogram_freq=0,
+                            write_graph=True,
+                            write_images=False)
+
+        optimizer = Adam(
+                        lr=YoloParams.L_RATE, 
+                        beta_1=0.9, 
+                        beta_2=0.999, 
+                        epsilon=1e-08, 
+                        decay=0.0)
+
+        self.model.compile(loss=self.yolo_loss, optimizer=optimizer) #, metrics=['accuracy'])
+
+        self.model.fit_generator(
+                        generator=train_gen,
+                        steps_per_epoch=len(train_gen),
+                        verbose=YoloParams.TRAIN_VERBOSE, 
+                        validation_data=valid_gen,
+                        validation_steps=len(valid_gen),
+                        callbacks=[early_stop, checkpoint, tensorboard], 
+                        epochs=YoloParams.NUM_EPOCHS,
+                        max_queue_size=20)
+
+
+
+if __name__ == '__main__':
+    # Example: python3 yolov2.py data/birds.png -m models/coco/yolo_model_coco.h5 -c confs/config_coco.json -t 0.35
+
+    var = YoloV2()
+    var.run()
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+