non-layer related commits

brodyh · Dec 1, 2014 · 97687c1 · 97687c1
1 parent d771232
commit 97687c1
Show file tree

Hide file tree

Showing 8 changed files with 950 additions and 12 deletions.
diff --git a/examples/filter_visualization_driving.ipynb b/examples/filter_visualization_driving.ipynb
diff --git a/models/brody/solver.prototxt b/models/brody/solver.prototxt
@@ -1,6 +1,6 @@
 net: "models/brody/train_val_brody.prototxt"
-test_iter: 1000
-test_interval: 1000
+test_iter: 20
+test_interval: 5000
 base_lr: 0.0000001
 lr_policy: "step"
 gamma: 0.1

diff --git a/models/brody/train_val_brody.prototxt b/models/brody/train_val_brody.prototxt
@@ -58,7 +58,7 @@ layers {
   include: { phase: TEST }
 }
 
-# Splite label layer into pixel and bounding box label.
+# Split label layer into pixel and bounding box label.
 layers {
   name: "slice-label"
   type: SLICE
@@ -510,7 +510,7 @@ layers {
     operation: PROD
   }
   top: "bb-loss"
-  loss_weight: 0.001
+  loss_weight: 0.1
 }
 
 #layers {

diff --git a/python/convert_mean.py b/python/convert_mean.py
@@ -0,0 +1,37 @@
+"""
+Conver mean binary proto to npy so that it can be used for visualizing
+network activity.
+"""
+import os
+import numpy as np
+from google.protobuf import text_format
+
+import caffe
+from caffe.proto import caffe_pb2
+from PIL import Image
+
+def main(argv):
+  if len(argv) != 2:
+    print 'Usage: %s mean_binary' % os.path.basename(sys.argv[0])
+    return
+
+  mean_data = caffe_pb2.BlobProto()
+  mean_data.ParseFromString(open(sys.argv[1]).read())
+
+  mean_data = np.array(mean_data.data)
+  mean_img = mean_data.reshape([3, 480, 640])
+  mean_img = mean_img[(2, 1, 0), :, :]
+  np.save(open('mean_r.npy', 'wb'), mean_img)
+
+  mean_img = np.transpose(mean_img, (1, 2, 0))
+  Image.fromarray(mean_img.astype('uint8')).save('mean.png')
+
+  real_img = caffe.io.load_image( \
+      '/deep/group/driving_data/andriluka/IMAGES/driving_data_q50_data/all_extracted/4-2-14-monterey-split_0_280S_a2/4-2-14-monterey-split_0_280S_a2_000341.jpeg')
+  real_img = caffe.io.resize_image(real_img * 255, (480, 640, 3))
+  Image.fromarray(real_img.astype('uint8')).save('original.png')
+  Image.fromarray(np.clip(real_img - mean_img, 0, 255).astype('uint8')).save('sub.png')
+
+if __name__ == '__main__':
+  import sys
+  main(sys.argv)
diff --git a/python/driving_mean.npy b/python/driving_mean.npy
diff --git a/python/driving_utils.py b/python/driving_utils.py
@@ -0,0 +1,113 @@
+import numpy as np
+import scipy
+import cv2
+
+class Rect():
+  def __init__(self, xmin, ymin, xmax, ymax):
+    if xmax < xmin:
+      xmax = xmin
+    if ymax < ymin:
+      ymax = ymin
+    self.xmin = int(xmin)
+    self.ymin = int(ymin)
+    self.xmax = int(xmax)
+    self.ymax = int(ymax)
+    self.w = self.xmax - self.xmin
+    self.h = self.ymax - self.ymin
+
+  def area(self):
+    return (self.xmax - self.xmin + 1) * (self.ymax - self.ymin + 1)
+
+  def jaccard(self, other):
+    xmin = max(self.xmin, other.xmin)
+    xmax = min(self.xmax, other.xmax)
+    ymin = max(self.ymin, other.ymin)
+    ymax = min(self.ymax, other.ymax)
+    if ymax >= ymin and xmax >= xmin:
+      intersect = (xmax - xmin + 1) * (ymax - ymin + 1)
+    else:
+      return 0
+    return float(intersect) / (self.area() + other.area() - intersect)
+
+  def __repr__(self):
+    return '(%d,%d,%d,%d)' % (self.xmin, self.ymin, self.xmax, self.ymax)
+
+  def __str__(self):
+    return '(%d,%d,%d,%d)' % (self.xmin, self.ymin, self.xmax, self.ymax)
+
+def get_gt_bbs(bbs):
+  assert len(bbs) % 4 == 0
+  rbbs = []
+  for i in range(0, len(bbs), 4):
+    rbbs.append(Rect(*bbs[i:i+4]))
+  return rbbs
+
+def draw_rects(image, rects):
+  for r in rects:
+    image[r.ymin:r.ymax+1, r.xmin:r.xmin+2, 1] = 1
+    image[r.ymin:r.ymax+1, r.xmax:r.xmax+2, 1] = 1
+    image[r.ymin:r.ymin+2, r.xmin:r.xmax+1, 1] = 1
+    image[r.ymax:r.ymax+2, r.xmin:r.xmax+1, 1] = 1
+  return image
+
+def get_mask(feat):
+  mask = np.empty((60, 80))
+  for y in range(15):
+    for x in range(20):
+      mask[y*4:(y+1)*4, x*4:(x+1)*4] = feat[:, y, x].reshape((4, 4))
+  return mask
+
+def dump_image(net, mask, rects, path):
+  image = net.deprocess('data', net.blobs['data'].data[4])
+  zoomed_mask = np.empty((480, 640))
+  zoomed_mask = scipy.ndimage.zoom(mask, 8, order=0)
+  masked_image = image.transpose((2, 0, 1))
+  masked_image[0, :, :] += zoomed_mask
+  masked_image = np.clip(masked_image, 0, 1)
+  masked_image = masked_image.transpose((1, 2, 0))
+  boxed_image = np.copy(masked_image)
+  if len(rects) > 0:
+    boxed_image = draw_rects(boxed_image, rects)
+  Image.fromarray(
+      (boxed_image * 255).astype('uint8')).save(path)
+
+
+def get_rects(feat, mask):
+  hard_mask = np.round(mask + 0.3)
+  bb = np.empty((4, 60, 80))
+  for y in range(15):
+    for x in range(20):
+      for c in range(4):
+        bb[c, y*4:(y+1)*4, x*4:(x+1)*4] = feat[c*16:(c+1)*16, y, x].reshape((4, 4))
+
+  for c in range(4):
+    bb[c, :, :] *= hard_mask
+
+  y_offset = np.array([np.arange(16, 480, 32)]).T
+  y_offset = np.tile(y_offset, (1, 20))
+  x_offset = np.arange(16, 640, 32)
+  x_offset = np.tile(x_offset, (15, 1))
+  y_offset = scipy.ndimage.zoom(y_offset, 4, order=0)
+  x_offset = scipy.ndimage.zoom(x_offset, 4, order=0)
+  bb[0, :, :] += x_offset
+  bb[2, :, :] += x_offset
+  bb[1, :, :] += y_offset
+  bb[3, :, :] += y_offset
+
+  selected_rects = hard_mask > 0
+  num_rects = np.sum(selected_rects)
+  rects = np.empty((num_rects, 4))
+  for i in range(4):
+    rects[:, i] = bb[i, selected_rects]
+  rects = rects[np.logical_and((rects[:, 2] - rects[:, 0]) > 0, (rects[:, 3] - rects[:, 1]) > 0), :]
+  rects[:, (2, 3)] -= rects[:, (0, 1)]
+  rects = np.clip(rects, 0, 640)
+  rects = [rects[i, :] for i in range(rects.shape[0])]
+  rects, scores = cv2.groupRectangles(rects, 4, 0.4)
+
+  rectangles = []
+  if len(rects) == 0:
+    return rectangles
+  for i in range(rects.shape[0]):
+    rectangles.append(Rect(rects[i, 0], rects[i, 1], rects[i, 0] + rects[i, 2], rects[i, 1] + rects[i, 3]))
+  return rectangles
diff --git a/python/evaluate_result.py b/python/evaluate_result.py
@@ -0,0 +1,82 @@
+import numpy as np
+import scipy
+import matplotlib.pyplot as plt
+import caffe
+import sys
+import Image
+import time
+import cv2
+import argparse
+
+from driving_utils import *
+
+def main():
+  parser = argparse.ArgumentParser()
+  parser.add_argument('--gt_label', required=True)
+  parser.add_argument('--dump_images', action='store_true')
+  parser.add_argument('--output_path')
+  args = parser.parse_args()
+
+  if args.dump_images:
+    assert args.output_path is not None
+
+  net = caffe.Classifier('/deep/u/willsong/caffe/models/brody/deploy.prototxt',
+                         '/deep/u/willsong/caffe/models/brody/caffe_brody_train_iter_200000.caffemodel')
+  net.set_phase_test()
+  net.set_mode_gpu()
+  net.set_mean('data', np.load('/deep/u/willsong/caffe/python/driving_mean.npy'))  # ImageNet mean
+  net.set_raw_scale('data', 255)  # the reference model operates on images in [0,255] range instead of [0,1]
+  net.set_channel_swap('data', (2, 1, 0))  # the reference model has channels in BGR order instead of RGB
+
+  tp = 0
+  fp = 0
+  fn = 0
+  for line in open(args.gt_label).readlines():
+    tokens = line.split()
+    fname = tokens[0]
+    bbs = tokens[2:]
+    gt_bbs = get_gt_bbs(bbs)
+
+    img_name = fname.split('/')[-1]
+    # print img_name, '...',
+    start = time.time()
+    scores = net.ff([caffe.io.load_image(fname)])
+    # print 'done ff, took %f seconds' % (time.time() - start)
+
+    mask = get_mask(net.blobs['pixel-prob'].data[4])
+    rects = get_rects(net.blobs['bb-output'].data[4], mask)
+
+    if args.dump_images:
+      assert output_path != ''
+      image = net.deprocess('data', net.blobs['data'].data[4])
+      zoomed_mask = np.empty((480, 640))
+      zoomed_mask = scipy.ndimage.zoom(mask, 8, order=0)
+      masked_image = image.transpose((2, 0, 1))
+      masked_image[0, :, :] += zoomed_mask
+      masked_image = np.clip(masked_image, 0, 1)
+      masked_image = masked_image.transpose((1, 2, 0))
+      boxed_image = np.copy(masked_image)
+      if len(rects) > 0:
+        boxed_image = draw_rects(boxed_image, rects)
+      Image.fromarray(
+          (boxed_image * 255).astype('uint8')).save(args.output_path + '/' + img_name)
+
+    used_rect = set()
+    for bb in gt_bbs:
+      matched = False
+      for i, rect in enumerate(rects):
+        if i in used_rect:
+          continue
+        if bb.jaccard(rect) > 0.5:
+          tp += 1
+          used_rect.add(i)
+          matched = True
+          break
+      if not matched:
+        fn += 1
+    fp += len(rects) - len(used_rect)
+    if tp + fp > 0 and tp + fn > 0:
+      print 'Precision: %f,  Recall: %f' % (float(tp) / (tp + fp), float(tp) / (tp + fn))
+
+if __name__ == '__main__':
+  main()
diff --git a/tools/convert_detection_label.cpp b/tools/convert_detection_label.cpp
@@ -113,14 +113,13 @@ bool ReadBoundingBoxLabelToDatum(
     }
   }
 
-  if (total_num_pixels == 0) {
-    total_num_pixels = 1;
-  }
-  float reweight_value = 1.0 / total_num_pixels;
-  for (int y = 0; y < img_height; ++y) {
-    for (int x = 0; x < img_width; ++x) {
-      if (labels[num_total_labels - 1]->at<float>(x, y) == 1.0) {
-        labels[num_total_labels - 1]->at<float>(x, y) = reweight_value;
+  if (total_num_pixels != 0) {
+    float reweight_value = 1.0 / total_num_pixels;
+    for (int y = 0; y < img_height; ++y) {
+      for (int x = 0; x < img_width; ++x) {
+        if (labels[num_total_labels - 1]->at<float>(y, x) == 1.0) {
+          labels[num_total_labels - 1]->at<float>(y, x) = reweight_value;
+        }
       }
     }
   }