added option to load images in RGB

brodyh · Jan 27, 2015 · 95db5ba · 95db5ba
1 parent 2a01554
commit 95db5ba
Show file tree

Hide file tree

Showing 6 changed files with 16 additions and 121 deletions.
diff --git a/include/caffe/util/io.hpp b/include/caffe/util/io.hpp
@@ -89,11 +89,9 @@ inline void WriteProtoToBinaryFile(
   WriteProtoToBinaryFile(proto, filename.c_str());
 }
 
-bool ReadBBLabelToDatum(const vector<int>& bbs, const int width, const int height,
-    const int grid_dim, float scaling, Datum* datum);
-
 bool ReadImageToDatum(const string& filename, const int label,
-    const int height, const int width, const bool is_color, Datum* datum);
+    const int height, const int width, const bool is_color, Datum* datum,
+    const bool use_rgb = false);
 
 inline bool ReadImageToDatum(const string& filename, const int label,
     const int height, const int width, Datum* datum) {

diff --git a/models/brody/solver_driving_softmax.prototxt b/models/brody/solver_driving_softmax.prototxt
@@ -2,14 +2,14 @@ net: "models/brody/train_val_driving_softmax_norm.prototxt"
 test_iter: 20
 test_interval: 5000
 test_compute_loss: true
-base_lr: 0.001
+base_lr: 0.002
 lr_policy: "step"
 gamma: 0.1
 stepsize: 100000
 display: 20
 max_iter: 1450000
 momentum: 0.9
-weight_decay: 0.00005
+weight_decay: 0.0005
 snapshot: 1000
 snapshot_prefix: "models/brody/driving_softmax_8x8_norm"
 solver_mode: GPU
diff --git a/models/brody/train_val_driving_normalization.prototxt b/models/brody/train_val_driving_normalization.prototxt
@@ -7,12 +7,12 @@ layers {
   top: "data"
   top: "label"
   data_param {
-    source: "new_driving_train"
+    source: "driving_train_rgb"
     backend: LMDB
     batch_size: 10
   }
   transform_param {
-    mean_file: "driving_mean.binaryproto"
+    mean_file: "driving_mean_rgb.binaryproto"
   }
   include: { phase: TRAIN }
 }
@@ -24,12 +24,12 @@ layers {
   top: "data"
   top: "label"
   data_param {
-    source: "new_driving_test"
+    source: "driving_test_rgb"
     backend: LMDB
     batch_size: 10
   }
   transform_param {
-    mean_file: "driving_mean.binaryproto"
+    mean_file: "driving_mean_rgb.binaryproto"
   }
   include: { phase: TEST }
 }
@@ -509,55 +509,3 @@ layers {
   top: "bb-loss"
   loss_weight: 10.0
 }
-
-# L1 error loss
-#layers {
-#  name: "bb-diff"
-#  type: ELTWISE
-#  bottom: "bb-masked-output"
-#  bottom: "bb-label"
-#  eltwise_param {
-#    operation: SUM
-#    coeff: 1.0
-#    coeff: -1.0
-#  }
-#  top: "bb-diff"
-#}
-
-#layers {
-#  name: "bb-loss"
-#  type: ABSVAL
-#  bottom: "bb-diff"
-#  top: "bb-loss"
-#  # 1 / (20 * 15 * 64)
-#  loss_weight: 0.00000000001
-#}
-
-#layers {
-#  name: "bb-loss-pow2"
-#  type: POWER
-#  bottom: "bb-diff"
-#  top: "bb-loss-pow2"
-#  # 1 / (20 * 15 * 64)
-#  power_param {
-#    power: 2
-#  }
-#}
-
-#layers {
-#  name: "bb-loss-height-normalize"
-#  type: ELTWISE
-#  bottom: "bb-loss-pow2"
-#  bottom: "height-block"
-#  eltwise_param {
-#    operation: PROD
-#  }
-#  top: "bb-loss"
-#  loss_weight: 0.1
-#}
-
-#layers {
-#  name: "bb-loss-silence"
-#  type: SILENCE
-#  bottom: "bb-loss"
-#}
diff --git a/python/convert_mean.py b/python/convert_mean.py
@@ -21,11 +21,11 @@ def main(argv):
   mean_data = np.array(mean_data.data)
   print mean_data.shape
   mean_img = mean_data.reshape([3, 480, 640])
-#  mean_img = mean_img[(2, 1, 0), :, :]
-#  np.save(open('new_driving_mean.npy', 'wb'), mean_img)
+  mean_img = mean_img[(2, 1, 0), :, :]
+  np.save(open('driving_mean_640x480_rgb.npy', 'wb'), mean_img)
 
   mean_img = np.transpose(mean_img, (1, 2, 0))
-  Image.fromarray(mean_img.astype('uint8')).save('test_mean.png')
+  Image.fromarray(mean_img.astype('uint8')).save('driving_mean_640x480_rgb.png')
 
   """
   real_img = caffe.io.load_image( \

diff --git a/src/caffe/util/io.cpp b/src/caffe/util/io.cpp
@@ -66,63 +66,10 @@ void WriteProtoToBinaryFile(const Message& proto, const char* filename) {
   CHECK(proto.SerializeToOstream(&output));
 }
 
-bool ReadBBLabelToDatum(const vector<int>& bbs, const int width, const int height,
-    const int grid_dim, const float scaling, Datum* datum) {
-  // 1 pixel label, 4 bounding box coordinates.
-  vector<cv::Mat *> labels;
-  for (int i = 0; i < 5; ++i) {
-    labels.push_back(new cv::Mat(height * grid_dim, width * grid_dim,
-        CV_32F, cv::Scalar(0.0)));
-  }
-
-  for (int i = 0; i < bbs.size(); i += 4) {
-    float xmin = bbs[i];
-    float ymin = bbs[i + 1];
-    float xmax = bbs[i + 2];
-    float ymax = bbs[i + 3];
-    float width = xmax - xmin;
-    float height = ymax - ymin;
-
-    int gxmin = cvRound((xmin + width / 4) * scaling);
-    int gxmax = cvRound((xmax - width / 4) * scaling);
-    int gymin = cvRound((ymin + height / 4) * scaling);
-    int gymax = cvRound((ymax - height / 4) * scaling);
-
-    cv::Rect r(gxmin, gymin, gxmax, gymax);
-    float flabels[5] = {1.0, xmin, ymin, xmax, ymax};
-    for (int j = 0; j < 5; ++j) {
-      cv::Mat roi(*labels[j], r);
-      roi = cv::Scalar(flabels[j]);
-    }
-  }
-
-  datum->set_channels(5 * grid_dim * grid_dim);
-  datum->set_height(height);
-  datum->set_width(width);
-  datum->clear_data();
-  datum->clear_float_data();
-
-  for (int m = 0; m < 5; ++m) {
-    for (int dy = 0; dy < grid_dim; ++dy) {
-      for (int dx = 0; dx < grid_dim; ++dx) {
-        for (int y = 0; y < height; y += grid_dim) {
-          for (int x = 0; x < width; x += grid_dim) {
-            datum->add_float_data(labels[m]->at<float>(y + dy, x + dx));
-          }
-        }
-      }
-    }
-  }
-
-  for (int i = 0; i < 5; ++i) {
-    delete labels[i];
-  }
-
-  return true;
-}
 
 bool ReadImageToDatum(const string& filename, const int label,
-    const int height, const int width, const bool is_color, Datum* datum) {
+    const int height, const int width, const bool is_color, Datum* datum,
+    const bool use_rgb) {
   cv::Mat cv_img;
   int cv_read_flag = (is_color ? CV_LOAD_IMAGE_COLOR :
     CV_LOAD_IMAGE_GRAYSCALE);
@@ -148,10 +95,11 @@ bool ReadImageToDatum(const string& filename, const int label,
   string* datum_string = datum->mutable_data();
   if (is_color) {
     for (int c = 0; c < num_channels; ++c) {
+      int channel = use_rgb ? 2 - c : c;
       for (int h = 0; h < cv_img.rows; ++h) {
         for (int w = 0; w < cv_img.cols; ++w) {
           datum_string->push_back(
-            static_cast<char>(cv_img.at<cv::Vec3b>(h, w)[c]));
+            static_cast<char>(cv_img.at<cv::Vec3b>(h, w)[channel]));
         }
       }
     }

diff --git a/tools/convert_driving_data.cpp b/tools/convert_driving_data.cpp
@@ -44,6 +44,7 @@ DEFINE_bool(gray, false,
     "When this option is on, treat images as grayscale ones");
 DEFINE_bool(shuffle, true,
     "Randomly shuffle the order of images and their labels");
+DEFINE_bool(use_rgb, false, "use RGB channels");
 DEFINE_int32(width, 20, "Number of grids horizontally.");
 DEFINE_int32(height, 15, "Number of grids vertically.");
 DEFINE_int32(grid_dim, 8, "grid_dim x grid_dim number of pixels per each grid.");