diff --git a/include/caffe/util/io.hpp b/include/caffe/util/io.hpp index a27f2528a0f..da47cfb6851 100644 --- a/include/caffe/util/io.hpp +++ b/include/caffe/util/io.hpp @@ -89,11 +89,9 @@ inline void WriteProtoToBinaryFile( WriteProtoToBinaryFile(proto, filename.c_str()); } -bool ReadBBLabelToDatum(const vector& bbs, const int width, const int height, - const int grid_dim, float scaling, Datum* datum); - bool ReadImageToDatum(const string& filename, const int label, - const int height, const int width, const bool is_color, Datum* datum); + const int height, const int width, const bool is_color, Datum* datum, + const bool use_rgb = false); inline bool ReadImageToDatum(const string& filename, const int label, const int height, const int width, Datum* datum) { diff --git a/models/brody/solver_driving_softmax.prototxt b/models/brody/solver_driving_softmax.prototxt index 089cf2868f1..1036a393acd 100644 --- a/models/brody/solver_driving_softmax.prototxt +++ b/models/brody/solver_driving_softmax.prototxt @@ -2,14 +2,14 @@ net: "models/brody/train_val_driving_softmax_norm.prototxt" test_iter: 20 test_interval: 5000 test_compute_loss: true -base_lr: 0.001 +base_lr: 0.002 lr_policy: "step" gamma: 0.1 stepsize: 100000 display: 20 max_iter: 1450000 momentum: 0.9 -weight_decay: 0.00005 +weight_decay: 0.0005 snapshot: 1000 snapshot_prefix: "models/brody/driving_softmax_8x8_norm" solver_mode: GPU diff --git a/models/brody/train_val_driving_normalization.prototxt b/models/brody/train_val_driving_normalization.prototxt index d4a3e82fb6e..1c766c8f50a 100644 --- a/models/brody/train_val_driving_normalization.prototxt +++ b/models/brody/train_val_driving_normalization.prototxt @@ -7,12 +7,12 @@ layers { top: "data" top: "label" data_param { - source: "new_driving_train" + source: "driving_train_rgb" backend: LMDB batch_size: 10 } transform_param { - mean_file: "driving_mean.binaryproto" + mean_file: "driving_mean_rgb.binaryproto" } include: { phase: TRAIN } } @@ -24,12 +24,12 @@ layers { top: "data" top: "label" data_param { - source: "new_driving_test" + source: "driving_test_rgb" backend: LMDB batch_size: 10 } transform_param { - mean_file: "driving_mean.binaryproto" + mean_file: "driving_mean_rgb.binaryproto" } include: { phase: TEST } } @@ -509,55 +509,3 @@ layers { top: "bb-loss" loss_weight: 10.0 } - -# L1 error loss -#layers { -# name: "bb-diff" -# type: ELTWISE -# bottom: "bb-masked-output" -# bottom: "bb-label" -# eltwise_param { -# operation: SUM -# coeff: 1.0 -# coeff: -1.0 -# } -# top: "bb-diff" -#} - -#layers { -# name: "bb-loss" -# type: ABSVAL -# bottom: "bb-diff" -# top: "bb-loss" -# # 1 / (20 * 15 * 64) -# loss_weight: 0.00000000001 -#} - -#layers { -# name: "bb-loss-pow2" -# type: POWER -# bottom: "bb-diff" -# top: "bb-loss-pow2" -# # 1 / (20 * 15 * 64) -# power_param { -# power: 2 -# } -#} - -#layers { -# name: "bb-loss-height-normalize" -# type: ELTWISE -# bottom: "bb-loss-pow2" -# bottom: "height-block" -# eltwise_param { -# operation: PROD -# } -# top: "bb-loss" -# loss_weight: 0.1 -#} - -#layers { -# name: "bb-loss-silence" -# type: SILENCE -# bottom: "bb-loss" -#} diff --git a/python/convert_mean.py b/python/convert_mean.py index cd7df3cdcc8..db781cae622 100755 --- a/python/convert_mean.py +++ b/python/convert_mean.py @@ -21,11 +21,11 @@ def main(argv): mean_data = np.array(mean_data.data) print mean_data.shape mean_img = mean_data.reshape([3, 480, 640]) -# mean_img = mean_img[(2, 1, 0), :, :] -# np.save(open('new_driving_mean.npy', 'wb'), mean_img) + mean_img = mean_img[(2, 1, 0), :, :] + np.save(open('driving_mean_640x480_rgb.npy', 'wb'), mean_img) mean_img = np.transpose(mean_img, (1, 2, 0)) - Image.fromarray(mean_img.astype('uint8')).save('test_mean.png') + Image.fromarray(mean_img.astype('uint8')).save('driving_mean_640x480_rgb.png') """ real_img = caffe.io.load_image( \ diff --git a/src/caffe/util/io.cpp b/src/caffe/util/io.cpp index 991ef91491d..23f669ab01a 100644 --- a/src/caffe/util/io.cpp +++ b/src/caffe/util/io.cpp @@ -66,63 +66,10 @@ void WriteProtoToBinaryFile(const Message& proto, const char* filename) { CHECK(proto.SerializeToOstream(&output)); } -bool ReadBBLabelToDatum(const vector& bbs, const int width, const int height, - const int grid_dim, const float scaling, Datum* datum) { - // 1 pixel label, 4 bounding box coordinates. - vector labels; - for (int i = 0; i < 5; ++i) { - labels.push_back(new cv::Mat(height * grid_dim, width * grid_dim, - CV_32F, cv::Scalar(0.0))); - } - - for (int i = 0; i < bbs.size(); i += 4) { - float xmin = bbs[i]; - float ymin = bbs[i + 1]; - float xmax = bbs[i + 2]; - float ymax = bbs[i + 3]; - float width = xmax - xmin; - float height = ymax - ymin; - - int gxmin = cvRound((xmin + width / 4) * scaling); - int gxmax = cvRound((xmax - width / 4) * scaling); - int gymin = cvRound((ymin + height / 4) * scaling); - int gymax = cvRound((ymax - height / 4) * scaling); - - cv::Rect r(gxmin, gymin, gxmax, gymax); - float flabels[5] = {1.0, xmin, ymin, xmax, ymax}; - for (int j = 0; j < 5; ++j) { - cv::Mat roi(*labels[j], r); - roi = cv::Scalar(flabels[j]); - } - } - - datum->set_channels(5 * grid_dim * grid_dim); - datum->set_height(height); - datum->set_width(width); - datum->clear_data(); - datum->clear_float_data(); - - for (int m = 0; m < 5; ++m) { - for (int dy = 0; dy < grid_dim; ++dy) { - for (int dx = 0; dx < grid_dim; ++dx) { - for (int y = 0; y < height; y += grid_dim) { - for (int x = 0; x < width; x += grid_dim) { - datum->add_float_data(labels[m]->at(y + dy, x + dx)); - } - } - } - } - } - - for (int i = 0; i < 5; ++i) { - delete labels[i]; - } - - return true; -} bool ReadImageToDatum(const string& filename, const int label, - const int height, const int width, const bool is_color, Datum* datum) { + const int height, const int width, const bool is_color, Datum* datum, + const bool use_rgb) { cv::Mat cv_img; int cv_read_flag = (is_color ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE); @@ -148,10 +95,11 @@ bool ReadImageToDatum(const string& filename, const int label, string* datum_string = datum->mutable_data(); if (is_color) { for (int c = 0; c < num_channels; ++c) { + int channel = use_rgb ? 2 - c : c; for (int h = 0; h < cv_img.rows; ++h) { for (int w = 0; w < cv_img.cols; ++w) { datum_string->push_back( - static_cast(cv_img.at(h, w)[c])); + static_cast(cv_img.at(h, w)[channel])); } } } diff --git a/tools/convert_driving_data.cpp b/tools/convert_driving_data.cpp index 33e402d7e81..6c720607aa7 100644 --- a/tools/convert_driving_data.cpp +++ b/tools/convert_driving_data.cpp @@ -44,6 +44,7 @@ DEFINE_bool(gray, false, "When this option is on, treat images as grayscale ones"); DEFINE_bool(shuffle, true, "Randomly shuffle the order of images and their labels"); +DEFINE_bool(use_rgb, false, "use RGB channels"); DEFINE_int32(width, 20, "Number of grids horizontally."); DEFINE_int32(height, 15, "Number of grids vertically."); DEFINE_int32(grid_dim, 8, "grid_dim x grid_dim number of pixels per each grid.");