Skip to content

Commit

Permalink
added option to load images in RGB
Browse files Browse the repository at this point in the history
  • Loading branch information
cheeyos committed Jan 27, 2015
1 parent 2a01554 commit 95db5ba
Show file tree
Hide file tree
Showing 6 changed files with 16 additions and 121 deletions.
6 changes: 2 additions & 4 deletions include/caffe/util/io.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,9 @@ inline void WriteProtoToBinaryFile(
WriteProtoToBinaryFile(proto, filename.c_str());
}

bool ReadBBLabelToDatum(const vector<int>& bbs, const int width, const int height,
const int grid_dim, float scaling, Datum* datum);

bool ReadImageToDatum(const string& filename, const int label,
const int height, const int width, const bool is_color, Datum* datum);
const int height, const int width, const bool is_color, Datum* datum,
const bool use_rgb = false);

inline bool ReadImageToDatum(const string& filename, const int label,
const int height, const int width, Datum* datum) {
Expand Down
4 changes: 2 additions & 2 deletions models/brody/solver_driving_softmax.prototxt
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@ net: "models/brody/train_val_driving_softmax_norm.prototxt"
test_iter: 20
test_interval: 5000
test_compute_loss: true
base_lr: 0.001
base_lr: 0.002
lr_policy: "step"
gamma: 0.1
stepsize: 100000
display: 20
max_iter: 1450000
momentum: 0.9
weight_decay: 0.00005
weight_decay: 0.0005
snapshot: 1000
snapshot_prefix: "models/brody/driving_softmax_8x8_norm"
solver_mode: GPU
60 changes: 4 additions & 56 deletions models/brody/train_val_driving_normalization.prototxt
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ layers {
top: "data"
top: "label"
data_param {
source: "new_driving_train"
source: "driving_train_rgb"
backend: LMDB
batch_size: 10
}
transform_param {
mean_file: "driving_mean.binaryproto"
mean_file: "driving_mean_rgb.binaryproto"
}
include: { phase: TRAIN }
}
Expand All @@ -24,12 +24,12 @@ layers {
top: "data"
top: "label"
data_param {
source: "new_driving_test"
source: "driving_test_rgb"
backend: LMDB
batch_size: 10
}
transform_param {
mean_file: "driving_mean.binaryproto"
mean_file: "driving_mean_rgb.binaryproto"
}
include: { phase: TEST }
}
Expand Down Expand Up @@ -509,55 +509,3 @@ layers {
top: "bb-loss"
loss_weight: 10.0
}

# L1 error loss
#layers {
# name: "bb-diff"
# type: ELTWISE
# bottom: "bb-masked-output"
# bottom: "bb-label"
# eltwise_param {
# operation: SUM
# coeff: 1.0
# coeff: -1.0
# }
# top: "bb-diff"
#}

#layers {
# name: "bb-loss"
# type: ABSVAL
# bottom: "bb-diff"
# top: "bb-loss"
# # 1 / (20 * 15 * 64)
# loss_weight: 0.00000000001
#}

#layers {
# name: "bb-loss-pow2"
# type: POWER
# bottom: "bb-diff"
# top: "bb-loss-pow2"
# # 1 / (20 * 15 * 64)
# power_param {
# power: 2
# }
#}

#layers {
# name: "bb-loss-height-normalize"
# type: ELTWISE
# bottom: "bb-loss-pow2"
# bottom: "height-block"
# eltwise_param {
# operation: PROD
# }
# top: "bb-loss"
# loss_weight: 0.1
#}

#layers {
# name: "bb-loss-silence"
# type: SILENCE
# bottom: "bb-loss"
#}
6 changes: 3 additions & 3 deletions python/convert_mean.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ def main(argv):
mean_data = np.array(mean_data.data)
print mean_data.shape
mean_img = mean_data.reshape([3, 480, 640])
# mean_img = mean_img[(2, 1, 0), :, :]
# np.save(open('new_driving_mean.npy', 'wb'), mean_img)
mean_img = mean_img[(2, 1, 0), :, :]
np.save(open('driving_mean_640x480_rgb.npy', 'wb'), mean_img)

mean_img = np.transpose(mean_img, (1, 2, 0))
Image.fromarray(mean_img.astype('uint8')).save('test_mean.png')
Image.fromarray(mean_img.astype('uint8')).save('driving_mean_640x480_rgb.png')

"""
real_img = caffe.io.load_image( \
Expand Down
60 changes: 4 additions & 56 deletions src/caffe/util/io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,63 +66,10 @@ void WriteProtoToBinaryFile(const Message& proto, const char* filename) {
CHECK(proto.SerializeToOstream(&output));
}

bool ReadBBLabelToDatum(const vector<int>& bbs, const int width, const int height,
const int grid_dim, const float scaling, Datum* datum) {
// 1 pixel label, 4 bounding box coordinates.
vector<cv::Mat *> labels;
for (int i = 0; i < 5; ++i) {
labels.push_back(new cv::Mat(height * grid_dim, width * grid_dim,
CV_32F, cv::Scalar(0.0)));
}

for (int i = 0; i < bbs.size(); i += 4) {
float xmin = bbs[i];
float ymin = bbs[i + 1];
float xmax = bbs[i + 2];
float ymax = bbs[i + 3];
float width = xmax - xmin;
float height = ymax - ymin;

int gxmin = cvRound((xmin + width / 4) * scaling);
int gxmax = cvRound((xmax - width / 4) * scaling);
int gymin = cvRound((ymin + height / 4) * scaling);
int gymax = cvRound((ymax - height / 4) * scaling);

cv::Rect r(gxmin, gymin, gxmax, gymax);
float flabels[5] = {1.0, xmin, ymin, xmax, ymax};
for (int j = 0; j < 5; ++j) {
cv::Mat roi(*labels[j], r);
roi = cv::Scalar(flabels[j]);
}
}

datum->set_channels(5 * grid_dim * grid_dim);
datum->set_height(height);
datum->set_width(width);
datum->clear_data();
datum->clear_float_data();

for (int m = 0; m < 5; ++m) {
for (int dy = 0; dy < grid_dim; ++dy) {
for (int dx = 0; dx < grid_dim; ++dx) {
for (int y = 0; y < height; y += grid_dim) {
for (int x = 0; x < width; x += grid_dim) {
datum->add_float_data(labels[m]->at<float>(y + dy, x + dx));
}
}
}
}
}

for (int i = 0; i < 5; ++i) {
delete labels[i];
}

return true;
}

bool ReadImageToDatum(const string& filename, const int label,
const int height, const int width, const bool is_color, Datum* datum) {
const int height, const int width, const bool is_color, Datum* datum,
const bool use_rgb) {
cv::Mat cv_img;
int cv_read_flag = (is_color ? CV_LOAD_IMAGE_COLOR :
CV_LOAD_IMAGE_GRAYSCALE);
Expand All @@ -148,10 +95,11 @@ bool ReadImageToDatum(const string& filename, const int label,
string* datum_string = datum->mutable_data();
if (is_color) {
for (int c = 0; c < num_channels; ++c) {
int channel = use_rgb ? 2 - c : c;
for (int h = 0; h < cv_img.rows; ++h) {
for (int w = 0; w < cv_img.cols; ++w) {
datum_string->push_back(
static_cast<char>(cv_img.at<cv::Vec3b>(h, w)[c]));
static_cast<char>(cv_img.at<cv::Vec3b>(h, w)[channel]));
}
}
}
Expand Down
1 change: 1 addition & 0 deletions tools/convert_driving_data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ DEFINE_bool(gray, false,
"When this option is on, treat images as grayscale ones");
DEFINE_bool(shuffle, true,
"Randomly shuffle the order of images and their labels");
DEFINE_bool(use_rgb, false, "use RGB channels");
DEFINE_int32(width, 20, "Number of grids horizontally.");
DEFINE_int32(height, 15, "Number of grids vertically.");
DEFINE_int32(grid_dim, 8, "grid_dim x grid_dim number of pixels per each grid.");
Expand Down

0 comments on commit 95db5ba

Please sign in to comment.