Skip to content

Commit

Permalink
more changes and bug fixes for driving network
Browse files Browse the repository at this point in the history
  • Loading branch information
cheeyos committed Jan 26, 2015
1 parent 3258f77 commit a149449
Show file tree
Hide file tree
Showing 15 changed files with 207 additions and 122 deletions.
209 changes: 127 additions & 82 deletions examples/filter_visualization_car_deeppy.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions include/caffe/blob.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ class Blob {
Dtype* mutable_gpu_diff();
void Update();
void FromProto(const BlobProto& proto);
void FromProtoDataOnly(const BlobProto& proto);
void FromProtoReplicate(const BlobProto& proto, const int num_replicates);
void ToProto(BlobProto* proto, bool write_diff = false) const;

Expand Down
2 changes: 1 addition & 1 deletion models/brody/solver_driving.prototxt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ net: "models/brody/train_val_driving.prototxt"
test_iter: 20
test_interval: 5000
test_compute_loss: true
base_lr: 0.0000001
base_lr: 0.001
lr_policy: "step"
gamma: 0.1
stepsize: 100000
Expand Down
6 changes: 3 additions & 3 deletions models/brody/solver_driving_softmax.prototxt
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@ net: "models/brody/train_val_driving_softmax.prototxt"
test_iter: 20
test_interval: 5000
test_compute_loss: true
base_lr: 0.0000001
base_lr: 0.001
lr_policy: "step"
gamma: 0.1
stepsize: 100000
display: 20
max_iter: 1450000
momentum: 0.9
weight_decay: 0.00005
snapshot: 10000
snapshot_prefix: "models/brody/caffe_driving_softmax"
snapshot: 1000
snapshot_prefix: "models/brody/driving_softmax_8x8"
solver_mode: GPU
18 changes: 9 additions & 9 deletions models/brody/train_val_driving_softmax.prototxt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ layers {
top: "data"
top: "label"
data_param {
source: "driving_train"
source: "new_driving_train"
backend: LMDB
batch_size: 5
}
Expand All @@ -24,7 +24,7 @@ layers {
top: "data"
top: "label"
data_param {
source: "driving_test"
source: "new_driving_test"
backend: LMDB
batch_size: 5
}
Expand Down Expand Up @@ -363,12 +363,12 @@ layers {
type: CONVOLUTION
bottom: "fc7-conv"
top: "bb-output"
blobs_lr: 100
blobs_lr: 200
weight_decay: 0.00001
blobs_lr: 10
blobs_lr: 20
weight_decay: 0.1
weight_decay: 0
convolution_param {
num_output: 64
num_output: 256
kernel_size: 1
weight_filler {
type: "gaussian"
Expand All @@ -391,7 +391,7 @@ layers {
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 32
num_output: 128
kernel_size: 1
weight_filler {
type: "gaussian"
Expand All @@ -410,7 +410,7 @@ layers {
bottom: "pixel-conv"
top: "pixel-conv-tiled"
tiling_param {
tile_dim: 4
tile_dim: 8
}
}

Expand All @@ -420,7 +420,7 @@ layers {
bottom: "bb-output"
top: "bb-output-tiled"
tiling_param {
tile_dim: 4
tile_dim: 8
}
}

Expand Down
9 changes: 5 additions & 4 deletions python/convert_mean.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,21 @@ def main(argv):
mean_data = np.array(mean_data.data)
print mean_data.shape
mean_img = mean_data.reshape([3, 480, 640])
mean_img = mean_img[(2, 1, 0), :, :]
np.save(open('new_driving_mean.npy', 'wb'), mean_img)
# mean_img = mean_img[(2, 1, 0), :, :]
# np.save(open('new_driving_mean.npy', 'wb'), mean_img)

"""
mean_img = np.transpose(mean_img, (1, 2, 0))
Image.fromarray(mean_img.astype('uint8')).save('mean.png')
Image.fromarray(mean_img.astype('uint8')).save('test_mean.png')

"""
real_img = caffe.io.load_image( \
'/deep/group/driving_data/andriluka/IMAGES/driving_data_q50_data/all_extracted/4-2-14-monterey-split_0_280S_a2/4-2-14-monterey-split_0_280S_a2_000341.jpeg')
real_img = caffe.io.resize_image(real_img * 255, (480, 640, 3))
Image.fromarray(real_img.astype('uint8')).save('original.png')
Image.fromarray(np.clip(real_img - mean_img, 0, 255).astype('uint8')).save('sub.png')
"""


if __name__ == '__main__':
import sys
main(sys.argv)
8 changes: 8 additions & 0 deletions src/caffe/blob.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,14 @@ void Blob<Dtype>::FromProtoReplicate(const BlobProto& proto, const int num_repli
}
}

template <typename Dtype>
void Blob<Dtype>::FromProtoDataOnly(const BlobProto& proto) {
// copy data
Dtype* data_vec = mutable_cpu_data();
for (int i = 0; i < count_; ++i) {
data_vec[i] = proto.data(i);
}
}

template <typename Dtype>
void Blob<Dtype>::ToProto(BlobProto* proto, bool write_diff) const {
Expand Down
30 changes: 22 additions & 8 deletions src/caffe/layers/driving_data_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,6 @@ bool DrivingDataLayer<Dtype>::ReadBoundingBoxLabelToDatumLegacy(
new cv::Mat(full_label_height, full_label_width, CV_32F, cv::Scalar(0.0)));
}

int total_num_pixels = 0;
for (int i = 0; i < data.car_boxes_size(); ++i) {
int xmin = data.car_boxes(i).xmin();
int ymin = data.car_boxes(i).ymin();
Expand Down Expand Up @@ -319,7 +318,6 @@ bool DrivingDataLayer<Dtype>::ReadBoundingBoxLabelToDatumLegacy(
gxmax - gxmin + (gxmax == gxmin && gxmax < full_label_width ? 1 : 0),
gymax - gymin + (gymax == gymin && gymax < full_label_height ? 1 : 0));

total_num_pixels += r.area();
int normalization_height = ymax - ymin == 0 ? 1 : ymax - ymin;
CHECK_GT(normalization_height, 0);
int normalization_width = xmax - xmin == 0 ? 1 : xmax - xmin;
Expand All @@ -339,6 +337,15 @@ bool DrivingDataLayer<Dtype>::ReadBoundingBoxLabelToDatumLegacy(
}
}


int total_num_pixels = 0;
for (int y = 0; y < full_label_height; ++y) {
for (int x = 0; x < full_label_width; ++x) {
if (labels[num_total_labels - 1]->at<float>(y, x) == 1.0) {
total_num_pixels++;
}
}
}
if (total_num_pixels != 0) {
float reweight_value = 1.0 / total_num_pixels;
for (int y = 0; y < full_label_height; ++y) {
Expand Down Expand Up @@ -399,6 +406,7 @@ bool DrivingDataLayer<Dtype>::ReadBoundingBoxLabelToDatum(
const int height = data.car_label_height();
const int full_label_width = width * grid_dim;
const int full_label_height = height * grid_dim;
const float half_shrink_factor = data.car_shrink_factor() / 2;
const float scaling = static_cast<float>(full_label_width) / data.car_cropped_width();

// 1 pixel label, 4 bounding box coordinates, 2 normalization labels.
Expand All @@ -412,7 +420,6 @@ bool DrivingDataLayer<Dtype>::ReadBoundingBoxLabelToDatum(
new cv::Mat(full_label_height, full_label_width, CV_32F, cv::Scalar(0.0)));
}

int total_num_pixels = 0;
for (int i = 0; i < data.car_boxes_size(); ++i) {
int xmin = data.car_boxes(i).xmin();
int ymin = data.car_boxes(i).ymin();
Expand All @@ -425,10 +432,10 @@ bool DrivingDataLayer<Dtype>::ReadBoundingBoxLabelToDatum(
float w = xmax - xmin;
float h = ymax - ymin;
// shrink bboxes
int gxmin = cvRound((xmin + w / 4) * scaling);
int gxmax = cvRound((xmax - w / 4) * scaling);
int gymin = cvRound((ymin + h / 4) * scaling);
int gymax = cvRound((ymax - h / 4) * scaling);
int gxmin = cvRound((xmin + w * half_shrink_factor) * scaling);
int gxmax = cvRound((xmax - w * half_shrink_factor) * scaling);
int gymin = cvRound((ymin + h * half_shrink_factor) * scaling);
int gymax = cvRound((ymax - h * half_shrink_factor) * scaling);

CHECK_LE(gxmin, gxmax);
CHECK_LE(gymin, gymax);
Expand All @@ -448,7 +455,6 @@ bool DrivingDataLayer<Dtype>::ReadBoundingBoxLabelToDatum(
gxmax - gxmin + (gxmax == gxmin && gxmax < full_label_width ? 1 : 0),
gymax - gymin + (gymax == gymin && gymax < full_label_height ? 1 : 0));

total_num_pixels += r.area();
int normalization_height = ymax - ymin == 0 ? 1 : ymax - ymin;
CHECK_GT(normalization_height, 0);
int normalization_width = xmax - xmin == 0 ? 1 : xmax - xmin;
Expand All @@ -468,6 +474,14 @@ bool DrivingDataLayer<Dtype>::ReadBoundingBoxLabelToDatum(
}
}

int total_num_pixels = 0;
for (int y = 0; y < full_label_height; ++y) {
for (int x = 0; x < full_label_width; ++x) {
if (labels[num_total_labels - 1]->at<float>(y, x) == 1.0) {
total_num_pixels++;
}
}
}
if (total_num_pixels != 0) {
float reweight_value = 1.0 / total_num_pixels;
for (int y = 0; y < full_label_height; ++y) {
Expand Down
2 changes: 2 additions & 0 deletions src/caffe/layers/l1_loss_layer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ void L1LossLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
caffe_gpu_asum(count, diff_.gpu_data(), &abs_sum);
caffe_gpu_sign(count, diff_.gpu_data(), sign_.mutable_gpu_data());
Dtype loss = abs_sum / bottom[0]->num();
// Dtype loss = abs_sum / bottom[0]->count();
(*top)[0]->mutable_cpu_data()[0] = loss;
}

Expand All @@ -30,6 +31,7 @@ void L1LossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
if (propagate_down[i]) {
const Dtype sign = (i == 0) ? 1 : -1;
const Dtype alpha = sign * top[0]->cpu_diff()[0] / (*bottom)[i]->num();
// const Dtype alpha = sign * top[0]->cpu_diff()[0] / (*bottom)[i]->count();
caffe_gpu_axpby(
(*bottom)[i]->count(), // count
alpha, // alpha
Expand Down
12 changes: 6 additions & 6 deletions src/caffe/layers/lrn_fixed_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,12 +110,11 @@ void LRNFixedLayer<Dtype>::CrossChannelForward_cpu(
Dtype* scale_data = scale_.mutable_cpu_data();
// start with the constant value
for (int i = 0; i < scale_.count(); ++i) {
scale_data[i] = 1.;
scale_data[i] = 2.;
}
Blob<Dtype> padded_square(1, channels_ + size_ - 1, height_, width_);
Dtype* padded_square_data = padded_square.mutable_cpu_data();
caffe_set(padded_square.count(), Dtype(0), padded_square_data);
Dtype alpha_over_size = alpha_ / size_;
// go through the images
for (int n = 0; n < num_; ++n) {
// compute the padded square
Expand All @@ -124,7 +123,7 @@ void LRNFixedLayer<Dtype>::CrossChannelForward_cpu(
padded_square_data + padded_square.offset(0, pre_pad_));
// Create the first channel scale
for (int c = 0; c < size_; ++c) {
caffe_axpy<Dtype>(height_ * width_, alpha_over_size,
caffe_axpy<Dtype>(height_ * width_, alpha_,
padded_square_data + padded_square.offset(0, c),
scale_data + scale_.offset(n, 0));
}
Expand All @@ -134,11 +133,11 @@ void LRNFixedLayer<Dtype>::CrossChannelForward_cpu(
scale_data + scale_.offset(n, c - 1),
scale_data + scale_.offset(n, c));
// add head
caffe_axpy<Dtype>(height_ * width_, alpha_over_size,
caffe_axpy<Dtype>(height_ * width_, alpha_,
padded_square_data + padded_square.offset(0, c + size_ - 1),
scale_data + scale_.offset(n, c));
// subtract tail
caffe_axpy<Dtype>(height_ * width_, -alpha_over_size,
caffe_axpy<Dtype>(height_ * width_, -alpha_,
padded_square_data + padded_square.offset(0, c - 1),
scale_data + scale_.offset(n, c));
}
Expand Down Expand Up @@ -190,7 +189,8 @@ void LRNFixedLayer<Dtype>::CrossChannelBackward_cpu(
// We hack a little bit by using the diff() to store an additional result
Dtype* accum_ratio_times_bottom = accum_ratio.mutable_cpu_diff();
caffe_set(padded_ratio.count(), Dtype(0), padded_ratio_data);
Dtype cache_ratio_value = 2. * alpha_ * beta_ / size_;
// Dtype cache_ratio_value = 2. * alpha_ * beta_ / size_;
Dtype cache_ratio_value = 2. * alpha_ * beta_;

caffe_powx<Dtype>(scale_.count(), scale_data, -beta_, bottom_diff);
caffe_mul<Dtype>(scale_.count(), top_diff, bottom_diff, bottom_diff);
Expand Down
2 changes: 1 addition & 1 deletion src/caffe/layers/lrn_fixed_layer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ void LRNFixedLayer<Dtype>::CrossChannelBackward_gpu(
LRNFixedComputeDiff<<<CAFFE_GET_BLOCKS(n_threads), CAFFE_CUDA_NUM_THREADS>>>(
n_threads, (*bottom)[0]->gpu_data(), top[0]->gpu_data(),
scale_.gpu_data(), top[0]->gpu_diff(), num_, channels_, height_, width_,
size_, -beta_, Dtype(2. * alpha_ * beta_ / size_),
size_, -beta_, Dtype(2. * alpha_ * beta_),
(*bottom)[0]->mutable_gpu_diff());
}

Expand Down
4 changes: 2 additions & 2 deletions src/caffe/layers/softmax_loss_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ void SoftmaxWithLossLayer<Dtype>::Forward_cpu(
Dtype(FLT_MIN)));
}
}
(*top)[0]->mutable_cpu_data()[0] = loss / num;
(*top)[0]->mutable_cpu_data()[0] = loss / num / spatial_dim;
if (top->size() == 2) {
(*top)[1]->ShareData(prob_);
}
Expand Down Expand Up @@ -78,7 +78,7 @@ void SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
}
// Scale gradient
const Dtype loss_weight = top[0]->cpu_diff()[0];
caffe_scal(prob_.count(), loss_weight / num, bottom_diff);
caffe_scal(prob_.count(), loss_weight / num / spatial_dim, bottom_diff);
}
}

Expand Down
20 changes: 15 additions & 5 deletions src/caffe/net.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -720,18 +720,28 @@ void Net<Dtype>::CopyTrainedLayersFrom(const NetParameter& param) {
<< source_layer.blobs(j).channels() << "x"
<< source_layer.blobs(j).height() << "x"
<< source_layer.blobs(j).width();
CHECK_EQ(target_blobs[j]->num(), source_layer.blobs(j).num());
CHECK_EQ(target_blobs[j]->channels(), source_layer.blobs(j).channels());
CHECK_EQ(target_blobs[j]->height(), source_layer.blobs(j).height());
//CHECK_EQ(target_blobs[j]->width(), source_layer.blobs(j).width());
if (target_blobs[j]->width() == source_layer.blobs(j).width()) {
if (target_blobs[j]->width() == source_layer.blobs(j).width() &&
target_blobs[j]->height() == source_layer.blobs(j).height() &&
target_blobs[j]->channels() == source_layer.blobs(j).channels() &&
target_blobs[j]->num() == source_layer.blobs(j).num()) {
target_blobs[j]->FromProto(source_layer.blobs(j));
} else if (target_blobs[j]->width() > source_layer.blobs(j).width()) {
} else if (target_blobs[j]->width() > source_layer.blobs(j).width() &&
target_blobs[j]->height() == source_layer.blobs(j).height() &&
target_blobs[j]->channels() == source_layer.blobs(j).channels() &&
target_blobs[j]->num() == source_layer.blobs(j).num()) {
LOG(INFO) << "### WARNING: source target dimension is less than target";
const int num_replicates = target_blobs[j]->width()
/ source_layer.blobs(j).width();
CHECK_EQ(target_blobs[j]->width() % source_layer.blobs(j).width(), 0);
target_blobs[j]->FromProtoReplicate(source_layer.blobs(j), num_replicates);
} else if (
target_blobs[j]->width() * target_blobs[j]->height() *
target_blobs[j]->channels() * target_blobs[j]->num() ==
source_layer.blobs(j).width() * source_layer.blobs(j).height() *
source_layer.blobs(j).channels() * source_layer.blobs(j).num()) {
LOG(INFO) << "### WARNING: source target dimension only match by total";
target_blobs[j]->FromProtoDataOnly(source_layer.blobs(j));
} else {
CHECK(false) << "dimension mismatched";
}
Expand Down
2 changes: 2 additions & 0 deletions src/caffe/proto/caffe.proto
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ message DrivingData {
optional int32 car_label_height = 10 [default = 15];
// Tiling dimensions.
optional int32 car_label_resolution = 11 [default = 4];
// Shrink factor for the car prediction mask.
optional float car_shrink_factor = 12 [default = 0.5];
}

message FillerParameter {
Expand Down
4 changes: 3 additions & 1 deletion tools/convert_driving_data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ DEFINE_bool(shuffle, true,
"Randomly shuffle the order of images and their labels");
DEFINE_int32(width, 20, "Number of grids horizontally.");
DEFINE_int32(height, 15, "Number of grids vertically.");
DEFINE_int32(grid_dim, 4, "grid_dim x grid_dim number of pixels per each grid.");
DEFINE_int32(grid_dim, 8, "grid_dim x grid_dim number of pixels per each grid.");
DEFINE_int32(num_info_per_box, 4, "number of fields per box.");
DEFINE_int32(resize_width, 640 + 32, "Width images are resized to");
DEFINE_int32(resize_height, 480 + 32, "Height images are resized to");
Expand Down Expand Up @@ -132,6 +132,8 @@ int main(int argc, char** argv) {
LOG(ERROR) << "Total to be processed: " << lines.size() << ".\n";
for (int line_id = 0; line_id < lines.size(); ++line_id) {
DrivingData data;
data.set_car_label_resolution(FLAGS_grid_dim);
data.set_car_shrink_factor(0.75);
const string image_path = root_folder + lines[line_id].first;
data.set_car_img_source(image_path);
const vector<int>& bbs = lines[line_id].second;
Expand Down

0 comments on commit a149449

Please sign in to comment.