Skip to content

Commit

Permalink
L1 layer and tests, some python eval stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
cheeyos committed Dec 2, 2014
1 parent daf7b80 commit bbbfb2e
Show file tree
Hide file tree
Showing 11 changed files with 491 additions and 46 deletions.
103 changes: 100 additions & 3 deletions include/caffe/loss_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,8 @@ class ContrastiveLossLayer : public LossLayer<Dtype> {

/**
* @brief Computes the Contrastive error gradient w.r.t. the inputs.
*
* Computes the gradients with respect to the two input vectors (bottom[0] and
*
* Computes the gradients with respect to the two input vectors (bottom[0] and
* bottom[1]), but not the similarity label (bottom[2]).
*
* @param top output Blob vector (length 1), providing the error gradient with
Expand All @@ -194,7 +194,7 @@ class ContrastiveLossLayer : public LossLayer<Dtype> {
* the features @f$a@f$; Backward fills their diff with
* gradients if propagate_down[0]
* -# @f$ (N \times C \times 1 \times 1) @f$
* the features @f$b@f$; Backward fills their diff with gradients if
* the features @f$b@f$; Backward fills their diff with gradients if
* propagate_down[1]
*/
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
Expand Down Expand Up @@ -763,6 +763,103 @@ class SoftmaxWithLossLayer : public LossLayer<Dtype> {
vector<Blob<Dtype>*> softmax_top_vec_;
};

/**
* @brief Computes the L1 loss @f$
* E = \frac{1}{N} \sum\limits_{n=1}^N \left| \hat{y}_n - y_n
* \right| @f$ for real-valued regression tasks.
*
* @param bottom input Blob vector (length 2)
* -# @f$ (N \times C \times H \times W) @f$
* the predictions @f$ \hat{y} \in [-\infty, +\infty]@f$
* -# @f$ (N \times C \times H \times W) @f$
* the targets @f$ y \in [-\infty, +\infty]@f$
* @param top output Blob vector (length 1)
* -# @f$ (1 \times 1 \times 1 \times 1) @f$
* the computed L1 loss: @f$ E =
* \frac{1}{2n} \sum\limits_{n=1}^N \left| \left| \hat{y}_n - y_n
* \right| \right|_2^2 @f$
*
* This can be used for least-squares regression tasks. An InnerProductLayer
* input to a L1LossLayer exactly formulates a linear least squares
* regression problem. With non-zero weight decay the problem becomes one of
* ridge regression -- see src/caffe/test/test_sgd_solver.cpp for a concrete
* example wherein we check that the gradients computed for a Net with exactly
* this structure match hand-computed gradient formulas for ridge regression.
*
* (Note: Caffe, and SGD in general, is certainly \b not the best way to solve
* linear least squares problems! We use it only as an instructive example.)
*/
template <typename Dtype>
class L1LossLayer : public LossLayer<Dtype> {
public:
explicit L1LossLayer(const LayerParameter& param)
: LossLayer<Dtype>(param), diff_() {}
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);

virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_EUCLIDEAN_LOSS;
}

/**
* Unlike most loss layers, in the L1LossLayer we can backpropagate
* to both inputs -- override to return true and always allow force_backward.
*/
virtual inline bool AllowForceBackward(const int bottom_index) const {
return true;
}

protected:
/// @copydoc L1LossLayer
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);

/**
* @brief Computes the L1 error gradient w.r.t. the inputs.
*
* Unlike other children of LossLayer, L1LossLayer \b can compute
* gradients with respect to the label inputs bottom[1] (but still only will
* if propagate_down[1] is set, due to being produced by learnable parameters
* or if force_backward is set). In fact, this layer is "commutative" -- the
* result is the same regardless of the order of the two bottoms.
*
* @param top output Blob vector (length 1), providing the error gradient with
* respect to the outputs
* -# @f$ (1 \times 1 \times 1 \times 1) @f$
* This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$,
* as @f$ \lambda @f$ is the coefficient of this layer's output
* @f$\ell_i@f$ in the overall Net loss
* @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence
* @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$.
* (*Assuming that this top Blob is not used as a bottom (input) by any
* other layer of the Net.)
* @param propagate_down see Layer::Backward.
* @param bottom input Blob vector (length 2)
* -# @f$ (N \times C \times H \times W) @f$
* the predictions @f$\hat{y}@f$; Backward fills their diff with
* gradients @f$
* \frac{\partial E}{\partial \hat{y}} =
* \frac{1}{n} \sum\limits_{n=1}^N (\hat{y}_n - y_n)
* @f$ if propagate_down[0]
* -# @f$ (N \times C \times H \times W) @f$
* the targets @f$y@f$; Backward fills their diff with gradients
* @f$ \frac{\partial E}{\partial y} =
* \frac{1}{n} \sum\limits_{n=1}^N (y_n - \hat{y}_n)
* @f$ if propagate_down[1]
*/
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);

Blob<Dtype> diff_;
Blob<Dtype> sign_;
};



} // namespace caffe

#endif // CAFFE_LOSS_LAYERS_HPP_
10 changes: 5 additions & 5 deletions models/brody/solver.prototxt
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
net: "models/brody/train_val_brody.prototxt"
test_iter: 1000
test_interval: 1000
base_lr: 0.00000001
test_iter: 20
test_interval: 5000
base_lr: 0.0000001
lr_policy: "step"
gamma: 0.1
stepsize: 100000
display: 20
max_iter: 1450000
momentum: 0.2
momentum: 0.9
weight_decay: 0.00005
snapshot: 2000
snapshot: 10000
snapshot_prefix: "models/brody/caffe_brody_train"
solver_mode: GPU
Loading

0 comments on commit bbbfb2e

Please sign in to comment.