Skip to content

Commit

Permalink
added video data loader. Now need to add lane label reader
Browse files Browse the repository at this point in the history
  • Loading branch information
Tao Wang committed Nov 15, 2014
1 parent 66c77d5 commit ff97eab
Show file tree
Hide file tree
Showing 10 changed files with 397 additions and 41 deletions.
43 changes: 42 additions & 1 deletion include/caffe/data_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
#include <string>
#include <utility>
#include <vector>

#include "boost/scoped_ptr.hpp"
#include "hdf5.h"
#include "leveldb/db.h"
Expand All @@ -17,6 +16,10 @@
#include "caffe/internal_thread.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/highgui/highgui_c.h>
#include <opencv2/imgproc/imgproc.hpp>

namespace caffe {

Expand Down Expand Up @@ -272,6 +275,44 @@ class ImageDataLayer : public BasePrefetchingDataLayer<Dtype> {
int lines_id_;
};


/**
* @brief Provides data to the Net from video files.
*
* TODO(dox): thorough documentation for Forward and proto params.
*/
template <typename Dtype>
class VideoDataLayer : public BasePrefetchingDataLayer<Dtype> {
public:
explicit VideoDataLayer(const LayerParameter& param)
: BasePrefetchingDataLayer<Dtype>(param) {}
virtual ~VideoDataLayer();
virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);

virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_VIDEO_DATA;
}
virtual inline int ExactNumBottomBlobs() const { return 0; }
virtual inline int ExactNumTopBlobs() const { return 1; }

protected:
shared_ptr<Caffe::RNG> prefetch_rng_;
virtual void ShuffleBatches();
virtual void InternalThreadEntry();
bool ReadVideoFrameToDatum(const string& filename, size_t id,
size_t persp, const int height, const int width, Datum* datum);

//inline bool ReadVideoBatchToDatum(const string& filename, std::vector<size_t> frameIds,
// std::vector<size_t>trans, Datum* datum) {
// return ReadVideoBatchToDatum(filename, frameIds, trans, 0, 0, datum);
//}

vector<std::pair<std::string, std::pair<std::vector<size_t>, std::vector<size_t> > > > lines_;
int lines_id_;
cv::VideoCapture* cap;
};

/**
* @brief Provides data to the Net from memory.
*
Expand Down
3 changes: 3 additions & 0 deletions include/caffe/util/io.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,9 @@ inline bool ReadImageToDatum(const string& filename, const int label,
return ReadImageToDatum(filename, label, 0, 0, datum);
}




leveldb::Options GetLevelDBOptions();

template <typename Dtype>
Expand Down
15 changes: 7 additions & 8 deletions models/brody/train_val_brody.prototxt
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,11 @@ name: "BrodyNet"
# Training input.
layers {
name: "data"
type: DATA
type: VIDEO_DATA
top: "data"
data_param {
source: "/deep/group/driving_data/twangcat/lmdb/driving_img_train"
backend: LMDB
batch_size: 5
video_data_param {
source: "/scail/group/deeplearning/driving_data/twangcat/schedules/q50_multilane_planar_train_schedule1_batch20_2cam.txt"
batch_size: 20
}
transform_param {
mean_file: "driving_img_mean.binaryproto"
Expand All @@ -24,7 +23,7 @@ layers {
data_param {
source: "/deep/group/driving_data/twangcat/lmdb/driving_label_train"
backend: LMDB
batch_size: 5
batch_size: 20
}
include: { phase: TRAIN }
}
Expand All @@ -37,7 +36,7 @@ layers {
data_param {
source: "/deep/group/driving_data/twangcat/lmdb/driving_img_test"
backend: LMDB
batch_size: 5
batch_size: 20
}
transform_param {
mean_file: "driving_img_mean.binaryproto"
Expand All @@ -53,7 +52,7 @@ layers {
data_param {
source: "/deep/group/driving_data/twangcat/lmdb/driving_label_test"
backend: LMDB
batch_size: 5
batch_size: 20
}
include: { phase: TEST }
}
Expand Down
Binary file modified src/caffe/.__afs6319
Binary file not shown.
2 changes: 1 addition & 1 deletion src/caffe/data_transformer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,4 +108,4 @@ unsigned int DataTransformer<Dtype>::Rand() {

INSTANTIATE_CLASS(DataTransformer);

} // namespace caffe
} // namespace caffe
3 changes: 2 additions & 1 deletion src/caffe/layer_factory.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#include <string>

#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/vision_layers.hpp"
Expand Down Expand Up @@ -249,6 +248,8 @@ Layer<Dtype>* GetLayer(const LayerParameter& param) {
return new SplitLayer<Dtype>(param);
case LayerParameter_LayerType_TANH:
return GetTanHLayer<Dtype>(name, param);
case LayerParameter_LayerType_VIDEO_DATA:
return new VideoDataLayer<Dtype>(param);
case LayerParameter_LayerType_WINDOW_DATA:
return new WindowDataLayer<Dtype>(param);
case LayerParameter_LayerType_NONE:
Expand Down
232 changes: 232 additions & 0 deletions src/caffe/layers/video_data_layer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
#include <fstream> // NOLINT(readability/streams)
#include <iostream> // NOLINT(readability/streams)
#include <string>
#include <utility>
#include <vector>
#include <stdlib.h>
#include "caffe/data_layers.hpp"
#include "caffe/layer.hpp"
#include "caffe/util/io.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/util/rng.hpp"
#include <boost/algorithm/string.hpp>
namespace caffe {

template <typename Dtype>
VideoDataLayer<Dtype>::~VideoDataLayer<Dtype>() {
this->JoinPrefetchThread();
}

template <typename Dtype>
bool VideoDataLayer<Dtype>:: ReadVideoFrameToDatum(const string& filename, size_t id, size_t persp,
const int height, const int width, Datum* datum) {
int cam_num = (int)(filename.at(filename.length()-5) - '0');
cam_num = cam_num>2?2:cam_num; // 3rd cam is for testing only. So using cam2 distortions as dummy
//int numPersp = mTransforms.size()/2;
cv::Mat cv_img, cv_img_origin;
bool set_ok = this->cap->set(CV_CAP_PROP_POS_FRAMES, id );
if(!set_ok) {
LOG(ERROR)<<"Failed to set video frame";
return false;
}
bool read_ok = this->cap->read(cv_img_origin);
if(!read_ok) {
LOG(ERROR)<<"Failed to read video frame";
return false;
}
// resize image if necessary
if (height > 0 && width > 0) {
cv::resize(cv_img_origin, cv_img, cv::Size(width, height));
} else {
cv_img = cv_img_origin;
}
// apply perspective transform
//cv::Mat warpMatrix = mTransforms[persp+(cam_num-1)*numPersp];
//cv::warpPerspective(cv_img, cv_img, warpMatrix, frame.size(), cv::INTER_LINEAR, cv::BORDER_REPLICATE);
// copy data to datum
int num_channels = 3;
datum->set_channels(num_channels);
datum->set_height(cv_img.rows);
datum->set_width(cv_img.cols);
datum->set_label(0); // dummy label for now.
datum->clear_data();
datum->clear_float_data();
string* datum_string = datum->mutable_data();
for (int c = 0; c < num_channels; ++c) {
for (int h = 0; h < cv_img.rows; ++h) {
for (int w = 0; w < cv_img.cols; ++w) {
datum_string->push_back(
static_cast<char>(cv_img.at<cv::Vec3b>(h, w)[c]));
}
}
}
return true;
}

template <typename Dtype>
void VideoDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
const int new_height = this->layer_param_.video_data_param().new_height();
const int new_width = this->layer_param_.video_data_param().new_width();
CHECK((new_height == 0 && new_width == 0) ||
(new_height > 0 && new_width > 0)) << "Current implementation requires "
"new_height and new_width to be set at the same time.";
// Read the file with filenames and labels
const string& source = this->layer_param_.video_data_param().source();
LOG(INFO) << "Opening schedule file " << source;
std::ifstream infile(source.c_str());

string batch_string;

string filename;
//while (infile >> batch_string) {
while (getline (infile, batch_string)) {
if(!infile)
{
if(infile.eof())
{
LOG(INFO) << "Reached EOF of schedule file.";
break;
}
else
LOG(FATAL)<< "Error while reading schedule file. Possibly corrupted.";
}
std::vector<string> batch_fields;
// first split a line into fields with delimiter ",". Fields should be [filename, frame_ids, transform_ids]
boost::split(batch_fields, batch_string, boost::is_any_of(","),
boost::token_compress_on);
if(batch_fields.size()!=3)
LOG(FATAL) << "Each line must have 3 fields separated by comma, "
<<batch_fields.size()<<" found instead";
// store filename
filename = batch_fields[0];
// store frame ids
std::vector<string> frame_ids_str;
std::vector<size_t> frame_ids;
boost::split(frame_ids_str, batch_fields[1], boost::is_any_of(" "),
boost::token_compress_on);
for (int f=0; f<frame_ids_str.size(); ++f) {
frame_ids.push_back((size_t)atoi(frame_ids_str[f].c_str()));
}

// store persp transform ids
std::vector<string> trans_ids_str;
std::vector<size_t> trans_ids;
boost::split(trans_ids_str, batch_fields[2], boost::is_any_of(" "),
boost::token_compress_on);
for (int f=0; f<trans_ids_str.size(); ++f) {
trans_ids.push_back((size_t)atoi(trans_ids_str[f].c_str()));
}

lines_.push_back(std::make_pair(filename, std::make_pair(frame_ids, trans_ids)));
}

if (this->layer_param_.video_data_param().shuffle()) {
// randomly shuffle data
LOG(INFO) << "Shuffling batches";
const unsigned int prefetch_rng_seed = caffe_rng_rand();
prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed));
ShuffleBatches();
}
LOG(INFO) << "A total of " << lines_.size() << " batches.";

lines_id_ = 0;
// Check if we would need to randomly skip a few data points
if (this->layer_param_.video_data_param().rand_skip()) {
unsigned int skip = caffe_rng_rand() %
this->layer_param_.video_data_param().rand_skip();
LOG(INFO) << "Skipping first " << skip << " data points.";
CHECK_GT(lines_.size(), skip) << "Not enough points to skip";
lines_id_ = skip;
}
// Read a data batch, and use it to initialize the top blob.
this->cap = new cv::VideoCapture(lines_[lines_id_].first);
Datum datum;
CHECK(ReadVideoFrameToDatum(lines_[lines_id_].first, lines_[lines_id_].second.first[0],
lines_[lines_id_].second.second[0], new_height, new_width, &datum));
this->cap->release();
// image
const int crop_size = this->layer_param_.transform_param().crop_size();
const int batch_size = this->layer_param_.video_data_param().batch_size();
if (crop_size > 0) {
(*top)[0]->Reshape(batch_size, datum.channels(), crop_size, crop_size);
this->prefetch_data_.Reshape(batch_size, datum.channels(), crop_size,
crop_size);
} else {
(*top)[0]->Reshape(batch_size, datum.channels(), datum.height(),
datum.width());
this->prefetch_data_.Reshape(batch_size, datum.channels(), datum.height(),
datum.width());
}
LOG(INFO) << "output data size: " << (*top)[0]->num() << ","
<< (*top)[0]->channels() << "," << (*top)[0]->height() << ","
<< (*top)[0]->width();
// label
//(*top)[1]->Reshape(batch_size, 1, 1, 1);
this->prefetch_label_.Reshape(batch_size, 1, 1, 1);
// datum size
this->datum_channels_ = datum.channels();
this->datum_height_ = datum.height();
this->datum_width_ = datum.width();
this->datum_size_ = datum.channels() * datum.height() * datum.width();
}

template <typename Dtype>
void VideoDataLayer<Dtype>::ShuffleBatches() {
caffe::rng_t* prefetch_rng =
static_cast<caffe::rng_t*>(prefetch_rng_->generator());
shuffle(lines_.begin(), lines_.end(), prefetch_rng);
}



// This function is used to create a thread that prefetches the data.
template <typename Dtype>
void VideoDataLayer<Dtype>::InternalThreadEntry() {
Datum datum;
CHECK(this->prefetch_data_.count());
Dtype* top_data = this->prefetch_data_.mutable_cpu_data();
//Dtype* top_label = this->prefetch_label_.mutable_cpu_data();
VideoDataParameter video_data_param = this->layer_param_.video_data_param();
const int batch_size = video_data_param.batch_size();
const int new_height = video_data_param.new_height();
const int new_width = video_data_param.new_width();

// datum scales
const int lines_size = lines_.size();
string filename = lines_[lines_id_].first;
std::vector<size_t> frameIds = lines_[lines_id_].second.first;
std::vector<size_t> trans = lines_[lines_id_].second.second;
if (batch_size!=frameIds.size() || batch_size!=trans.size())
LOG(ERROR)<<"Frame count mismatch!";
LOG(INFO)<<"reading video file "<<filename;
this->cap = new cv::VideoCapture(filename);
for (int item_id = 0; item_id < batch_size; ++item_id) {
// get a blob
//CHECK_GT(lines_size, lines_id_);
if (!ReadVideoFrameToDatum(filename, frameIds[item_id], trans[item_id],
new_height, new_width, &datum)) {
LOG(ERROR)<< "Error reading frame from video!";
continue;
}

// Apply transformations (mirror, crop...) to the data
this->data_transformer_.Transform(item_id, datum, this->mean_, top_data);

// go to the next iter
lines_id_++;
if (lines_id_ >= lines_size) {
// We have reached the end. Restart from the first.
DLOG(INFO) << "Restarting data prefetching from start.";
lines_id_ = 0;
if (this->layer_param_.image_data_param().shuffle()) {
ShuffleBatches();
}
}
}
this->cap->release();
}

INSTANTIATE_CLASS(VideoDataLayer);

} // namespace caffe
Loading

0 comments on commit ff97eab

Please sign in to comment.