From ff97eab18af8e59f5e7bebe8e4794b1ff9f0cc3f Mon Sep 17 00:00:00 2001
From: Tao Wang <twangcat@robo.stanford.edu>
Date: Fri, 14 Nov 2014 21:03:54 -0800
Subject: [PATCH] added video data loader. Now need to add lane label reader

---
 include/caffe/data_layers.hpp         |  43 ++++-
 include/caffe/util/io.hpp             |   3 +
 models/brody/train_val_brody.prototxt |  15 +-
 src/caffe/.__afs6319                  | Bin 53248 -> 53248 bytes
 src/caffe/data_transformer.cpp        |   2 +-
 src/caffe/layer_factory.cpp           |   3 +-
 src/caffe/layers/video_data_layer.cpp | 232 ++++++++++++++++++++++++++
 src/caffe/proto/caffe.proto           |  35 +++-
 src/caffe/solver.cpp                  |  95 +++++++----
 src/caffe/util/io.cpp                 |  10 ++
 10 files changed, 397 insertions(+), 41 deletions(-)
 create mode 100644 src/caffe/layers/video_data_layer.cpp
diff --git a/include/caffe/data_layers.hpp b/include/caffe/data_layers.hpp
index 8e2637b0658..7ccb7a6e805 100644
--- a/include/caffe/data_layers.hpp
+++ b/include/caffe/data_layers.hpp
@@ -4,7 +4,6 @@
 #include <string>
 #include <utility>
 #include <vector>
-
 #include "boost/scoped_ptr.hpp"
 #include "hdf5.h"
 #include "leveldb/db.h"
@@ -17,6 +16,10 @@
 #include "caffe/internal_thread.hpp"
 #include "caffe/layer.hpp"
 #include "caffe/proto/caffe.pb.h"
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/highgui/highgui_c.h>
+#include <opencv2/imgproc/imgproc.hpp>
 
 namespace caffe {
 
@@ -272,6 +275,44 @@ class ImageDataLayer : public BasePrefetchingDataLayer<Dtype> {
   int lines_id_;
 };
 
+
+/**
+ * @brief Provides data to the Net from video files.
+ *
+ * TODO(dox): thorough documentation for Forward and proto params.
+ */
+template <typename Dtype>
+class VideoDataLayer : public BasePrefetchingDataLayer<Dtype> {
+ public:
+  explicit VideoDataLayer(const LayerParameter& param)
+      : BasePrefetchingDataLayer<Dtype>(param) {}
+  virtual ~VideoDataLayer();
+  virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top);
+
+  virtual inline LayerParameter_LayerType type() const {
+    return LayerParameter_LayerType_VIDEO_DATA;
+  }
+  virtual inline int ExactNumBottomBlobs() const { return 0; }
+  virtual inline int ExactNumTopBlobs() const { return 1; }
+
+ protected:
+  shared_ptr<Caffe::RNG> prefetch_rng_;
+  virtual void ShuffleBatches();
+  virtual void InternalThreadEntry();
+  bool ReadVideoFrameToDatum(const string& filename, size_t id,
+    size_t persp, const int height, const int width, Datum* datum);
+
+//inline bool ReadVideoBatchToDatum(const string& filename, std::vector<size_t> frameIds,
+//    std::vector<size_t>trans, Datum* datum) {
+ // return ReadVideoBatchToDatum(filename, frameIds, trans, 0, 0, datum);
+//}
+
+  vector<std::pair<std::string, std::pair<std::vector<size_t>, std::vector<size_t> > > > lines_;
+  int lines_id_;
+  cv::VideoCapture* cap;
+};
+
 /**
  * @brief Provides data to the Net from memory.
  *
diff --git a/include/caffe/util/io.hpp b/include/caffe/util/io.hpp
index 7edd1a4149c..a27f2528a0f 100644
--- a/include/caffe/util/io.hpp
+++ b/include/caffe/util/io.hpp
@@ -105,6 +105,9 @@ inline bool ReadImageToDatum(const string& filename, const int label,
   return ReadImageToDatum(filename, label, 0, 0, datum);
 }
 
+
+
+
 leveldb::Options GetLevelDBOptions();
 
 template <typename Dtype>
diff --git a/models/brody/train_val_brody.prototxt b/models/brody/train_val_brody.prototxt
index 23c08d1206f..b70ea376e0e 100644
--- a/models/brody/train_val_brody.prototxt
+++ b/models/brody/train_val_brody.prototxt
@@ -3,12 +3,11 @@ name: "BrodyNet"
 # Training input.
 layers {
   name: "data"
-  type: DATA
+  type: VIDEO_DATA
   top: "data"
-  data_param {
-    source: "/deep/group/driving_data/twangcat/lmdb/driving_img_train"
-    backend: LMDB
-    batch_size: 5
+  video_data_param {
+    source: "/scail/group/deeplearning/driving_data/twangcat/schedules/q50_multilane_planar_train_schedule1_batch20_2cam.txt"
+    batch_size: 20
   }
   transform_param {
     mean_file: "driving_img_mean.binaryproto"
@@ -24,7 +23,7 @@ layers {
   data_param {
     source: "/deep/group/driving_data/twangcat/lmdb/driving_label_train"
     backend: LMDB
-    batch_size: 5
+    batch_size: 20
   }
   include: { phase: TRAIN }
 }
@@ -37,7 +36,7 @@ layers {
   data_param {
     source: "/deep/group/driving_data/twangcat/lmdb/driving_img_test"
     backend: LMDB
-    batch_size: 5
+    batch_size: 20
   }
   transform_param {
     mean_file: "driving_img_mean.binaryproto"
@@ -53,7 +52,7 @@ layers {
   data_param {
     source: "/deep/group/driving_data/twangcat/lmdb/driving_label_test"
     backend: LMDB
-    batch_size: 5
+    batch_size: 20
   }
   include: { phase: TEST }
 }
diff --git a/src/caffe/.__afs6319 b/src/caffe/.__afs6319
index 2365e5d4e5d3c6f9ae73658818832e7cb8ec7f3f..65eafff2724f3ae981df768dd0cb16a417bb590e 100644
GIT binary patch
delta 39
tcmZozz}&EaSv1KY%+puFT+fIB2m}}ye3fECCT$eG#mpGG`2(}S834#s3e5lj

delta 39
tcmZozz}&EaSv1KY%+puFT+fIB2m}}ynw4Tg+BS;bVrFF6{DE2E3;@X{3Zei2

diff --git a/src/caffe/data_transformer.cpp b/src/caffe/data_transformer.cpp
index 7150fd99c18..452e4c4a31a 100644
--- a/src/caffe/data_transformer.cpp
+++ b/src/caffe/data_transformer.cpp
@@ -108,4 +108,4 @@ unsigned int DataTransformer<Dtype>::Rand() {
 
 INSTANTIATE_CLASS(DataTransformer);
 
-}  // namespace caffe
+}  // namespace caffe
\ No newline at end of file
diff --git a/src/caffe/layer_factory.cpp b/src/caffe/layer_factory.cpp
index b78167f21eb..6002bd07f0e 100644
--- a/src/caffe/layer_factory.cpp
+++ b/src/caffe/layer_factory.cpp
@@ -1,5 +1,4 @@
 #include <string>
-
 #include "caffe/layer.hpp"
 #include "caffe/proto/caffe.pb.h"
 #include "caffe/vision_layers.hpp"
@@ -249,6 +248,8 @@ Layer<Dtype>* GetLayer(const LayerParameter& param) {
     return new SplitLayer<Dtype>(param);
   case LayerParameter_LayerType_TANH:
     return GetTanHLayer<Dtype>(name, param);
+  case LayerParameter_LayerType_VIDEO_DATA:
+    return new VideoDataLayer<Dtype>(param);
   case LayerParameter_LayerType_WINDOW_DATA:
     return new WindowDataLayer<Dtype>(param);
   case LayerParameter_LayerType_NONE:
diff --git a/src/caffe/layers/video_data_layer.cpp b/src/caffe/layers/video_data_layer.cpp
new file mode 100644
index 00000000000..ac814fbbe0f
--- /dev/null
+++ b/src/caffe/layers/video_data_layer.cpp
@@ -0,0 +1,232 @@
+#include <fstream>  // NOLINT(readability/streams)
+#include <iostream>  // NOLINT(readability/streams)
+#include <string>
+#include <utility>
+#include <vector>
+#include <stdlib.h>
+#include "caffe/data_layers.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/util/io.hpp"
+#include "caffe/util/math_functions.hpp"
+#include "caffe/util/rng.hpp"
+#include <boost/algorithm/string.hpp>
+namespace caffe {
+
+template <typename Dtype>
+VideoDataLayer<Dtype>::~VideoDataLayer<Dtype>() {
+  this->JoinPrefetchThread();
+}
+
+template <typename Dtype>
+bool VideoDataLayer<Dtype>:: ReadVideoFrameToDatum(const string& filename, size_t id, size_t persp,
+    const int height, const int width, Datum* datum) {
+  int cam_num = (int)(filename.at(filename.length()-5) - '0');
+  cam_num = cam_num>2?2:cam_num; // 3rd cam is for testing only. So using cam2 distortions as dummy
+  //int numPersp = mTransforms.size()/2;
+  cv::Mat cv_img, cv_img_origin;
+  bool set_ok = this->cap->set(CV_CAP_PROP_POS_FRAMES, id );
+  if(!set_ok) {
+    LOG(ERROR)<<"Failed to set video frame"; 
+    return false;
+  }
+  bool read_ok = this->cap->read(cv_img_origin);
+  if(!read_ok) {
+    LOG(ERROR)<<"Failed to read video frame";
+    return false;
+  }
+  // resize image if necessary
+  if (height > 0 && width > 0) {
+    cv::resize(cv_img_origin, cv_img, cv::Size(width, height));
+  } else {
+    cv_img = cv_img_origin;
+  }
+  // apply perspective transform
+  //cv::Mat warpMatrix = mTransforms[persp+(cam_num-1)*numPersp];
+  //cv::warpPerspective(cv_img, cv_img, warpMatrix, frame.size(), cv::INTER_LINEAR, cv::BORDER_REPLICATE);
+  // copy data to datum  
+  int num_channels = 3;
+  datum->set_channels(num_channels);
+  datum->set_height(cv_img.rows);
+  datum->set_width(cv_img.cols);
+  datum->set_label(0); // dummy label for now.
+  datum->clear_data();
+  datum->clear_float_data();
+  string* datum_string = datum->mutable_data();
+  for (int c = 0; c < num_channels; ++c) {
+    for (int h = 0; h < cv_img.rows; ++h) {
+      for (int w = 0; w < cv_img.cols; ++w) {
+        datum_string->push_back(
+          static_cast<char>(cv_img.at<cv::Vec3b>(h, w)[c]));
+      }
+    }
+  }
+  return true;
+}
+
+template <typename Dtype>
+void VideoDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top) {
+  const int new_height = this->layer_param_.video_data_param().new_height();
+  const int new_width  = this->layer_param_.video_data_param().new_width();
+  CHECK((new_height == 0 && new_width == 0) ||
+      (new_height > 0 && new_width > 0)) << "Current implementation requires "
+      "new_height and new_width to be set at the same time.";
+  // Read the file with filenames and labels
+  const string& source = this->layer_param_.video_data_param().source();
+  LOG(INFO) << "Opening schedule file " << source;
+  std::ifstream infile(source.c_str());
+  
+  string batch_string;
+  
+  string filename;
+  //while (infile >> batch_string) {
+  while (getline (infile, batch_string)) {
+    if(!infile)
+    {
+      if(infile.eof())
+      {
+        LOG(INFO) << "Reached EOF of schedule file.";
+        break;
+      }
+      else
+        LOG(FATAL)<< "Error while reading schedule file. Possibly corrupted.";
+    }
+    std::vector<string> batch_fields;
+    // first split a line into fields with delimiter ",". Fields should be [filename, frame_ids, transform_ids]
+    boost::split(batch_fields, batch_string, boost::is_any_of(","), 
+                 boost::token_compress_on);
+    if(batch_fields.size()!=3)
+      LOG(FATAL) << "Each line must have 3 fields separated by comma, "
+                 <<batch_fields.size()<<" found instead";
+    // store filename
+    filename = batch_fields[0];
+    // store frame ids
+    std::vector<string> frame_ids_str;
+    std::vector<size_t> frame_ids;
+    boost::split(frame_ids_str, batch_fields[1], boost::is_any_of(" "), 
+                 boost::token_compress_on);
+    for (int f=0; f<frame_ids_str.size(); ++f) {
+      frame_ids.push_back((size_t)atoi(frame_ids_str[f].c_str()));
+    }
+
+    // store persp transform ids
+    std::vector<string> trans_ids_str;
+    std::vector<size_t> trans_ids;
+    boost::split(trans_ids_str, batch_fields[2], boost::is_any_of(" "), 
+                 boost::token_compress_on);
+    for (int f=0; f<trans_ids_str.size(); ++f) {
+      trans_ids.push_back((size_t)atoi(trans_ids_str[f].c_str()));
+    }
+       
+    lines_.push_back(std::make_pair(filename, std::make_pair(frame_ids, trans_ids)));
+  }
+
+  if (this->layer_param_.video_data_param().shuffle()) {
+    // randomly shuffle data
+    LOG(INFO) << "Shuffling batches";
+    const unsigned int prefetch_rng_seed = caffe_rng_rand();
+    prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed));
+    ShuffleBatches();
+  }
+  LOG(INFO) << "A total of " << lines_.size() << " batches.";
+
+  lines_id_ = 0;
+  // Check if we would need to randomly skip a few data points
+  if (this->layer_param_.video_data_param().rand_skip()) {
+    unsigned int skip = caffe_rng_rand() %
+        this->layer_param_.video_data_param().rand_skip();
+    LOG(INFO) << "Skipping first " << skip << " data points.";
+    CHECK_GT(lines_.size(), skip) << "Not enough points to skip";
+    lines_id_ = skip;
+  }
+  // Read a data batch, and use it to initialize the top blob.
+  this->cap = new cv::VideoCapture(lines_[lines_id_].first);
+  Datum datum;
+  CHECK(ReadVideoFrameToDatum(lines_[lines_id_].first, lines_[lines_id_].second.first[0],
+                         lines_[lines_id_].second.second[0], new_height, new_width, &datum)); 
+  this->cap->release();
+  // image
+  const int crop_size = this->layer_param_.transform_param().crop_size();
+  const int batch_size = this->layer_param_.video_data_param().batch_size();
+  if (crop_size > 0) {
+    (*top)[0]->Reshape(batch_size, datum.channels(), crop_size, crop_size);
+    this->prefetch_data_.Reshape(batch_size, datum.channels(), crop_size,
+                                 crop_size);
+  } else {
+    (*top)[0]->Reshape(batch_size, datum.channels(), datum.height(),
+                       datum.width());
+    this->prefetch_data_.Reshape(batch_size, datum.channels(), datum.height(),
+        datum.width());
+  }
+  LOG(INFO) << "output data size: " << (*top)[0]->num() << ","
+      << (*top)[0]->channels() << "," << (*top)[0]->height() << ","
+      << (*top)[0]->width();
+  // label
+  //(*top)[1]->Reshape(batch_size, 1, 1, 1);
+  this->prefetch_label_.Reshape(batch_size, 1, 1, 1);
+  // datum size
+  this->datum_channels_ = datum.channels();
+  this->datum_height_ = datum.height();
+  this->datum_width_ = datum.width();
+  this->datum_size_ = datum.channels() * datum.height() * datum.width();
+}
+
+template <typename Dtype>
+void VideoDataLayer<Dtype>::ShuffleBatches() {
+  caffe::rng_t* prefetch_rng =
+      static_cast<caffe::rng_t*>(prefetch_rng_->generator());
+  shuffle(lines_.begin(), lines_.end(), prefetch_rng);
+}
+
+
+
+// This function is used to create a thread that prefetches the data.
+template <typename Dtype>
+void VideoDataLayer<Dtype>::InternalThreadEntry() {
+  Datum datum;
+  CHECK(this->prefetch_data_.count());
+  Dtype* top_data = this->prefetch_data_.mutable_cpu_data();
+  //Dtype* top_label = this->prefetch_label_.mutable_cpu_data();
+  VideoDataParameter video_data_param = this->layer_param_.video_data_param();
+  const int batch_size = video_data_param.batch_size();
+  const int new_height = video_data_param.new_height();
+  const int new_width = video_data_param.new_width();
+
+  // datum scales
+  const int lines_size = lines_.size();
+  string filename = lines_[lines_id_].first;
+  std::vector<size_t> frameIds = lines_[lines_id_].second.first;
+  std::vector<size_t> trans = lines_[lines_id_].second.second;
+  if (batch_size!=frameIds.size() || batch_size!=trans.size())
+    LOG(ERROR)<<"Frame count mismatch!";
+  LOG(INFO)<<"reading video file "<<filename;
+  this->cap = new cv::VideoCapture(filename);
+  for (int item_id = 0; item_id < batch_size; ++item_id) {
+    // get a blob
+    //CHECK_GT(lines_size, lines_id_);
+    if (!ReadVideoFrameToDatum(filename, frameIds[item_id], trans[item_id],
+          new_height, new_width, &datum)) {
+      LOG(ERROR)<< "Error reading frame from video!";
+      continue;
+    }
+
+    // Apply transformations (mirror, crop...) to the data
+    this->data_transformer_.Transform(item_id, datum, this->mean_, top_data);
+
+    // go to the next iter
+    lines_id_++;
+    if (lines_id_ >= lines_size) {
+      // We have reached the end. Restart from the first.
+      DLOG(INFO) << "Restarting data prefetching from start.";
+      lines_id_ = 0;
+      if (this->layer_param_.image_data_param().shuffle()) {
+        ShuffleBatches();
+      }
+    }
+  }
+  this->cap->release();
+}
+
+INSTANTIATE_CLASS(VideoDataLayer);
+
+}  // namespace caffe
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 9395c38f3e9..0e69ef54e77 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -219,7 +219,7 @@ message LayerParameter {
   // line above the enum. Update the next available ID when you add a new
   // LayerType.
   //
-  // LayerType next available ID: 38 (last added: CONTRASTIVE_LOSS)
+  // LayerType next available ID: 39 (last added: VIDEO_DATA)
   enum LayerType {
     // "NONE" layer type is 0th enum element so that we don't cause confusion
     // by defaulting to an existent LayerType (instead, should usually error if
@@ -260,8 +260,9 @@ message LayerParameter {
     SPLIT = 22;
     SLICE = 33;
     TANH = 23;
-    WINDOW_DATA = 24;
     THRESHOLD = 31;
+    VIDEO_DATA=38;
+    WINDOW_DATA = 24;
   }
   optional LayerType type = 5; // the layer type from the enum above
 
@@ -316,6 +317,7 @@ message LayerParameter {
   optional SliceParameter slice_param = 31;
   optional TanHParameter tanh_param = 37;
   optional ThresholdParameter threshold_param = 25;
+  optional VideoDataParameter video_data_param = 41;
   optional WindowDataParameter window_data_param = 20;
 
   // Parameters for data pre-processing.
@@ -524,6 +526,35 @@ message ImageDataParameter {
   optional bool mirror = 6 [default = false];
 }
 
+// Message that stores parameters used by VideoDataLayer
+message VideoDataParameter {
+  // Specify the data source.
+  optional string source = 1;
+  // Specify the batch size.
+  optional uint32 batch_size = 4;
+  // The rand_skip variable is for the data layer to skip a few data points
+  // to avoid all asynchronous sgd clients to start at the same point. The skip
+  // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
+  // be larger than the number of keys in the leveldb.
+  optional uint32 rand_skip = 7 [default = 0];
+  // Whether or not VideoDataLayer should shuffle the list of files at every epoch.
+  optional bool shuffle = 8 [default = false];
+  // It will also resize images if new_height or new_width are not zero.
+  optional uint32 new_height = 9 [default = 0];
+  optional uint32 new_width = 10 [default = 0];
+  // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
+  // simple scaling and subtracting the data mean, if provided. Note that the
+  // mean subtraction is always carried out before scaling.
+  optional float scale = 2 [default = 1];
+  optional string mean_file = 3;
+  // DEPRECATED. See TransformationParameter. Specify if we would like to randomly
+  // crop an image.
+  optional uint32 crop_size = 5 [default = 0];
+  // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
+  // data.
+  optional bool mirror = 6 [default = false];
+}
+
 // Message that stores parameters InfogainLossLayer
 message InfogainLossParameter {
   // Specify the infogain matrix source.
diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp
index 26bab64b2ca..aa1dd9e137e 100644
--- a/src/caffe/solver.cpp
+++ b/src/caffe/solver.cpp
@@ -207,52 +207,89 @@ void Solver<Dtype>::Solve(const char* resume_file) {
       string str2("label");
       string str3("pixel-label");
       string str4("bb-label");
+      string save_dir("/scr/twangcat/caffenet_results/train/");
+      vector<cv::Mat> save_imgs;
+      int quad_height;
+      int quad_width;
+      int batch_size;
+      const Dtype* pix_start;
+      const Dtype* bb_start;
       for (int j = 0; j < blobs.size(); ++j) {
-        if(blob_names[j].compare(str1)==0)
+        if(blob_names[j].compare(str3)==0) //pixel label
+        {
+          LOG(INFO) << "pixel-label " << blobs[j]->num()<<" "<<blobs[j]->channels()<<" "<<blobs[j]->height()<<" "<<blobs[j]->width();
+          pix_start = blobs[j]->cpu_data();
+          quad_height = blobs[j]->height();
+          quad_width = blobs[j]->width();
+          batch_size = blobs[j]->num();
+        }
+        if(blob_names[j].compare(str4)==0) // bb label
+        {
+          LOG(INFO) << "bb-label " << blobs[j]->num()<<" "<<blobs[j]->channels()<<" "<<blobs[j]->height()<<" "<<blobs[j]->width();
+          bb_start = blobs[j]->cpu_data();
+        }
+        if(blob_names[j].compare(str1)==0) // actual image
         {
           LOG(INFO) << "data " << blobs[j]->num()<<" "<<blobs[j]->channels()<<" "<<blobs[j]->height()<<" "<<blobs[j]->width();
+          const Dtype* data_start = blobs[j]->cpu_data();
           for(int n=0; n<blobs[j]->num(); ++n)
           {
-            int image_id = this->iter_*5+n;
-            const Dtype* foo;
-            foo = blobs[j]->cpu_data()+n*blobs[j]->channels()*blobs[j]->height()*blobs[j]->width();
             cv::Mat curr_img = cv::Mat(blobs[j]->height(), blobs[j]->width(), CV_32FC3, cv::Scalar(0,0,255));
-            double minVal, maxVal;
-            cv::minMaxLoc(curr_img, &minVal, &maxVal); //find minimum and maximum intensities
-            //std::copy ( foo, foo+blobs[j]->channels()*blobs[j]->height()*blobs[j]->width(), curr_img.data );
             for(int kk=0; kk<blobs[j]->channels();++kk)
             {
               for(int yy=0; yy<blobs[j]->height();++yy)
               {
                 for(int xx=0; xx<blobs[j]->width();++xx)
                 {
-                  //std::cout<<*(foo+(((n*blobs[j]->channels() + kk) * blobs[j]->height() + yy) * blobs[j]->width() + xx))<<" ";
-                  //*(curr_img.data+((yy * blobs[j]->width() + xx) * 3 + kk))=*(foo+(((n*blobs[j]->channels() + kk) * blobs[j]->height() + yy) * blobs[j]->width() + xx));
-                  curr_img.at<cv::Vec3f>(yy,xx)[kk]=*(foo+(((n*blobs[j]->channels() + kk) * blobs[j]->height() + yy) * blobs[j]->width() + xx));
+                  curr_img.at<cv::Vec3f>(yy,xx)[kk]=*(data_start+(((n*blobs[j]->channels() + kk) * blobs[j]->height() + yy) * blobs[j]->width() + xx));
                 }
               }
             }
-            //std::cout<<std::endl;
-            cv::Mat save_img;
-            //curr_img.convertTo(save_img, CV_8UC3);
-            std::ostringstream stringStream;
-            stringStream << "img"<<image_id<<".png";
-            std::string save_name = stringStream.str();
-            cv::imwrite(save_name, curr_img);
+            save_imgs.push_back(curr_img); 
           }
         }
-        /*if(blob_names[j].compare(str2)==0)
-        {
-          LOG(INFO) << "label " << blobs[j]->num()<<" "<<blobs[j]->channels()<<" "<<blobs[j]->height()<<" "<<blobs[j]->width();
-        }
-        if(blob_names[j].compare(str3)==0)
-        {
-          LOG(INFO) << "pixel-label " << blobs[j]->num()<<" "<<blobs[j]->channels()<<" "<<blobs[j]->height()<<" "<<blobs[j]->width();
+      }
+      int grid_dim=4;
+      int label_count = 0;
+      int label_height = quad_height*grid_dim;
+      int label_width = quad_width*grid_dim;
+      Dtype scaling = 1.0/8;
+      for(int n=0; n<batch_size; ++n){
+        int image_id = this->iter_*5+n;
+        cv::Mat save_img = save_imgs[n];
+        std::ostringstream stringStream;
+        stringStream <<save_dir<< "img"<<image_id<<".png";
+        std::string save_name = stringStream.str();
+        for (int z=0; z<16;++z){
+          for (int qy = 0; qy < quad_height; ++qy) {
+            for (int qx = 0; qx < quad_width; ++qx) {
+              int dx = z%grid_dim;
+              int dy = z/grid_dim;
+              int x = qx*grid_dim+dx;
+              int y = qy*grid_dim+dy;
+              if (*(pix_start+(((n*16+z)*quad_height+qy)*quad_width+qx)) <0.5) {
+                // do nothing
+              } else{
+
+                save_img.at<cv::Vec3f>(y/scaling,x/scaling) = cv::Vec3f(0,255,0);
+                save_img.at<cv::Vec3f>(y/scaling-1,x/scaling-1) = cv::Vec3f(0,255,0);
+                save_img.at<cv::Vec3f>(y/scaling+1,x/scaling-1) = cv::Vec3f(0,255,0);
+                save_img.at<cv::Vec3f>(y/scaling-1,x/scaling+1) = cv::Vec3f(0,255,0);
+                save_img.at<cv::Vec3f>(y/scaling+1,x/scaling+1) = cv::Vec3f(0,255,0);
+
+                float x_adj = (qx*grid_dim + grid_dim / 2) / scaling;
+                float y_adj = (qy*grid_dim + grid_dim / 2) / scaling;
+                int x_min = *(bb_start+(((n*64+z)*quad_height+qy)*quad_width+qx))+x_adj;
+                int y_min = *(bb_start+(((n*64+z+16)*quad_height+qy)*quad_width+qx))+y_adj;
+                int x_max = *(bb_start+(((n*64+z+32)*quad_height+qy)*quad_width+qx))+x_adj;
+                int y_max = *(bb_start+(((n*64+z+48)*quad_height+qy)*quad_width+qx))+y_adj;
+                cv::Rect bb(x_min, y_min, x_max-x_min+1, y_max-y_min+1); 
+                cv::rectangle(save_img, bb, cv::Scalar(100, 100, 200), 2);
+              }
+            }
+          }
+          cv::imwrite(save_name, save_img);
         }
-        if(blob_names[j].compare(str4)==0)
-        {
-          LOG(INFO) << "bb-label " << blobs[j]->num()<<" "<<blobs[j]->channels()<<" "<<blobs[j]->height()<<" "<<blobs[j]->width();
-        }*/
       }
       //end
       int score_index = 0;
@@ -321,6 +358,7 @@ void Solver<Dtype>::Test(const int test_net_id) {
   const shared_ptr<Net<Dtype> >& test_net = test_nets_[test_net_id];
   Dtype loss = 0;
   for (int i = 0; i < param_.test_iter(test_net_id); ++i) {
+    LOG(INFO) << "i = " << i<<" of "<<param_.test_iter(test_net_id);
     Dtype iter_loss;
     const vector<Blob<Dtype>*>& result =
         test_net->Forward(bottom_vec, &iter_loss);
@@ -350,6 +388,7 @@ void Solver<Dtype>::Test(const int test_net_id) {
     LOG(INFO) << "Test loss: " << loss;
   }
   for (int i = 0; i < test_score.size(); ++i) {
+    LOG(INFO) << "i2 = " << i<<" of "<<test_score.size();
     const int output_blob_index =
         test_net->output_blob_indices()[test_score_output_id[i]];
     const string& output_name = test_net->blob_names()[output_blob_index];
diff --git a/src/caffe/util/io.cpp b/src/caffe/util/io.cpp
index cf4ba8cffa4..991ef91491d 100644
--- a/src/caffe/util/io.cpp
+++ b/src/caffe/util/io.cpp
@@ -166,6 +166,16 @@ bool ReadImageToDatum(const string& filename, const int label,
   return true;
 }
 
+//added by Tao
+
+
+
+
+
+//end
+
+
+
 leveldb::Options GetLevelDBOptions() {
   // In default, we will return the leveldb option and set the max open files
   // in order to avoid using up the operating system's limit.