Skip to content

Commit

Permalink
Merge pull request spmallick#137 from sunitanyk/master
Browse files Browse the repository at this point in the history
Mask R-CNN
  • Loading branch information
spmallick authored Oct 1, 2018
2 parents 5b81810 + 3405d0d commit 23b0d14
Show file tree
Hide file tree
Showing 10 changed files with 8,140 additions and 2 deletions.
25 changes: 25 additions & 0 deletions Mask-RCNN/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
**Python**

`wget http://download.tensorflow.org/models/object_detection/mask_rcnn_inception_v2_coco_2018_01_28.tar.gz`
`tar zxvf mask_rcnn_inception_v2_coco_2018_01_28.tar.gz`

Download and extract the needed model files.

**Usage Examples :**

**Python**

`python3 mask_rcnn.py --image=cars.jpg`
`python3 mask_rcnn.py --video=cars.mp4`

It starts the webcam - if no argument provided.

**C++**

Compile using:

```g++ -ggdb `pkg-config --cflags --libs /Users/snayak/opencv/build/unix-install/opencv.pc` mask_rcnn.cpp -o mask_rcnn.out```

Run using:
`./mask_rcnn.out --image=cars.jpg`
`./mask_rcnn.out --video=cars.mp4`
Binary file added Mask-RCNN/cars.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Mask-RCNN/cars.mp4
Binary file not shown.
11 changes: 11 additions & 0 deletions Mask-RCNN/colors.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
0 255 0
0 0 255
255 0 0
0 255 255
255 255 0
255 0 255
80 70 180
250 80 190
245 145 50
70 150 250
50 190 190
253 changes: 253 additions & 0 deletions Mask-RCNN/mask_rcnn.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
// This code is written at BigVision LLC. It is based on the OpenCV project. It is subject to the license terms in the LICENSE file found in this distribution and at http://opencv.org/license.html

// Usage example: ./mask_rcnn.out --video=run.mp4
// ./mask_rcnn.out --image=bird.jpg
#include <fstream>
#include <sstream>
#include <iostream>
#include <string.h>

#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>

const char* keys =
"{help h usage ? | | Usage examples: \n\t\t./mask-rcnn.out --image=traffic.jpg \n\t\t./mask-rcnn.out --video=sample.mp4}"
"{image i |<none>| input image }"
"{video v |<none>| input video }"
;
using namespace cv;
using namespace dnn;
using namespace std;

// Initialize the parameters
float confThreshold = 0.5; // Confidence threshold
float maskThreshold = 0.3; // Mask threshold

vector<string> classes;
vector<Scalar> colors;

// Draw the predicted bounding box
void drawBox(Mat& frame, int classId, float conf, Rect box, Mat& objectMask);

// Postprocess the neural network's output for each frame
void postprocess(Mat& frame, const vector<Mat>& outs);

int main(int argc, char** argv)
{
CommandLineParser parser(argc, argv, keys);
parser.about("Use this script to run object detection using YOLO3 in OpenCV.");
if (parser.has("help"))
{
parser.printMessage();
return 0;
}
// Load names of classes
string classesFile = "mscoco_labels.names";
ifstream ifs(classesFile.c_str());
string line;
while (getline(ifs, line)) classes.push_back(line);

// Load the colors
string colorsFile = "colors.txt";
ifstream colorFptr(colorsFile.c_str());
while (getline(colorFptr, line)) {
char* pEnd;
double r, g, b;
r = strtod (line.c_str(), &pEnd);
g = strtod (pEnd, NULL);
b = strtod (pEnd, NULL);
Scalar color = Scalar(r, g, b, 255.0);
colors.push_back(Scalar(r, g, b, 255.0));
}

// Give the configuration and weight files for the model
String textGraph = "./mask_rcnn_inception_v2_coco_2018_01_28.pbtxt";
String modelWeights = "./mask_rcnn_inception_v2_coco_2018_01_28/frozen_inference_graph.pb";

// Load the network
Net net = readNetFromTensorflow(modelWeights, textGraph);
net.setPreferableBackend(DNN_BACKEND_OPENCV);
net.setPreferableTarget(DNN_TARGET_CPU);

// Open a video file or an image file or a camera stream.
string str, outputFile;
VideoCapture cap;
VideoWriter video;
Mat frame, blob;

try {

outputFile = "mask_rcnn_out_cpp.avi";
if (parser.has("image"))
{
// Open the image file
str = parser.get<String>("image");
//cout << "Image file input : " << str << endl;
ifstream ifile(str);
if (!ifile) throw("error");
cap.open(str);
str.replace(str.end()-4, str.end(), "_mask_rcnn_out.jpg");
outputFile = str;
}
else if (parser.has("video"))
{
// Open the video file
str = parser.get<String>("video");
ifstream ifile(str);
if (!ifile) throw("error");
cap.open(str);
str.replace(str.end()-4, str.end(), "_mask_rcnn_out.avi");
outputFile = str;
}
// Open the webcam
else cap.open(parser.get<int>("device"));

}
catch(...) {
cout << "Could not open the input image/video stream" << endl;
return 0;
}

// Get the video writer initialized to save the output video
if (!parser.has("image")) {
video.open(outputFile, VideoWriter::fourcc('M','J','P','G'), 28, Size(cap.get(CAP_PROP_FRAME_WIDTH), cap.get(CAP_PROP_FRAME_HEIGHT)));
}

// Create a window
static const string kWinName = "Deep learning object detection in OpenCV";
namedWindow(kWinName, WINDOW_NORMAL);

// Process frames.
while (waitKey(1) < 0)
{
// get frame from the video
cap >> frame;

// Stop the program if reached end of video
if (frame.empty()) {
cout << "Done processing !!!" << endl;
cout << "Output file is stored as " << outputFile << endl;
waitKey(3000);
break;
}
// Create a 4D blob from a frame.
blobFromImage(frame, blob, 1.0, Size(frame.cols, frame.rows), Scalar(), true, false);
//blobFromImage(frame, blob);

//Sets the input to the network
net.setInput(blob);

// Runs the forward pass to get output from the output layers
std::vector<String> outNames(2);
outNames[0] = "detection_out_final";
outNames[1] = "detection_masks";
vector<Mat> outs;
net.forward(outs, outNames);

// Extract the bounding box and mask for each of the detected objects
postprocess(frame, outs);

// Put efficiency information. The function getPerfProfile returns the overall time for inference(t) and the timings for each of the layers(in layersTimes)
vector<double> layersTimes;
double freq = getTickFrequency() / 1000;
double t = net.getPerfProfile(layersTimes) / freq;
string label = format("Mask-RCNN on 2.5 GHz Intel Core i7 CPU, Inference time for a frame : %0.0f ms", t);
putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0));

// Write the frame with the detection boxes
Mat detectedFrame;
frame.convertTo(detectedFrame, CV_8U);
if (parser.has("image")) imwrite(outputFile, detectedFrame);
else video.write(detectedFrame);

imshow(kWinName, frame);

}

cap.release();
if (!parser.has("image")) video.release();

return 0;
}

// For each frame, extract the bounding box and mask for each detected object
void postprocess(Mat& frame, const vector<Mat>& outs)
{
Mat outDetections = outs[0];
Mat outMasks = outs[1];

// Output size of masks is NxCxHxW where
// N - number of detected boxes
// C - number of classes (excluding background)
// HxW - segmentation shape
const int numDetections = outDetections.size[2];
const int numClasses = outMasks.size[1];

outDetections = outDetections.reshape(1, outDetections.total() / 7);
for (int i = 0; i < numDetections; ++i)
{
float score = outDetections.at<float>(i, 2);
if (score > confThreshold)
{
// Extract the bounding box
int classId = static_cast<int>(outDetections.at<float>(i, 1));
int left = static_cast<int>(frame.cols * outDetections.at<float>(i, 3));
int top = static_cast<int>(frame.rows * outDetections.at<float>(i, 4));
int right = static_cast<int>(frame.cols * outDetections.at<float>(i, 5));
int bottom = static_cast<int>(frame.rows * outDetections.at<float>(i, 6));

left = max(0, min(left, frame.cols - 1));
top = max(0, min(top, frame.rows - 1));
right = max(0, min(right, frame.cols - 1));
bottom = max(0, min(bottom, frame.rows - 1));
Rect box = Rect(left, top, right - left + 1, bottom - top + 1);

// Extract the mask for the object
Mat objectMask(outMasks.size[2], outMasks.size[3],CV_32F, outMasks.ptr<float>(i,classId));

// Draw bounding box, colorize and show the mask on the image
drawBox(frame, classId, score, box, objectMask);

}
}
}

// Draw the predicted bounding box, colorize and show the mask on the image
void drawBox(Mat& frame, int classId, float conf, Rect box, Mat& objectMask)
{
//Draw a rectangle displaying the bounding box
rectangle(frame, Point(box.x, box.y), Point(box.x+box.width, box.y+box.height), Scalar(255, 178, 50), 3);

//Get the label for the class name and its confidence
string label = format("%.2f", conf);
if (!classes.empty())
{
CV_Assert(classId < (int)classes.size());
label = classes[classId] + ":" + label;
}

//Display the label at the top of the bounding box
int baseLine;
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
box.y = max(box.y, labelSize.height);
rectangle(frame, Point(box.x, box.y - round(1.5*labelSize.height)), Point(box.x + round(1.5*labelSize.width), box.y + baseLine), Scalar(255, 255, 255), FILLED);
putText(frame, label, Point(box.x, box.y), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0,0,0),1);

Scalar color = colors[classId%colors.size()];

// Resize the mask, threshold, color and apply it on the image
resize(objectMask, objectMask, Size(box.width, box.height));
Mat mask = (objectMask > maskThreshold);
Mat coloredRoi = (0.3 * color + 0.7 * frame(box));
coloredRoi.convertTo(coloredRoi, CV_8UC3);

// Draw the contours on the image
vector<Mat> contours;
Mat hierarchy;
mask.convertTo(mask, CV_8U);
findContours(mask, contours, hierarchy, RETR_CCOMP, CHAIN_APPROX_SIMPLE);
drawContours(coloredRoi, contours, -1, color, 5, LINE_8, hierarchy, 100);
coloredRoi.copyTo(frame(box), mask);

}
Binary file added Mask-RCNN/mask_rcnn.out
Binary file not shown.
Loading

0 comments on commit 23b0d14

Please sign in to comment.