forked from spmallick/learnopencv
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request spmallick#137 from sunitanyk/master
Mask R-CNN
- Loading branch information
Showing
10 changed files
with
8,140 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
**Python** | ||
|
||
`wget http://download.tensorflow.org/models/object_detection/mask_rcnn_inception_v2_coco_2018_01_28.tar.gz` | ||
`tar zxvf mask_rcnn_inception_v2_coco_2018_01_28.tar.gz` | ||
|
||
Download and extract the needed model files. | ||
|
||
**Usage Examples :** | ||
|
||
**Python** | ||
|
||
`python3 mask_rcnn.py --image=cars.jpg` | ||
`python3 mask_rcnn.py --video=cars.mp4` | ||
|
||
It starts the webcam - if no argument provided. | ||
|
||
**C++** | ||
|
||
Compile using: | ||
|
||
```g++ -ggdb `pkg-config --cflags --libs /Users/snayak/opencv/build/unix-install/opencv.pc` mask_rcnn.cpp -o mask_rcnn.out``` | ||
|
||
Run using: | ||
`./mask_rcnn.out --image=cars.jpg` | ||
`./mask_rcnn.out --video=cars.mp4` |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
0 255 0 | ||
0 0 255 | ||
255 0 0 | ||
0 255 255 | ||
255 255 0 | ||
255 0 255 | ||
80 70 180 | ||
250 80 190 | ||
245 145 50 | ||
70 150 250 | ||
50 190 190 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,253 @@ | ||
// This code is written at BigVision LLC. It is based on the OpenCV project. It is subject to the license terms in the LICENSE file found in this distribution and at http://opencv.org/license.html | ||
|
||
// Usage example: ./mask_rcnn.out --video=run.mp4 | ||
// ./mask_rcnn.out --image=bird.jpg | ||
#include <fstream> | ||
#include <sstream> | ||
#include <iostream> | ||
#include <string.h> | ||
|
||
#include <opencv2/dnn.hpp> | ||
#include <opencv2/imgproc.hpp> | ||
#include <opencv2/highgui.hpp> | ||
|
||
const char* keys = | ||
"{help h usage ? | | Usage examples: \n\t\t./mask-rcnn.out --image=traffic.jpg \n\t\t./mask-rcnn.out --video=sample.mp4}" | ||
"{image i |<none>| input image }" | ||
"{video v |<none>| input video }" | ||
; | ||
using namespace cv; | ||
using namespace dnn; | ||
using namespace std; | ||
|
||
// Initialize the parameters | ||
float confThreshold = 0.5; // Confidence threshold | ||
float maskThreshold = 0.3; // Mask threshold | ||
|
||
vector<string> classes; | ||
vector<Scalar> colors; | ||
|
||
// Draw the predicted bounding box | ||
void drawBox(Mat& frame, int classId, float conf, Rect box, Mat& objectMask); | ||
|
||
// Postprocess the neural network's output for each frame | ||
void postprocess(Mat& frame, const vector<Mat>& outs); | ||
|
||
int main(int argc, char** argv) | ||
{ | ||
CommandLineParser parser(argc, argv, keys); | ||
parser.about("Use this script to run object detection using YOLO3 in OpenCV."); | ||
if (parser.has("help")) | ||
{ | ||
parser.printMessage(); | ||
return 0; | ||
} | ||
// Load names of classes | ||
string classesFile = "mscoco_labels.names"; | ||
ifstream ifs(classesFile.c_str()); | ||
string line; | ||
while (getline(ifs, line)) classes.push_back(line); | ||
|
||
// Load the colors | ||
string colorsFile = "colors.txt"; | ||
ifstream colorFptr(colorsFile.c_str()); | ||
while (getline(colorFptr, line)) { | ||
char* pEnd; | ||
double r, g, b; | ||
r = strtod (line.c_str(), &pEnd); | ||
g = strtod (pEnd, NULL); | ||
b = strtod (pEnd, NULL); | ||
Scalar color = Scalar(r, g, b, 255.0); | ||
colors.push_back(Scalar(r, g, b, 255.0)); | ||
} | ||
|
||
// Give the configuration and weight files for the model | ||
String textGraph = "./mask_rcnn_inception_v2_coco_2018_01_28.pbtxt"; | ||
String modelWeights = "./mask_rcnn_inception_v2_coco_2018_01_28/frozen_inference_graph.pb"; | ||
|
||
// Load the network | ||
Net net = readNetFromTensorflow(modelWeights, textGraph); | ||
net.setPreferableBackend(DNN_BACKEND_OPENCV); | ||
net.setPreferableTarget(DNN_TARGET_CPU); | ||
|
||
// Open a video file or an image file or a camera stream. | ||
string str, outputFile; | ||
VideoCapture cap; | ||
VideoWriter video; | ||
Mat frame, blob; | ||
|
||
try { | ||
|
||
outputFile = "mask_rcnn_out_cpp.avi"; | ||
if (parser.has("image")) | ||
{ | ||
// Open the image file | ||
str = parser.get<String>("image"); | ||
//cout << "Image file input : " << str << endl; | ||
ifstream ifile(str); | ||
if (!ifile) throw("error"); | ||
cap.open(str); | ||
str.replace(str.end()-4, str.end(), "_mask_rcnn_out.jpg"); | ||
outputFile = str; | ||
} | ||
else if (parser.has("video")) | ||
{ | ||
// Open the video file | ||
str = parser.get<String>("video"); | ||
ifstream ifile(str); | ||
if (!ifile) throw("error"); | ||
cap.open(str); | ||
str.replace(str.end()-4, str.end(), "_mask_rcnn_out.avi"); | ||
outputFile = str; | ||
} | ||
// Open the webcam | ||
else cap.open(parser.get<int>("device")); | ||
|
||
} | ||
catch(...) { | ||
cout << "Could not open the input image/video stream" << endl; | ||
return 0; | ||
} | ||
|
||
// Get the video writer initialized to save the output video | ||
if (!parser.has("image")) { | ||
video.open(outputFile, VideoWriter::fourcc('M','J','P','G'), 28, Size(cap.get(CAP_PROP_FRAME_WIDTH), cap.get(CAP_PROP_FRAME_HEIGHT))); | ||
} | ||
|
||
// Create a window | ||
static const string kWinName = "Deep learning object detection in OpenCV"; | ||
namedWindow(kWinName, WINDOW_NORMAL); | ||
|
||
// Process frames. | ||
while (waitKey(1) < 0) | ||
{ | ||
// get frame from the video | ||
cap >> frame; | ||
|
||
// Stop the program if reached end of video | ||
if (frame.empty()) { | ||
cout << "Done processing !!!" << endl; | ||
cout << "Output file is stored as " << outputFile << endl; | ||
waitKey(3000); | ||
break; | ||
} | ||
// Create a 4D blob from a frame. | ||
blobFromImage(frame, blob, 1.0, Size(frame.cols, frame.rows), Scalar(), true, false); | ||
//blobFromImage(frame, blob); | ||
|
||
//Sets the input to the network | ||
net.setInput(blob); | ||
|
||
// Runs the forward pass to get output from the output layers | ||
std::vector<String> outNames(2); | ||
outNames[0] = "detection_out_final"; | ||
outNames[1] = "detection_masks"; | ||
vector<Mat> outs; | ||
net.forward(outs, outNames); | ||
|
||
// Extract the bounding box and mask for each of the detected objects | ||
postprocess(frame, outs); | ||
|
||
// Put efficiency information. The function getPerfProfile returns the overall time for inference(t) and the timings for each of the layers(in layersTimes) | ||
vector<double> layersTimes; | ||
double freq = getTickFrequency() / 1000; | ||
double t = net.getPerfProfile(layersTimes) / freq; | ||
string label = format("Mask-RCNN on 2.5 GHz Intel Core i7 CPU, Inference time for a frame : %0.0f ms", t); | ||
putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0)); | ||
|
||
// Write the frame with the detection boxes | ||
Mat detectedFrame; | ||
frame.convertTo(detectedFrame, CV_8U); | ||
if (parser.has("image")) imwrite(outputFile, detectedFrame); | ||
else video.write(detectedFrame); | ||
|
||
imshow(kWinName, frame); | ||
|
||
} | ||
|
||
cap.release(); | ||
if (!parser.has("image")) video.release(); | ||
|
||
return 0; | ||
} | ||
|
||
// For each frame, extract the bounding box and mask for each detected object | ||
void postprocess(Mat& frame, const vector<Mat>& outs) | ||
{ | ||
Mat outDetections = outs[0]; | ||
Mat outMasks = outs[1]; | ||
|
||
// Output size of masks is NxCxHxW where | ||
// N - number of detected boxes | ||
// C - number of classes (excluding background) | ||
// HxW - segmentation shape | ||
const int numDetections = outDetections.size[2]; | ||
const int numClasses = outMasks.size[1]; | ||
|
||
outDetections = outDetections.reshape(1, outDetections.total() / 7); | ||
for (int i = 0; i < numDetections; ++i) | ||
{ | ||
float score = outDetections.at<float>(i, 2); | ||
if (score > confThreshold) | ||
{ | ||
// Extract the bounding box | ||
int classId = static_cast<int>(outDetections.at<float>(i, 1)); | ||
int left = static_cast<int>(frame.cols * outDetections.at<float>(i, 3)); | ||
int top = static_cast<int>(frame.rows * outDetections.at<float>(i, 4)); | ||
int right = static_cast<int>(frame.cols * outDetections.at<float>(i, 5)); | ||
int bottom = static_cast<int>(frame.rows * outDetections.at<float>(i, 6)); | ||
|
||
left = max(0, min(left, frame.cols - 1)); | ||
top = max(0, min(top, frame.rows - 1)); | ||
right = max(0, min(right, frame.cols - 1)); | ||
bottom = max(0, min(bottom, frame.rows - 1)); | ||
Rect box = Rect(left, top, right - left + 1, bottom - top + 1); | ||
|
||
// Extract the mask for the object | ||
Mat objectMask(outMasks.size[2], outMasks.size[3],CV_32F, outMasks.ptr<float>(i,classId)); | ||
|
||
// Draw bounding box, colorize and show the mask on the image | ||
drawBox(frame, classId, score, box, objectMask); | ||
|
||
} | ||
} | ||
} | ||
|
||
// Draw the predicted bounding box, colorize and show the mask on the image | ||
void drawBox(Mat& frame, int classId, float conf, Rect box, Mat& objectMask) | ||
{ | ||
//Draw a rectangle displaying the bounding box | ||
rectangle(frame, Point(box.x, box.y), Point(box.x+box.width, box.y+box.height), Scalar(255, 178, 50), 3); | ||
|
||
//Get the label for the class name and its confidence | ||
string label = format("%.2f", conf); | ||
if (!classes.empty()) | ||
{ | ||
CV_Assert(classId < (int)classes.size()); | ||
label = classes[classId] + ":" + label; | ||
} | ||
|
||
//Display the label at the top of the bounding box | ||
int baseLine; | ||
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); | ||
box.y = max(box.y, labelSize.height); | ||
rectangle(frame, Point(box.x, box.y - round(1.5*labelSize.height)), Point(box.x + round(1.5*labelSize.width), box.y + baseLine), Scalar(255, 255, 255), FILLED); | ||
putText(frame, label, Point(box.x, box.y), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0,0,0),1); | ||
|
||
Scalar color = colors[classId%colors.size()]; | ||
|
||
// Resize the mask, threshold, color and apply it on the image | ||
resize(objectMask, objectMask, Size(box.width, box.height)); | ||
Mat mask = (objectMask > maskThreshold); | ||
Mat coloredRoi = (0.3 * color + 0.7 * frame(box)); | ||
coloredRoi.convertTo(coloredRoi, CV_8UC3); | ||
|
||
// Draw the contours on the image | ||
vector<Mat> contours; | ||
Mat hierarchy; | ||
mask.convertTo(mask, CV_8U); | ||
findContours(mask, contours, hierarchy, RETR_CCOMP, CHAIN_APPROX_SIMPLE); | ||
drawContours(coloredRoi, contours, -1, color, 5, LINE_8, hierarchy, 100); | ||
coloredRoi.copyTo(frame(box), mask); | ||
|
||
} |
Binary file not shown.
Oops, something went wrong.