Skip to content

Commit

Permalink
Add confidence level based on entropy
Browse files Browse the repository at this point in the history
Also clean up resultant JSON, expose regions and tweak threshold.
  • Loading branch information
automata committed Jun 16, 2015
1 parent 0e7cedb commit 7586c53
Show file tree
Hide file tree
Showing 12 changed files with 21,342 additions and 145 deletions.
10 changes: 6 additions & 4 deletions Gruntfile.coffee
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ execSync = require('exec-sync')

test_path = './test_set/'
#test_sets = ['Text', 'NonText']
test_sets = ['Grid_full', 'Grid_rescaled']
test_sets = ['good', 'bad']

module.exports = ->
grunt = @
Expand Down Expand Up @@ -96,13 +96,15 @@ module.exports = ->

imgs = fs.readdirSync dir
for img in imgs
unless /filtered|threshold|contours|saliency|DS_Store/.test img
unless /filtered|threshold|contours|saliency|histogram_saliency|DS_Store/.test img
img = img.replace /\s/g, '\\ '
abspath = dir + '/' + img
console.log ' ' + abspath
execSync './build/Release/saliency ' + abspath
output = execSync './build/Release/saliency ' + abspath
console.log ' ' + abspath + ' finished.'
data[set].push abspath
data[set].push
image: abspath
measurement: JSON.parse output

grunt.file.write './test_set_app/data.js', 'window.DATA = {sets:' + JSON.stringify(data, 1, 1) + '};'

Expand Down
133 changes: 91 additions & 42 deletions saliency.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* saliency.cpp - Saliency heuristics
* (c) 2014 The Grid
* (c) 2014-2015 The Grid
*
*/

Expand All @@ -17,12 +17,30 @@ using namespace std;

RNG rng(12345);

Mat DrawHistogram(Mat gray) {
int histSize = 256; // bin size
float range[] = { 0, 255 };
const float *ranges[] = { range };
MatND hist;

calcHist( &gray, 1, 0, Mat(), hist, 1, &histSize, ranges, true, false );

int hist_w = 512; int hist_h = 400;
int bin_w = cvRound( (double) hist_w/histSize );

Mat histImage( hist_h, hist_w, CV_8UC1, Scalar( 0,0,0) );
normalize(hist, hist, 0, histImage.rows, NORM_MINMAX, -1, Mat() );

for( int i = 1; i < histSize; i++){
line( histImage, Point( bin_w*(i-1), hist_h - cvRound(hist.at<float>(i-1)) ) ,
Point( bin_w*(i), hist_h - cvRound(hist.at<float>(i)) ),
Scalar( 255, 0, 0), 2, 8, 0 );
}
return histImage;
}

static void display_help(string program_name) {
cerr << "Usage: " << program_name << " <original image>"
//<< "Options:\n"
//<< "\t-h,--help\t\tShow this help message\n"
//<< "\t-s,--saliencymap\tWrite the saliency map to an image file\n"
<< endl;
cerr << "Usage: " << program_name << " <original image>" << endl;
}

int main(int argc, char *argv[]) {
Expand Down Expand Up @@ -51,25 +69,40 @@ int main(int argc, char *argv[]) {
Mat saliency_gray = saliency_map * 255;
Mat most_salient;

// Static threshold:
// Mat fg;
// int threshold_value = 254;
// fg = saliency_gray >= threshold_value;
// #ifdef DEBUG
// sprintf(file_path, "%s_fg.png", original_image_path);
// imwrite(file_path, fg);
// #endif
GaussianBlur(saliency_gray, saliency_gray, Size(1,1), 0, 0);
#ifdef DEBUG
Mat histin = DrawHistogram(saliency_gray);
sprintf(file_path, "%s_histogram_saliency.png", original_image_path);
imwrite(file_path, histin);
#endif

// Calculate confidence based on homogeneity of saliency map's histogram
Mat hist;
int histSize = 256;
float range[] = { 0, 256 } ;
const float* histRange = { range };
calcHist(&saliency_gray, 1, 0, Mat(), hist, 1, &histSize, &histRange, true, false);
hist /= original_image.size().height*original_image.size().width;
Mat logP;
cv::log(hist,logP);
// Inverse normalized entropy
float entropy = -1*sum(hist.mul(logP)).val[0];
entropy = entropy/log(256); // normalize
entropy = 1.0 - entropy; // inverse

// Blur and binary threshold saliency map based on OTSU
saliency_gray.convertTo(saliency_gray, CV_8U); // threshold needs an int Mat
//adaptiveThreshold(saliency_gray, most_salient, 255, ADAPTIVE_THRESH_MEAN_C, THRESH_BINARY, 3, 0);
threshold(saliency_gray, most_salient, 0, 255, THRESH_BINARY | THRESH_OTSU);
Mat blur;
bilateralFilter(saliency_gray, blur, 12, 24, 6);
// GaussianBlur(saliency_gray, blur, Size(5,5), 0);
threshold(blur, most_salient, 0, 255, THRESH_BINARY + THRESH_OTSU);
#ifdef DEBUG
sprintf(file_path, "%s_threshold.png", original_image_path);
imwrite(file_path, most_salient);
#endif

// Eliminate small regions (Mat() == default 3x3 kernel)
Mat filtered;
//filtered = most_salient;
// Another option is to use dilate/erode/dilate:
// dilate(most_salient, filtered, Mat(), Point(-1, -1), 2, 1, 1);
// erode(filtered, filtered, Mat(), Point(-1, -1), 4, 1, 1);
Expand Down Expand Up @@ -117,16 +150,16 @@ int main(int argc, char *argv[]) {
}

// Find the biggest area of all contours
// int big_id = 0;
// double big_area = 0;
// for (size_t i = 0, max = contours.size(); i < max; ++i) {
// // Contour area
// double area = contourArea(contours[i]);
// if (area > big_area) {
// big_id = i;
// big_area = area;
// }
// }
int big_id = 0;
double big_area = 0;
for (size_t i = 0, max = contours.size(); i < max; ++i) {
// Contour area
double area = contourArea(contours[i]);
if (area > big_area) {
big_id = i;
big_area = area;
}
}

// Group all bounding rects into one, good for superimposition elimination
// Vector<Rect> allRect = boundRect;
Expand All @@ -152,14 +185,9 @@ int main(int argc, char *argv[]) {
xmax = xmaxB;
if (ymaxB > ymax)
ymax = ymaxB;
// cout << j << endl;
// cout << boundRect[j].tl() << endl;
// cout << boundRect[j].br() << endl;
}
// cout << xmin << "," << ymin << endl;
// cout << xmax << "," << ymax << endl;
Rect bigRect = Rect(xmin, ymin, xmax-xmin, ymax-ymin);
//int i = big_id;

#ifdef DEBUG
// Draw polygonal contour + bonding rects + circles
Mat drawing = Mat::zeros( filtered.size(), CV_8UC3 );
Expand Down Expand Up @@ -192,23 +220,44 @@ int main(int argc, char *argv[]) {
#endif

// Serialize as stringified JSON
// TODO: Use jsoncpp instead? Not using now to avoid one more dependency
cout << "{\"saliency\": ";
cout << "{\"outmost_rect\": [" << bigRect.tl() << ", " << bigRect.br() << "],";
float x = bigRect.tl().x;
float y = bigRect.tl().y;
float w = abs(x-bigRect.br().x);
float h = abs(y-bigRect.br().y);

cout << "{\"bounding_rect\": [" << bigRect.tl() << ", " << bigRect.br() << "],";
cout << "\"bbox\": {\"x\": " <<x<< ", \"y\": " <<y<< ", \"width\": " <<w<< ", \"height\": " <<h<< "},";
cout << "\"confidence\": " << entropy << ",";

cout << "\"polygon\": [";
size_t maxPoly = contours_poly[big_id].size()-1;
for (size_t j = 0; j < maxPoly; ++j) {
cout << contours_poly[big_id][j] << ", ";
}
cout << contours_poly[big_id][maxPoly] << "], ";
cout << "\"center\": [" << (int)center[big_id].x << ", " << (int)center[big_id].y << "], ";
cout << "\"radius\": " << radius[big_id] << ", ";
// Regions
cout << "\"regions\": [";
for (size_t i=0, max=boundRect.size(); i<max; ++i) {
cout << "{\"polygon\": [";
size_t maxPoly = contours_poly[i].size()-1;
for (size_t j = 0; j < maxPoly; ++j) {
cout << contours_poly[i][j] << ", ";
cout << "{\"x\": " << contours_poly[i][j].x << ", \"y\": " << contours_poly[i][j].y << "}, ";
}
cout << contours_poly[i][maxPoly] << "], ";
cout << "\"center\": [" << (int)center[i].x << ", " << (int)center[i].y << "], ";
cout << "{\"x\": " << contours_poly[i][maxPoly].x << ", \"y\": " << contours_poly[i][maxPoly].y << "}], ";
cout << "\"center\": {\"x\": " << (int)center[i].x << ", \"y\": " << (int)center[i].y << "}, ";
cout << "\"radius\": " << radius[i] << ", ";
if (i == max-1)
cout << "\"bounding_rect\": [" << boundRect[i].tl() << ", " << boundRect[i].br() << "]}";
else
cout << "\"bounding_rect\": [" << boundRect[i].tl() << ", " << boundRect[i].br() << "]},";
float x = boundRect[i].tl().x;
float y = boundRect[i].tl().y;
float w = abs(x-boundRect[i].br().x);
float h = abs(y-boundRect[i].br().y);
if (i == max-1) {
cout << "\"bbox\": {\"x\": " <<x<< ", \"y\": " <<y<< ", \"width\": " <<w<< ", \"height\": " <<h<< "}}";
} else {
cout << "\"bbox\": {\"x\": " <<x<< ", \"y\": " <<y<< ", \"width\": " <<w<< ", \"height\": " <<h<< "}},";
}
}
cout << "]}}" << endl;

Expand Down
84 changes: 62 additions & 22 deletions spec/GetSaliency.coffee
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,8 @@ else

validateWithThreshold = (chai, calculated, expected, threshold) ->
for i in [0...calculated.length]
diffX = Math.abs(calculated[i][0] - expected[i][0])
diffY = Math.abs(calculated[i][1] - expected[i][1])
chai.expect(diffX).to.be.at.most threshold
chai.expect(diffY).to.be.at.most threshold
chai.expect(calculated[i][0]).to.be.closeTo expected[i][0], threshold
chai.expect(calculated[i][1]).to.be.closeTo expected[i][1], threshold

describe 'GetSaliency component', ->

Expand All @@ -34,6 +32,7 @@ describe 'GetSaliency component', ->
chai.expect(c.outPorts.out).to.be.an 'object'

describe 'with file system image', ->
previous = null
it 'should extract a valid saliency profile', (done) ->
@timeout 10000
id = 1
Expand All @@ -45,15 +44,62 @@ describe 'GetSaliency component', ->
out.once 'data', (res) ->
chai.expect(groups).to.eql [1]
chai.expect(res).to.be.an 'object'
saliency = res.saliency
outmost_rect = saliency.outmost_rect
regions = saliency.regions
{saliency} = res
previous = saliency
{bounding_rect, polygon, radius, center, bbox, confidence, regions} = saliency

expected = [[510,1],[456,59],[417,109],[396,111],[367,133],[352,138],[341,135],[331,125],[318,102],[294,75],[266,57],[231,45],[209,46],[196,52],[193,57],[217,65],[230,83],[226,103],[189,139],[189,165],[198,180],[197,191],[158,237],[135,244],[120,260],[108,261],[69,307],[63,324],[65,349],[60,358],[65,367],[66,466],[61,510],[172,510],[173,495],[183,483],[199,479],[215,484],[221,479],[224,468],[234,458],[242,428],[251,417],[302,414],[305,406],[317,397],[334,399],[351,415],[367,455],[380,410],[374,400],[375,358],[410,243],[447,158],[481,107],[484,82],[493,74],[510,72]]
validateWithThreshold chai, regions[0].polygon, expected, 15
# chai.expect(saliency.center).to.be.eql [285, 255]
# chai.expect(Math.round(saliency.radius)).to.be.equal 350
# chai.expect(saliency.bounding_rect).to.be.eql [[60, 1], [511, 511]]
# Check if every field exists and have the right types
chai.expect(bounding_rect).to.exists
chai.expect(bounding_rect).to.be.an 'array'
chai.expect(polygon).to.exists
chai.expect(polygon).to.be.an 'array'
chai.expect(radius).to.exists
chai.expect(radius).to.be.a 'number'
chai.expect(center).to.exists
chai.expect(center).to.be.an 'array'
chai.expect(bbox).to.exists
chai.expect(bbox).to.be.an 'object'
chai.expect(confidence).to.exists
chai.expect(confidence).to.be.a 'number'
chai.expect(regions).to.exists
chai.expect(regions).to.be.an 'array'
chai.expect(regions[0]).to.exists
chai.expect(regions[0]).to.be.an 'object'
chai.expect(regions[0].bbox).to.exists
chai.expect(regions[0].bbox).to.be.an 'object'
chai.expect(regions[0].bbox.x).to.be.a 'number'
chai.expect(regions[0].bbox.y).to.be.a 'number'
chai.expect(regions[0].bbox.width).to.be.a 'number'
chai.expect(regions[0].bbox.height).to.be.a 'number'
chai.expect(regions[0].center).to.exists
chai.expect(regions[0].center).to.be.an 'object'
chai.expect(regions[0].center.x).to.be.a 'number'
chai.expect(regions[0].center.y).to.be.a 'number'
chai.expect(regions[0].radius).to.exists
chai.expect(regions[0].radius).to.be.a 'number'
chai.expect(regions[0].polygon).to.exists
chai.expect(regions[0].polygon).to.be.an 'array'
chai.expect(regions[0].polygon[0]).to.exists
chai.expect(regions[0].polygon[0]).to.be.an 'object'
chai.expect(regions[0].polygon[0].x).to.be.a 'number'
chai.expect(regions[0].polygon[0].y).to.be.a 'number'

expected = [[60, 1], [511, 511]]
chai.expect(bounding_rect).to.be.deep.equal expected
chai.expect(polygon).to.be.an 'array'
chai.expect(polygon.length).to.be.gt 0
chai.expect(radius).to.be.closeTo 350, 2
expected = [285, 255]
chai.expect(center).to.be.deep.equal expected
expected =
x: 60
y: 1
width: 451
height: 510
chai.expect(bbox).to.be.deep.equal expected
chai.expect(confidence).to.be.lte 0.30
chai.expect(regions).to.be.an 'array'
chai.expect(regions.length).to.be.gt 0
done()

inSrc = 'lenna.png'
Expand All @@ -62,8 +108,8 @@ describe 'GetSaliency component', ->
inImage.send image
inImage.endGroup()

it 'should extract saliency with two images in a row', (done) ->
@timeout 10000
it 'should extract a different saliency for a different image', (done) ->
@timeout 20000
id = 2
groups = []
out.once 'begingroup', (group) ->
Expand All @@ -73,14 +119,8 @@ describe 'GetSaliency component', ->
out.once 'data', (res) ->
chai.expect(groups).to.eql [2]
chai.expect(res).to.be.an 'object'
saliency = res.saliency
outmost_rect = saliency.outmost_rect
regions = saliency.regions
expected = [[77,74],[83,92],[103,123],[100,139],[84,150],[77,166],[95,198],[103,198],[106,188],[124,172],[124,160],[107,132],[107,101],[112,92],[98,89]]
validateWithThreshold chai, regions[0].polygon, expected, 15
# chai.expect(saliency.center).to.be.eql [96, 136]
# chai.expect(Math.round(saliency.radius)).to.be.equal 67
# chai.expect(saliency.bounding_rect).to.be.eql [[77, 74], [125, 199]]
{saliency} = res
chai.expect(saliency).to.be.not.deep.equal previous
done()

inSrc = 'lenin.jpg'
Expand Down
Loading

0 comments on commit 7586c53

Please sign in to comment.