Skip to content


Add confidence level based on entropy
Browse files Browse the repository at this point in the history
Also clean up resultant JSON, expose regions and tweak threshold.
  • Loading branch information
automata committed Jun 16, 2015
1 parent 0e7cedb commit 7586c53
Show file tree
Hide file tree
Showing 12 changed files with 21,342 additions and 145 deletions.
10 changes: 6 additions & 4 deletions
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ execSync = require('exec-sync')

test_path = './test_set/'
#test_sets = ['Text', 'NonText']
test_sets = ['Grid_full', 'Grid_rescaled']
test_sets = ['good', 'bad']

module.exports = ->
grunt = @
Expand Down Expand Up @@ -96,13 +96,15 @@ module.exports = ->

imgs = fs.readdirSync dir
for img in imgs
unless /filtered|threshold|contours|saliency|DS_Store/.test img
unless /filtered|threshold|contours|saliency|histogram_saliency|DS_Store/.test img
img = img.replace /\s/g, '\\ '
abspath = dir + '/' + img
console.log ' ' + abspath
execSync './build/Release/saliency ' + abspath
output = execSync './build/Release/saliency ' + abspath
console.log ' ' + abspath + ' finished.'
data[set].push abspath
image: abspath
measurement: JSON.parse output

grunt.file.write './test_set_app/data.js', 'window.DATA = {sets:' + JSON.stringify(data, 1, 1) + '};'

Expand Down
133 changes: 91 additions & 42 deletions saliency.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* saliency.cpp - Saliency heuristics
* (c) 2014 The Grid
* (c) 2014-2015 The Grid

Expand All @@ -17,12 +17,30 @@ using namespace std;

RNG rng(12345);

Mat DrawHistogram(Mat gray) {
int histSize = 256; // bin size
float range[] = { 0, 255 };
const float *ranges[] = { range };
MatND hist;

calcHist( &gray, 1, 0, Mat(), hist, 1, &histSize, ranges, true, false );

int hist_w = 512; int hist_h = 400;
int bin_w = cvRound( (double) hist_w/histSize );

Mat histImage( hist_h, hist_w, CV_8UC1, Scalar( 0,0,0) );
normalize(hist, hist, 0, histImage.rows, NORM_MINMAX, -1, Mat() );

for( int i = 1; i < histSize; i++){
line( histImage, Point( bin_w*(i-1), hist_h - cvRound(<float>(i-1)) ) ,
Point( bin_w*(i), hist_h - cvRound(<float>(i)) ),
Scalar( 255, 0, 0), 2, 8, 0 );
return histImage;

static void display_help(string program_name) {
cerr << "Usage: " << program_name << " <original image>"
//<< "Options:\n"
//<< "\t-h,--help\t\tShow this help message\n"
//<< "\t-s,--saliencymap\tWrite the saliency map to an image file\n"
<< endl;
cerr << "Usage: " << program_name << " <original image>" << endl;

int main(int argc, char *argv[]) {
Expand Down Expand Up @@ -51,25 +69,40 @@ int main(int argc, char *argv[]) {
Mat saliency_gray = saliency_map * 255;
Mat most_salient;

// Static threshold:
// Mat fg;
// int threshold_value = 254;
// fg = saliency_gray >= threshold_value;
// #ifdef DEBUG
// sprintf(file_path, "%s_fg.png", original_image_path);
// imwrite(file_path, fg);
// #endif
GaussianBlur(saliency_gray, saliency_gray, Size(1,1), 0, 0);
#ifdef DEBUG
Mat histin = DrawHistogram(saliency_gray);
sprintf(file_path, "%s_histogram_saliency.png", original_image_path);
imwrite(file_path, histin);

// Calculate confidence based on homogeneity of saliency map's histogram
Mat hist;
int histSize = 256;
float range[] = { 0, 256 } ;
const float* histRange = { range };
calcHist(&saliency_gray, 1, 0, Mat(), hist, 1, &histSize, &histRange, true, false);
hist /= original_image.size().height*original_image.size().width;
Mat logP;
// Inverse normalized entropy
float entropy = -1*sum(hist.mul(logP)).val[0];
entropy = entropy/log(256); // normalize
entropy = 1.0 - entropy; // inverse

// Blur and binary threshold saliency map based on OTSU
saliency_gray.convertTo(saliency_gray, CV_8U); // threshold needs an int Mat
//adaptiveThreshold(saliency_gray, most_salient, 255, ADAPTIVE_THRESH_MEAN_C, THRESH_BINARY, 3, 0);
threshold(saliency_gray, most_salient, 0, 255, THRESH_BINARY | THRESH_OTSU);
Mat blur;
bilateralFilter(saliency_gray, blur, 12, 24, 6);
// GaussianBlur(saliency_gray, blur, Size(5,5), 0);
threshold(blur, most_salient, 0, 255, THRESH_BINARY + THRESH_OTSU);
#ifdef DEBUG
sprintf(file_path, "%s_threshold.png", original_image_path);
imwrite(file_path, most_salient);

// Eliminate small regions (Mat() == default 3x3 kernel)
Mat filtered;
//filtered = most_salient;
// Another option is to use dilate/erode/dilate:
// dilate(most_salient, filtered, Mat(), Point(-1, -1), 2, 1, 1);
// erode(filtered, filtered, Mat(), Point(-1, -1), 4, 1, 1);
Expand Down Expand Up @@ -117,16 +150,16 @@ int main(int argc, char *argv[]) {

// Find the biggest area of all contours
// int big_id = 0;
// double big_area = 0;
// for (size_t i = 0, max = contours.size(); i < max; ++i) {
// // Contour area
// double area = contourArea(contours[i]);
// if (area > big_area) {
// big_id = i;
// big_area = area;
// }
// }
int big_id = 0;
double big_area = 0;
for (size_t i = 0, max = contours.size(); i < max; ++i) {
// Contour area
double area = contourArea(contours[i]);
if (area > big_area) {
big_id = i;
big_area = area;

// Group all bounding rects into one, good for superimposition elimination
// Vector<Rect> allRect = boundRect;
Expand All @@ -152,14 +185,9 @@ int main(int argc, char *argv[]) {
xmax = xmaxB;
if (ymaxB > ymax)
ymax = ymaxB;
// cout << j << endl;
// cout << boundRect[j].tl() << endl;
// cout << boundRect[j].br() << endl;
// cout << xmin << "," << ymin << endl;
// cout << xmax << "," << ymax << endl;
Rect bigRect = Rect(xmin, ymin, xmax-xmin, ymax-ymin);
//int i = big_id;

#ifdef DEBUG
// Draw polygonal contour + bonding rects + circles
Mat drawing = Mat::zeros( filtered.size(), CV_8UC3 );
Expand Down Expand Up @@ -192,23 +220,44 @@ int main(int argc, char *argv[]) {

// Serialize as stringified JSON
// TODO: Use jsoncpp instead? Not using now to avoid one more dependency
cout << "{\"saliency\": ";
cout << "{\"outmost_rect\": [" << << ", " << << "],";
float x =;
float y =;
float w = abs(;
float h = abs(;

cout << "{\"bounding_rect\": [" << << ", " << << "],";
cout << "\"bbox\": {\"x\": " <<x<< ", \"y\": " <<y<< ", \"width\": " <<w<< ", \"height\": " <<h<< "},";
cout << "\"confidence\": " << entropy << ",";

cout << "\"polygon\": [";
size_t maxPoly = contours_poly[big_id].size()-1;
for (size_t j = 0; j < maxPoly; ++j) {
cout << contours_poly[big_id][j] << ", ";
cout << contours_poly[big_id][maxPoly] << "], ";
cout << "\"center\": [" << (int)center[big_id].x << ", " << (int)center[big_id].y << "], ";
cout << "\"radius\": " << radius[big_id] << ", ";
// Regions
cout << "\"regions\": [";
for (size_t i=0, max=boundRect.size(); i<max; ++i) {
cout << "{\"polygon\": [";
size_t maxPoly = contours_poly[i].size()-1;
for (size_t j = 0; j < maxPoly; ++j) {
cout << contours_poly[i][j] << ", ";
cout << "{\"x\": " << contours_poly[i][j].x << ", \"y\": " << contours_poly[i][j].y << "}, ";
cout << contours_poly[i][maxPoly] << "], ";
cout << "\"center\": [" << (int)center[i].x << ", " << (int)center[i].y << "], ";
cout << "{\"x\": " << contours_poly[i][maxPoly].x << ", \"y\": " << contours_poly[i][maxPoly].y << "}], ";
cout << "\"center\": {\"x\": " << (int)center[i].x << ", \"y\": " << (int)center[i].y << "}, ";
cout << "\"radius\": " << radius[i] << ", ";
if (i == max-1)
cout << "\"bounding_rect\": [" << boundRect[i].tl() << ", " << boundRect[i].br() << "]}";
cout << "\"bounding_rect\": [" << boundRect[i].tl() << ", " << boundRect[i].br() << "]},";
float x = boundRect[i].tl().x;
float y = boundRect[i].tl().y;
float w = abs(x-boundRect[i].br().x);
float h = abs(y-boundRect[i].br().y);
if (i == max-1) {
cout << "\"bbox\": {\"x\": " <<x<< ", \"y\": " <<y<< ", \"width\": " <<w<< ", \"height\": " <<h<< "}}";
} else {
cout << "\"bbox\": {\"x\": " <<x<< ", \"y\": " <<y<< ", \"width\": " <<w<< ", \"height\": " <<h<< "}},";
cout << "]}}" << endl;

Expand Down
84 changes: 62 additions & 22 deletions spec/
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,8 @@ else

validateWithThreshold = (chai, calculated, expected, threshold) ->
for i in [0...calculated.length]
diffX = Math.abs(calculated[i][0] - expected[i][0])
diffY = Math.abs(calculated[i][1] - expected[i][1])
chai.expect(diffX) threshold
chai.expect(diffY) threshold
chai.expect(calculated[i][0]) expected[i][0], threshold
chai.expect(calculated[i][1]) expected[i][1], threshold

describe 'GetSaliency component', ->

Expand All @@ -34,6 +32,7 @@ describe 'GetSaliency component', ->
chai.expect(c.outPorts.out) 'object'

describe 'with file system image', ->
previous = null
it 'should extract a valid saliency profile', (done) ->
@timeout 10000
id = 1
Expand All @@ -45,15 +44,62 @@ describe 'GetSaliency component', ->
out.once 'data', (res) ->
chai.expect(groups).to.eql [1]
chai.expect(res) 'object'
saliency = res.saliency
outmost_rect = saliency.outmost_rect
regions = saliency.regions
{saliency} = res
previous = saliency
{bounding_rect, polygon, radius, center, bbox, confidence, regions} = saliency

expected = [[510,1],[456,59],[417,109],[396,111],[367,133],[352,138],[341,135],[331,125],[318,102],[294,75],[266,57],[231,45],[209,46],[196,52],[193,57],[217,65],[230,83],[226,103],[189,139],[189,165],[198,180],[197,191],[158,237],[135,244],[120,260],[108,261],[69,307],[63,324],[65,349],[60,358],[65,367],[66,466],[61,510],[172,510],[173,495],[183,483],[199,479],[215,484],[221,479],[224,468],[234,458],[242,428],[251,417],[302,414],[305,406],[317,397],[334,399],[351,415],[367,455],[380,410],[374,400],[375,358],[410,243],[447,158],[481,107],[484,82],[493,74],[510,72]]
validateWithThreshold chai, regions[0].polygon, expected, 15
# chai.expect( [285, 255]
# chai.expect(Math.round(saliency.radius)) 350
# chai.expect(saliency.bounding_rect) [[60, 1], [511, 511]]
# Check if every field exists and have the right types
chai.expect(bounding_rect) 'array'
chai.expect(polygon) 'array'
chai.expect(radius) 'number'
chai.expect(center) 'array'
chai.expect(bbox) 'object'
chai.expect(confidence) 'number'
chai.expect(regions) 'array'
chai.expect(regions[0]) 'object'
chai.expect(regions[0].bbox) 'object'
chai.expect(regions[0].bbox.x) 'number'
chai.expect(regions[0].bbox.y) 'number'
chai.expect(regions[0].bbox.width) 'number'
chai.expect(regions[0].bbox.height) 'number'
chai.expect(regions[0].center) 'object'
chai.expect(regions[0].center.x) 'number'
chai.expect(regions[0].center.y) 'number'
chai.expect(regions[0].radius) 'number'
chai.expect(regions[0].polygon) 'array'
chai.expect(regions[0].polygon[0]) 'object'
chai.expect(regions[0].polygon[0].x) 'number'
chai.expect(regions[0].polygon[0].y) 'number'

expected = [[60, 1], [511, 511]]
chai.expect(bounding_rect) expected
chai.expect(polygon) 'array'
chai.expect(polygon.length) 0
chai.expect(radius) 350, 2
expected = [285, 255]
chai.expect(center) expected
expected =
x: 60
y: 1
width: 451
height: 510
chai.expect(bbox) expected
chai.expect(confidence) 0.30
chai.expect(regions) 'array'
chai.expect(regions.length) 0

inSrc = 'lenna.png'
Expand All @@ -62,8 +108,8 @@ describe 'GetSaliency component', ->
inImage.send image

it 'should extract saliency with two images in a row', (done) ->
@timeout 10000
it 'should extract a different saliency for a different image', (done) ->
@timeout 20000
id = 2
groups = []
out.once 'begingroup', (group) ->
Expand All @@ -73,14 +119,8 @@ describe 'GetSaliency component', ->
out.once 'data', (res) ->
chai.expect(groups).to.eql [2]
chai.expect(res) 'object'
saliency = res.saliency
outmost_rect = saliency.outmost_rect
regions = saliency.regions
expected = [[77,74],[83,92],[103,123],[100,139],[84,150],[77,166],[95,198],[103,198],[106,188],[124,172],[124,160],[107,132],[107,101],[112,92],[98,89]]
validateWithThreshold chai, regions[0].polygon, expected, 15
# chai.expect( [96, 136]
# chai.expect(Math.round(saliency.radius)) 67
# chai.expect(saliency.bounding_rect) [[77, 74], [125, 199]]
{saliency} = res
chai.expect(saliency) previous

inSrc = 'lenin.jpg'
Expand Down

0 comments on commit 7586c53

Please sign in to comment.