From f1557f2b7976862485e87ea1e15565b23869527e Mon Sep 17 00:00:00 2001 From: jingchaoluan Date: Wed, 15 Nov 2017 18:55:52 -0500 Subject: [PATCH 1/5] Changed method 'read_image_gray()' and 'read_image_binary()'. The old method can only read image from disk i.e. from a filename (string). The new method extend the ability to read image from both disk and memory. Read image from memory means that it can also accept file object or PIL.Image.Image object as the parameter of this method. --- ocrolib/common.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/ocrolib/common.py b/ocrolib/common.py index 614add07..f9bacd81 100644 --- a/ocrolib/common.py +++ b/ocrolib/common.py @@ -152,7 +152,7 @@ def isintarray(a): def isintegerarray(a): return a.dtype in [dtype('int32'),dtype('int64'),dtype('uint32'),dtype('uint64')] -@checks(str,pageno=int,_=GRAYSCALE) +@checks({str,object},pageno=int,_=GRAYSCALE) def read_image_gray(fname,pageno=0): """Read an image and returns it as a floating point array. The optional page number allows images from files containing multiple @@ -160,7 +160,10 @@ def read_image_gray(fname,pageno=0): the range 0...1 (unsigned) or -1...1 (signed).""" if type(fname)==tuple: fname,pageno = fname assert pageno==0 - pil = PIL.Image.open(fname) + if type(fname) == PIL.Image.Image: + pil = fname + else: + pil = PIL.Image.open(fname) a = pil2array(pil) if a.dtype==dtype('uint8'): a = a/255.0 @@ -191,13 +194,16 @@ def write_image_gray(fname,image,normalize=0,verbose=0): im = array2pil(image) im.save(fname) -@checks(str,_=ABINARY2) +@checks({str,object},_=ABINARY2) def read_image_binary(fname,dtype='i',pageno=0): """Read an image from disk and return it as a binary image of the given dtype.""" if type(fname)==tuple: fname,pageno = fname assert pageno==0 - pil = PIL.Image.open(fname) + if type(fname) == PIL.Image.Image + pil = fname + else: + pil = PIL.Image.open(fname) a = pil2array(pil) if a.ndim==3: a = amax(a,axis=2) return array(a>0.5*(amin(a)+amax(a)),dtype) From c9fcdac8426b350404f369d04e0db197cb3e477e Mon Sep 17 00:00:00 2001 From: "Luan, Jingchao" Date: Mon, 20 Nov 2017 22:42:34 -0500 Subject: [PATCH 2/5] Update common.py --- ocrolib/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocrolib/common.py b/ocrolib/common.py index f9bacd81..1b582476 100644 --- a/ocrolib/common.py +++ b/ocrolib/common.py @@ -200,7 +200,7 @@ def read_image_binary(fname,dtype='i',pageno=0): of the given dtype.""" if type(fname)==tuple: fname,pageno = fname assert pageno==0 - if type(fname) == PIL.Image.Image + if type(fname) == PIL.Image.Image: pil = fname else: pil = PIL.Image.open(fname) From 122c3055a4ac231c0e9a3fe2162ad333628a0363 Mon Sep 17 00:00:00 2001 From: jingchaoluan Date: Fri, 15 Dec 2017 14:18:04 -0500 Subject: [PATCH 3/5] Change from accept only PIL.Image.Image instance to all of its subclass instances for methods 'read_image_gray()' and 'read_image_binary()' --- ocrolib/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ocrolib/common.py b/ocrolib/common.py index f9bacd81..75e52c20 100644 --- a/ocrolib/common.py +++ b/ocrolib/common.py @@ -160,7 +160,7 @@ def read_image_gray(fname,pageno=0): the range 0...1 (unsigned) or -1...1 (signed).""" if type(fname)==tuple: fname,pageno = fname assert pageno==0 - if type(fname) == PIL.Image.Image: + if issubclass(type(fname), PIL.Image.Image): pil = fname else: pil = PIL.Image.open(fname) @@ -200,7 +200,7 @@ def read_image_binary(fname,dtype='i',pageno=0): of the given dtype.""" if type(fname)==tuple: fname,pageno = fname assert pageno==0 - if type(fname) == PIL.Image.Image + if issubclass(type(fname), PIL.Image.Image): pil = fname else: pil = PIL.Image.open(fname) From f2a4e83cc1227457c05dd2c68d6587d8d87a8f5b Mon Sep 17 00:00:00 2001 From: jingchaoluan Date: Fri, 15 Dec 2017 14:46:59 -0500 Subject: [PATCH 4/5] Change from accept only PIL.Image.Image instance to all of its subclass instances for methods 'read_image_gray()' and 'read_image_binary()' --- ocrolib/common.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ocrolib/common.py b/ocrolib/common.py index 52262c11..75e52c20 100644 --- a/ocrolib/common.py +++ b/ocrolib/common.py @@ -200,11 +200,7 @@ def read_image_binary(fname,dtype='i',pageno=0): of the given dtype.""" if type(fname)==tuple: fname,pageno = fname assert pageno==0 -<<<<<<< HEAD if issubclass(type(fname), PIL.Image.Image): -======= - if type(fname) == PIL.Image.Image: ->>>>>>> c9fcdac8426b350404f369d04e0db197cb3e477e pil = fname else: pil = PIL.Image.open(fname) From d63437859ece8d49cc46c14c9caa019106837ea5 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Mon, 19 Feb 2018 18:25:33 +0100 Subject: [PATCH 5/5] add @jingchaoluan's unittest for #260, run-unit as shell script to run python test scripts --- tests/run-unit | 58 ++++----------------------------------- tests/test-levenshtein.py | 54 ++++++++++++++++++++++++++++++++++++ tests/test_read_image.py | 35 +++++++++++++++++++++++ 3 files changed, 94 insertions(+), 53 deletions(-) create mode 100755 tests/test-levenshtein.py create mode 100644 tests/test_read_image.py diff --git a/tests/run-unit b/tests/run-unit index 9511e6df..a689d148 100755 --- a/tests/run-unit +++ b/tests/run-unit @@ -1,54 +1,6 @@ -#!/usr/bin/env python +#/bin/bash -import sys - -from ocrolib import edist, utils - -# Test the levenshtein function and returns 0 if the computed value -# equals the one it should be, otherwise returns 1 for failed tests. -def testLevenshtein(a, b, should): - if edist.levenshtein(a, b) == should: - print 'ok - levenshtein(%s, %s) == %s' % (a,b,should) - return 0 - else: - print 'not ok - levenshtein(%s, %s) == %s' % (a,b,should) - return 1 - - -def testXLevenshtein(a, b, context, should): - #print(edist.xlevenshtein(a, b, context)) - if edist.xlevenshtein(a, b, context) == should: - print 'ok - xlevenshtein(%s, %s, %s) == %s' % (a,b,context,should) - return 0 - else: - print 'not ok - xlevenshtein(%s, %s, %s) == %s' % (a,b,context,should) - return 1 - - -failed_tests = 0 - -print('# 1 Test function "levenshtein" in edist.py') -failed_tests += testLevenshtein('a', 'a', 0) -failed_tests += testLevenshtein('', '', 0) -failed_tests += testLevenshtein('a', '', 1) -failed_tests += testLevenshtein('', 'a', 1) -failed_tests += testLevenshtein('aa', 'aaaaaa', 4) -failed_tests += testLevenshtein('aba', 'bab', 2) - -print('\n# 2 Test function "xlevenshtein" in edist.py') -failed_tests += testXLevenshtein('exccpt', 'except', 1, should=(1.0, [('ccp', 'cep')])) -failed_tests += testXLevenshtein('exccpt', 'except', 2, should=(1.0, [('xccpt', 'xcept')])) -failed_tests += testXLevenshtein('exccpt', 'except', 3, should=(1.0, [('exccpt ', 'except ')])) -failed_tests += testXLevenshtein('exccpt', 'except', 4, should=(1.0, [(' exccpt ', ' except ')])) -failed_tests += testXLevenshtein('', 'test', 1, should=(4.0, [])) -failed_tests += testXLevenshtein('aaaaaaaaaaa', 'a', 1, should=(10.0, [('aaaaaaaaaaa ', 'a__________ ')])) -failed_tests += testXLevenshtein('123 111 456', '132 111 444', 1, should=(4.0, [('123_ ', '1_32 '), ('456 ', '444 ')])) - -print('\n# 3 utils.sumouter / utils.sumprod') -from pylab import randn -utils.sumouter(randn(10,3),randn(10,4),out=randn(3,4)) -print('ok - dimensions of sumouter') -utils.sumprod(randn(11,7),randn(11,7),out=randn(7)) -print('ok - dimensions of sumprod') - -sys.exit(failed_tests) +BASE=$(dirname $0)/.. +export PYTHONPATH="$BASE":"$PYTHONPATH" +python2 "$BASE"/tests/test_read_image.py +python2 "$BASE"/tests/test-levenshtein.py diff --git a/tests/test-levenshtein.py b/tests/test-levenshtein.py new file mode 100755 index 00000000..9511e6df --- /dev/null +++ b/tests/test-levenshtein.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python + +import sys + +from ocrolib import edist, utils + +# Test the levenshtein function and returns 0 if the computed value +# equals the one it should be, otherwise returns 1 for failed tests. +def testLevenshtein(a, b, should): + if edist.levenshtein(a, b) == should: + print 'ok - levenshtein(%s, %s) == %s' % (a,b,should) + return 0 + else: + print 'not ok - levenshtein(%s, %s) == %s' % (a,b,should) + return 1 + + +def testXLevenshtein(a, b, context, should): + #print(edist.xlevenshtein(a, b, context)) + if edist.xlevenshtein(a, b, context) == should: + print 'ok - xlevenshtein(%s, %s, %s) == %s' % (a,b,context,should) + return 0 + else: + print 'not ok - xlevenshtein(%s, %s, %s) == %s' % (a,b,context,should) + return 1 + + +failed_tests = 0 + +print('# 1 Test function "levenshtein" in edist.py') +failed_tests += testLevenshtein('a', 'a', 0) +failed_tests += testLevenshtein('', '', 0) +failed_tests += testLevenshtein('a', '', 1) +failed_tests += testLevenshtein('', 'a', 1) +failed_tests += testLevenshtein('aa', 'aaaaaa', 4) +failed_tests += testLevenshtein('aba', 'bab', 2) + +print('\n# 2 Test function "xlevenshtein" in edist.py') +failed_tests += testXLevenshtein('exccpt', 'except', 1, should=(1.0, [('ccp', 'cep')])) +failed_tests += testXLevenshtein('exccpt', 'except', 2, should=(1.0, [('xccpt', 'xcept')])) +failed_tests += testXLevenshtein('exccpt', 'except', 3, should=(1.0, [('exccpt ', 'except ')])) +failed_tests += testXLevenshtein('exccpt', 'except', 4, should=(1.0, [(' exccpt ', ' except ')])) +failed_tests += testXLevenshtein('', 'test', 1, should=(4.0, [])) +failed_tests += testXLevenshtein('aaaaaaaaaaa', 'a', 1, should=(10.0, [('aaaaaaaaaaa ', 'a__________ ')])) +failed_tests += testXLevenshtein('123 111 456', '132 111 444', 1, should=(4.0, [('123_ ', '1_32 '), ('456 ', '444 ')])) + +print('\n# 3 utils.sumouter / utils.sumprod') +from pylab import randn +utils.sumouter(randn(10,3),randn(10,4),out=randn(3,4)) +print('ok - dimensions of sumouter') +utils.sumprod(randn(11,7),randn(11,7),out=randn(7)) +print('ok - dimensions of sumprod') + +sys.exit(failed_tests) diff --git a/tests/test_read_image.py b/tests/test_read_image.py new file mode 100644 index 00000000..ef7ff778 --- /dev/null +++ b/tests/test_read_image.py @@ -0,0 +1,35 @@ +import unittest +import ocrolib +import PIL +import numpy + +### Original image in disk and memory used to test method read_image_gray() +img_disk = "tests/testpage.png" +img_mem = PIL.Image.open(img_disk) + +### Binarized image in disk and memory used to test method read_image_binary() +img_bin_disk = "tests/010030.bin.png" +img_bin_mem = PIL.Image.open(img_bin_disk) + +class OcrolibTestCase(unittest.TestCase): + """ + Tests for processing image from disk and memory for methods + read_image_gray() and read_image_binary() in common.py under ocrolib + """ + + def test_read_image_gray(self): + """ + Test whether the function read_image_gray() will return same result + when pass a image file name (from disk) and a image object (PIL.Image from memory). + The return object of read_image_gray() is a 'ndarray' dedfined by 'numpy', thus we use the + built-in function 'array_equal' to compare two ndarray objects + """ + self.assertTrue(numpy.array_equal(ocrolib.read_image_gray(img_disk), ocrolib.read_image_gray(img_mem))) + + + def test_read_image_binary(self): + self.assertTrue(numpy.array_equal(ocrolib.read_image_binary(img_bin_disk), ocrolib.read_image_binary(img_bin_mem))) + + +if __name__ == '__main__': + unittest.main()