From a02de8820c2346db2b55344f2d1954db9e1e1bdc Mon Sep 17 00:00:00 2001
From: Konst Kolesnichenko <const@dynamic-systems.com.ua>
Date: Wed, 20 Aug 2014 13:16:20 +0300
Subject: [PATCH 01/11] * primitive line detector (debug version)

---
 unshred/features/lines.py | 41 +++++++++++++++++++++++++++++++++++++++
 unshred/split.py          |  5 +++--
 2 files changed, 44 insertions(+), 2 deletions(-)
 create mode 100644 unshred/features/lines.py

diff --git a/unshred/features/lines.py b/unshred/features/lines.py
new file mode 100644
index 0000000..faa17a5
--- /dev/null
+++ b/unshred/features/lines.py
@@ -0,0 +1,41 @@
+import cv2
+import numpy
+from unshred.features import AbstractShredFeature
+
+
+class LinesFeatures(AbstractShredFeature):
+    TAG_HAS_LINES_FEATURE = "has lines"
+    TAG_PARALLEL_FEATURE = "parallel"
+    TAG_PERPENDECULAR_FEATURE = "perpendecular"
+
+    def get_info(self, shred, contour, name):
+
+        tags = []
+        params = {}
+
+        gray = cv2.cvtColor(shred, cv2.COLOR_BGR2GRAY)
+        edges = cv2.Canny(gray, 150, 200, apertureSize = 3)
+        # removing contours from the edges (by drawing them black)
+        cv2.drawContours(edges, contour, -1, (0, 0, 0), 12, 2)
+        cv2.imwrite('../debug/edges_%s.png'%name, edges)
+
+        lines = cv2.HoughLines(edges, 3, 1* numpy.pi/180, 60)
+
+        if not lines is None:
+            #debug images
+            for rho,theta in lines[0]:
+                a = numpy.cos(theta)
+                b = numpy.sin(theta)
+                x0 = a*rho
+                y0 = b*rho
+                x1 = int(x0 + 1000*(-b)) # Here i have used int() instead of rounding the decimal value, so 3.8 --> 3
+                y1 = int(y0 + 1000*(a)) # But if you want to round the number, then use np.around() function, then 3.8 --> 4.0
+                x2 = int(x0 - 1000*(-b)) # But we need integers, so use int() function after that, ie int(np.around(x))
+                y2 = int(y0 - 1000*(a))
+                cv2.line(shred,(x1,y1),(x2,y2),(255,0,0),2)
+            cv2.imwrite('../debug/houghlines_%s.png'%name, shred)
+
+            params['Lines Count'] = len(lines)
+            tags.append(self.TAG_HAS_LINES_FEATURE)
+
+        return params, tags
diff --git a/unshred/split.py b/unshred/split.py
index cacfe7e..43ab509 100644
--- a/unshred/split.py
+++ b/unshred/split.py
@@ -11,6 +11,7 @@
 import numpy as np
 
 from features import GeometryFeatures, ColourFeatures
+from unshred.features.lines import LinesFeatures
 
 
 parser = ArgumentParser()
@@ -239,7 +240,7 @@ def open_image_and_separate_bg(self, img):
         img = cv2.bitwise_and(img, img, mask=mask)
 
         # Write original image with no background for debug purposes
-        cv2.imwrite("debug/mask.tif", mask)
+        cv2.imwrite("../debug/mask.tif", mask)
 
         return img, mask
 
@@ -417,7 +418,7 @@ def save_thumb(self, width=200):
 
         print("Processing file %s" % fname)
         sheet = Sheet(fname, sheet_name,
-                      [GeometryFeatures, ColourFeatures], out_dir, out_format)
+                      [GeometryFeatures, ColourFeatures, LinesFeatures], out_dir, out_format)
 
         sheet.export_results_as_html()
         sheets.append({

From c58fae04e5e531e2fbf6ff9f8f6130d989ba76ec Mon Sep 17 00:00:00 2001
From: Konst Kolesnichenko <const@dynamic-systems.com.ua>
Date: Wed, 20 Aug 2014 17:14:05 +0300
Subject: [PATCH 02/11] * more reliable line detection (but still too much
 false positives) * added sorting for fast clusterization

---
 unshred/features/lines.py | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/unshred/features/lines.py b/unshred/features/lines.py
index faa17a5..dd10b1d 100644
--- a/unshred/features/lines.py
+++ b/unshred/features/lines.py
@@ -1,3 +1,4 @@
+from PIL import Image
 import cv2
 import numpy
 from unshred.features import AbstractShredFeature
@@ -14,16 +15,23 @@ def get_info(self, shred, contour, name):
         params = {}
 
         gray = cv2.cvtColor(shred, cv2.COLOR_BGR2GRAY)
-        edges = cv2.Canny(gray, 150, 200, apertureSize = 3)
+        # gray = cv2.blur(gray, (5,5))
+        # thresh = cv2.threshold(gray, 220, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
+        # thimg = Image.fromarray(thresh[1])
+        # thimg.save('../debug/thresh_%s.png'%name)
+        edges = cv2.Canny(gray, 100, 200, apertureSize = 3)
         # removing contours from the edges (by drawing them black)
-        cv2.drawContours(edges, contour, -1, (0, 0, 0), 12, 2)
+        cv2.drawContours(edges, contour, -1, (0, 0, 0), 18)
         cv2.imwrite('../debug/edges_%s.png'%name, edges)
 
-        lines = cv2.HoughLines(edges, 3, 1* numpy.pi/180, 60)
+        lines = cv2.HoughLines(edges, 1, 1* numpy.pi/180, 40)
 
         if not lines is None:
+            ar = lines[0]
+            # sorting by theta (for grouping by angle)
+            ar = ar[ar[:,1].argsort()]
             #debug images
-            for rho,theta in lines[0]:
+            for rho,theta in ar:
                 a = numpy.cos(theta)
                 b = numpy.sin(theta)
                 x0 = a*rho
@@ -32,8 +40,8 @@ def get_info(self, shred, contour, name):
                 y1 = int(y0 + 1000*(a)) # But if you want to round the number, then use np.around() function, then 3.8 --> 4.0
                 x2 = int(x0 - 1000*(-b)) # But we need integers, so use int() function after that, ie int(np.around(x))
                 y2 = int(y0 - 1000*(a))
-                cv2.line(shred,(x1,y1),(x2,y2),(255,0,0),2)
-            cv2.imwrite('../debug/houghlines_%s.png'%name, shred)
+                cv2.line(gray,(x1,y1),(x2,y2),(255,0,0),2)
+            cv2.imwrite('../debug/houghlines_%s.png'%name, gray)
 
             params['Lines Count'] = len(lines)
             tags.append(self.TAG_HAS_LINES_FEATURE)

From 7b2fabbb9e566e63267f0fe094d7f83385c178a8 Mon Sep 17 00:00:00 2001
From: Konst Kolesnichenko <const@dynamic-systems.com.ua>
Date: Wed, 20 Aug 2014 18:44:37 +0300
Subject: [PATCH 03/11] * better line detection

---
 unshred/features/lines.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/unshred/features/lines.py b/unshred/features/lines.py
index dd10b1d..fc594c1 100644
--- a/unshred/features/lines.py
+++ b/unshred/features/lines.py
@@ -15,16 +15,14 @@ def get_info(self, shred, contour, name):
         params = {}
 
         gray = cv2.cvtColor(shred, cv2.COLOR_BGR2GRAY)
-        # gray = cv2.blur(gray, (5,5))
-        # thresh = cv2.threshold(gray, 220, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
-        # thimg = Image.fromarray(thresh[1])
-        # thimg.save('../debug/thresh_%s.png'%name)
-        edges = cv2.Canny(gray, 100, 200, apertureSize = 3)
+        edges = cv2.Canny(gray, 50, 200, apertureSize=3)
         # removing contours from the edges (by drawing them black)
-        cv2.drawContours(edges, contour, -1, (0, 0, 0), 18)
+        cv2.drawContours(edges, contour, -1, (0, 0, 0), 16)
+        edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, (3, 3))
         cv2.imwrite('../debug/edges_%s.png'%name, edges)
+        # cv2.imwrite('../debug/cont_%s.png'%name, contourImg)
 
-        lines = cv2.HoughLines(edges, 1, 1* numpy.pi/180, 40)
+        lines = cv2.HoughLines(edges, 1, 1* numpy.pi/180, 38)
 
         if not lines is None:
             ar = lines[0]

From 576d7dff45163b2472fbf8fa8bbad50c066b7a19 Mon Sep 17 00:00:00 2001
From: Konst Kolesnichenko <const@dynamic-systems.com.ua>
Date: Wed, 20 Aug 2014 19:29:05 +0300
Subject: [PATCH 04/11] changed detetection to probable model much better lines
 identification

---
 unshred/features/lines.py | 26 ++++++++------------------
 1 file changed, 8 insertions(+), 18 deletions(-)

diff --git a/unshred/features/lines.py b/unshred/features/lines.py
index fc594c1..c3f110d 100644
--- a/unshred/features/lines.py
+++ b/unshred/features/lines.py
@@ -15,29 +15,19 @@ def get_info(self, shred, contour, name):
         params = {}
 
         gray = cv2.cvtColor(shred, cv2.COLOR_BGR2GRAY)
-        edges = cv2.Canny(gray, 50, 200, apertureSize=3)
+        gray_blur = cv2.GaussianBlur(gray, (15, 15), 0)
+        edges = cv2.adaptiveThreshold(gray_blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+            cv2.THRESH_BINARY_INV, 5, 1)
         # removing contours from the edges (by drawing them black)
-        cv2.drawContours(edges, contour, -1, (0, 0, 0), 16)
-        edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, (3, 3))
-        cv2.imwrite('../debug/edges_%s.png'%name, edges)
-        # cv2.imwrite('../debug/cont_%s.png'%name, contourImg)
+        cv2.drawContours(edges, contour, -1, (0, 0, 0), 24)
+        edges = cv2.morphologyEx(edges, cv2.MORPH_ERODE, (6, 6), iterations=2)
 
-        lines = cv2.HoughLines(edges, 1, 1* numpy.pi/180, 38)
+        cv2.imwrite('../debug/edges_%s.png'%name, edges)
 
+        lines = cv2.HoughLinesP(edges, 1, numpy.pi/180, 20, minLineLength = 30, maxLineGap = 10)
         if not lines is None:
-            ar = lines[0]
-            # sorting by theta (for grouping by angle)
-            ar = ar[ar[:,1].argsort()]
             #debug images
-            for rho,theta in ar:
-                a = numpy.cos(theta)
-                b = numpy.sin(theta)
-                x0 = a*rho
-                y0 = b*rho
-                x1 = int(x0 + 1000*(-b)) # Here i have used int() instead of rounding the decimal value, so 3.8 --> 3
-                y1 = int(y0 + 1000*(a)) # But if you want to round the number, then use np.around() function, then 3.8 --> 4.0
-                x2 = int(x0 - 1000*(-b)) # But we need integers, so use int() function after that, ie int(np.around(x))
-                y2 = int(y0 - 1000*(a))
+            for x1,y1,x2,y2 in lines[0]:
                 cv2.line(gray,(x1,y1),(x2,y2),(255,0,0),2)
             cv2.imwrite('../debug/houghlines_%s.png'%name, gray)
 

From adddc42b4a10971dc983cace0e745806bf0a07cc Mon Sep 17 00:00:00 2001
From: Konst Kolesnichenko <const@dynamic-systems.com.ua>
Date: Wed, 20 Aug 2014 19:36:35 +0300
Subject: [PATCH 05/11] * commented out debug images output * fixed line count

---
 unshred/features/lines.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/unshred/features/lines.py b/unshred/features/lines.py
index c3f110d..0235974 100644
--- a/unshred/features/lines.py
+++ b/unshred/features/lines.py
@@ -22,16 +22,18 @@ def get_info(self, shred, contour, name):
         cv2.drawContours(edges, contour, -1, (0, 0, 0), 24)
         edges = cv2.morphologyEx(edges, cv2.MORPH_ERODE, (6, 6), iterations=2)
 
-        cv2.imwrite('../debug/edges_%s.png'%name, edges)
+        # const: uncomment for debug
+        # cv2.imwrite('../debug/edges_%s.png'%name, edges)
 
         lines = cv2.HoughLinesP(edges, 1, numpy.pi/180, 20, minLineLength = 30, maxLineGap = 10)
         if not lines is None:
+            # const: uncomment for debug
             #debug images
-            for x1,y1,x2,y2 in lines[0]:
-                cv2.line(gray,(x1,y1),(x2,y2),(255,0,0),2)
-            cv2.imwrite('../debug/houghlines_%s.png'%name, gray)
+            # for x1,y1,x2,y2 in lines[0]:
+                # cv2.line(gray,(x1,y1),(x2,y2),(255,0,0),2)
+            # cv2.imwrite('../debug/houghlines_%s.png'%name, gray)
 
-            params['Lines Count'] = len(lines)
+            params['Lines Count'] = len(lines[0])
             tags.append(self.TAG_HAS_LINES_FEATURE)
 
         return params, tags

From 57e5e1a310a4e2547338552b9cfe1f3a3c24f266 Mon Sep 17 00:00:00 2001
From: Konst Kolesnichenko <kolesnichenko@gmail.com>
Date: Sun, 31 Aug 2014 15:40:51 +0300
Subject: [PATCH 06/11] * much better contour removal based on a mask erosion

---
 unshred/features/lines.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/unshred/features/lines.py b/unshred/features/lines.py
index 0235974..13e5639 100644
--- a/unshred/features/lines.py
+++ b/unshred/features/lines.py
@@ -14,24 +14,30 @@ def get_info(self, shred, contour, name):
         tags = []
         params = {}
 
+        _, _, _, mask = cv2.split(shred)
+
+        # expanding mask for future removal of the contour
+        mask = cv2.morphologyEx(mask, cv2.MORPH_ERODE, (5, 5), iterations=200)
+
         gray = cv2.cvtColor(shred, cv2.COLOR_BGR2GRAY)
         gray_blur = cv2.GaussianBlur(gray, (15, 15), 0)
         edges = cv2.adaptiveThreshold(gray_blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
             cv2.THRESH_BINARY_INV, 5, 1)
         # removing contours from the edges (by drawing them black)
-        cv2.drawContours(edges, contour, -1, (0, 0, 0), 24)
+        edges = edges & mask
         edges = cv2.morphologyEx(edges, cv2.MORPH_ERODE, (6, 6), iterations=2)
 
         # const: uncomment for debug
-        # cv2.imwrite('../debug/edges_%s.png'%name, edges)
+        cv2.imwrite('../debug/edges_%s.png'%name, edges)
+        cv2.imwrite('../debug/mask_%s.png'%name, mask)
 
         lines = cv2.HoughLinesP(edges, 1, numpy.pi/180, 20, minLineLength = 30, maxLineGap = 10)
         if not lines is None:
             # const: uncomment for debug
             #debug images
-            # for x1,y1,x2,y2 in lines[0]:
-                # cv2.line(gray,(x1,y1),(x2,y2),(255,0,0),2)
-            # cv2.imwrite('../debug/houghlines_%s.png'%name, gray)
+            for x1,y1,x2,y2 in lines[0]:
+                cv2.line(gray,(x1,y1),(x2,y2),(255,0,0),2)
+            cv2.imwrite('../debug/houghlines_%s.png'%name, gray)
 
             params['Lines Count'] = len(lines[0])
             tags.append(self.TAG_HAS_LINES_FEATURE)

From a95dfabadc787f01e323ec5c64ed7ce4db100276 Mon Sep 17 00:00:00 2001
From: Konst Kolesnichenko <kolesnichenko@gmail.com>
Date: Sun, 31 Aug 2014 23:06:34 +0300
Subject: [PATCH 07/11] * reduced mask erosion due to weird image distortion *
 better noise reduction (i hope)

---
 unshred/features/lines.py | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/unshred/features/lines.py b/unshred/features/lines.py
index 13e5639..e363c60 100644
--- a/unshred/features/lines.py
+++ b/unshred/features/lines.py
@@ -1,4 +1,3 @@
-from PIL import Image
 import cv2
 import numpy
 from unshred.features import AbstractShredFeature
@@ -15,29 +14,30 @@ def get_info(self, shred, contour, name):
         params = {}
 
         _, _, _, mask = cv2.split(shred)
-
-        # expanding mask for future removal of the contour
-        mask = cv2.morphologyEx(mask, cv2.MORPH_ERODE, (5, 5), iterations=200)
-
+        #
+        # # expanding mask for future removal of a border
+        mask = cv2.morphologyEx(mask, cv2.MORPH_ERODE, (3, 3), iterations=2)
+        #
+        # # thresholding our shred
         gray = cv2.cvtColor(shred, cv2.COLOR_BGR2GRAY)
-        gray_blur = cv2.GaussianBlur(gray, (15, 15), 0)
+        gray_blur = cv2.GaussianBlur(gray, (9, 9), 0)
         edges = cv2.adaptiveThreshold(gray_blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
-            cv2.THRESH_BINARY_INV, 5, 1)
-        # removing contours from the edges (by drawing them black)
+                                      cv2.THRESH_BINARY_INV, 5, 1)
+        # attemting to remove borders
         edges = edges & mask
+        # reducing noise
         edges = cv2.morphologyEx(edges, cv2.MORPH_ERODE, (6, 6), iterations=2)
-
+        # removing small white noise
+        edges = cv2.medianBlur(edges, 3)
         # const: uncomment for debug
-        cv2.imwrite('../debug/edges_%s.png'%name, edges)
-        cv2.imwrite('../debug/mask_%s.png'%name, mask)
+        cv2.imwrite('../debug/edges_%s.png' % name, edges)
 
-        lines = cv2.HoughLinesP(edges, 1, numpy.pi/180, 20, minLineLength = 30, maxLineGap = 10)
+        lines = cv2.HoughLinesP(edges, 1, numpy.pi / 180, 20, minLineLength=50, maxLineGap=30)
         if not lines is None:
             # const: uncomment for debug
-            #debug images
-            for x1,y1,x2,y2 in lines[0]:
-                cv2.line(gray,(x1,y1),(x2,y2),(255,0,0),2)
-            cv2.imwrite('../debug/houghlines_%s.png'%name, gray)
+            for x1, y1, x2, y2 in lines[0]:
+                cv2.line(shred, (x1, y1), (x2, y2), (255, 255, 0, 0), 2)
+            cv2.imwrite('../debug/houghlines_%s.png' % name, shred)
 
             params['Lines Count'] = len(lines[0])
             tags.append(self.TAG_HAS_LINES_FEATURE)

From 214a437512415e2caeb1e2713c1f24e63acff167 Mon Sep 17 00:00:00 2001
From: Konst Kolesnichenko <kolesnichenko@gmail.com>
Date: Mon, 1 Sep 2014 00:00:08 +0300
Subject: [PATCH 08/11] * better border remove * shred-dependent line length

---
 unshred/features/lines.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/unshred/features/lines.py b/unshred/features/lines.py
index e363c60..55dd727 100644
--- a/unshred/features/lines.py
+++ b/unshred/features/lines.py
@@ -16,7 +16,9 @@ def get_info(self, shred, contour, name):
         _, _, _, mask = cv2.split(shred)
         #
         # # expanding mask for future removal of a border
-        mask = cv2.morphologyEx(mask, cv2.MORPH_ERODE, (3, 3), iterations=2)
+        kernel = numpy.ones((5,5),numpy.uint8)
+        mask = cv2.morphologyEx(mask, cv2.MORPH_ERODE, kernel, iterations=2)
+        _, mask = cv2.threshold(mask, 240, 0, cv2.THRESH_TOZERO)
         #
         # # thresholding our shred
         gray = cv2.cvtColor(shred, cv2.COLOR_BGR2GRAY)
@@ -31,8 +33,12 @@ def get_info(self, shred, contour, name):
         edges = cv2.medianBlur(edges, 3)
         # const: uncomment for debug
         cv2.imwrite('../debug/edges_%s.png' % name, edges)
+        cv2.imwrite('../debug/mask_%s.png' % name, mask)
 
-        lines = cv2.HoughLinesP(edges, 1, numpy.pi / 180, 20, minLineLength=50, maxLineGap=30)
+        _, _, r_w, r_h = cv2.boundingRect(contour)
+
+        # Line len should be at least 80% of shred's width, gap - 20%
+        lines = cv2.HoughLinesP(edges, 1, numpy.pi / 180, 30, minLineLength=r_w*0.7, maxLineGap=r_w*0.2)
         if not lines is None:
             # const: uncomment for debug
             for x1, y1, x2, y2 in lines[0]:

From e12ff399664ee0980e9bcea23406e26b6782ab7d Mon Sep 17 00:00:00 2001
From: Ievgen Varavva <yvaravva@google.com>
Date: Tue, 23 Sep 2014 23:01:10 +0100
Subject: [PATCH 09/11] Complement lines feature detector with a better
 adaptive thresholder. Clean up the code.

---
 unshred/features/lines.py |  55 ++++++++++--------
 unshred/threshold.py      | 118 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 150 insertions(+), 23 deletions(-)
 create mode 100755 unshred/threshold.py

diff --git a/unshred/features/lines.py b/unshred/features/lines.py
index 55dd727..fd14114 100644
--- a/unshred/features/lines.py
+++ b/unshred/features/lines.py
@@ -1,49 +1,58 @@
 import cv2
 import numpy
+
+from unshred import threshold
 from unshred.features import AbstractShredFeature
 
 
+DEBUG = False
+
+
 class LinesFeatures(AbstractShredFeature):
     TAG_HAS_LINES_FEATURE = "has lines"
     TAG_PARALLEL_FEATURE = "parallel"
     TAG_PERPENDECULAR_FEATURE = "perpendecular"
 
     def get_info(self, shred, contour, name):
-
         tags = []
         params = {}
 
         _, _, _, mask = cv2.split(shred)
         #
         # # expanding mask for future removal of a border
-        kernel = numpy.ones((5,5),numpy.uint8)
+        kernel = numpy.ones((5, 5), numpy.uint8)
+        if DEBUG:
+            cv2.imwrite('../debug/%s_mask_0.png' % name, mask)
         mask = cv2.morphologyEx(mask, cv2.MORPH_ERODE, kernel, iterations=2)
+        if DEBUG:
+            cv2.imwrite('../debug/%s_mask_1.png' % name, mask)
         _, mask = cv2.threshold(mask, 240, 0, cv2.THRESH_TOZERO)
-        #
-        # # thresholding our shred
-        gray = cv2.cvtColor(shred, cv2.COLOR_BGR2GRAY)
-        gray_blur = cv2.GaussianBlur(gray, (9, 9), 0)
-        edges = cv2.adaptiveThreshold(gray_blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
-                                      cv2.THRESH_BINARY_INV, 5, 1)
-        # attemting to remove borders
+        if DEBUG:
+            cv2.imwrite('../debug/%s_mask_2.png' % name, mask)
+
+        edges = 255 - threshold.threshold(
+            shred, min(shred.shape[:2])).astype(numpy.uint8)
         edges = edges & mask
-        # reducing noise
-        edges = cv2.morphologyEx(edges, cv2.MORPH_ERODE, (6, 6), iterations=2)
-        # removing small white noise
-        edges = cv2.medianBlur(edges, 3)
-        # const: uncomment for debug
-        cv2.imwrite('../debug/edges_%s.png' % name, edges)
-        cv2.imwrite('../debug/mask_%s.png' % name, mask)
+
+        if DEBUG:
+            # const: uncomment for debug
+            cv2.imwrite('../debug/%s_asrc.png' % name, shred)
+            cv2.imwrite('../debug/%s_edges.png' % name, edges)
+            cv2.imwrite('../debug/%s_mask.png' % name, mask)
 
         _, _, r_w, r_h = cv2.boundingRect(contour)
 
-        # Line len should be at least 80% of shred's width, gap - 20%
-        lines = cv2.HoughLinesP(edges, 1, numpy.pi / 180, 30, minLineLength=r_w*0.7, maxLineGap=r_w*0.2)
-        if not lines is None:
-            # const: uncomment for debug
-            for x1, y1, x2, y2 in lines[0]:
-                cv2.line(shred, (x1, y1), (x2, y2), (255, 255, 0, 0), 2)
-            cv2.imwrite('../debug/houghlines_%s.png' % name, shred)
+        # Line len should be at least 50% of shred's width, gap - 20%
+        # TODO: come up with better threshold value. Come up with better lines
+        # filtering.
+        lines = cv2.HoughLinesP(edges, rho=1, theta=numpy.pi / 180,
+                                threshold=30, minLineLength=r_w*0.5,
+                                maxLineGap=r_w*0.2)
+        if lines is not None:
+            if DEBUG:
+                for x1, y1, x2, y2 in lines[0]:
+                    cv2.line(shred, (x1, y1), (x2, y2), (255, 255, 0, 0), 2)
+                cv2.imwrite('../debug/%s_houghlines.png' % name, shred)
 
             params['Lines Count'] = len(lines[0])
             tags.append(self.TAG_HAS_LINES_FEATURE)
diff --git a/unshred/threshold.py b/unshred/threshold.py
new file mode 100755
index 0000000..c7e5888
--- /dev/null
+++ b/unshred/threshold.py
@@ -0,0 +1,118 @@
+"""Fancy adaptive threshlding.
+
+Code adapted from
+http://stackoverflow.com/questions/22122309/opencv-adaptive-threshold-ocr.
+
+As I understand it:
+1. Reduces an image to a smaller one, where each
+DEFAULT_BLOCKSIZExDEFAULT_BLOCKSIZE block -> one pixel.
+2. Creates a mask of small_image size, where pixels corresponding to
+high-variance (non-background) blocks get value > 0, others (background
+blocks) get 0.
+3. Small image is inpainted using mask from step 2. So non-bg blocks are
+inpainted by surrounding bg blocks.
+4. Image is resize back to original size, resulting in what looks like just bg
+from original image.
+5. Bg image is subtracted from original and result thresholded.
+
+The algorith assumes dark foreground on light background.
+
+"""
+import argparse
+import sys
+
+import cv2
+import numpy as np
+
+DEFAULT_BLOCKSIZE = 40
+
+# Blocks with variance over this value are assumed to contain foreground.
+MEAN_VARIANCE_THRESHOLD = 0.01
+
+
+def _calc_block_mean_variance(image, mask, blocksize):
+    """Adaptively determines image background.
+
+    Args:
+        image: image converted 1-channel image.
+        mask: 1-channel mask, same size as image.
+        blocksize: adaptive algorithm parameter.
+
+    Returns:
+        image of same size as input with foreground inpainted with background.
+    """
+    I = image.copy()
+    I_f = I.astype(np.float32) / 255.  # Used for mean and std.
+
+    result = np.zeros(
+        (image.shape[0] / blocksize, image.shape[1] / blocksize),
+        dtype=np.float32)
+
+    for i in xrange(0, image.shape[0] - blocksize, blocksize):
+        for j in xrange(0, image.shape[1] - blocksize, blocksize):
+
+            patch = I_f[i:i+blocksize+1, j:j+blocksize+1]
+            mask_patch = mask[i:i+blocksize+1, j:j+blocksize+1]
+
+            tmp1 = np.zeros((blocksize, blocksize))
+            tmp2 = np.zeros((blocksize, blocksize))
+            mean, std_dev = cv2.meanStdDev(patch, tmp1, tmp2, mask_patch)
+
+            value = 0
+            if std_dev[0][0] > MEAN_VARIANCE_THRESHOLD:
+                value = mean[0][0]
+
+            result[i/blocksize, j/blocksize] = value
+
+    small_image = cv2.resize(I, (image.shape[1] / blocksize,
+                                 image.shape[0] / blocksize))
+
+    res, inpaintmask = cv2.threshold(result, 0.02, 1, cv2.THRESH_BINARY)
+
+    inpainted = cv2.inpaint(small_image, inpaintmask.astype(np.uint8), 5,
+                            cv2.INPAINT_TELEA)
+
+    res = cv2.resize(inpainted, (image.shape[1], image.shape[0]))
+
+    return res
+
+
+def threshold(image, block_size=DEFAULT_BLOCKSIZE, mask=None):
+    """Applies adaptive thresholding to the given image.
+
+    Args:
+        image: BGRA image.
+        block_size: optional int block_size to use for adaptive thresholding.
+        mask: optional mask.
+    Returns:
+        Thresholded image.
+    """
+    if mask is None:
+        mask = np.zeros(image.shape[:2], dtype=np.uint8)
+        mask[:] = 255
+
+    image = cv2.cvtColor(image, cv2.COLOR_BGRA2GRAY)
+    res = _calc_block_mean_variance(image, mask, block_size)
+    res = image.astype(np.float32) - res.astype(np.float32) + 255
+    _, res = cv2.threshold(res, 215, 255, cv2.THRESH_BINARY)
+    return res
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('input', type=str, help='Input file name.',
+                        nargs='?', default="11.jpg")
+    parser.add_argument('output', type=str, help='Output file name.',
+                        nargs='?', default="out.png")
+
+    args = parser.parse_args()
+
+    fname = args.input
+    outfile = args.output
+
+    fname = "11.jpg" if len(sys.argv) < 2 else sys.argv[1]
+    outfile = "out.png" if len(sys.argv) < 3 else sys.argv[2]
+
+    image = cv2.imread(fname, cv2.CV_LOAD_IMAGE_UNCHANGED)
+    result = threshold(image)
+    cv2.imwrite(outfile, result * 255)

From 8b09121db938a3e64a80d9cbf35e3d843e0e2743 Mon Sep 17 00:00:00 2001
From: Ievgen Varavva <fuzzy.parabola@gmail.com>
Date: Thu, 25 Sep 2014 23:59:16 +0100
Subject: [PATCH 10/11] Address review comments: less unused code, merge
 conflicts, more pep8, TODOs. AUTHORS file.

---
 AUTHORS                   |  4 ++++
 unshred/features/lines.py | 16 +++++++++-------
 unshred/threshold.py      |  8 ++------
 3 files changed, 15 insertions(+), 13 deletions(-)
 create mode 100644 AUTHORS

diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 0000000..05c4a37
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1,4 @@
+Dmitry Chaplinsky <chaplinsky.dmitry@gmail.com>
+Fedir Nepyivoda <fednep@gmail.com>
+Ievgen Varavva <yvaravva@google.com>
+Konst Kolesnichenko <kolesnichenko@gmail.com>
diff --git a/unshred/features/lines.py b/unshred/features/lines.py
index fd14114..e75d815 100644
--- a/unshred/features/lines.py
+++ b/unshred/features/lines.py
@@ -9,9 +9,7 @@
 
 
 class LinesFeatures(AbstractShredFeature):
-    TAG_HAS_LINES_FEATURE = "has lines"
-    TAG_PARALLEL_FEATURE = "parallel"
-    TAG_PERPENDECULAR_FEATURE = "perpendecular"
+    TAG_HAS_LINES_FEATURE = "Has Lines"
 
     def get_info(self, shred, contour, name):
         tags = []
@@ -30,6 +28,8 @@ def get_info(self, shred, contour, name):
         if DEBUG:
             cv2.imwrite('../debug/%s_mask_2.png' % name, mask)
 
+        # TODO: move thresholding to Shred class, to allow reusing from other
+        # feature detectors.
         edges = 255 - threshold.threshold(
             shred, min(shred.shape[:2])).astype(numpy.uint8)
         edges = edges & mask
@@ -46,15 +46,17 @@ def get_info(self, shred, contour, name):
         # TODO: come up with better threshold value. Come up with better lines
         # filtering.
         lines = cv2.HoughLinesP(edges, rho=1, theta=numpy.pi / 180,
-                                threshold=30, minLineLength=r_w*0.5,
-                                maxLineGap=r_w*0.2)
+                                threshold=30, minLineLength=r_w * 0.5,
+                                maxLineGap=r_w * 0.2)
         if lines is not None:
             if DEBUG:
                 for x1, y1, x2, y2 in lines[0]:
                     cv2.line(shred, (x1, y1), (x2, y2), (255, 255, 0, 0), 2)
                 cv2.imwrite('../debug/%s_houghlines.png' % name, shred)
-
-            params['Lines Count'] = len(lines[0])
             tags.append(self.TAG_HAS_LINES_FEATURE)
 
+            # TODO: Find dominant lines slopes and store as a tag.
+            # Determine the presence of multiple lines parallel or orthogonal to
+            # dominant slope (might mean that's part of a table).
+
         return params, tags
diff --git a/unshred/threshold.py b/unshred/threshold.py
index c7e5888..85ff59f 100755
--- a/unshred/threshold.py
+++ b/unshred/threshold.py
@@ -18,9 +18,6 @@
 The algorith assumes dark foreground on light background.
 
 """
-import argparse
-import sys
-
 import cv2
 import numpy as np
 
@@ -99,6 +96,8 @@ def threshold(image, block_size=DEFAULT_BLOCKSIZE, mask=None):
 
 
 if __name__ == '__main__':
+    import argparse
+
     parser = argparse.ArgumentParser()
     parser.add_argument('input', type=str, help='Input file name.',
                         nargs='?', default="11.jpg")
@@ -110,9 +109,6 @@ def threshold(image, block_size=DEFAULT_BLOCKSIZE, mask=None):
     fname = args.input
     outfile = args.output
 
-    fname = "11.jpg" if len(sys.argv) < 2 else sys.argv[1]
-    outfile = "out.png" if len(sys.argv) < 3 else sys.argv[2]
-
     image = cv2.imread(fname, cv2.CV_LOAD_IMAGE_UNCHANGED)
     result = threshold(image)
     cv2.imwrite(outfile, result * 255)

From 4d6b5df1d7675235cec235a1d3a5d1fd0c5719ea Mon Sep 17 00:00:00 2001
From: Ievgen Varavva <fuzzy.parabola@gmail.com>
Date: Fri, 31 Oct 2014 22:12:44 +0000
Subject: [PATCH 11/11] =?UTF-8?q?Adds=20lines=20angle=20estimation=20using?=
 =?UTF-8?q?=20mean=20of=20detected=20hough=20lines=E2=80=99=20angles.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 unshred/features/lines.py | 131 ++++++++++++++++++++++++++++++++++----
 unshred/threshold.py      |   3 +-
 2 files changed, 119 insertions(+), 15 deletions(-)

diff --git a/unshred/features/lines.py b/unshred/features/lines.py
index e75d815..3ac0194 100644
--- a/unshred/features/lines.py
+++ b/unshred/features/lines.py
@@ -1,3 +1,5 @@
+import math
+import cmath
 import cv2
 import numpy
 
@@ -8,7 +10,82 @@
 DEBUG = False
 
 
+MEAN, MEDIAN = range(2)
+
+
+def _get_dominant_angle(lines, domination_type=MEDIAN):
+    """Picks dominant angle of a set of lines.
+
+    Args:
+        lines: iterable of (x1, y1, x2, y2) tuples that define lines.
+        domination_type: either MEDIAN or MEAN.
+
+    Returns:
+        Dominant angle value in radians.
+
+    Raises:
+        ValueError: on unknown domination_type.
+    """
+    if domination_type == MEDIAN:
+        return _get_median_angle(lines)
+    elif domination_type == MEAN:
+        return _get_mean_angle(lines)
+    else:
+        raise ValueError('Unknown domination type provided: %s' % (
+            domination_type))
+
+
+def _normalize_angle(angle, range, step):
+    """Finds an angle that matches the given one modulo step.
+
+    Increments and decrements the given value with a given step.
+
+    Args:
+        range: a 2-tuple of min and max target values.
+        step: tuning step.
+
+    Returns:
+        Normalized value within a given range.
+    """
+    while angle <= range[0]:
+        angle += step
+    while angle >= range[1]:
+        angle -= step
+    return angle
+
+
+def _get_mean_angle(lines):
+    unit_vectors = []
+    for x1, y1, x2, y2 in lines:
+        c = complex(x2, -y2) - complex(x1, -y1)
+        unit = c / abs(c)
+        unit_vectors.append(unit)
+
+    avg_angle = cmath.phase(numpy.average(unit_vectors))
+
+    return _normalize_angle(avg_angle, [-math.pi / 2, math.pi / 2], math.pi)
+
+
+def _get_median_angle(lines):
+    angles = []
+    for x1, y1, x2, y2 in lines:
+        c = complex(x2, -y2) - complex(x1, -y1)
+        angle = cmath.phase(c)
+        angles.append(angle)
+
+    # Not np.median to avoid averaging middle elements.
+    median_angle = numpy.percentile(angles, .5)
+
+    return _normalize_angle(median_angle, [-math.pi / 2, math.pi / 2], math.pi)
+
+
 class LinesFeatures(AbstractShredFeature):
+    """Feature detector that recognizes lines.
+
+    If the lines are detected, tag "Has Lines" is set and "lines_angle" feature
+    is set to the value of best guess of lines angle in radians in range of
+    [-pi/2; pi/2].
+    """
     TAG_HAS_LINES_FEATURE = "Has Lines"
 
     def get_info(self, shred, contour, name):
@@ -35,28 +112,54 @@ def get_info(self, shred, contour, name):
         edges = edges & mask
 
         if DEBUG:
-            # const: uncomment for debug
             cv2.imwrite('../debug/%s_asrc.png' % name, shred)
             cv2.imwrite('../debug/%s_edges.png' % name, edges)
             cv2.imwrite('../debug/%s_mask.png' % name, mask)
 
         _, _, r_w, r_h = cv2.boundingRect(contour)
 
-        # Line len should be at least 50% of shred's width, gap - 20%
-        # TODO: come up with better threshold value. Come up with better lines
-        # filtering.
-        lines = cv2.HoughLinesP(edges, rho=1, theta=numpy.pi / 180,
-                                threshold=30, minLineLength=r_w * 0.5,
-                                maxLineGap=r_w * 0.2)
+        # Line len should be at least 30% of shred's width, gap - 20%
+        lines = cv2.HoughLinesP(edges, rho=10, theta=numpy.pi / 180 * 2,
+                                threshold=30, maxLineGap=r_w * 0.2,
+                                minLineLength=max([r_h, r_w]) * 0.3
+                                )
+
         if lines is not None:
-            if DEBUG:
-                for x1, y1, x2, y2 in lines[0]:
-                    cv2.line(shred, (x1, y1), (x2, y2), (255, 255, 0, 0), 2)
-                cv2.imwrite('../debug/%s_houghlines.png' % name, shred)
+            lines = lines[0]
             tags.append(self.TAG_HAS_LINES_FEATURE)
 
-            # TODO: Find dominant lines slopes and store as a tag.
-            # Determine the presence of multiple lines parallel or orthogonal to
-            # dominant slope (might mean that's part of a table).
+            dominant_angle = _get_dominant_angle(lines)
+
+            if DEBUG:
+                dbg = cv2.cvtColor(edges, cv2.cv.CV_GRAY2BGRA)
+                # Draw detected lines in green.
+                for x1, y1, x2, y2 in lines:
+                    cv2.line(dbg, (x1, y1), (x2, y2), (0, 255, 0, 255), 1)
+
+                approaches = [
+                    ((0, 0, 255, 255), _get_dominant_angle(lines, MEAN)),
+                    ((255, 0, 0, 255), _get_dominant_angle(lines, MEDIAN)),
+                ]
+
+                print [a[1] for a in approaches]
+
+                # Draws lines originating from the middle of left border with
+                # computed slopes: MEAN in red, MEDIAN in blue.
+                for color, angle in approaches:
+                    def y(x0, x):
+                        return max(-2**15,
+                                   min(2**15,
+                                       int(x0 - math.tan(angle) * x)))
+
+                    x0 = shred.shape[0]/2
+                    x1 = 0
+                    y1 = y(x0, x1)
+                    x2 = shred.shape[1]
+                    y2 = y(x0, x2)
+                    cv2.line(dbg, (x1, y1), (x2, y2), color, 1)
+
+                dbg = numpy.concatenate([shred, dbg], 1)
+                cv2.imwrite('../debug/%s_houghlines.png' % name, dbg)
+            params['lines_angle'] = dominant_angle
 
         return params, tags
diff --git a/unshred/threshold.py b/unshred/threshold.py
index 85ff59f..9e5cd77 100755
--- a/unshred/threshold.py
+++ b/unshred/threshold.py
@@ -88,7 +88,8 @@ def threshold(image, block_size=DEFAULT_BLOCKSIZE, mask=None):
         mask = np.zeros(image.shape[:2], dtype=np.uint8)
         mask[:] = 255
 
-    image = cv2.cvtColor(image, cv2.COLOR_BGRA2GRAY)
+    if len(image.shape) > 2 and image.shape[2] == 4:
+        image = cv2.cvtColor(image, cv2.COLOR_BGRA2GRAY)
     res = _calc_block_mean_variance(image, mask, block_size)
     res = image.astype(np.float32) - res.astype(np.float32) + 255
     _, res = cv2.threshold(res, 215, 255, cv2.THRESH_BINARY)