From 3635399205171e1104de72081216c845e7ce02dc Mon Sep 17 00:00:00 2001
From: Fedir Nepyivoda <fednep@gmail.com>
Date: Thu, 4 Sep 2014 13:34:49 +0300
Subject: [PATCH 1/5] Text direction feature detector added

---
 unshred/features/text.py | 361 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 361 insertions(+)
 create mode 100644 unshred/features/text.py

diff --git a/unshred/features/text.py b/unshred/features/text.py
new file mode 100644
index 0000000..9e2fb2b
--- /dev/null
+++ b/unshred/features/text.py
@@ -0,0 +1,361 @@
+import sys
+import json
+import os.path
+import glob
+
+import cv2
+import numpy as np
+
+import Image
+import ImageEnhance
+import ImageFilter
+import ImageDraw
+import ImageOps
+
+import base
+
+DEBUG = True
+
+# Magic values
+MAGIC_COLOR_THREASHOLD = 10
+MAGIC_LINE_THRESHOLD = 10
+MAGIC_SECTIONS_THREASHOLD = 64
+MAGIN_GROUP_THREASHOLD = 5
+
+class TextFeatures(base.AbstractShredFeature):
+    """ 
+        Tries to guess the following features of the shread:
+
+            * text direction (in angles) relative to original shread
+            * number of text lines
+            * positions of text lines (after rotation) and their heights
+
+        Algorithm is pretty straightforward and slow:
+            
+            1. increase shread contrast
+            2. rotate image from -45 to +45 angles and compute 
+               image line histogram (sum of inverted pixels for every line)
+            3. analyze histogram for every angle to compute resuling coefficients
+            3. sort computed parameters and choose the best match
+            
+        Currently, the best match is selected by angle, at which 
+        maximum number of text lines is found with minimum heights for each line.
+
+        TODO: 
+              * better way to increase contrast of the image (based on histogram)
+              * include and analyze additional parameters of rotated shread, like 
+                contrast of horizontal lines histogram, connect with lines detector 
+                for more accurate results, etc..
+              * improve performance by using OpenCV/numpy for computation
+        
+    """
+
+    def enhance(self, image, enhancer_class, value):
+        enhancer = enhancer_class(image);
+        return enhancer.enhance(value)
+
+    def desaturate(self, image):
+        """ Get green component from the PIL image
+        """
+        r, g, b, a = image.split()
+        return Image.merge("RGBA", (g,g,g,a))
+
+    def get_derivative(self, values):
+        """ Calculate derivative of a list of values
+        """
+        result = []
+        for i in xrange(1, len(values) - 1):
+            result.append((values[i + 1] - values[i - 1]) / 2)
+
+        return result
+
+    def get_sections(self, values):
+        """ Analyze lines histogram and return list of sections 
+            (consecutive blocks of data values > threashold)
+
+            Args:
+                values: horizontal line histogram
+
+            Returns:
+
+                List of Sections.
+                    Section is an uninterrupted part of histogram
+                    dictionary with keys:
+
+                    pos:   start position of section
+                    len:   length of section
+                    value: inverted sum of pixels for that section
+        """
+        sections = []
+
+        current_section = []
+        is_in_section = False
+        spacing = []
+        position = 0
+
+        for i in xrange(len(values)):
+
+            value = values[i]
+
+            if value > MAGIC_SECTIONS_THREASHOLD:
+
+                if is_in_section == False:
+                    sections.append({'len' : -len(spacing), 'value' : 0, 'pos': i - len(spacing)})
+                    is_in_section = True
+                    spacing = []
+
+                current_section.append(value)
+            else:
+
+                if is_in_section == True:
+                    is_in_section = False;
+                    sections.append({ 'len' : len(current_section), 'value' : sum(current_section), 'pos' : i - len(current_section)})
+                    current_section = []
+
+                spacing.append(' ')
+
+        
+        return sections     
+
+    def get_histogram_for_angle(self, image, angle):
+        """ 
+            Rotates an image for the specified angle and calculates
+            sum of pixel values for every row
+
+            Args:
+
+                image: PIL image object
+                angle: rotation angle
+
+            Returns:
+                list of values. Each value is a sum of inverted pixel's for the corresponding row
+        """
+
+        total_lines = 0
+        copy = image.rotate(angle, Image.BILINEAR, True)
+
+        x = 0
+        y = 0
+
+        line_data = 0
+
+        line_histogram = []
+
+        for data in copy.getdata():
+
+            if data[0] < MAGIC_COLOR_THREASHOLD and data[3] == 255:
+                line_data += 255 - data[0]
+
+            x += 1
+            
+            if x >= copy.size[0]:
+                if line_data > MAGIC_LINE_THRESHOLD:
+                    total_lines += 1
+
+                line_histogram.append(line_data)
+
+                line_data = 0
+
+                x = 0
+                y += 1
+
+        return line_histogram
+
+    def group_sections(self, sections):
+        """ Groups adjacent sections which are devided by only few pixels.
+        """        
+        finished = False
+
+        while not finished:
+
+            finished = True
+            for i in xrange(1, len(sections) - 1):
+                if sections[i]['len'] < 0 and abs(sections[i]['len']) < MAGIN_GROUP_THREASHOLD:
+                    sections[i-1]['len'] += sections[i+1]['len'] + sections[i]['len']
+                    sections[i-1]['value'] += sections[i+1]['value']
+
+                    sections[i:i+2] = []
+                    finished = False
+                    break        
+
+
+        if len(sections) == 0:
+            return
+
+        if abs(sections[0]['len']) < MAGIN_GROUP_THREASHOLD:
+            sections[0:1] = []
+
+        if len(sections) == 0:
+            return
+
+        if abs(sections[-1]['len']) < MAGIN_GROUP_THREASHOLD:
+            sections[-1:] = []
+
+    def log_format_sections(self, sections):
+        """ Formats sections in human-readable form for debugging
+        """
+        
+        data = []
+        for section in sections:
+            data.append("%s (%s)" % (section['len'], section['value']))
+
+        return ", ".join(data)
+
+    def get_rotation_info(self, image, angle):
+        """ 
+            Rotates image and compute resulting coefficients for the specified angle
+            
+            Args:
+                image: grayscale python image
+                angle: angle for which to rotate an image
+
+            Returns:
+
+                dictionary with values for the specified angle
+                
+                Coefficients currently computed:
+                    nsc (Normalized Sections Count) - number of text lines, 
+                         without those lines, which have very little pixels in them 
+
+                    heights - sum of heights of lines 
+
+                Additional lists returned (currently used only for debug and experiments):
+                    derivative_pos: list of positive derivatives values for histogram
+                    derivative_neg: list of negative derivatives values for histogram
+                    full_sections:  list of sections with enough data for analysis
+                    sections:       list of all sections
+        """
+
+        diagram = self.get_histogram_for_angle(image, angle)
+        derivative = self.get_derivative(diagram)
+        sections = self.get_sections(diagram)        
+
+        self.group_sections(sections)            
+
+
+        # Remove all spacing sections
+        sections = [s for s in sections if s['len'] > 0]
+        #positive_sections = [s for s in sections if s['len'] > 0]
+
+        full_sections = []
+        normalized_sections_count = 0
+        sections_heights = 0
+
+        if len(sections) > 0:
+            # get average section size
+            section_avg_value = sum( [s['value'] for s in sections] ) / float(len(sections))
+
+            full_sections = [s for s in sections if s['value'] > 0.5*section_avg_value]
+            normalized_sections_count = len(full_sections)
+
+            sections_heights = sum( map(lambda x: x['len'], full_sections) )
+            
+        positive = [x for x in derivative if x > 0]
+        negative = [x for x in derivative if x < 0]
+
+        positive.sort()
+        positive.reverse()
+
+        negative.sort()
+            
+        positive_sum = sum(positive[:5])
+        nagative_sum = sum(negative[:5])
+
+        return {'angle' : angle, 
+                'nsc': normalized_sections_count,
+                'heights': sections_heights,
+                'derivative_pos' : positive_sum, 
+                'derivative_neg' : nagative_sum, 
+                'full_sections': full_sections,
+                'sections': sections}
+
+    def sort_result(self, result):
+        """ Sort result by important parameters
+
+            Args:
+                result: list of dictionaries for each tested angle
+
+            Returns:
+                sorted dict with the most accurate result first
+        """
+
+        def sort_fun2(a, b):
+            if b['nsc'] == a['nsc']:
+                return a['heights'] - b['heights']
+
+            return b['nsc'] - a['nsc']
+                
+        def sort_fun(a, b):
+            if len(b['sections']) == len(a['sections']):
+                return b['derivative_pos'] - a['derivative_pos']
+            
+            return len(b['sections']) - len(a['sections'])
+
+        result.sort( sort_fun2 )
+
+    def info_for_angles(self, image):
+        """Args:
+                image: grayscale python image
+
+            Returns:
+                list of dicts with info for every angle tested
+        """
+
+        result = []
+        for angle in xrange(-45, 45):
+            
+            if DEBUG: sys.stdout.write(".")
+
+            rotation_info = self.get_rotation_info(image, angle)
+            result.append(rotation_info) # diagram, derivative
+
+        if DEBUG: sys.stdout.write("\n")            
+
+        self.sort_result(result)
+
+        return result
+
+
+    def get_info(self, shred, contour, name):
+
+        if DEBUG:        
+            print "Processing file: %s" % (name)
+
+        image = Image.fromarray(cv2.cvtColor(shred, cv2.COLOR_BGRA2RGBA))
+
+        image = self.desaturate(image)
+
+        image = self.enhance(image, ImageEnhance.Brightness, 1.5);
+        image = self.enhance(image, ImageEnhance.Contrast, 3);
+
+        results = self.info_for_angles(image)
+
+        top_result = results[0]       
+        resulting_angle = top_result['angle']
+
+        if DEBUG:
+            result = image.rotate(resulting_angle, Image.BILINEAR, True)            
+            result.save("results/%s" % (name))
+        
+        return {'text_angle' : resulting_angle, 'text_sections' : [{'pos' : s['pos'], 'length' : s['len']} for s in top_result['full_sections']]}
+
+    
+if __name__ == '__main__':
+
+    IMAGE_PATH = "C:\\Development\\shreder\\test-rotation\\direct\\img"
+    FILE_NAME = "11.png"
+    
+    for full_name in glob.glob("%s\\*.png" % (IMAGE_PATH)):
+
+        features = TextFeatures(None)
+        cv_image = cv2.imread(full_name, -1)
+
+        file_name = os.path.split(full_name)[1]
+
+        result = features.get_info(cv_image, None, file_name)
+
+        with open("results/%s.json" %(file_name), "wt") as f_info:
+            f_info.write( json.dumps(result, sort_keys=True,
+                            indent=4, separators=(',', ': ')) )
+
+#    rotate("img/74.png")

From 766fd0cf1c45c87b6b405ae4d54743143982be5e Mon Sep 17 00:00:00 2001
From: Fedir Nepyivoda <fednep@gmail.com>
Date: Fri, 5 Sep 2014 21:50:36 +0300
Subject: [PATCH 2/5] More accurate results, returns "undefined" angle if not
 sure, fixes after review

---
 unshred/features/text.py | 158 +++++++++++++++++++++------------------
 1 file changed, 87 insertions(+), 71 deletions(-)

diff --git a/unshred/features/text.py b/unshred/features/text.py
index 9e2fb2b..30e0ab8 100644
--- a/unshred/features/text.py
+++ b/unshred/features/text.py
@@ -16,11 +16,17 @@
 
 DEBUG = True
 
+# Minimum number of detected text lines. 
+# If numer of lines recognized are below this value - the result is undefined
+MIN_LINES_FOR_RESULT = 3  
+
 # Magic values
-MAGIC_COLOR_THREASHOLD = 10
+MAGIC_COLOR_THRESHOLD = 10
 MAGIC_LINE_THRESHOLD = 10
-MAGIC_SECTIONS_THREASHOLD = 64
-MAGIN_GROUP_THREASHOLD = 5
+MAGIC_SECTIONS_THRESHOLD = 64
+
+MAGIN_GROUP_VALUE_THRESHOLD = 0.3
+MAGIC_GROUP_LEN_THRESHOLD = 5
 
 class TextFeatures(base.AbstractShredFeature):
     """ 
@@ -65,7 +71,7 @@ def get_derivative(self, values):
         """
         result = []
         for i in xrange(1, len(values) - 1):
-            result.append((values[i + 1] - values[i - 1]) / 2)
+            result.append((values[i + 1] - values[i - 1]) / 2.0)
 
         return result
 
@@ -90,31 +96,28 @@ def get_sections(self, values):
 
         current_section = []
         is_in_section = False
-        spacing = []
+        spacing_len = 0
         position = 0
 
-        for i in xrange(len(values)):
-
-            value = values[i]
+        for i, value in enumerate(values):
 
-            if value > MAGIC_SECTIONS_THREASHOLD:
+            if value > MAGIC_SECTIONS_THRESHOLD:
 
-                if is_in_section == False:
-                    sections.append({'len' : -len(spacing), 'value' : 0, 'pos': i - len(spacing)})
+                if not is_in_section:
+                    sections.append({'len' : -spacing_len, 'value' : 0, 'pos': i - spacing_len})
                     is_in_section = True
-                    spacing = []
+                    spacing_len = 0
 
                 current_section.append(value)
             else:
 
-                if is_in_section == True:
+                if is_in_section:
                     is_in_section = False;
                     sections.append({ 'len' : len(current_section), 'value' : sum(current_section), 'pos' : i - len(current_section)})
                     current_section = []
 
-                spacing.append(' ')
+                spacing_len += 1
 
-        
         return sections     
 
     def get_histogram_for_angle(self, image, angle):
@@ -131,64 +134,67 @@ def get_histogram_for_angle(self, image, angle):
                 list of values. Each value is a sum of inverted pixel's for the corresponding row
         """
 
-        total_lines = 0
         copy = image.rotate(angle, Image.BILINEAR, True)
 
-        x = 0
-        y = 0
-
-        line_data = 0
-
         line_histogram = []
+                
+        for i in xrange(copy.size[1]):
+            line = copy.crop( (0, i, copy.size[0], i + 1))
 
-        for data in copy.getdata():
-
-            if data[0] < MAGIC_COLOR_THREASHOLD and data[3] == 255:
-                line_data += 255 - data[0]
-
-            x += 1
-            
-            if x >= copy.size[0]:
-                if line_data > MAGIC_LINE_THRESHOLD:
-                    total_lines += 1
-
-                line_histogram.append(line_data)
+            value = 0
 
-                line_data = 0
+            for pixel in line.getdata():
+                if pixel[0] < MAGIC_COLOR_THRESHOLD and pixel[1] == 255:
+                    value += 255 - pixel[0]
+                
+            line_histogram.append(value)           
+                       
+        return line_histogram
 
-                x = 0
-                y += 1
+    def group_section_below_threshold(self, section, group_threshold):
+        if section['len'] > 0 and section['value'] < group_threshold:
+            return True
+        
+        if section['len'] <= 0 and section['len'] > - MAGIC_GROUP_LEN_THRESHOLD:
+            return True
 
-        return line_histogram
+        return False
 
     def group_sections(self, sections):
         """ Groups adjacent sections which are devided by only few pixels.
         """        
         finished = False
 
+        section_avg_value = 0
+        positive_sections = [s['value'] for s in sections if s['len'] > 0]
+
+        if len(positive_sections) > 0:
+            section_avg_value = sum( positive_sections ) / float(len(positive_sections))
+
+        group_threshold = section_avg_value * MAGIN_GROUP_VALUE_THRESHOLD
+
         while not finished:
 
             finished = True
             for i in xrange(1, len(sections) - 1):
-                if sections[i]['len'] < 0 and abs(sections[i]['len']) < MAGIN_GROUP_THREASHOLD:
+                if self.group_section_below_threshold(sections[i], group_threshold):
                     sections[i-1]['len'] += sections[i+1]['len'] + sections[i]['len']
                     sections[i-1]['value'] += sections[i+1]['value']
 
                     sections[i:i+2] = []
                     finished = False
-                    break        
-
+                    break
 
         if len(sections) == 0:
             return
 
-        if abs(sections[0]['len']) < MAGIN_GROUP_THREASHOLD:
+        if self.group_section_below_threshold(sections[0], group_threshold):
             sections[0:1] = []
 
         if len(sections) == 0:
             return
 
-        if abs(sections[-1]['len']) < MAGIN_GROUP_THREASHOLD:
+        if self.group_section_below_threshold(sections[-1], group_threshold):
             sections[-1:] = []
 
     def log_format_sections(self, sections):
@@ -201,6 +207,13 @@ def log_format_sections(self, sections):
 
         return ", ".join(data)
 
+    def get_derivative_coef(self, histogram):
+        """ Calculates the square sum of derivative from histogram
+            This can be used to measure "sharpness" of the histogram
+        """
+        derivative = self.get_derivative(histogram)
+        return sum(map(lambda x: x*x, derivative))
+
     def get_rotation_info(self, image, angle):
         """ 
             Rotates image and compute resulting coefficients for the specified angle
@@ -227,7 +240,6 @@ def get_rotation_info(self, image, angle):
         """
 
         diagram = self.get_histogram_for_angle(image, angle)
-        derivative = self.get_derivative(diagram)
         sections = self.get_sections(diagram)        
 
         self.group_sections(sections)            
@@ -245,29 +257,16 @@ def get_rotation_info(self, image, angle):
             # get average section size
             section_avg_value = sum( [s['value'] for s in sections] ) / float(len(sections))
 
-            full_sections = [s for s in sections if s['value'] > 0.5*section_avg_value]
+            full_sections = [s for s in sections if s['value'] > 0.3 * section_avg_value]
             normalized_sections_count = len(full_sections)
 
             sections_heights = sum( map(lambda x: x['len'], full_sections) )
-            
-        positive = [x for x in derivative if x > 0]
-        negative = [x for x in derivative if x < 0]
-
-        positive.sort()
-        positive.reverse()
-
-        negative.sort()
-            
-        positive_sum = sum(positive[:5])
-        nagative_sum = sum(negative[:5])
 
         return {'angle' : angle, 
                 'nsc': normalized_sections_count,
                 'heights': sections_heights,
-                'derivative_pos' : positive_sum, 
-                'derivative_neg' : nagative_sum, 
-                'full_sections': full_sections,
-                'sections': sections}
+                'derivative': self.get_derivative_coef(diagram),
+                'full_sections': full_sections}
 
     def sort_result(self, result):
         """ Sort result by important parameters
@@ -286,10 +285,10 @@ def sort_fun2(a, b):
             return b['nsc'] - a['nsc']
                 
         def sort_fun(a, b):
-            if len(b['sections']) == len(a['sections']):
-                return b['derivative_pos'] - a['derivative_pos']
+            if b['nsc'] == a['nsc']:
+                return b['derivative'] - a['derivative']
             
-            return len(b['sections']) - len(a['sections'])
+            return b['nsc'] - a['nsc']
 
         result.sort( sort_fun2 )
 
@@ -322,8 +321,7 @@ def get_info(self, shred, contour, name):
             print "Processing file: %s" % (name)
 
         image = Image.fromarray(cv2.cvtColor(shred, cv2.COLOR_BGRA2RGBA))
-
-        image = self.desaturate(image)
+        image = image.convert("LA")
 
         image = self.enhance(image, ImageEnhance.Brightness, 1.5);
         image = self.enhance(image, ImageEnhance.Contrast, 3);
@@ -337,15 +335,14 @@ def get_info(self, shred, contour, name):
             result = image.rotate(resulting_angle, Image.BILINEAR, True)            
             result.save("results/%s" % (name))
         
-        return {'text_angle' : resulting_angle, 'text_sections' : [{'pos' : s['pos'], 'length' : s['len']} for s in top_result['full_sections']]}
-
+        if top_result['nsc'] >= MIN_LINES_FOR_RESULT:
+            return {'text_angle' : resulting_angle, 'text_sections' : [{'pos' : s['pos'], 'length' : s['len']} for s in top_result['full_sections']]}
+        else:
+            return {'text_angle' : "undefined" }
     
 if __name__ == '__main__':
 
-    IMAGE_PATH = "C:\\Development\\shreder\\test-rotation\\direct\\img"
-    FILE_NAME = "11.png"
-    
-    for full_name in glob.glob("%s\\*.png" % (IMAGE_PATH)):
+    def process_shred(full_name):
 
         features = TextFeatures(None)
         cv_image = cv2.imread(full_name, -1)
@@ -354,8 +351,27 @@ def get_info(self, shred, contour, name):
 
         result = features.get_info(cv_image, None, file_name)
 
+        if result == None:
+            return
+
         with open("results/%s.json" %(file_name), "wt") as f_info:
             f_info.write( json.dumps(result, sort_keys=True,
                             indent=4, separators=(',', ': ')) )
 
-#    rotate("img/74.png")
+    if len(sys.argv) < 2:
+        print "Error: Please specify path or file"
+        sys.exit(255)
+
+    path = sys.argv[1]
+
+    if os.path.isfile(path):
+        process_shred(path)
+    else:
+
+        for full_name in glob.glob("%s\\*.png" % (path)): 
+
+            if full_name.count("_ctx") > 0 or full_name.count("_mask") > 0:
+                continue
+
+            process_shred(full_name)
+    
\ No newline at end of file

From c6d4a62a9a3ce198a710a721e0a1294b7fe67649 Mon Sep 17 00:00:00 2001
From: Fedir Nepyivoda <fednep@gmail.com>
Date: Sun, 7 Sep 2014 17:09:00 +0300
Subject: [PATCH 3/5] Improvements after review

---
 unshred/features/text.py | 207 +++++++++++++++++----------------------
 1 file changed, 92 insertions(+), 115 deletions(-)

diff --git a/unshred/features/text.py b/unshred/features/text.py
index 30e0ab8..b86d0da 100644
--- a/unshred/features/text.py
+++ b/unshred/features/text.py
@@ -2,6 +2,7 @@
 import json
 import os.path
 import glob
+import collections
 
 import cv2
 import numpy as np
@@ -16,68 +17,58 @@
 
 DEBUG = True
 
-# Minimum number of detected text lines. 
-# If numer of lines recognized are below this value - the result is undefined
-MIN_LINES_FOR_RESULT = 3  
+# Minimum number of detected text lines.
+# If number of lines recognized are below this value - the result is undefined
+MIN_LINES_FOR_RESULT = 3
 
 # Magic values
 MAGIC_COLOR_THRESHOLD = 10
 MAGIC_LINE_THRESHOLD = 10
 MAGIC_SECTIONS_THRESHOLD = 64
 
-MAGIN_GROUP_VALUE_THRESHOLD = 0.3
+MAGIC_GROUP_VALUE_THRESHOLD = 0.3
 MAGIC_GROUP_LEN_THRESHOLD = 5
 
+RotationInfo = collections.namedtuple('RotationInfo', ['angle', 'nsc',
+                                                       'heights', 'derivative',
+                                                       'full_sections'])
+
+Section = collections.namedtuple('Section', ['pos', 'len', 'value'])
+
 class TextFeatures(base.AbstractShredFeature):
-    """ 
-        Tries to guess the following features of the shread:
+    """
+        Tries to guess the following features of the shred:
 
-            * text direction (in angles) relative to original shread
+            * text direction (in degrees) relative to original shred
             * number of text lines
             * positions of text lines (after rotation) and their heights
 
         Algorithm is pretty straightforward and slow:
-            
-            1. increase shread contrast
-            2. rotate image from -45 to +45 angles and compute 
+
+            1. increase shred contrast
+            2. rotate image from -45 to +45 degrees and compute
                image line histogram (sum of inverted pixels for every line)
-            3. analyze histogram for every angle to compute resuling coefficients
+            3. analyze histogram for every angle to compute resulting coefficients
             3. sort computed parameters and choose the best match
-            
-        Currently, the best match is selected by angle, at which 
+
+        Currently, the best match is selected by angle, at which
         maximum number of text lines is found with minimum heights for each line.
 
-        TODO: 
+        TODO:
               * better way to increase contrast of the image (based on histogram)
-              * include and analyze additional parameters of rotated shread, like 
-                contrast of horizontal lines histogram, connect with lines detector 
+              * include and analyze additional parameters of rotated shred, like
+                contrast of horizontal lines histogram, connect with lines detector
                 for more accurate results, etc..
               * improve performance by using OpenCV/numpy for computation
-        
     """
 
     def enhance(self, image, enhancer_class, value):
-        enhancer = enhancer_class(image);
+        enhancer = enhancer_class(image)
         return enhancer.enhance(value)
 
-    def desaturate(self, image):
-        """ Get green component from the PIL image
-        """
-        r, g, b, a = image.split()
-        return Image.merge("RGBA", (g,g,g,a))
-
-    def get_derivative(self, values):
-        """ Calculate derivative of a list of values
-        """
-        result = []
-        for i in xrange(1, len(values) - 1):
-            result.append((values[i + 1] - values[i - 1]) / 2.0)
-
-        return result
-
     def get_sections(self, values):
-        """ Analyze lines histogram and return list of sections 
-            (consecutive blocks of data values > threashold)
+        """ Analyze lines histogram and return list of Sections
+            (consecutive blocks of data values > threshold)
 
             Args:
                 values: horizontal line histogram
@@ -97,14 +88,13 @@ def get_sections(self, values):
         current_section = []
         is_in_section = False
         spacing_len = 0
-        position = 0
 
         for i, value in enumerate(values):
 
             if value > MAGIC_SECTIONS_THRESHOLD:
 
                 if not is_in_section:
-                    sections.append({'len' : -spacing_len, 'value' : 0, 'pos': i - spacing_len})
+                    sections.append(Section(len=-spacing_len, value=0, pos=i - spacing_len))
                     is_in_section = True
                     spacing_len = 0
 
@@ -112,23 +102,23 @@ def get_sections(self, values):
             else:
 
                 if is_in_section:
-                    is_in_section = False;
-                    sections.append({ 'len' : len(current_section), 'value' : sum(current_section), 'pos' : i - len(current_section)})
+                    is_in_section = False
+                    sections.append(Section(len=len(current_section), value=sum(current_section), pos=i - len(current_section)))
                     current_section = []
 
                 spacing_len += 1
 
-        return sections     
+        return sections
 
     def get_histogram_for_angle(self, image, angle):
-        """ 
+        """
             Rotates an image for the specified angle and calculates
             sum of pixel values for every row
 
             Args:
 
                 image: PIL image object
-                angle: rotation angle
+                angle: rotation angle in degrees
 
             Returns:
                 list of values. Each value is a sum of inverted pixel's for the corresponding row
@@ -137,61 +127,60 @@ def get_histogram_for_angle(self, image, angle):
         copy = image.rotate(angle, Image.BILINEAR, True)
 
         line_histogram = []
-                
-        for i in xrange(copy.size[1]):
-            line = copy.crop( (0, i, copy.size[0], i + 1))
 
+        for i in xrange(copy.size[1]):
+            line = copy.crop((0, i, copy.size[0], i + 1))
             value = 0
 
             for pixel in line.getdata():
                 if pixel[0] < MAGIC_COLOR_THRESHOLD and pixel[1] == 255:
                     value += 255 - pixel[0]
-                
-            line_histogram.append(value)           
-                       
+
+            line_histogram.append(value)
+
         return line_histogram
 
     def group_section_below_threshold(self, section, group_threshold):
-        if section['len'] > 0 and section['value'] < group_threshold:
+        if section.len > 0 and section.value < group_threshold:
             return True
-        
-        if section['len'] <= 0 and section['len'] > - MAGIC_GROUP_LEN_THRESHOLD:
+
+        if section.len <= 0 and section.len > - MAGIC_GROUP_LEN_THRESHOLD:
             return True
 
         return False
 
     def group_sections(self, sections):
         """ Groups adjacent sections which are devided by only few pixels.
-        """        
+        """
         finished = False
 
         section_avg_value = 0
-        positive_sections = [s['value'] for s in sections if s['len'] > 0]
+        positive_sections = [s.value for s in sections if s.len > 0]
 
-        if len(positive_sections) > 0:
-            section_avg_value = sum( positive_sections ) / float(len(positive_sections))
+        if positive_sections:
+            section_avg_value = sum(positive_sections) / float(len(positive_sections))
 
-        group_threshold = section_avg_value * MAGIN_GROUP_VALUE_THRESHOLD
+        group_threshold = section_avg_value * MAGIC_GROUP_VALUE_THRESHOLD
 
         while not finished:
 
             finished = True
             for i in xrange(1, len(sections) - 1):
                 if self.group_section_below_threshold(sections[i], group_threshold):
-                    sections[i-1]['len'] += sections[i+1]['len'] + sections[i]['len']
-                    sections[i-1]['value'] += sections[i+1]['value']
-
+                    sections[i-1] = Section(len=sections[i-1].len + sections[i].len + sections[i+1].len,
+                                            pos=sections[i-1].pos,
+                                            value=sections[i-1].value + sections[i].value + sections[i+1].value)
                     sections[i:i+2] = []
                     finished = False
                     break
 
-        if len(sections) == 0:
+        if not sections:
             return
 
         if self.group_section_below_threshold(sections[0], group_threshold):
             sections[0:1] = []
 
-        if len(sections) == 0:
+        if not sections:
             return
 
         if self.group_section_below_threshold(sections[-1], group_threshold):
@@ -200,10 +189,9 @@ def group_sections(self, sections):
     def log_format_sections(self, sections):
         """ Formats sections in human-readable form for debugging
         """
-        
         data = []
         for section in sections:
-            data.append("%s (%s)" % (section['len'], section['value']))
+            data.append("%s (%s)" % (section.len, section.value))
 
         return ", ".join(data)
 
@@ -211,13 +199,13 @@ def get_derivative_coef(self, histogram):
         """ Calculates the square sum of derivative from histogram
             This can be used to measure "sharpness" of the histogram
         """
-        derivative = self.get_derivative(histogram)
-        return sum(map(lambda x: x*x, derivative))
+        derivative = np.gradient(histogram)
+        return sum([x*x for x in derivative])
 
     def get_rotation_info(self, image, angle):
-        """ 
+        """
             Rotates image and compute resulting coefficients for the specified angle
-            
+
             Args:
                 image: grayscale python image
                 angle: angle for which to rotate an image
@@ -225,12 +213,12 @@ def get_rotation_info(self, image, angle):
             Returns:
 
                 dictionary with values for the specified angle
-                
+
                 Coefficients currently computed:
-                    nsc (Normalized Sections Count) - number of text lines, 
-                         without those lines, which have very little pixels in them 
+                    nsc (Normalized Sections Count) - number of text lines,
+                         without those lines, which have very little pixels in them
 
-                    heights - sum of heights of lines 
+                    heights - sum of heights of lines
 
                 Additional lists returned (currently used only for debug and experiments):
                     derivative_pos: list of positive derivatives values for histogram
@@ -240,14 +228,12 @@ def get_rotation_info(self, image, angle):
         """
 
         diagram = self.get_histogram_for_angle(image, angle)
-        sections = self.get_sections(diagram)        
-
-        self.group_sections(sections)            
+        sections = self.get_sections(diagram)
 
+        self.group_sections(sections)
 
         # Remove all spacing sections
-        sections = [s for s in sections if s['len'] > 0]
-        #positive_sections = [s for s in sections if s['len'] > 0]
+        sections = [s for s in sections if s.len > 0]
 
         full_sections = []
         normalized_sections_count = 0
@@ -255,18 +241,18 @@ def get_rotation_info(self, image, angle):
 
         if len(sections) > 0:
             # get average section size
-            section_avg_value = sum( [s['value'] for s in sections] ) / float(len(sections))
+            section_avg_value = sum([s.value for s in sections]) / float(len(sections))
 
-            full_sections = [s for s in sections if s['value'] > 0.3 * section_avg_value]
+            full_sections = [s for s in sections if s.value > MAGIC_GROUP_VALUE_THRESHOLD * section_avg_value]
             normalized_sections_count = len(full_sections)
 
-            sections_heights = sum( map(lambda x: x['len'], full_sections) )
+            sections_heights = sum(map(lambda s: s.len, full_sections))
 
-        return {'angle' : angle, 
-                'nsc': normalized_sections_count,
-                'heights': sections_heights,
-                'derivative': self.get_derivative_coef(diagram),
-                'full_sections': full_sections}
+        return RotationInfo(angle=angle,
+                            nsc=normalized_sections_count,
+                            heights=sections_heights,
+                            derivative=self.get_derivative_coef(diagram),
+                            full_sections=full_sections)
 
     def sort_result(self, result):
         """ Sort result by important parameters
@@ -278,19 +264,13 @@ def sort_result(self, result):
                 sorted dict with the most accurate result first
         """
 
-        def sort_fun2(a, b):
-            if b['nsc'] == a['nsc']:
-                return a['heights'] - b['heights']
-
-            return b['nsc'] - a['nsc']
-                
         def sort_fun(a, b):
-            if b['nsc'] == a['nsc']:
-                return b['derivative'] - a['derivative']
-            
-            return b['nsc'] - a['nsc']
+            if b.nsc == a.nsc:
+                return cmp(a.heights, b.heights)
 
-        result.sort( sort_fun2 )
+            return cmp(b.nsc, a.nsc)
+
+        result.sort(sort_fun)
 
     def info_for_angles(self, image):
         """Args:
@@ -302,44 +282,44 @@ def info_for_angles(self, image):
 
         result = []
         for angle in xrange(-45, 45):
-            
+
             if DEBUG: sys.stdout.write(".")
 
             rotation_info = self.get_rotation_info(image, angle)
             result.append(rotation_info) # diagram, derivative
 
-        if DEBUG: sys.stdout.write("\n")            
+        if DEBUG: sys.stdout.write("\n")
 
         self.sort_result(result)
 
         return result
 
-
     def get_info(self, shred, contour, name):
 
-        if DEBUG:        
+        if DEBUG:
             print "Processing file: %s" % (name)
 
         image = Image.fromarray(cv2.cvtColor(shred, cv2.COLOR_BGRA2RGBA))
         image = image.convert("LA")
 
-        image = self.enhance(image, ImageEnhance.Brightness, 1.5);
-        image = self.enhance(image, ImageEnhance.Contrast, 3);
+        image = self.enhance(image, ImageEnhance.Brightness, 1.5)
+        image = self.enhance(image, ImageEnhance.Contrast, 3)
 
         results = self.info_for_angles(image)
 
-        top_result = results[0]       
-        resulting_angle = top_result['angle']
+        top_result = results[0]
+        resulting_angle = top_result.angle
 
         if DEBUG:
-            result = image.rotate(resulting_angle, Image.BILINEAR, True)            
+            result = image.rotate(resulting_angle, Image.BILINEAR, True)
             result.save("results/%s" % (name))
-        
-        if top_result['nsc'] >= MIN_LINES_FOR_RESULT:
-            return {'text_angle' : resulting_angle, 'text_sections' : [{'pos' : s['pos'], 'length' : s['len']} for s in top_result['full_sections']]}
+
+        if top_result.nsc >= MIN_LINES_FOR_RESULT:
+            return {'text_angle' : resulting_angle,
+                    'text_sections' : [{'pos' : s.pos, 'length' : s.len} for s in top_result.full_sections]}
         else:
-            return {'text_angle' : "undefined" }
-    
+            return {'text_angle' : "undefined"}
+
 if __name__ == '__main__':
 
     def process_shred(full_name):
@@ -355,8 +335,8 @@ def process_shred(full_name):
             return
 
         with open("results/%s.json" %(file_name), "wt") as f_info:
-            f_info.write( json.dumps(result, sort_keys=True,
-                            indent=4, separators=(',', ': ')) )
+            f_info.write(json.dumps(result, sort_keys=True,
+                         indent=4, separators=(',', ': ')))
 
     if len(sys.argv) < 2:
         print "Error: Please specify path or file"
@@ -367,11 +347,8 @@ def process_shred(full_name):
     if os.path.isfile(path):
         process_shred(path)
     else:
-
-        for full_name in glob.glob("%s\\*.png" % (path)): 
-
+        for full_name in glob.glob("%s\\*.png" % (path)):
             if full_name.count("_ctx") > 0 or full_name.count("_mask") > 0:
                 continue
 
             process_shred(full_name)
-    
\ No newline at end of file

From 97718280e6526726e2b61ffca19da6a87966af7a Mon Sep 17 00:00:00 2001
From: Fedir Nepyivoda <fednep@gmail.com>
Date: Sat, 13 Sep 2014 18:13:36 +0300
Subject: [PATCH 4/5] Speed improved 5x and few more corrections after review

---
 unshred/features/text.py | 45 ++++++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/unshred/features/text.py b/unshred/features/text.py
index b86d0da..41ff357 100644
--- a/unshred/features/text.py
+++ b/unshred/features/text.py
@@ -17,15 +17,16 @@
 
 DEBUG = True
 
+# Shred will be rotated and evaluated for every angle from this range
+ANGLES_RANGE = xrange(-45, 45)
+
 # Minimum number of detected text lines.
 # If number of lines recognized are below this value - the result is undefined
 MIN_LINES_FOR_RESULT = 3
 
 # Magic values
 MAGIC_COLOR_THRESHOLD = 10
-MAGIC_LINE_THRESHOLD = 10
 MAGIC_SECTIONS_THRESHOLD = 64
-
 MAGIC_GROUP_VALUE_THRESHOLD = 0.3
 MAGIC_GROUP_LEN_THRESHOLD = 5
 
@@ -123,23 +124,23 @@ def get_histogram_for_angle(self, image, angle):
             Returns:
                 list of values. Each value is a sum of inverted pixel's for the corresponding row
         """
-
         copy = image.rotate(angle, Image.BILINEAR, True)
-
-        line_histogram = []
-
-        for i in xrange(copy.size[1]):
-            line = copy.crop((0, i, copy.size[0], i + 1))
-            value = 0
-
-            for pixel in line.getdata():
-                if pixel[0] < MAGIC_COLOR_THRESHOLD and pixel[1] == 255:
-                    value += 255 - pixel[0]
-
-            line_histogram.append(value)
-
-        return line_histogram
-
+        
+        img = np.fromstring(copy.tostring(), dtype=np.uint8).reshape(copy.size[1], copy.size[0], 2)    
+   
+        alpha = img[:, :, 1]        
+        res = img[:, :, 0]
+        
+        res[res >= MAGIC_COLOR_THRESHOLD] = 255
+        res[alpha < 255] = 255
+        res = 255 - res
+
+        # Python cv2.reduce doesn't work correctly with int matrices.
+        data_for_reduce = res.astype(np.float)
+        histogram = cv2.reduce(data_for_reduce, 1, cv2.cv.CV_REDUCE_SUM)[:, 0]
+
+        return histogram
+    
     def group_section_below_threshold(self, section, group_threshold):
         if section.len > 0 and section.value < group_threshold:
             return True
@@ -158,7 +159,7 @@ def group_sections(self, sections):
         positive_sections = [s.value for s in sections if s.len > 0]
 
         if positive_sections:
-            section_avg_value = sum(positive_sections) / float(len(positive_sections))
+            section_avg_value = np.average(positive_sections)
 
         group_threshold = section_avg_value * MAGIC_GROUP_VALUE_THRESHOLD
 
@@ -277,16 +278,16 @@ def info_for_angles(self, image):
                 image: grayscale python image
 
             Returns:
-                list of dicts with info for every angle tested
+                list of RotationInfo instances with info for every angle tested
         """
 
         result = []
-        for angle in xrange(-45, 45):
+        for angle in ANGLES_RANGE:
 
             if DEBUG: sys.stdout.write(".")
 
             rotation_info = self.get_rotation_info(image, angle)
-            result.append(rotation_info) # diagram, derivative
+            result.append(rotation_info)
 
         if DEBUG: sys.stdout.write("\n")
 

From f9b0974dc981c38d52fab146e97d677af16ad4ce Mon Sep 17 00:00:00 2001
From: Fedir Nepyivoda <fednep@gmail.com>
Date: Thu, 16 Oct 2014 12:02:14 +0300
Subject: [PATCH 5/5] Magic values described, typos fixed, code linted

---
 unshred/features/text.py | 169 ++++++++++++++++++++++++---------------
 1 file changed, 106 insertions(+), 63 deletions(-)

diff --git a/unshred/features/text.py b/unshred/features/text.py
index 41ff357..d606309 100644
--- a/unshred/features/text.py
+++ b/unshred/features/text.py
@@ -9,9 +9,6 @@
 
 import Image
 import ImageEnhance
-import ImageFilter
-import ImageDraw
-import ImageOps
 
 import base
 
@@ -24,43 +21,59 @@
 # If number of lines recognized are below this value - the result is undefined
 MIN_LINES_FOR_RESULT = 3
 
-# Magic values
+# The following magic values are empirical,
+# they most likely will be changed (or removed) in future
+# while tuning the algorithm.
+
+# All pixels above this value (after converting to grayscale mode)
+# will be discarded (i.e. equal 255)
 MAGIC_COLOR_THRESHOLD = 10
+
+# All values in histogram below this magic will be equal to zero
 MAGIC_SECTIONS_THRESHOLD = 64
+
+# All sections with length (height of text)
+# below this magic multiple "average height" of text sections
+# will be discarded
 MAGIC_GROUP_VALUE_THRESHOLD = 0.3
+
+# All empty sections (without text) with length below
+# this value will be discarded
 MAGIC_GROUP_LEN_THRESHOLD = 5
 
+
 RotationInfo = collections.namedtuple('RotationInfo', ['angle', 'nsc',
                                                        'heights', 'derivative',
                                                        'full_sections'])
 
 Section = collections.namedtuple('Section', ['pos', 'len', 'value'])
 
+
 class TextFeatures(base.AbstractShredFeature):
     """
-        Tries to guess the following features of the shred:
+    Tries to guess the following features of the shred:
 
-            * text direction (in degrees) relative to original shred
-            * number of text lines
-            * positions of text lines (after rotation) and their heights
+        * text direction (in degrees) relative to original shred
+        * number of text lines
+        * positions of text lines (after rotation) and their heights
 
-        Algorithm is pretty straightforward and slow:
+    Algorithm is pretty straightforward and slow:
 
-            1. increase shred contrast
-            2. rotate image from -45 to +45 degrees and compute
-               image line histogram (sum of inverted pixels for every line)
-            3. analyze histogram for every angle to compute resulting coefficients
-            3. sort computed parameters and choose the best match
+        1. increase shred contrast
+        2. rotate image from -45 to +45 degrees and compute
+           image line histogram (sum of inverted pixels for every line)
+        3. analyze histogram for every angle to compute resulting coefficients
+        3. sort computed parameters and choose the best match
 
-        Currently, the best match is selected by angle, at which
-        maximum number of text lines is found with minimum heights for each line.
+    Currently, the best match is selected by angle, at which
+    maximum number of text lines is found with minimum heights for each line.
 
-        TODO:
-              * better way to increase contrast of the image (based on histogram)
-              * include and analyze additional parameters of rotated shred, like
-                contrast of horizontal lines histogram, connect with lines detector
-                for more accurate results, etc..
-              * improve performance by using OpenCV/numpy for computation
+    TODO:
+          * better way to increase contrast of the image (based on histogram)
+          * include and analyze additional parameters of rotated shred, like
+            contrast of horizontal lines histogram, connect with lines detector
+            for more accurate results, etc..
+          * improve performance by using OpenCV/numpy for computation
     """
 
     def enhance(self, image, enhancer_class, value):
@@ -95,7 +108,11 @@ def get_sections(self, values):
             if value > MAGIC_SECTIONS_THRESHOLD:
 
                 if not is_in_section:
-                    sections.append(Section(len=-spacing_len, value=0, pos=i - spacing_len))
+                    sections.append(
+                        Section(len=-spacing_len,
+                                value=0,
+                                pos=i - spacing_len))
+
                     is_in_section = True
                     spacing_len = 0
 
@@ -104,7 +121,10 @@ def get_sections(self, values):
 
                 if is_in_section:
                     is_in_section = False
-                    sections.append(Section(len=len(current_section), value=sum(current_section), pos=i - len(current_section)))
+                    sections.append(
+                        Section(len=len(current_section),
+                                value=sum(current_section),
+                                pos=i - len(current_section)))
                     current_section = []
 
                 spacing_len += 1
@@ -122,15 +142,18 @@ def get_histogram_for_angle(self, image, angle):
                 angle: rotation angle in degrees
 
             Returns:
-                list of values. Each value is a sum of inverted pixel's for the corresponding row
+                list of values. Each value is a sum of inverted pixel's
+                for the corresponding row
         """
         copy = image.rotate(angle, Image.BILINEAR, True)
-        
-        img = np.fromstring(copy.tostring(), dtype=np.uint8).reshape(copy.size[1], copy.size[0], 2)    
-   
-        alpha = img[:, :, 1]        
+
+        img = np.fromstring(copy.tostring(),
+                            dtype=np.uint8).reshape(copy.size[1],
+                                                    copy.size[0], 2)
+
+        alpha = img[:, :, 1]
         res = img[:, :, 0]
-        
+
         res[res >= MAGIC_COLOR_THRESHOLD] = 255
         res[alpha < 255] = 255
         res = 255 - res
@@ -140,7 +163,7 @@ def get_histogram_for_angle(self, image, angle):
         histogram = cv2.reduce(data_for_reduce, 1, cv2.cv.CV_REDUCE_SUM)[:, 0]
 
         return histogram
-    
+
     def group_section_below_threshold(self, section, group_threshold):
         if section.len > 0 and section.value < group_threshold:
             return True
@@ -150,8 +173,20 @@ def group_section_below_threshold(self, section, group_threshold):
 
         return False
 
+    def join_sections(self, sections, num):
+        """ Joins three sections into a single one.
+            Summing up sections length and values
+        """
+        return Section(len=sum([sections[num - 1].len,
+                                sections[num].len,
+                                sections[num + 1].len]),
+                       pos=sections[num - 1].pos,
+                       value=sum([sections[num - 1].value,
+                                  sections[num].value,
+                                  sections[num + 1].value]))
+
     def group_sections(self, sections):
-        """ Groups adjacent sections which are devided by only few pixels.
+        """ Groups adjacent sections which are divided by only few pixels.
         """
         finished = False
 
@@ -164,14 +199,13 @@ def group_sections(self, sections):
         group_threshold = section_avg_value * MAGIC_GROUP_VALUE_THRESHOLD
 
         while not finished:
-
             finished = True
             for i in xrange(1, len(sections) - 1):
-                if self.group_section_below_threshold(sections[i], group_threshold):
-                    sections[i-1] = Section(len=sections[i-1].len + sections[i].len + sections[i+1].len,
-                                            pos=sections[i-1].pos,
-                                            value=sections[i-1].value + sections[i].value + sections[i+1].value)
-                    sections[i:i+2] = []
+                if self.group_section_below_threshold(sections[i],
+                                                      group_threshold):
+                    self.join_sections(sections, i)
+                    sections[i - 1] = self.join_sections(sections, i)
+                    sections[i:i + 2] = []
                     finished = False
                     break
 
@@ -201,31 +235,33 @@ def get_derivative_coef(self, histogram):
             This can be used to measure "sharpness" of the histogram
         """
         derivative = np.gradient(histogram)
-        return sum([x*x for x in derivative])
+        return sum([x * x for x in derivative])
 
     def get_rotation_info(self, image, angle):
         """
-            Rotates image and compute resulting coefficients for the specified angle
+        Rotates image and compute resulting coefficients
+        for the specified angle
 
-            Args:
-                image: grayscale python image
-                angle: angle for which to rotate an image
+        Args:
+            image: grayscale python image
+            angle: angle for which to rotate an image
 
-            Returns:
+        Returns:
 
-                dictionary with values for the specified angle
+            dictionary with values for the specified angle
 
-                Coefficients currently computed:
-                    nsc (Normalized Sections Count) - number of text lines,
-                         without those lines, which have very little pixels in them
+            Coefficients currently computed:
+                nsc (Normalized Sections Count) - number of text lines,
+                     without those lines, which have very
+                     little pixels in them
 
-                    heights - sum of heights of lines
+                heights - sum of heights of lines
 
-                Additional lists returned (currently used only for debug and experiments):
-                    derivative_pos: list of positive derivatives values for histogram
-                    derivative_neg: list of negative derivatives values for histogram
-                    full_sections:  list of sections with enough data for analysis
-                    sections:       list of all sections
+            Additional lists returned (currently used for debug only):
+                derivative_pos: list of positive derivatives for histogram
+                derivative_neg: list of negative derivatives for histogram
+                full_sections:  list of sections with enough data for analysis
+                sections:       list of all sections
         """
 
         diagram = self.get_histogram_for_angle(image, angle)
@@ -242,9 +278,13 @@ def get_rotation_info(self, image, angle):
 
         if len(sections) > 0:
             # get average section size
-            section_avg_value = sum([s.value for s in sections]) / float(len(sections))
+            section_avg_value = (sum([s.value for s in sections])
+                                 / float(len(sections)))
+
+            full_threshold = MAGIC_GROUP_VALUE_THRESHOLD * section_avg_value
+            full_sections = [s for s in sections
+                             if s.value > full_threshold]
 
-            full_sections = [s for s in sections if s.value > MAGIC_GROUP_VALUE_THRESHOLD * section_avg_value]
             normalized_sections_count = len(full_sections)
 
             sections_heights = sum(map(lambda s: s.len, full_sections))
@@ -284,12 +324,14 @@ def info_for_angles(self, image):
         result = []
         for angle in ANGLES_RANGE:
 
-            if DEBUG: sys.stdout.write(".")
+            if DEBUG:
+                sys.stdout.write(".")
 
             rotation_info = self.get_rotation_info(image, angle)
             result.append(rotation_info)
 
-        if DEBUG: sys.stdout.write("\n")
+        if DEBUG:
+            sys.stdout.write("\n")
 
         self.sort_result(result)
 
@@ -316,10 +358,11 @@ def get_info(self, shred, contour, name):
             result.save("results/%s" % (name))
 
         if top_result.nsc >= MIN_LINES_FOR_RESULT:
-            return {'text_angle' : resulting_angle,
-                    'text_sections' : [{'pos' : s.pos, 'length' : s.len} for s in top_result.full_sections]}
+            return {'text_angle': resulting_angle,
+                    'text_sections': [{'pos': s.pos, 'length': s.len}
+                                      for s in top_result.full_sections]}
         else:
-            return {'text_angle' : "undefined"}
+            return {'text_angle': "undefined"}
 
 if __name__ == '__main__':
 
@@ -332,10 +375,10 @@ def process_shred(full_name):
 
         result = features.get_info(cv_image, None, file_name)
 
-        if result == None:
+        if result is None:
             return
 
-        with open("results/%s.json" %(file_name), "wt") as f_info:
+        with open("results/%s.json" % (file_name), "wt") as f_info:
             f_info.write(json.dumps(result, sort_keys=True,
                          indent=4, separators=(',', ': ')))