From 3635399205171e1104de72081216c845e7ce02dc Mon Sep 17 00:00:00 2001 From: Fedir Nepyivoda Date: Thu, 4 Sep 2014 13:34:49 +0300 Subject: [PATCH 1/5] Text direction feature detector added --- unshred/features/text.py | 361 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 361 insertions(+) create mode 100644 unshred/features/text.py diff --git a/unshred/features/text.py b/unshred/features/text.py new file mode 100644 index 0000000..9e2fb2b --- /dev/null +++ b/unshred/features/text.py @@ -0,0 +1,361 @@ +import sys +import json +import os.path +import glob + +import cv2 +import numpy as np + +import Image +import ImageEnhance +import ImageFilter +import ImageDraw +import ImageOps + +import base + +DEBUG = True + +# Magic values +MAGIC_COLOR_THREASHOLD = 10 +MAGIC_LINE_THRESHOLD = 10 +MAGIC_SECTIONS_THREASHOLD = 64 +MAGIN_GROUP_THREASHOLD = 5 + +class TextFeatures(base.AbstractShredFeature): + """ + Tries to guess the following features of the shread: + + * text direction (in angles) relative to original shread + * number of text lines + * positions of text lines (after rotation) and their heights + + Algorithm is pretty straightforward and slow: + + 1. increase shread contrast + 2. rotate image from -45 to +45 angles and compute + image line histogram (sum of inverted pixels for every line) + 3. analyze histogram for every angle to compute resuling coefficients + 3. sort computed parameters and choose the best match + + Currently, the best match is selected by angle, at which + maximum number of text lines is found with minimum heights for each line. + + TODO: + * better way to increase contrast of the image (based on histogram) + * include and analyze additional parameters of rotated shread, like + contrast of horizontal lines histogram, connect with lines detector + for more accurate results, etc.. + * improve performance by using OpenCV/numpy for computation + + """ + + def enhance(self, image, enhancer_class, value): + enhancer = enhancer_class(image); + return enhancer.enhance(value) + + def desaturate(self, image): + """ Get green component from the PIL image + """ + r, g, b, a = image.split() + return Image.merge("RGBA", (g,g,g,a)) + + def get_derivative(self, values): + """ Calculate derivative of a list of values + """ + result = [] + for i in xrange(1, len(values) - 1): + result.append((values[i + 1] - values[i - 1]) / 2) + + return result + + def get_sections(self, values): + """ Analyze lines histogram and return list of sections + (consecutive blocks of data values > threashold) + + Args: + values: horizontal line histogram + + Returns: + + List of Sections. + Section is an uninterrupted part of histogram + dictionary with keys: + + pos: start position of section + len: length of section + value: inverted sum of pixels for that section + """ + sections = [] + + current_section = [] + is_in_section = False + spacing = [] + position = 0 + + for i in xrange(len(values)): + + value = values[i] + + if value > MAGIC_SECTIONS_THREASHOLD: + + if is_in_section == False: + sections.append({'len' : -len(spacing), 'value' : 0, 'pos': i - len(spacing)}) + is_in_section = True + spacing = [] + + current_section.append(value) + else: + + if is_in_section == True: + is_in_section = False; + sections.append({ 'len' : len(current_section), 'value' : sum(current_section), 'pos' : i - len(current_section)}) + current_section = [] + + spacing.append(' ') + + + return sections + + def get_histogram_for_angle(self, image, angle): + """ + Rotates an image for the specified angle and calculates + sum of pixel values for every row + + Args: + + image: PIL image object + angle: rotation angle + + Returns: + list of values. Each value is a sum of inverted pixel's for the corresponding row + """ + + total_lines = 0 + copy = image.rotate(angle, Image.BILINEAR, True) + + x = 0 + y = 0 + + line_data = 0 + + line_histogram = [] + + for data in copy.getdata(): + + if data[0] < MAGIC_COLOR_THREASHOLD and data[3] == 255: + line_data += 255 - data[0] + + x += 1 + + if x >= copy.size[0]: + if line_data > MAGIC_LINE_THRESHOLD: + total_lines += 1 + + line_histogram.append(line_data) + + line_data = 0 + + x = 0 + y += 1 + + return line_histogram + + def group_sections(self, sections): + """ Groups adjacent sections which are devided by only few pixels. + """ + finished = False + + while not finished: + + finished = True + for i in xrange(1, len(sections) - 1): + if sections[i]['len'] < 0 and abs(sections[i]['len']) < MAGIN_GROUP_THREASHOLD: + sections[i-1]['len'] += sections[i+1]['len'] + sections[i]['len'] + sections[i-1]['value'] += sections[i+1]['value'] + + sections[i:i+2] = [] + finished = False + break + + + if len(sections) == 0: + return + + if abs(sections[0]['len']) < MAGIN_GROUP_THREASHOLD: + sections[0:1] = [] + + if len(sections) == 0: + return + + if abs(sections[-1]['len']) < MAGIN_GROUP_THREASHOLD: + sections[-1:] = [] + + def log_format_sections(self, sections): + """ Formats sections in human-readable form for debugging + """ + + data = [] + for section in sections: + data.append("%s (%s)" % (section['len'], section['value'])) + + return ", ".join(data) + + def get_rotation_info(self, image, angle): + """ + Rotates image and compute resulting coefficients for the specified angle + + Args: + image: grayscale python image + angle: angle for which to rotate an image + + Returns: + + dictionary with values for the specified angle + + Coefficients currently computed: + nsc (Normalized Sections Count) - number of text lines, + without those lines, which have very little pixels in them + + heights - sum of heights of lines + + Additional lists returned (currently used only for debug and experiments): + derivative_pos: list of positive derivatives values for histogram + derivative_neg: list of negative derivatives values for histogram + full_sections: list of sections with enough data for analysis + sections: list of all sections + """ + + diagram = self.get_histogram_for_angle(image, angle) + derivative = self.get_derivative(diagram) + sections = self.get_sections(diagram) + + self.group_sections(sections) + + + # Remove all spacing sections + sections = [s for s in sections if s['len'] > 0] + #positive_sections = [s for s in sections if s['len'] > 0] + + full_sections = [] + normalized_sections_count = 0 + sections_heights = 0 + + if len(sections) > 0: + # get average section size + section_avg_value = sum( [s['value'] for s in sections] ) / float(len(sections)) + + full_sections = [s for s in sections if s['value'] > 0.5*section_avg_value] + normalized_sections_count = len(full_sections) + + sections_heights = sum( map(lambda x: x['len'], full_sections) ) + + positive = [x for x in derivative if x > 0] + negative = [x for x in derivative if x < 0] + + positive.sort() + positive.reverse() + + negative.sort() + + positive_sum = sum(positive[:5]) + nagative_sum = sum(negative[:5]) + + return {'angle' : angle, + 'nsc': normalized_sections_count, + 'heights': sections_heights, + 'derivative_pos' : positive_sum, + 'derivative_neg' : nagative_sum, + 'full_sections': full_sections, + 'sections': sections} + + def sort_result(self, result): + """ Sort result by important parameters + + Args: + result: list of dictionaries for each tested angle + + Returns: + sorted dict with the most accurate result first + """ + + def sort_fun2(a, b): + if b['nsc'] == a['nsc']: + return a['heights'] - b['heights'] + + return b['nsc'] - a['nsc'] + + def sort_fun(a, b): + if len(b['sections']) == len(a['sections']): + return b['derivative_pos'] - a['derivative_pos'] + + return len(b['sections']) - len(a['sections']) + + result.sort( sort_fun2 ) + + def info_for_angles(self, image): + """Args: + image: grayscale python image + + Returns: + list of dicts with info for every angle tested + """ + + result = [] + for angle in xrange(-45, 45): + + if DEBUG: sys.stdout.write(".") + + rotation_info = self.get_rotation_info(image, angle) + result.append(rotation_info) # diagram, derivative + + if DEBUG: sys.stdout.write("\n") + + self.sort_result(result) + + return result + + + def get_info(self, shred, contour, name): + + if DEBUG: + print "Processing file: %s" % (name) + + image = Image.fromarray(cv2.cvtColor(shred, cv2.COLOR_BGRA2RGBA)) + + image = self.desaturate(image) + + image = self.enhance(image, ImageEnhance.Brightness, 1.5); + image = self.enhance(image, ImageEnhance.Contrast, 3); + + results = self.info_for_angles(image) + + top_result = results[0] + resulting_angle = top_result['angle'] + + if DEBUG: + result = image.rotate(resulting_angle, Image.BILINEAR, True) + result.save("results/%s" % (name)) + + return {'text_angle' : resulting_angle, 'text_sections' : [{'pos' : s['pos'], 'length' : s['len']} for s in top_result['full_sections']]} + + +if __name__ == '__main__': + + IMAGE_PATH = "C:\\Development\\shreder\\test-rotation\\direct\\img" + FILE_NAME = "11.png" + + for full_name in glob.glob("%s\\*.png" % (IMAGE_PATH)): + + features = TextFeatures(None) + cv_image = cv2.imread(full_name, -1) + + file_name = os.path.split(full_name)[1] + + result = features.get_info(cv_image, None, file_name) + + with open("results/%s.json" %(file_name), "wt") as f_info: + f_info.write( json.dumps(result, sort_keys=True, + indent=4, separators=(',', ': ')) ) + +# rotate("img/74.png") From 766fd0cf1c45c87b6b405ae4d54743143982be5e Mon Sep 17 00:00:00 2001 From: Fedir Nepyivoda Date: Fri, 5 Sep 2014 21:50:36 +0300 Subject: [PATCH 2/5] More accurate results, returns "undefined" angle if not sure, fixes after review --- unshred/features/text.py | 158 +++++++++++++++++++++------------------ 1 file changed, 87 insertions(+), 71 deletions(-) diff --git a/unshred/features/text.py b/unshred/features/text.py index 9e2fb2b..30e0ab8 100644 --- a/unshred/features/text.py +++ b/unshred/features/text.py @@ -16,11 +16,17 @@ DEBUG = True +# Minimum number of detected text lines. +# If numer of lines recognized are below this value - the result is undefined +MIN_LINES_FOR_RESULT = 3 + # Magic values -MAGIC_COLOR_THREASHOLD = 10 +MAGIC_COLOR_THRESHOLD = 10 MAGIC_LINE_THRESHOLD = 10 -MAGIC_SECTIONS_THREASHOLD = 64 -MAGIN_GROUP_THREASHOLD = 5 +MAGIC_SECTIONS_THRESHOLD = 64 + +MAGIN_GROUP_VALUE_THRESHOLD = 0.3 +MAGIC_GROUP_LEN_THRESHOLD = 5 class TextFeatures(base.AbstractShredFeature): """ @@ -65,7 +71,7 @@ def get_derivative(self, values): """ result = [] for i in xrange(1, len(values) - 1): - result.append((values[i + 1] - values[i - 1]) / 2) + result.append((values[i + 1] - values[i - 1]) / 2.0) return result @@ -90,31 +96,28 @@ def get_sections(self, values): current_section = [] is_in_section = False - spacing = [] + spacing_len = 0 position = 0 - for i in xrange(len(values)): - - value = values[i] + for i, value in enumerate(values): - if value > MAGIC_SECTIONS_THREASHOLD: + if value > MAGIC_SECTIONS_THRESHOLD: - if is_in_section == False: - sections.append({'len' : -len(spacing), 'value' : 0, 'pos': i - len(spacing)}) + if not is_in_section: + sections.append({'len' : -spacing_len, 'value' : 0, 'pos': i - spacing_len}) is_in_section = True - spacing = [] + spacing_len = 0 current_section.append(value) else: - if is_in_section == True: + if is_in_section: is_in_section = False; sections.append({ 'len' : len(current_section), 'value' : sum(current_section), 'pos' : i - len(current_section)}) current_section = [] - spacing.append(' ') + spacing_len += 1 - return sections def get_histogram_for_angle(self, image, angle): @@ -131,64 +134,67 @@ def get_histogram_for_angle(self, image, angle): list of values. Each value is a sum of inverted pixel's for the corresponding row """ - total_lines = 0 copy = image.rotate(angle, Image.BILINEAR, True) - x = 0 - y = 0 - - line_data = 0 - line_histogram = [] + + for i in xrange(copy.size[1]): + line = copy.crop( (0, i, copy.size[0], i + 1)) - for data in copy.getdata(): - - if data[0] < MAGIC_COLOR_THREASHOLD and data[3] == 255: - line_data += 255 - data[0] - - x += 1 - - if x >= copy.size[0]: - if line_data > MAGIC_LINE_THRESHOLD: - total_lines += 1 - - line_histogram.append(line_data) + value = 0 - line_data = 0 + for pixel in line.getdata(): + if pixel[0] < MAGIC_COLOR_THRESHOLD and pixel[1] == 255: + value += 255 - pixel[0] + + line_histogram.append(value) + + return line_histogram - x = 0 - y += 1 + def group_section_below_threshold(self, section, group_threshold): + if section['len'] > 0 and section['value'] < group_threshold: + return True + + if section['len'] <= 0 and section['len'] > - MAGIC_GROUP_LEN_THRESHOLD: + return True - return line_histogram + return False def group_sections(self, sections): """ Groups adjacent sections which are devided by only few pixels. """ finished = False + section_avg_value = 0 + positive_sections = [s['value'] for s in sections if s['len'] > 0] + + if len(positive_sections) > 0: + section_avg_value = sum( positive_sections ) / float(len(positive_sections)) + + group_threshold = section_avg_value * MAGIN_GROUP_VALUE_THRESHOLD + while not finished: finished = True for i in xrange(1, len(sections) - 1): - if sections[i]['len'] < 0 and abs(sections[i]['len']) < MAGIN_GROUP_THREASHOLD: + if self.group_section_below_threshold(sections[i], group_threshold): sections[i-1]['len'] += sections[i+1]['len'] + sections[i]['len'] sections[i-1]['value'] += sections[i+1]['value'] sections[i:i+2] = [] finished = False - break - + break if len(sections) == 0: return - if abs(sections[0]['len']) < MAGIN_GROUP_THREASHOLD: + if self.group_section_below_threshold(sections[0], group_threshold): sections[0:1] = [] if len(sections) == 0: return - if abs(sections[-1]['len']) < MAGIN_GROUP_THREASHOLD: + if self.group_section_below_threshold(sections[-1], group_threshold): sections[-1:] = [] def log_format_sections(self, sections): @@ -201,6 +207,13 @@ def log_format_sections(self, sections): return ", ".join(data) + def get_derivative_coef(self, histogram): + """ Calculates the square sum of derivative from histogram + This can be used to measure "sharpness" of the histogram + """ + derivative = self.get_derivative(histogram) + return sum(map(lambda x: x*x, derivative)) + def get_rotation_info(self, image, angle): """ Rotates image and compute resulting coefficients for the specified angle @@ -227,7 +240,6 @@ def get_rotation_info(self, image, angle): """ diagram = self.get_histogram_for_angle(image, angle) - derivative = self.get_derivative(diagram) sections = self.get_sections(diagram) self.group_sections(sections) @@ -245,29 +257,16 @@ def get_rotation_info(self, image, angle): # get average section size section_avg_value = sum( [s['value'] for s in sections] ) / float(len(sections)) - full_sections = [s for s in sections if s['value'] > 0.5*section_avg_value] + full_sections = [s for s in sections if s['value'] > 0.3 * section_avg_value] normalized_sections_count = len(full_sections) sections_heights = sum( map(lambda x: x['len'], full_sections) ) - - positive = [x for x in derivative if x > 0] - negative = [x for x in derivative if x < 0] - - positive.sort() - positive.reverse() - - negative.sort() - - positive_sum = sum(positive[:5]) - nagative_sum = sum(negative[:5]) return {'angle' : angle, 'nsc': normalized_sections_count, 'heights': sections_heights, - 'derivative_pos' : positive_sum, - 'derivative_neg' : nagative_sum, - 'full_sections': full_sections, - 'sections': sections} + 'derivative': self.get_derivative_coef(diagram), + 'full_sections': full_sections} def sort_result(self, result): """ Sort result by important parameters @@ -286,10 +285,10 @@ def sort_fun2(a, b): return b['nsc'] - a['nsc'] def sort_fun(a, b): - if len(b['sections']) == len(a['sections']): - return b['derivative_pos'] - a['derivative_pos'] + if b['nsc'] == a['nsc']: + return b['derivative'] - a['derivative'] - return len(b['sections']) - len(a['sections']) + return b['nsc'] - a['nsc'] result.sort( sort_fun2 ) @@ -322,8 +321,7 @@ def get_info(self, shred, contour, name): print "Processing file: %s" % (name) image = Image.fromarray(cv2.cvtColor(shred, cv2.COLOR_BGRA2RGBA)) - - image = self.desaturate(image) + image = image.convert("LA") image = self.enhance(image, ImageEnhance.Brightness, 1.5); image = self.enhance(image, ImageEnhance.Contrast, 3); @@ -337,15 +335,14 @@ def get_info(self, shred, contour, name): result = image.rotate(resulting_angle, Image.BILINEAR, True) result.save("results/%s" % (name)) - return {'text_angle' : resulting_angle, 'text_sections' : [{'pos' : s['pos'], 'length' : s['len']} for s in top_result['full_sections']]} - + if top_result['nsc'] >= MIN_LINES_FOR_RESULT: + return {'text_angle' : resulting_angle, 'text_sections' : [{'pos' : s['pos'], 'length' : s['len']} for s in top_result['full_sections']]} + else: + return {'text_angle' : "undefined" } if __name__ == '__main__': - IMAGE_PATH = "C:\\Development\\shreder\\test-rotation\\direct\\img" - FILE_NAME = "11.png" - - for full_name in glob.glob("%s\\*.png" % (IMAGE_PATH)): + def process_shred(full_name): features = TextFeatures(None) cv_image = cv2.imread(full_name, -1) @@ -354,8 +351,27 @@ def get_info(self, shred, contour, name): result = features.get_info(cv_image, None, file_name) + if result == None: + return + with open("results/%s.json" %(file_name), "wt") as f_info: f_info.write( json.dumps(result, sort_keys=True, indent=4, separators=(',', ': ')) ) -# rotate("img/74.png") + if len(sys.argv) < 2: + print "Error: Please specify path or file" + sys.exit(255) + + path = sys.argv[1] + + if os.path.isfile(path): + process_shred(path) + else: + + for full_name in glob.glob("%s\\*.png" % (path)): + + if full_name.count("_ctx") > 0 or full_name.count("_mask") > 0: + continue + + process_shred(full_name) + \ No newline at end of file From c6d4a62a9a3ce198a710a721e0a1294b7fe67649 Mon Sep 17 00:00:00 2001 From: Fedir Nepyivoda Date: Sun, 7 Sep 2014 17:09:00 +0300 Subject: [PATCH 3/5] Improvements after review --- unshred/features/text.py | 207 +++++++++++++++++---------------------- 1 file changed, 92 insertions(+), 115 deletions(-) diff --git a/unshred/features/text.py b/unshred/features/text.py index 30e0ab8..b86d0da 100644 --- a/unshred/features/text.py +++ b/unshred/features/text.py @@ -2,6 +2,7 @@ import json import os.path import glob +import collections import cv2 import numpy as np @@ -16,68 +17,58 @@ DEBUG = True -# Minimum number of detected text lines. -# If numer of lines recognized are below this value - the result is undefined -MIN_LINES_FOR_RESULT = 3 +# Minimum number of detected text lines. +# If number of lines recognized are below this value - the result is undefined +MIN_LINES_FOR_RESULT = 3 # Magic values MAGIC_COLOR_THRESHOLD = 10 MAGIC_LINE_THRESHOLD = 10 MAGIC_SECTIONS_THRESHOLD = 64 -MAGIN_GROUP_VALUE_THRESHOLD = 0.3 +MAGIC_GROUP_VALUE_THRESHOLD = 0.3 MAGIC_GROUP_LEN_THRESHOLD = 5 +RotationInfo = collections.namedtuple('RotationInfo', ['angle', 'nsc', + 'heights', 'derivative', + 'full_sections']) + +Section = collections.namedtuple('Section', ['pos', 'len', 'value']) + class TextFeatures(base.AbstractShredFeature): - """ - Tries to guess the following features of the shread: + """ + Tries to guess the following features of the shred: - * text direction (in angles) relative to original shread + * text direction (in degrees) relative to original shred * number of text lines * positions of text lines (after rotation) and their heights Algorithm is pretty straightforward and slow: - - 1. increase shread contrast - 2. rotate image from -45 to +45 angles and compute + + 1. increase shred contrast + 2. rotate image from -45 to +45 degrees and compute image line histogram (sum of inverted pixels for every line) - 3. analyze histogram for every angle to compute resuling coefficients + 3. analyze histogram for every angle to compute resulting coefficients 3. sort computed parameters and choose the best match - - Currently, the best match is selected by angle, at which + + Currently, the best match is selected by angle, at which maximum number of text lines is found with minimum heights for each line. - TODO: + TODO: * better way to increase contrast of the image (based on histogram) - * include and analyze additional parameters of rotated shread, like - contrast of horizontal lines histogram, connect with lines detector + * include and analyze additional parameters of rotated shred, like + contrast of horizontal lines histogram, connect with lines detector for more accurate results, etc.. * improve performance by using OpenCV/numpy for computation - """ def enhance(self, image, enhancer_class, value): - enhancer = enhancer_class(image); + enhancer = enhancer_class(image) return enhancer.enhance(value) - def desaturate(self, image): - """ Get green component from the PIL image - """ - r, g, b, a = image.split() - return Image.merge("RGBA", (g,g,g,a)) - - def get_derivative(self, values): - """ Calculate derivative of a list of values - """ - result = [] - for i in xrange(1, len(values) - 1): - result.append((values[i + 1] - values[i - 1]) / 2.0) - - return result - def get_sections(self, values): - """ Analyze lines histogram and return list of sections - (consecutive blocks of data values > threashold) + """ Analyze lines histogram and return list of Sections + (consecutive blocks of data values > threshold) Args: values: horizontal line histogram @@ -97,14 +88,13 @@ def get_sections(self, values): current_section = [] is_in_section = False spacing_len = 0 - position = 0 for i, value in enumerate(values): if value > MAGIC_SECTIONS_THRESHOLD: if not is_in_section: - sections.append({'len' : -spacing_len, 'value' : 0, 'pos': i - spacing_len}) + sections.append(Section(len=-spacing_len, value=0, pos=i - spacing_len)) is_in_section = True spacing_len = 0 @@ -112,23 +102,23 @@ def get_sections(self, values): else: if is_in_section: - is_in_section = False; - sections.append({ 'len' : len(current_section), 'value' : sum(current_section), 'pos' : i - len(current_section)}) + is_in_section = False + sections.append(Section(len=len(current_section), value=sum(current_section), pos=i - len(current_section))) current_section = [] spacing_len += 1 - return sections + return sections def get_histogram_for_angle(self, image, angle): - """ + """ Rotates an image for the specified angle and calculates sum of pixel values for every row Args: image: PIL image object - angle: rotation angle + angle: rotation angle in degrees Returns: list of values. Each value is a sum of inverted pixel's for the corresponding row @@ -137,61 +127,60 @@ def get_histogram_for_angle(self, image, angle): copy = image.rotate(angle, Image.BILINEAR, True) line_histogram = [] - - for i in xrange(copy.size[1]): - line = copy.crop( (0, i, copy.size[0], i + 1)) + for i in xrange(copy.size[1]): + line = copy.crop((0, i, copy.size[0], i + 1)) value = 0 for pixel in line.getdata(): if pixel[0] < MAGIC_COLOR_THRESHOLD and pixel[1] == 255: value += 255 - pixel[0] - - line_histogram.append(value) - + + line_histogram.append(value) + return line_histogram def group_section_below_threshold(self, section, group_threshold): - if section['len'] > 0 and section['value'] < group_threshold: + if section.len > 0 and section.value < group_threshold: return True - - if section['len'] <= 0 and section['len'] > - MAGIC_GROUP_LEN_THRESHOLD: + + if section.len <= 0 and section.len > - MAGIC_GROUP_LEN_THRESHOLD: return True return False def group_sections(self, sections): """ Groups adjacent sections which are devided by only few pixels. - """ + """ finished = False section_avg_value = 0 - positive_sections = [s['value'] for s in sections if s['len'] > 0] + positive_sections = [s.value for s in sections if s.len > 0] - if len(positive_sections) > 0: - section_avg_value = sum( positive_sections ) / float(len(positive_sections)) + if positive_sections: + section_avg_value = sum(positive_sections) / float(len(positive_sections)) - group_threshold = section_avg_value * MAGIN_GROUP_VALUE_THRESHOLD + group_threshold = section_avg_value * MAGIC_GROUP_VALUE_THRESHOLD while not finished: finished = True for i in xrange(1, len(sections) - 1): if self.group_section_below_threshold(sections[i], group_threshold): - sections[i-1]['len'] += sections[i+1]['len'] + sections[i]['len'] - sections[i-1]['value'] += sections[i+1]['value'] - + sections[i-1] = Section(len=sections[i-1].len + sections[i].len + sections[i+1].len, + pos=sections[i-1].pos, + value=sections[i-1].value + sections[i].value + sections[i+1].value) sections[i:i+2] = [] finished = False break - if len(sections) == 0: + if not sections: return if self.group_section_below_threshold(sections[0], group_threshold): sections[0:1] = [] - if len(sections) == 0: + if not sections: return if self.group_section_below_threshold(sections[-1], group_threshold): @@ -200,10 +189,9 @@ def group_sections(self, sections): def log_format_sections(self, sections): """ Formats sections in human-readable form for debugging """ - data = [] for section in sections: - data.append("%s (%s)" % (section['len'], section['value'])) + data.append("%s (%s)" % (section.len, section.value)) return ", ".join(data) @@ -211,13 +199,13 @@ def get_derivative_coef(self, histogram): """ Calculates the square sum of derivative from histogram This can be used to measure "sharpness" of the histogram """ - derivative = self.get_derivative(histogram) - return sum(map(lambda x: x*x, derivative)) + derivative = np.gradient(histogram) + return sum([x*x for x in derivative]) def get_rotation_info(self, image, angle): - """ + """ Rotates image and compute resulting coefficients for the specified angle - + Args: image: grayscale python image angle: angle for which to rotate an image @@ -225,12 +213,12 @@ def get_rotation_info(self, image, angle): Returns: dictionary with values for the specified angle - + Coefficients currently computed: - nsc (Normalized Sections Count) - number of text lines, - without those lines, which have very little pixels in them + nsc (Normalized Sections Count) - number of text lines, + without those lines, which have very little pixels in them - heights - sum of heights of lines + heights - sum of heights of lines Additional lists returned (currently used only for debug and experiments): derivative_pos: list of positive derivatives values for histogram @@ -240,14 +228,12 @@ def get_rotation_info(self, image, angle): """ diagram = self.get_histogram_for_angle(image, angle) - sections = self.get_sections(diagram) - - self.group_sections(sections) + sections = self.get_sections(diagram) + self.group_sections(sections) # Remove all spacing sections - sections = [s for s in sections if s['len'] > 0] - #positive_sections = [s for s in sections if s['len'] > 0] + sections = [s for s in sections if s.len > 0] full_sections = [] normalized_sections_count = 0 @@ -255,18 +241,18 @@ def get_rotation_info(self, image, angle): if len(sections) > 0: # get average section size - section_avg_value = sum( [s['value'] for s in sections] ) / float(len(sections)) + section_avg_value = sum([s.value for s in sections]) / float(len(sections)) - full_sections = [s for s in sections if s['value'] > 0.3 * section_avg_value] + full_sections = [s for s in sections if s.value > MAGIC_GROUP_VALUE_THRESHOLD * section_avg_value] normalized_sections_count = len(full_sections) - sections_heights = sum( map(lambda x: x['len'], full_sections) ) + sections_heights = sum(map(lambda s: s.len, full_sections)) - return {'angle' : angle, - 'nsc': normalized_sections_count, - 'heights': sections_heights, - 'derivative': self.get_derivative_coef(diagram), - 'full_sections': full_sections} + return RotationInfo(angle=angle, + nsc=normalized_sections_count, + heights=sections_heights, + derivative=self.get_derivative_coef(diagram), + full_sections=full_sections) def sort_result(self, result): """ Sort result by important parameters @@ -278,19 +264,13 @@ def sort_result(self, result): sorted dict with the most accurate result first """ - def sort_fun2(a, b): - if b['nsc'] == a['nsc']: - return a['heights'] - b['heights'] - - return b['nsc'] - a['nsc'] - def sort_fun(a, b): - if b['nsc'] == a['nsc']: - return b['derivative'] - a['derivative'] - - return b['nsc'] - a['nsc'] + if b.nsc == a.nsc: + return cmp(a.heights, b.heights) - result.sort( sort_fun2 ) + return cmp(b.nsc, a.nsc) + + result.sort(sort_fun) def info_for_angles(self, image): """Args: @@ -302,44 +282,44 @@ def info_for_angles(self, image): result = [] for angle in xrange(-45, 45): - + if DEBUG: sys.stdout.write(".") rotation_info = self.get_rotation_info(image, angle) result.append(rotation_info) # diagram, derivative - if DEBUG: sys.stdout.write("\n") + if DEBUG: sys.stdout.write("\n") self.sort_result(result) return result - def get_info(self, shred, contour, name): - if DEBUG: + if DEBUG: print "Processing file: %s" % (name) image = Image.fromarray(cv2.cvtColor(shred, cv2.COLOR_BGRA2RGBA)) image = image.convert("LA") - image = self.enhance(image, ImageEnhance.Brightness, 1.5); - image = self.enhance(image, ImageEnhance.Contrast, 3); + image = self.enhance(image, ImageEnhance.Brightness, 1.5) + image = self.enhance(image, ImageEnhance.Contrast, 3) results = self.info_for_angles(image) - top_result = results[0] - resulting_angle = top_result['angle'] + top_result = results[0] + resulting_angle = top_result.angle if DEBUG: - result = image.rotate(resulting_angle, Image.BILINEAR, True) + result = image.rotate(resulting_angle, Image.BILINEAR, True) result.save("results/%s" % (name)) - - if top_result['nsc'] >= MIN_LINES_FOR_RESULT: - return {'text_angle' : resulting_angle, 'text_sections' : [{'pos' : s['pos'], 'length' : s['len']} for s in top_result['full_sections']]} + + if top_result.nsc >= MIN_LINES_FOR_RESULT: + return {'text_angle' : resulting_angle, + 'text_sections' : [{'pos' : s.pos, 'length' : s.len} for s in top_result.full_sections]} else: - return {'text_angle' : "undefined" } - + return {'text_angle' : "undefined"} + if __name__ == '__main__': def process_shred(full_name): @@ -355,8 +335,8 @@ def process_shred(full_name): return with open("results/%s.json" %(file_name), "wt") as f_info: - f_info.write( json.dumps(result, sort_keys=True, - indent=4, separators=(',', ': ')) ) + f_info.write(json.dumps(result, sort_keys=True, + indent=4, separators=(',', ': '))) if len(sys.argv) < 2: print "Error: Please specify path or file" @@ -367,11 +347,8 @@ def process_shred(full_name): if os.path.isfile(path): process_shred(path) else: - - for full_name in glob.glob("%s\\*.png" % (path)): - + for full_name in glob.glob("%s\\*.png" % (path)): if full_name.count("_ctx") > 0 or full_name.count("_mask") > 0: continue process_shred(full_name) - \ No newline at end of file From 97718280e6526726e2b61ffca19da6a87966af7a Mon Sep 17 00:00:00 2001 From: Fedir Nepyivoda Date: Sat, 13 Sep 2014 18:13:36 +0300 Subject: [PATCH 4/5] Speed improved 5x and few more corrections after review --- unshred/features/text.py | 45 ++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/unshred/features/text.py b/unshred/features/text.py index b86d0da..41ff357 100644 --- a/unshred/features/text.py +++ b/unshred/features/text.py @@ -17,15 +17,16 @@ DEBUG = True +# Shred will be rotated and evaluated for every angle from this range +ANGLES_RANGE = xrange(-45, 45) + # Minimum number of detected text lines. # If number of lines recognized are below this value - the result is undefined MIN_LINES_FOR_RESULT = 3 # Magic values MAGIC_COLOR_THRESHOLD = 10 -MAGIC_LINE_THRESHOLD = 10 MAGIC_SECTIONS_THRESHOLD = 64 - MAGIC_GROUP_VALUE_THRESHOLD = 0.3 MAGIC_GROUP_LEN_THRESHOLD = 5 @@ -123,23 +124,23 @@ def get_histogram_for_angle(self, image, angle): Returns: list of values. Each value is a sum of inverted pixel's for the corresponding row """ - copy = image.rotate(angle, Image.BILINEAR, True) - - line_histogram = [] - - for i in xrange(copy.size[1]): - line = copy.crop((0, i, copy.size[0], i + 1)) - value = 0 - - for pixel in line.getdata(): - if pixel[0] < MAGIC_COLOR_THRESHOLD and pixel[1] == 255: - value += 255 - pixel[0] - - line_histogram.append(value) - - return line_histogram - + + img = np.fromstring(copy.tostring(), dtype=np.uint8).reshape(copy.size[1], copy.size[0], 2) + + alpha = img[:, :, 1] + res = img[:, :, 0] + + res[res >= MAGIC_COLOR_THRESHOLD] = 255 + res[alpha < 255] = 255 + res = 255 - res + + # Python cv2.reduce doesn't work correctly with int matrices. + data_for_reduce = res.astype(np.float) + histogram = cv2.reduce(data_for_reduce, 1, cv2.cv.CV_REDUCE_SUM)[:, 0] + + return histogram + def group_section_below_threshold(self, section, group_threshold): if section.len > 0 and section.value < group_threshold: return True @@ -158,7 +159,7 @@ def group_sections(self, sections): positive_sections = [s.value for s in sections if s.len > 0] if positive_sections: - section_avg_value = sum(positive_sections) / float(len(positive_sections)) + section_avg_value = np.average(positive_sections) group_threshold = section_avg_value * MAGIC_GROUP_VALUE_THRESHOLD @@ -277,16 +278,16 @@ def info_for_angles(self, image): image: grayscale python image Returns: - list of dicts with info for every angle tested + list of RotationInfo instances with info for every angle tested """ result = [] - for angle in xrange(-45, 45): + for angle in ANGLES_RANGE: if DEBUG: sys.stdout.write(".") rotation_info = self.get_rotation_info(image, angle) - result.append(rotation_info) # diagram, derivative + result.append(rotation_info) if DEBUG: sys.stdout.write("\n") From f9b0974dc981c38d52fab146e97d677af16ad4ce Mon Sep 17 00:00:00 2001 From: Fedir Nepyivoda Date: Thu, 16 Oct 2014 12:02:14 +0300 Subject: [PATCH 5/5] Magic values described, typos fixed, code linted --- unshred/features/text.py | 169 ++++++++++++++++++++++++--------------- 1 file changed, 106 insertions(+), 63 deletions(-) diff --git a/unshred/features/text.py b/unshred/features/text.py index 41ff357..d606309 100644 --- a/unshred/features/text.py +++ b/unshred/features/text.py @@ -9,9 +9,6 @@ import Image import ImageEnhance -import ImageFilter -import ImageDraw -import ImageOps import base @@ -24,43 +21,59 @@ # If number of lines recognized are below this value - the result is undefined MIN_LINES_FOR_RESULT = 3 -# Magic values +# The following magic values are empirical, +# they most likely will be changed (or removed) in future +# while tuning the algorithm. + +# All pixels above this value (after converting to grayscale mode) +# will be discarded (i.e. equal 255) MAGIC_COLOR_THRESHOLD = 10 + +# All values in histogram below this magic will be equal to zero MAGIC_SECTIONS_THRESHOLD = 64 + +# All sections with length (height of text) +# below this magic multiple "average height" of text sections +# will be discarded MAGIC_GROUP_VALUE_THRESHOLD = 0.3 + +# All empty sections (without text) with length below +# this value will be discarded MAGIC_GROUP_LEN_THRESHOLD = 5 + RotationInfo = collections.namedtuple('RotationInfo', ['angle', 'nsc', 'heights', 'derivative', 'full_sections']) Section = collections.namedtuple('Section', ['pos', 'len', 'value']) + class TextFeatures(base.AbstractShredFeature): """ - Tries to guess the following features of the shred: + Tries to guess the following features of the shred: - * text direction (in degrees) relative to original shred - * number of text lines - * positions of text lines (after rotation) and their heights + * text direction (in degrees) relative to original shred + * number of text lines + * positions of text lines (after rotation) and their heights - Algorithm is pretty straightforward and slow: + Algorithm is pretty straightforward and slow: - 1. increase shred contrast - 2. rotate image from -45 to +45 degrees and compute - image line histogram (sum of inverted pixels for every line) - 3. analyze histogram for every angle to compute resulting coefficients - 3. sort computed parameters and choose the best match + 1. increase shred contrast + 2. rotate image from -45 to +45 degrees and compute + image line histogram (sum of inverted pixels for every line) + 3. analyze histogram for every angle to compute resulting coefficients + 3. sort computed parameters and choose the best match - Currently, the best match is selected by angle, at which - maximum number of text lines is found with minimum heights for each line. + Currently, the best match is selected by angle, at which + maximum number of text lines is found with minimum heights for each line. - TODO: - * better way to increase contrast of the image (based on histogram) - * include and analyze additional parameters of rotated shred, like - contrast of horizontal lines histogram, connect with lines detector - for more accurate results, etc.. - * improve performance by using OpenCV/numpy for computation + TODO: + * better way to increase contrast of the image (based on histogram) + * include and analyze additional parameters of rotated shred, like + contrast of horizontal lines histogram, connect with lines detector + for more accurate results, etc.. + * improve performance by using OpenCV/numpy for computation """ def enhance(self, image, enhancer_class, value): @@ -95,7 +108,11 @@ def get_sections(self, values): if value > MAGIC_SECTIONS_THRESHOLD: if not is_in_section: - sections.append(Section(len=-spacing_len, value=0, pos=i - spacing_len)) + sections.append( + Section(len=-spacing_len, + value=0, + pos=i - spacing_len)) + is_in_section = True spacing_len = 0 @@ -104,7 +121,10 @@ def get_sections(self, values): if is_in_section: is_in_section = False - sections.append(Section(len=len(current_section), value=sum(current_section), pos=i - len(current_section))) + sections.append( + Section(len=len(current_section), + value=sum(current_section), + pos=i - len(current_section))) current_section = [] spacing_len += 1 @@ -122,15 +142,18 @@ def get_histogram_for_angle(self, image, angle): angle: rotation angle in degrees Returns: - list of values. Each value is a sum of inverted pixel's for the corresponding row + list of values. Each value is a sum of inverted pixel's + for the corresponding row """ copy = image.rotate(angle, Image.BILINEAR, True) - - img = np.fromstring(copy.tostring(), dtype=np.uint8).reshape(copy.size[1], copy.size[0], 2) - - alpha = img[:, :, 1] + + img = np.fromstring(copy.tostring(), + dtype=np.uint8).reshape(copy.size[1], + copy.size[0], 2) + + alpha = img[:, :, 1] res = img[:, :, 0] - + res[res >= MAGIC_COLOR_THRESHOLD] = 255 res[alpha < 255] = 255 res = 255 - res @@ -140,7 +163,7 @@ def get_histogram_for_angle(self, image, angle): histogram = cv2.reduce(data_for_reduce, 1, cv2.cv.CV_REDUCE_SUM)[:, 0] return histogram - + def group_section_below_threshold(self, section, group_threshold): if section.len > 0 and section.value < group_threshold: return True @@ -150,8 +173,20 @@ def group_section_below_threshold(self, section, group_threshold): return False + def join_sections(self, sections, num): + """ Joins three sections into a single one. + Summing up sections length and values + """ + return Section(len=sum([sections[num - 1].len, + sections[num].len, + sections[num + 1].len]), + pos=sections[num - 1].pos, + value=sum([sections[num - 1].value, + sections[num].value, + sections[num + 1].value])) + def group_sections(self, sections): - """ Groups adjacent sections which are devided by only few pixels. + """ Groups adjacent sections which are divided by only few pixels. """ finished = False @@ -164,14 +199,13 @@ def group_sections(self, sections): group_threshold = section_avg_value * MAGIC_GROUP_VALUE_THRESHOLD while not finished: - finished = True for i in xrange(1, len(sections) - 1): - if self.group_section_below_threshold(sections[i], group_threshold): - sections[i-1] = Section(len=sections[i-1].len + sections[i].len + sections[i+1].len, - pos=sections[i-1].pos, - value=sections[i-1].value + sections[i].value + sections[i+1].value) - sections[i:i+2] = [] + if self.group_section_below_threshold(sections[i], + group_threshold): + self.join_sections(sections, i) + sections[i - 1] = self.join_sections(sections, i) + sections[i:i + 2] = [] finished = False break @@ -201,31 +235,33 @@ def get_derivative_coef(self, histogram): This can be used to measure "sharpness" of the histogram """ derivative = np.gradient(histogram) - return sum([x*x for x in derivative]) + return sum([x * x for x in derivative]) def get_rotation_info(self, image, angle): """ - Rotates image and compute resulting coefficients for the specified angle + Rotates image and compute resulting coefficients + for the specified angle - Args: - image: grayscale python image - angle: angle for which to rotate an image + Args: + image: grayscale python image + angle: angle for which to rotate an image - Returns: + Returns: - dictionary with values for the specified angle + dictionary with values for the specified angle - Coefficients currently computed: - nsc (Normalized Sections Count) - number of text lines, - without those lines, which have very little pixels in them + Coefficients currently computed: + nsc (Normalized Sections Count) - number of text lines, + without those lines, which have very + little pixels in them - heights - sum of heights of lines + heights - sum of heights of lines - Additional lists returned (currently used only for debug and experiments): - derivative_pos: list of positive derivatives values for histogram - derivative_neg: list of negative derivatives values for histogram - full_sections: list of sections with enough data for analysis - sections: list of all sections + Additional lists returned (currently used for debug only): + derivative_pos: list of positive derivatives for histogram + derivative_neg: list of negative derivatives for histogram + full_sections: list of sections with enough data for analysis + sections: list of all sections """ diagram = self.get_histogram_for_angle(image, angle) @@ -242,9 +278,13 @@ def get_rotation_info(self, image, angle): if len(sections) > 0: # get average section size - section_avg_value = sum([s.value for s in sections]) / float(len(sections)) + section_avg_value = (sum([s.value for s in sections]) + / float(len(sections))) + + full_threshold = MAGIC_GROUP_VALUE_THRESHOLD * section_avg_value + full_sections = [s for s in sections + if s.value > full_threshold] - full_sections = [s for s in sections if s.value > MAGIC_GROUP_VALUE_THRESHOLD * section_avg_value] normalized_sections_count = len(full_sections) sections_heights = sum(map(lambda s: s.len, full_sections)) @@ -284,12 +324,14 @@ def info_for_angles(self, image): result = [] for angle in ANGLES_RANGE: - if DEBUG: sys.stdout.write(".") + if DEBUG: + sys.stdout.write(".") rotation_info = self.get_rotation_info(image, angle) result.append(rotation_info) - if DEBUG: sys.stdout.write("\n") + if DEBUG: + sys.stdout.write("\n") self.sort_result(result) @@ -316,10 +358,11 @@ def get_info(self, shred, contour, name): result.save("results/%s" % (name)) if top_result.nsc >= MIN_LINES_FOR_RESULT: - return {'text_angle' : resulting_angle, - 'text_sections' : [{'pos' : s.pos, 'length' : s.len} for s in top_result.full_sections]} + return {'text_angle': resulting_angle, + 'text_sections': [{'pos': s.pos, 'length': s.len} + for s in top_result.full_sections]} else: - return {'text_angle' : "undefined"} + return {'text_angle': "undefined"} if __name__ == '__main__': @@ -332,10 +375,10 @@ def process_shred(full_name): result = features.get_info(cv_image, None, file_name) - if result == None: + if result is None: return - with open("results/%s.json" %(file_name), "wt") as f_info: + with open("results/%s.json" % (file_name), "wt") as f_info: f_info.write(json.dumps(result, sort_keys=True, indent=4, separators=(',', ': ')))