OCR-D · kba · May 19, 2021 · Feb 1, 2021 · Feb 1, 2021 · Feb 1, 2021
diff --git a/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py b/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
@@ -8,38 +8,34 @@
 # URL - https://www.dfki.de/fileadmin/user_upload/import/9512_ICDAR2017_anyOCR.pdf
 
 
-from scipy import ones, zeros, array, where, shape, ndimage, logical_or, logical_and
 import copy
-from pylab import unique
-import ocrolib
 import json
-from PIL import Image
-import sys
 import os
+from pathlib import Path
+import sys
+import math
+import click
+from PIL import Image
+from scipy import ndimage
 import numpy as np
 import shapely
-import cv2
-import math
-from ..constants import OCRD_TOOL
-from pathlib import Path
+import ocrolib
+from keras.models import load_model
+#from keras_segmentation.models.unet import resnet50_unet
 from ocrd import Processor
 from ocrd_modelfactory import page_from_file
+from ocrd_models.ocrd_page import to_xml, AlternativeImageType
 from ocrd_utils import (
-    getLogger, 
-    concat_padded, 
+    getLogger,
+    concat_padded,
     MIMETYPE_PAGE,
     coordinates_for_segment,
     points_from_polygon,
     make_file_id,
     assert_file_grp_cardinality,
-    )
-import click
+)
 from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor
-
-from keras.models import load_model
-#from keras_segmentation.models.unet import resnet50_unet
-
-from ocrd_models.ocrd_page import to_xml, AlternativeImageType
+from ..constants import OCRD_TOOL
 
 TOOL = 'ocrd-anybaseocr-tiseg'
 
@@ -49,56 +45,45 @@ def __init__(self, *args, **kwargs):
         kwargs['ocrd_tool'] = OCRD_TOOL['tools'][TOOL]
         kwargs['version'] = OCRD_TOOL['version']
         super(OcrdAnybaseocrTiseg, self).__init__(*args, **kwargs)
+        if hasattr(self, 'output_file_grp') and hasattr(self, 'parameter'):
+            # processing context
+            self.setup()
 
-    def crop_image(self, image_path, crop_region):
-        img = Image.open(image_path)
-        cropped = img.crop(crop_region)
-        return cropped
+    def setup(self):
+        LOG = getLogger('OcrdAnybaseocrTiseg')
+        self.model = None
+        if self.parameter['use_deeplr']:
 
+            model_weights = self.resolve_resource(self.parameter['seg_weights'])
+            #model = resnet50_unet(n_classes=self.parameter['classes'], input_height=self.parameter['height'], input_width=self.parameter['width'])
+            #model.load_weights(model_weights)
+            self.model = load_model(model_weights)
+            LOG.info('Loaded segmentation model')
+
     def process(self):
         LOG = getLogger('OcrdAnybaseocrTiseg')
 
         assert_file_grp_cardinality(self.input_file_grp, 1)
         assert_file_grp_cardinality(self.output_file_grp, 1)
-        oplevel = self.parameter['operation_level']
-
-        model = None
-        if self.parameter['use_deeplr']:
-
-            model_weights = self.resolve_resource(self.parameter['seg_weights'])
-
-            if not Path(model_weights).is_file():
-                LOG.error("""\
-                    Segementation model weights file was not found at '%s'. Make sure the `seg_weights` parameter
-                    points to the local model weights path.
-                    """ % model_weights)
-                sys.exit(1)
 
-            #model = resnet50_unet(n_classes=self.parameter['classes'], input_height=self.parameter['height'], input_width=self.parameter['width'])
-            #model.load_weights(model_weights)
-            model = load_model(model_weights)
-            LOG.info('Segmentation Model loaded')
-
-        for (n, input_file) in enumerate(self.input_files):
+        for input_file in self.input_files:
             page_id = input_file.pageId or input_file.ID
-            
+
             pcgts = page_from_file(self.workspace.download_file(input_file))
             self.add_metadata(pcgts)
 
             page = pcgts.get_Page()
             LOG.info("INPUT FILE %s", input_file.pageId or input_file.ID)
-            
+
             if self.parameter['use_deeplr']:
-                page_image, page_xywh, page_image_info = self.workspace.image_from_page(page, page_id, feature_filter='binarized,deskewed,cropped')
+                kwargs = {'feature_filter': 'binarized,deskewed,cropped'}
             else:
-                page_image, page_xywh, page_image_info = self.workspace.image_from_page(page, page_id, feature_selector='binarized,deskewed,cropped')            
-
-            if oplevel == 'page':
-                self._process_segment(page_image, page, page_xywh, page_id, input_file, n, model)
-            else:
-                LOG.warning('Operation level %s, but should be "page".', oplevel)
-                break
-
+                # _should_ also be deskewed and cropped, but no need to enforce that here
+                kwargs = {'feature_selector': 'binarized'}
+            page_image, page_coords, page_image_info = self.workspace.image_from_page(
+                page, page_id, **kwargs)
+
+            self._process_segment(page, page_image, page_coords, page_id, input_file)
 
             file_id = make_file_id(input_file, self.output_file_grp)
             pcgts.set_pcGtsId(file_id)
@@ -110,44 +95,43 @@ def process(self):
                 local_filename=os.path.join(self.output_file_grp, file_id + '.xml'),
                 content=to_xml(pcgts).encode('utf-8'),
             )
-                    
-    def _process_segment(self,page_image, page, page_xywh, page_id, input_file, n, model):
+
+    def _process_segment(self, page, page_image, page_coords, page_id, input_file):
         LOG = getLogger('OcrdAnybaseocrTiseg')
-    
-        if model:
-            
+
+        if self.model:
+
             I = ocrolib.pil2array(page_image.resize((800, 1024), Image.ANTIALIAS))
             I = np.array(I)[np.newaxis, :, :, :]
             LOG.info('I shape %s', I.shape)
             if len(I.shape)<3:
                 print('Wrong input shape. Image should have 3 channel')
-            
+
             # get prediction
-            #out = model.predict_segmentation(
+            #out = self.model.predict_segmentation(
             #    inp=I,
             #    out_fname="/tmp/out.png"
             #)
-            out = model.predict(I)
+            out = self.model.predict(I)
             out = out.reshape((2048, 1600, 3)).argmax(axis=2)
 
-            text_part = np.ones(out.shape)
+            text_part = 255 * np.ones(out.shape, 'B')
             text_part[np.where(out==1)] = 0
-
-            image_part = np.ones(out.shape)
+            LOG.info('text: %d%', 100 * (1 - np.count_nonzero(text_part) / np.prod(out.shape)))
+
+            image_part = 255 * np.ones(out.shape, 'B')
             image_part[np.where(out==2)] = 0
-
-            image_part = array(255*(image_part), 'B')
-            image_part = ocrolib.array2pil(image_part)
+            LOG.info('image: %d%', 100 * (1 - np.count_nonzero(image_part) / np.prod(out.shape)))
 
-            text_part = array(255*(text_part), 'B')
+            image_part = ocrolib.array2pil(image_part)
             text_part = ocrolib.array2pil(text_part)
-
-            text_part = text_part.resize(page_image.size, Image.BICUBIC)
+
             image_part = image_part.resize(page_image.size, Image.BICUBIC)
-
+            text_part = text_part.resize(page_image.size, Image.BICUBIC)
+
         else:
             I = ocrolib.pil2array(page_image)
-            
+
             if len(I.shape) > 2:
                 I = np.mean(I, 2)
             I = 1-I/I.max()
@@ -160,92 +144,91 @@ def _process_segment(self,page_image, page, page_xywh, page_id, input_file, n, m
             Iseedfill = self.pixSeedfillBinary(Imask, Iseed)
 
             # Dilation of Iseedfill
-            mask = ones((3, 3))
+            mask = np.ones((3, 3))
             Iseedfill = ndimage.binary_dilation(Iseedfill, mask)
 
             # Expansion of Iseedfill to become equal in size of I
             Iseedfill = self.expansion(Iseedfill, (rows, cols))
 
             # Write Text and Non-Text images
-            image_part = array((1-I*Iseedfill), dtype=int)
-            text_part = array((1-I*(1-Iseedfill)), dtype=int)   
+            image_part = np.array(255*(1-I*Iseedfill), dtype='B')
+            text_part = np.array(255*(1-I*(1-Iseedfill)), dtype='B')
+            LOG.info('text: %d%', 100 * (1 - np.count_nonzero(text_part) / np.prod(I.shape)))
+            LOG.info('image: %d%', 100 * (1 - np.count_nonzero(image_part) / np.prod(I.shape)))
+
+            image_part = ocrolib.array2pil(image_part)
+            text_part = ocrolib.array2pil(text_part)
 
-            bin_array = array(255*(text_part>ocrolib.midrange(img_part)),'B')
-            text_part = ocrolib.array2pil(bin_array)                            
-
-            bin_array = array(255*(text_part>ocrolib.midrange(text_part)),'B')
-            image_part = ocrolib.array2pil(bin_array)                            
-
-
         file_id = make_file_id(input_file, self.output_file_grp)
         file_path = self.workspace.save_image_file(image_part,
                                    file_id+"_img",
-                                   page_id=page_id,
+                                   page_id=input_file.pageId,
                                    file_grp=self.output_file_grp,
-            )     
-        page.add_AlternativeImage(AlternativeImageType(filename=file_path, comments=page_xywh['features']+',non_text'))
-
-        page_xywh['features'] += ',clipped'
+            )
+        page.add_AlternativeImage(AlternativeImageType(
+            filename=file_path, comments=page_coords['features'] + ',non_text'))
+
         file_path = self.workspace.save_image_file(text_part,
                                    file_id+"_txt",
-                                   page_id=page_id,
+                                   page_id=input_file.pageId,
                                    file_grp=self.output_file_grp,
-            )     
-        page.add_AlternativeImage(AlternativeImageType(filename=file_path, comments=page_xywh['features'])) 
-
+            )
+        page.add_AlternativeImage(AlternativeImageType(
+            filename=file_path, comments=page_coords['features'] + ',clipped'))
+
     def pixMorphSequence_mask_seed_fill_holes(self, I):
         Imask = self.reduction_T_1(I)
         Imask = self.reduction_T_1(Imask)
         Imask = ndimage.binary_fill_holes(Imask)
         Iseed = self.reduction_T_4(Imask)
         Iseed = self.reduction_T_3(Iseed)
-        mask = array(ones((5, 5)), dtype=int)
+        mask = np.array(np.ones((5, 5)), dtype=int)
         Iseed = ndimage.binary_opening(Iseed, mask)
         Iseed = self.expansion(Iseed, Imask.shape)
         return Imask, Iseed
 
     def pixSeedfillBinary(self, Imask, Iseed):
         Iseedfill = copy.deepcopy(Iseed)
-        s = ones((3, 3))
+        s = np.ones((3, 3))
         Ijmask, k = ndimage.label(Imask, s)
         Ijmask2 = Ijmask * Iseedfill
-        A = list(unique(Ijmask2))
+        A = list(np.unique(Ijmask2))
         A.remove(0)
         for i in range(0, len(A)):
-            x, y = where(Ijmask == A[i])
+            x, y = np.where(Ijmask == A[i])
             Iseedfill[x, y] = 1
         return Iseedfill
 
     def reduction_T_1(self, I):
-        A = logical_or(I[0:-1:2, :], I[1::2, :])
-        A = logical_or(A[:, 0:-1:2], A[:, 1::2])
+        A = np.logical_or(I[0:-1:2, :], I[1::2, :])
+        A = np.logical_or(A[:, 0:-1:2], A[:, 1::2])
         return A
 
     def reduction_T_2(self, I):
-        A = logical_or(I[0:-1:2, :], I[1::2, :])
-        A = logical_and(A[:, 0:-1:2], A[:, 1::2])
-        B = logical_and(I[0:-1:2, :], I[1::2, :])
-        B = logical_or(B[:, 0:-1:2], B[:, 1::2])
-        C = logical_or(A, B)
+        A = np.logical_or(I[0:-1:2, :], I[1::2, :])
+        A = np.logical_and(A[:, 0:-1:2], A[:, 1::2])
+        B = np.logical_and(I[0:-1:2, :], I[1::2, :])
+        B = np.logical_or(B[:, 0:-1:2], B[:, 1::2])
+        C = np.logical_or(A, B)
         return C
 
     def reduction_T_3(self, I):
-        A = logical_or(I[0:-1:2, :], I[1::2, :])
-        A = logical_and(A[:, 0:-1:2], A[:, 1::2])
-        B = logical_and(I[0:-1:2, :], I[1::2, :])
-        B = logical_or(B[:, 0:-1:2], B[:, 1::2])
-        C = logical_and(A, B)
+        A = np.logical_or(I[0:-1:2, :], I[1::2, :])
+        A = np.logical_and(A[:, 0:-1:2], A[:, 1::2])
+        B = np.logical_and(I[0:-1:2, :], I[1::2, :])
+        B = np.logical_or(B[:, 0:-1:2], B[:, 1::2])
+        C = np.logical_and(A, B)
         return C
 
     def reduction_T_4(self, I):
-        A = logical_and(I[0:-1:2, :], I[1::2, :])
-        A = logical_and(A[:, 0:-1:2], A[:, 1::2])
+        A = np.logical_and(I[0:-1:2, :], I[1::2, :])
+        A = np.logical_and(A[:, 0:-1:2], A[:, 1::2])
         return A
 
     def expansion(self, I, rows_cols):
         r, c = I.shape
         rows, cols = rows_cols
-        A = zeros((rows, cols))
+        A = np.zeros((rows, cols))
         A[0:4*r:4, 0:4*c:4] = I
         A[1:4*r:4, :] = A[0:4*r:4, :]
         A[2:4*r:4, :] = A[0:4*r:4, :]
@@ -254,7 +237,7 @@ def expansion(self, I, rows_cols):
         A[:, 2:4*c:4] = A[:, 0:4*c:4]
         A[:, 3:4*c:4] = A[:, 0:4*c:4]
         return A
-    
+
     def alpha_shape(self, coords, alpha):
         import shapely.geometry as geometry
         from shapely.ops import cascaded_union, polygonize
@@ -283,7 +266,7 @@ def add_edge(edges, edge_points, coords, i, j):
                 return
             edges.add( (i, j) )
             edge_points.append(coords[ [i, j] ])
-        
+
         tri = Delaunay(coords)
         edges = set()
         edge_points = []
@@ -313,7 +296,6 @@ def add_edge(edges, edge_points, coords, i, j):
         triangles = list(polygonize(m))
         return cascaded_union(triangles), edge_points
 
-
 @click.command()
 @ocrd_cli_options
 def cli(*args, **kwargs):

diff --git a/ocrd_anybaseocr/ocrd-tool.json b/ocrd_anybaseocr/ocrd-tool.json
@@ -91,12 +91,19 @@
       "steps": ["layout/segmentation/text-image"],
       "description": "Separates the text and non-text elements with anyBaseOCR. Outputs clipped versions of the input image as AlternativeImage containing either only text or non-text elements.",
       "parameters": {
-        "use_deeplr":      {"type":"boolean",                      "default":true, "description": "use deep learning model"},
-        "seg_weights":     {"type":"string",                       "default":"seg_model.hdf5", "description":"path to weights file", "required":false},
-         "classes":        {"type":"integer",                      "default":3, "description":"number of classes" },
-         "width"  :        {"type":"integer",                      "default":1024, "description":"input image height"},
-         "height" :        {"type":"integer",                      "default":800, "description":"input image width"},
-        "operation_level": {"type": "string", "enum": ["page","region", "line"], "default": "page","description": "PAGE XML hierarchy level to operate on"}
+        "use_deeplr": {
+          "type":"boolean",
+          "default":true,
+          "description": "Whether to use deep learning model (UNet pixel classifier) instead of rule-based implementation (multi-resolution morphology)."
+        },
+        "seg_weights": {
+          "type":"string",
+          "format":"uri",
+          "content-type": "application/x-hdf;subtype=bag",
+          "cacheable": true,
+          "default":"seg_model.hdf5",
+          "description":"Path to weights file for deep learning model when use_deeplr is true."
+        }
       }
     },
     "ocrd-anybaseocr-textline": {