From 57fd1e7c6404220ea62e6f9d31b435743deabbd0 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Mon, 1 Feb 2021 01:27:20 +0100
Subject: [PATCH 01/23] tiseg: fix typo

---
 ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py b/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
index 442b0d8..334ba07 100755
--- a/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
+++ b/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
@@ -170,7 +170,7 @@ def _process_segment(self,page_image, page, page_xywh, page_id, input_file, n, m
             image_part = array((1-I*Iseedfill), dtype=int)
             text_part = array((1-I*(1-Iseedfill)), dtype=int)   
 
-            bin_array = array(255*(text_part>ocrolib.midrange(img_part)),'B')
+            bin_array = array(255*(text_part>ocrolib.midrange(image_part)),'B')
             text_part = ocrolib.array2pil(bin_array)                            
             
             bin_array = array(255*(text_part>ocrolib.midrange(text_part)),'B')

From bab56a6f73fc9d1d4462511a4ccfc718c117273f Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Mon, 1 Feb 2021 01:30:31 +0100
Subject: [PATCH 02/23] tiseg: remove trailing whitespace

---
 ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py | 70 ++++++++++----------
 1 file changed, 34 insertions(+), 36 deletions(-)

diff --git a/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py b/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
index 334ba07..eb2b0b8 100755
--- a/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
+++ b/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
@@ -25,14 +25,14 @@
 from ocrd import Processor
 from ocrd_modelfactory import page_from_file
 from ocrd_utils import (
-    getLogger, 
-    concat_padded, 
+    getLogger,
+    concat_padded,
     MIMETYPE_PAGE,
     coordinates_for_segment,
     points_from_polygon,
     make_file_id,
     assert_file_grp_cardinality,
-    )
+)
 import click
 from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor
 
@@ -61,14 +61,14 @@ def process(self):
         assert_file_grp_cardinality(self.input_file_grp, 1)
         assert_file_grp_cardinality(self.output_file_grp, 1)
         oplevel = self.parameter['operation_level']
-        
+
         model = None
         if self.parameter['use_deeplr']:
-            
+
             model_weights = self.resolve_resource(self.parameter['seg_weights'])
-            
+
             if not Path(model_weights).is_file():
-                LOG.error("""\
+                LOG.error("""
                     Segementation model weights file was not found at '%s'. Make sure the `seg_weights` parameter
                     points to the local model weights path.
                     """ % model_weights)
@@ -78,27 +78,26 @@ def process(self):
             #model.load_weights(model_weights)
             model = load_model(model_weights)
             LOG.info('Segmentation Model loaded')
-                    
+
         for (n, input_file) in enumerate(self.input_files):
             page_id = input_file.pageId or input_file.ID
-            
+
             pcgts = page_from_file(self.workspace.download_file(input_file))
             self.add_metadata(pcgts)
 
             page = pcgts.get_Page()
             LOG.info("INPUT FILE %s", input_file.pageId or input_file.ID)
-            
+
             if self.parameter['use_deeplr']:
                 page_image, page_xywh, page_image_info = self.workspace.image_from_page(page, page_id, feature_filter='binarized,deskewed,cropped')
             else:
-                page_image, page_xywh, page_image_info = self.workspace.image_from_page(page, page_id, feature_selector='binarized,deskewed,cropped')            
-            
+                page_image, page_xywh, page_image_info = self.workspace.image_from_page(page, page_id, feature_selector='binarized,deskewed,cropped')
+
             if oplevel == 'page':
                 self._process_segment(page_image, page, page_xywh, page_id, input_file, n, model)
             else:
                 LOG.warning('Operation level %s, but should be "page".', oplevel)
                 break
-        
 
             file_id = make_file_id(input_file, self.output_file_grp)
             pcgts.set_pcGtsId(file_id)
@@ -110,18 +109,18 @@ def process(self):
                 local_filename=os.path.join(self.output_file_grp, file_id + '.xml'),
                 content=to_xml(pcgts).encode('utf-8'),
             )
-                    
+
     def _process_segment(self,page_image, page, page_xywh, page_id, input_file, n, model):
         LOG = getLogger('OcrdAnybaseocrTiseg')
-    
+
         if model:
-            
+
             I = ocrolib.pil2array(page_image.resize((800, 1024), Image.ANTIALIAS))
             I = np.array(I)[np.newaxis, :, :, :]
             LOG.info('I shape %s', I.shape)
             if len(I.shape)<3:
                 print('Wrong input shape. Image should have 3 channel')
-            
+
             # get prediction
             #out = model.predict_segmentation(
             #    inp=I,
@@ -132,22 +131,22 @@ def _process_segment(self,page_image, page, page_xywh, page_id, input_file, n, m
 
             text_part = np.ones(out.shape)
             text_part[np.where(out==1)] = 0
-            
+
             image_part = np.ones(out.shape)
             image_part[np.where(out==2)] = 0
-            
+
             image_part = array(255*(image_part), 'B')
             image_part = ocrolib.array2pil(image_part)
 
             text_part = array(255*(text_part), 'B')
             text_part = ocrolib.array2pil(text_part)
-            
+
             text_part = text_part.resize(page_image.size, Image.BICUBIC)
             image_part = image_part.resize(page_image.size, Image.BICUBIC)
-            
+
         else:
             I = ocrolib.pil2array(page_image)
-            
+
             if len(I.shape) > 2:
                 I = np.mean(I, 2)
             I = 1-I/I.max()
@@ -168,31 +167,31 @@ def _process_segment(self,page_image, page, page_xywh, page_id, input_file, n, m
 
             # Write Text and Non-Text images
             image_part = array((1-I*Iseedfill), dtype=int)
-            text_part = array((1-I*(1-Iseedfill)), dtype=int)   
+            text_part = array((1-I*(1-Iseedfill)), dtype=int)
 
             bin_array = array(255*(text_part>ocrolib.midrange(image_part)),'B')
-            text_part = ocrolib.array2pil(bin_array)                            
-            
+            text_part = ocrolib.array2pil(bin_array)
+
             bin_array = array(255*(text_part>ocrolib.midrange(text_part)),'B')
-            image_part = ocrolib.array2pil(bin_array)                            
-        
-        
+            image_part = ocrolib.array2pil(bin_array)
+
+
         file_id = make_file_id(input_file, self.output_file_grp)
         file_path = self.workspace.save_image_file(image_part,
                                    file_id+"_img",
                                    page_id=page_id,
                                    file_grp=self.output_file_grp,
-            )     
+            )
         page.add_AlternativeImage(AlternativeImageType(filename=file_path, comments=page_xywh['features']+',non_text'))
-        
+
         page_xywh['features'] += ',clipped'
         file_path = self.workspace.save_image_file(text_part,
                                    file_id+"_txt",
                                    page_id=page_id,
                                    file_grp=self.output_file_grp,
-            )     
-        page.add_AlternativeImage(AlternativeImageType(filename=file_path, comments=page_xywh['features'])) 
-    
+            )
+        page.add_AlternativeImage(AlternativeImageType(filename=file_path, comments=page_xywh['features']))
+
     def pixMorphSequence_mask_seed_fill_holes(self, I):
         Imask = self.reduction_T_1(I)
         Imask = self.reduction_T_1(Imask)
@@ -254,7 +253,7 @@ def expansion(self, I, rows_cols):
         A[:, 2:4*c:4] = A[:, 0:4*c:4]
         A[:, 3:4*c:4] = A[:, 0:4*c:4]
         return A
-    
+
     def alpha_shape(self, coords, alpha):
         import shapely.geometry as geometry
         from shapely.ops import cascaded_union, polygonize
@@ -283,7 +282,7 @@ def add_edge(edges, edge_points, coords, i, j):
                 return
             edges.add( (i, j) )
             edge_points.append(coords[ [i, j] ])
-        
+
         tri = Delaunay(coords)
         edges = set()
         edge_points = []
@@ -313,7 +312,6 @@ def add_edge(edges, edge_points, coords, i, j):
         triangles = list(polygonize(m))
         return cascaded_union(triangles), edge_points
 
-
 @click.command()
 @ocrd_cli_options
 def cli(*args, **kwargs):

From 624b32e53043cc1c3a2de8abb6c6e91b9e973329 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Mon, 1 Feb 2021 01:40:05 +0100
Subject: [PATCH 03/23] tiseg: unused parameters

---
 ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py |  7 +------
 ocrd_anybaseocr/ocrd-tool.json               | 19 +++++++++++++------
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py b/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
index eb2b0b8..647c1ad 100755
--- a/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
+++ b/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
@@ -60,7 +60,6 @@ def process(self):
 
         assert_file_grp_cardinality(self.input_file_grp, 1)
         assert_file_grp_cardinality(self.output_file_grp, 1)
-        oplevel = self.parameter['operation_level']
 
         model = None
         if self.parameter['use_deeplr']:
@@ -93,11 +92,7 @@ def process(self):
             else:
                 page_image, page_xywh, page_image_info = self.workspace.image_from_page(page, page_id, feature_selector='binarized,deskewed,cropped')
 
-            if oplevel == 'page':
-                self._process_segment(page_image, page, page_xywh, page_id, input_file, n, model)
-            else:
-                LOG.warning('Operation level %s, but should be "page".', oplevel)
-                break
+            self._process_segment(page_image, page, page_xywh, page_id, input_file, n, model)
 
             file_id = make_file_id(input_file, self.output_file_grp)
             pcgts.set_pcGtsId(file_id)
diff --git a/ocrd_anybaseocr/ocrd-tool.json b/ocrd_anybaseocr/ocrd-tool.json
index 0622c22..73680ef 100755
--- a/ocrd_anybaseocr/ocrd-tool.json
+++ b/ocrd_anybaseocr/ocrd-tool.json
@@ -91,12 +91,19 @@
       "steps": ["layout/segmentation/text-image"],
       "description": "Separates the text and non-text elements with anyBaseOCR. Outputs clipped versions of the input image as AlternativeImage containing either only text or non-text elements.",
       "parameters": {
-        "use_deeplr":      {"type":"boolean",                      "default":true, "description": "use deep learning model"},
-        "seg_weights":     {"type":"string",                       "default":"seg_model.hdf5", "description":"path to weights file", "required":false},
-         "classes":        {"type":"integer",                      "default":3, "description":"number of classes" },
-         "width"  :        {"type":"integer",                      "default":1024, "description":"input image height"},
-         "height" :        {"type":"integer",                      "default":800, "description":"input image width"},
-        "operation_level": {"type": "string", "enum": ["page","region", "line"], "default": "page","description": "PAGE XML hierarchy level to operate on"}
+        "use_deeplr": {
+          "type":"boolean",
+          "default":true,
+          "description": "Whether to use deep learning model (UNet pixel classifier) instead of rule-based implementation (multi-resolution morphology)."
+        },
+        "seg_weights": {
+          "type":"string",
+          "format":"uri",
+          "content-type": "application/x-hdf;subtype=bag",
+          "cacheable": true,
+          "default":"seg_model.hdf5",
+          "description":"Path to weights file for deep learning model when use_deeplr is true."
+        }
       }
     },
     "ocrd-anybaseocr-textline": {

From 99c457b4280a7d3f2825055dffaf1c0da6d3e7a9 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Mon, 1 Feb 2021 01:45:01 +0100
Subject: [PATCH 04/23] tiseg (legacy): do not enforce deskewed/cropped

---
 ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py b/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
index 647c1ad..331527e 100755
--- a/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
+++ b/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
@@ -88,11 +88,12 @@ def process(self):
             LOG.info("INPUT FILE %s", input_file.pageId or input_file.ID)
 
             if self.parameter['use_deeplr']:
-                page_image, page_xywh, page_image_info = self.workspace.image_from_page(page, page_id, feature_filter='binarized,deskewed,cropped')
+                page_image, page_coords, page_image_info = self.workspace.image_from_page(page, page_id, feature_filter='binarized,deskewed,cropped')
             else:
-                page_image, page_xywh, page_image_info = self.workspace.image_from_page(page, page_id, feature_selector='binarized,deskewed,cropped')
+                # _should_ also be deskewed and cropped, but no need to enforce that here
+                page_image, page_coords, page_image_info = self.workspace.image_from_page(page, page_id, feature_selector='binarized')
 
-            self._process_segment(page_image, page, page_xywh, page_id, input_file, n, model)
+            self._process_segment(page, page_image, page_coords, page_id, input_file, model)
 
             file_id = make_file_id(input_file, self.output_file_grp)
             pcgts.set_pcGtsId(file_id)
@@ -105,7 +106,7 @@ def process(self):
                 content=to_xml(pcgts).encode('utf-8'),
             )
 
-    def _process_segment(self,page_image, page, page_xywh, page_id, input_file, n, model):
+    def _process_segment(self, page, page_image, page_coords, page_id, input_file, model):
         LOG = getLogger('OcrdAnybaseocrTiseg')
 
         if model:
@@ -177,15 +178,16 @@ def _process_segment(self,page_image, page, page_xywh, page_id, input_file, n, m
                                    page_id=page_id,
                                    file_grp=self.output_file_grp,
             )
-        page.add_AlternativeImage(AlternativeImageType(filename=file_path, comments=page_xywh['features']+',non_text'))
+        page.add_AlternativeImage(AlternativeImageType(
+            filename=file_path, comments=page_coords['features'] + ',non_text'))
 
-        page_xywh['features'] += ',clipped'
         file_path = self.workspace.save_image_file(text_part,
                                    file_id+"_txt",
                                    page_id=page_id,
                                    file_grp=self.output_file_grp,
             )
-        page.add_AlternativeImage(AlternativeImageType(filename=file_path, comments=page_xywh['features']))
+        page.add_AlternativeImage(AlternativeImageType(
+            filename=file_path, comments=page_coords['features'] + ',clipped'))
 
     def pixMorphSequence_mask_seed_fill_holes(self, I):
         Imask = self.reduction_T_1(I)

From 62a97656a7e80c89a520bfc5207ae9b6358d49dd Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Mon, 1 Feb 2021 01:45:55 +0100
Subject: [PATCH 05/23] tiseg (legacy): fix image pageId

---
 ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py b/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
index 331527e..a3c021a 100755
--- a/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
+++ b/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
@@ -175,7 +175,7 @@ def _process_segment(self, page, page_image, page_coords, page_id, input_file, m
         file_id = make_file_id(input_file, self.output_file_grp)
         file_path = self.workspace.save_image_file(image_part,
                                    file_id+"_img",
-                                   page_id=page_id,
+                                   page_id=input_file.pageId,
                                    file_grp=self.output_file_grp,
             )
         page.add_AlternativeImage(AlternativeImageType(
@@ -183,7 +183,7 @@ def _process_segment(self, page, page_image, page_coords, page_id, input_file, m
 
         file_path = self.workspace.save_image_file(text_part,
                                    file_id+"_txt",
-                                   page_id=page_id,
+                                   page_id=input_file.pageId,
                                    file_grp=self.output_file_grp,
             )
         page.add_AlternativeImage(AlternativeImageType(

From 21a2cd9e4facafc80e0b274a2d811040d4d1362f Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Mon, 1 Feb 2021 01:54:06 +0100
Subject: [PATCH 06/23] tiseg: clean imports and import order

---
 ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py | 78 ++++++++++----------
 1 file changed, 37 insertions(+), 41 deletions(-)

diff --git a/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py b/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
index a3c021a..3bc516a 100755
--- a/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
+++ b/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
@@ -8,22 +8,23 @@
 # URL - https://www.dfki.de/fileadmin/user_upload/import/9512_ICDAR2017_anyOCR.pdf
 
 
-from scipy import ones, zeros, array, where, shape, ndimage, logical_or, logical_and
 import copy
-from pylab import unique
-import ocrolib
 import json
-from PIL import Image
-import sys
 import os
+from pathlib import Path
+import sys
+import math
+import click
+from PIL import Image
+from scipy import ndimage
 import numpy as np
 import shapely
-import cv2
-import math
-from ..constants import OCRD_TOOL
-from pathlib import Path
+import ocrolib
+from keras.models import load_model
+#from keras_segmentation.models.unet import resnet50_unet
 from ocrd import Processor
 from ocrd_modelfactory import page_from_file
+from ocrd_models.ocrd_page import to_xml, AlternativeImageType
 from ocrd_utils import (
     getLogger,
     concat_padded,
@@ -33,13 +34,8 @@
     make_file_id,
     assert_file_grp_cardinality,
 )
-import click
 from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor
-
-from keras.models import load_model
-#from keras_segmentation.models.unet import resnet50_unet
-
-from ocrd_models.ocrd_page import to_xml, AlternativeImageType
+from ..constants import OCRD_TOOL
 
 TOOL = 'ocrd-anybaseocr-tiseg'
 
@@ -131,10 +127,10 @@ def _process_segment(self, page, page_image, page_coords, page_id, input_file, m
             image_part = np.ones(out.shape)
             image_part[np.where(out==2)] = 0
 
-            image_part = array(255*(image_part), 'B')
+            image_part = np.array(255*(image_part), 'B')
             image_part = ocrolib.array2pil(image_part)
 
-            text_part = array(255*(text_part), 'B')
+            text_part = np.array(255*(text_part), 'B')
             text_part = ocrolib.array2pil(text_part)
 
             text_part = text_part.resize(page_image.size, Image.BICUBIC)
@@ -155,20 +151,20 @@ def _process_segment(self, page, page_image, page_coords, page_id, input_file, m
             Iseedfill = self.pixSeedfillBinary(Imask, Iseed)
 
             # Dilation of Iseedfill
-            mask = ones((3, 3))
+            mask = np.ones((3, 3))
             Iseedfill = ndimage.binary_dilation(Iseedfill, mask)
 
             # Expansion of Iseedfill to become equal in size of I
             Iseedfill = self.expansion(Iseedfill, (rows, cols))
 
             # Write Text and Non-Text images
-            image_part = array((1-I*Iseedfill), dtype=int)
-            text_part = array((1-I*(1-Iseedfill)), dtype=int)
+            image_part = np.array((1-I*Iseedfill), dtype=int)
+            text_part = np.array((1-I*(1-Iseedfill)), dtype=int)
 
-            bin_array = array(255*(text_part>ocrolib.midrange(image_part)),'B')
+            bin_array = np.array(255*(text_part>ocrolib.midrange(image_part)),'B')
             text_part = ocrolib.array2pil(bin_array)
 
-            bin_array = array(255*(text_part>ocrolib.midrange(text_part)),'B')
+            bin_array = np.array(255*(text_part>ocrolib.midrange(text_part)),'B')
             image_part = ocrolib.array2pil(bin_array)
 
 
@@ -195,53 +191,53 @@ def pixMorphSequence_mask_seed_fill_holes(self, I):
         Imask = ndimage.binary_fill_holes(Imask)
         Iseed = self.reduction_T_4(Imask)
         Iseed = self.reduction_T_3(Iseed)
-        mask = array(ones((5, 5)), dtype=int)
+        mask = np.array(np.ones((5, 5)), dtype=int)
         Iseed = ndimage.binary_opening(Iseed, mask)
         Iseed = self.expansion(Iseed, Imask.shape)
         return Imask, Iseed
 
     def pixSeedfillBinary(self, Imask, Iseed):
         Iseedfill = copy.deepcopy(Iseed)
-        s = ones((3, 3))
+        s = np.ones((3, 3))
         Ijmask, k = ndimage.label(Imask, s)
         Ijmask2 = Ijmask * Iseedfill
-        A = list(unique(Ijmask2))
+        A = list(np.unique(Ijmask2))
         A.remove(0)
         for i in range(0, len(A)):
-            x, y = where(Ijmask == A[i])
+            x, y = np.where(Ijmask == A[i])
             Iseedfill[x, y] = 1
         return Iseedfill
 
     def reduction_T_1(self, I):
-        A = logical_or(I[0:-1:2, :], I[1::2, :])
-        A = logical_or(A[:, 0:-1:2], A[:, 1::2])
+        A = np.logical_or(I[0:-1:2, :], I[1::2, :])
+        A = np.logical_or(A[:, 0:-1:2], A[:, 1::2])
         return A
 
     def reduction_T_2(self, I):
-        A = logical_or(I[0:-1:2, :], I[1::2, :])
-        A = logical_and(A[:, 0:-1:2], A[:, 1::2])
-        B = logical_and(I[0:-1:2, :], I[1::2, :])
-        B = logical_or(B[:, 0:-1:2], B[:, 1::2])
-        C = logical_or(A, B)
+        A = np.logical_or(I[0:-1:2, :], I[1::2, :])
+        A = np.logical_and(A[:, 0:-1:2], A[:, 1::2])
+        B = np.logical_and(I[0:-1:2, :], I[1::2, :])
+        B = np.logical_or(B[:, 0:-1:2], B[:, 1::2])
+        C = np.logical_or(A, B)
         return C
 
     def reduction_T_3(self, I):
-        A = logical_or(I[0:-1:2, :], I[1::2, :])
-        A = logical_and(A[:, 0:-1:2], A[:, 1::2])
-        B = logical_and(I[0:-1:2, :], I[1::2, :])
-        B = logical_or(B[:, 0:-1:2], B[:, 1::2])
-        C = logical_and(A, B)
+        A = np.logical_or(I[0:-1:2, :], I[1::2, :])
+        A = np.logical_and(A[:, 0:-1:2], A[:, 1::2])
+        B = np.logical_and(I[0:-1:2, :], I[1::2, :])
+        B = np.logical_or(B[:, 0:-1:2], B[:, 1::2])
+        C = np.logical_and(A, B)
         return C
 
     def reduction_T_4(self, I):
-        A = logical_and(I[0:-1:2, :], I[1::2, :])
-        A = logical_and(A[:, 0:-1:2], A[:, 1::2])
+        A = np.logical_and(I[0:-1:2, :], I[1::2, :])
+        A = np.logical_and(A[:, 0:-1:2], A[:, 1::2])
         return A
 
     def expansion(self, I, rows_cols):
         r, c = I.shape
         rows, cols = rows_cols
-        A = zeros((rows, cols))
+        A = np.zeros((rows, cols))
         A[0:4*r:4, 0:4*c:4] = I
         A[1:4*r:4, :] = A[0:4*r:4, :]
         A[2:4*r:4, :] = A[0:4*r:4, :]

From 82a0055c44dcd226967829f1762c8b2911c646d0 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Mon, 1 Feb 2021 01:59:33 +0100
Subject: [PATCH 07/23] tiseg (ML): load during init/setup instead of process

---
 ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py | 41 +++++++++-----------
 1 file changed, 19 insertions(+), 22 deletions(-)

diff --git a/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py b/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
index 3bc516a..d27b45b 100755
--- a/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
+++ b/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
@@ -45,7 +45,21 @@ def __init__(self, *args, **kwargs):
         kwargs['ocrd_tool'] = OCRD_TOOL['tools'][TOOL]
         kwargs['version'] = OCRD_TOOL['version']
         super(OcrdAnybaseocrTiseg, self).__init__(*args, **kwargs)
+        if hasattr(self, 'output_file_grp') and hasattr(self, 'parameter'):
+            # processing context
+            self.setup()
 
+    def setup(self):
+        LOG = getLogger('OcrdAnybaseocrTiseg')
+        self.model = None
+        if self.parameter['use_deeplr']:
+
+            model_weights = self.resolve_resource(self.parameter['seg_weights'])
+            #model = resnet50_unet(n_classes=self.parameter['classes'], input_height=self.parameter['height'], input_width=self.parameter['width'])
+            #model.load_weights(model_weights)
+            self.model = load_model(model_weights)
+            LOG.info('Loaded segmentation model')
+            
     def crop_image(self, image_path, crop_region):
         img = Image.open(image_path)
         cropped = img.crop(crop_region)
@@ -57,23 +71,6 @@ def process(self):
         assert_file_grp_cardinality(self.input_file_grp, 1)
         assert_file_grp_cardinality(self.output_file_grp, 1)
 
-        model = None
-        if self.parameter['use_deeplr']:
-
-            model_weights = self.resolve_resource(self.parameter['seg_weights'])
-
-            if not Path(model_weights).is_file():
-                LOG.error("""
-                    Segementation model weights file was not found at '%s'. Make sure the `seg_weights` parameter
-                    points to the local model weights path.
-                    """ % model_weights)
-                sys.exit(1)
-
-            #model = resnet50_unet(n_classes=self.parameter['classes'], input_height=self.parameter['height'], input_width=self.parameter['width'])
-            #model.load_weights(model_weights)
-            model = load_model(model_weights)
-            LOG.info('Segmentation Model loaded')
-
         for (n, input_file) in enumerate(self.input_files):
             page_id = input_file.pageId or input_file.ID
 
@@ -89,7 +86,7 @@ def process(self):
                 # _should_ also be deskewed and cropped, but no need to enforce that here
                 page_image, page_coords, page_image_info = self.workspace.image_from_page(page, page_id, feature_selector='binarized')
 
-            self._process_segment(page, page_image, page_coords, page_id, input_file, model)
+            self._process_segment(page, page_image, page_coords, page_id, input_file)
 
             file_id = make_file_id(input_file, self.output_file_grp)
             pcgts.set_pcGtsId(file_id)
@@ -102,10 +99,10 @@ def process(self):
                 content=to_xml(pcgts).encode('utf-8'),
             )
 
-    def _process_segment(self, page, page_image, page_coords, page_id, input_file, model):
+    def _process_segment(self, page, page_image, page_coords, page_id, input_file):
         LOG = getLogger('OcrdAnybaseocrTiseg')
 
-        if model:
+        if self.model:
 
             I = ocrolib.pil2array(page_image.resize((800, 1024), Image.ANTIALIAS))
             I = np.array(I)[np.newaxis, :, :, :]
@@ -114,11 +111,11 @@ def _process_segment(self, page, page_image, page_coords, page_id, input_file, m
                 print('Wrong input shape. Image should have 3 channel')
 
             # get prediction
-            #out = model.predict_segmentation(
+            #out = self.model.predict_segmentation(
             #    inp=I,
             #    out_fname="/tmp/out.png"
             #)
-            out = model.predict(I)
+            out = self.model.predict(I)
             out = out.reshape((2048, 1600, 3)).argmax(axis=2)
 
             text_part = np.ones(out.shape)

From eb6c98fb0ace3b14c666616e385a60003f54fef0 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Mon, 1 Feb 2021 02:05:40 +0100
Subject: [PATCH 08/23] tiseg (ML): clean unused function

---
 ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py b/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
index d27b45b..1575cb7 100755
--- a/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
+++ b/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
@@ -60,18 +60,13 @@ def setup(self):
             self.model = load_model(model_weights)
             LOG.info('Loaded segmentation model')
             
-    def crop_image(self, image_path, crop_region):
-        img = Image.open(image_path)
-        cropped = img.crop(crop_region)
-        return cropped
-
     def process(self):
         LOG = getLogger('OcrdAnybaseocrTiseg')
 
         assert_file_grp_cardinality(self.input_file_grp, 1)
         assert_file_grp_cardinality(self.output_file_grp, 1)
 
-        for (n, input_file) in enumerate(self.input_files):
+        for input_file in self.input_files:
             page_id = input_file.pageId or input_file.ID
 
             pcgts = page_from_file(self.workspace.download_file(input_file))
@@ -81,10 +76,12 @@ def process(self):
             LOG.info("INPUT FILE %s", input_file.pageId or input_file.ID)
 
             if self.parameter['use_deeplr']:
-                page_image, page_coords, page_image_info = self.workspace.image_from_page(page, page_id, feature_filter='binarized,deskewed,cropped')
+                kwargs = {'feature_filter': 'binarized,deskewed,cropped'}
             else:
                 # _should_ also be deskewed and cropped, but no need to enforce that here
-                page_image, page_coords, page_image_info = self.workspace.image_from_page(page, page_id, feature_selector='binarized')
+                kwargs = {'feature_selector': 'binarized'}
+            page_image, page_coords, page_image_info = self.workspace.image_from_page(
+                page, page_id, **kwargs)
 
             self._process_segment(page, page_image, page_coords, page_id, input_file)
 

From 96ec2eedcd2ffd5da30121639c692d506bacb175 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Mon, 1 Feb 2021 02:17:45 +0100
Subject: [PATCH 09/23] tiseg (legacy): fix image vs text part

---
 ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py b/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
index 1575cb7..0968719 100755
--- a/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
+++ b/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
@@ -158,10 +158,9 @@ def _process_segment(self, page, page_image, page_coords, page_id, input_file):
             bin_array = np.array(255*(text_part>ocrolib.midrange(image_part)),'B')
             text_part = ocrolib.array2pil(bin_array)
 
-            bin_array = np.array(255*(text_part>ocrolib.midrange(text_part)),'B')
+            bin_array = np.array(255*(image_part>ocrolib.midrange(text_part)),'B')
             image_part = ocrolib.array2pil(bin_array)
 
-
         file_id = make_file_id(input_file, self.output_file_grp)
         file_path = self.workspace.save_image_file(image_part,
                                    file_id+"_img",

From 9105973379879c0e55f70e1e2dc2776e65d00643 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Mon, 1 Feb 2021 02:28:16 +0100
Subject: [PATCH 10/23] tiseg (legacy): fix image vs background

---
 ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py b/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
index 0968719..14e05b4 100755
--- a/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
+++ b/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
@@ -152,17 +152,14 @@ def _process_segment(self, page, page_image, page_coords, page_id, input_file):
             Iseedfill = self.expansion(Iseedfill, (rows, cols))
 
             # Write Text and Non-Text images
-            image_part = np.array((1-I*Iseedfill), dtype=int)
-            text_part = np.array((1-I*(1-Iseedfill)), dtype=int)
+            nontext_part = np.array(255*(1-I*Iseedfill), dtype='B')
+            text_part = np.array(255*(1-I*(1-Iseedfill)), dtype='B')
 
-            bin_array = np.array(255*(text_part>ocrolib.midrange(image_part)),'B')
-            text_part = ocrolib.array2pil(bin_array)
-
-            bin_array = np.array(255*(image_part>ocrolib.midrange(text_part)),'B')
-            image_part = ocrolib.array2pil(bin_array)
+            nontext_image = ocrolib.array2pil(nontext_part)
+            text_image = ocrolib.array2pil(text_part)
 
         file_id = make_file_id(input_file, self.output_file_grp)
-        file_path = self.workspace.save_image_file(image_part,
+        file_path = self.workspace.save_image_file(nontext_image,
                                    file_id+"_img",
                                    page_id=input_file.pageId,
                                    file_grp=self.output_file_grp,
@@ -170,7 +167,7 @@ def _process_segment(self, page, page_image, page_coords, page_id, input_file):
         page.add_AlternativeImage(AlternativeImageType(
             filename=file_path, comments=page_coords['features'] + ',non_text'))
 
-        file_path = self.workspace.save_image_file(text_part,
+        file_path = self.workspace.save_image_file(text_image,
                                    file_id+"_txt",
                                    page_id=input_file.pageId,
                                    file_grp=self.output_file_grp,

From 25fc8e135e4255380ec49f5ceb689f020e87185d Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Mon, 1 Feb 2021 03:10:51 +0100
Subject: [PATCH 11/23] tiseg: show class counts

---
 ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py | 23 ++++++++++----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py b/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
index 14e05b4..4b2d044 100755
--- a/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
+++ b/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
@@ -115,20 +115,19 @@ def _process_segment(self, page, page_image, page_coords, page_id, input_file):
             out = self.model.predict(I)
             out = out.reshape((2048, 1600, 3)).argmax(axis=2)
 
-            text_part = np.ones(out.shape)
+            text_part = 255 * np.ones(out.shape, 'B')
             text_part[np.where(out==1)] = 0
+            LOG.info('text: %d%', 100 * (1 - np.count_nonzero(text_part) / np.prod(out.shape)))
 
-            image_part = np.ones(out.shape)
+            image_part = 255 * np.ones(out.shape, 'B')
             image_part[np.where(out==2)] = 0
+            LOG.info('image: %d%', 100 * (1 - np.count_nonzero(image_part) / np.prod(out.shape)))
 
-            image_part = np.array(255*(image_part), 'B')
             image_part = ocrolib.array2pil(image_part)
-
-            text_part = np.array(255*(text_part), 'B')
             text_part = ocrolib.array2pil(text_part)
 
-            text_part = text_part.resize(page_image.size, Image.BICUBIC)
             image_part = image_part.resize(page_image.size, Image.BICUBIC)
+            text_part = text_part.resize(page_image.size, Image.BICUBIC)
 
         else:
             I = ocrolib.pil2array(page_image)
@@ -152,14 +151,16 @@ def _process_segment(self, page, page_image, page_coords, page_id, input_file):
             Iseedfill = self.expansion(Iseedfill, (rows, cols))
 
             # Write Text and Non-Text images
-            nontext_part = np.array(255*(1-I*Iseedfill), dtype='B')
+            image_part = np.array(255*(1-I*Iseedfill), dtype='B')
             text_part = np.array(255*(1-I*(1-Iseedfill)), dtype='B')
+            LOG.info('text: %d%', 100 * (1 - np.count_nonzero(text_part) / np.prod(I.shape)))
+            LOG.info('image: %d%', 100 * (1 - np.count_nonzero(image_part) / np.prod(I.shape)))
 
-            nontext_image = ocrolib.array2pil(nontext_part)
-            text_image = ocrolib.array2pil(text_part)
+            image_part = ocrolib.array2pil(image_part)
+            text_part = ocrolib.array2pil(text_part)
 
         file_id = make_file_id(input_file, self.output_file_grp)
-        file_path = self.workspace.save_image_file(nontext_image,
+        file_path = self.workspace.save_image_file(image_part,
                                    file_id+"_img",
                                    page_id=input_file.pageId,
                                    file_grp=self.output_file_grp,
@@ -167,7 +168,7 @@ def _process_segment(self, page, page_image, page_coords, page_id, input_file):
         page.add_AlternativeImage(AlternativeImageType(
             filename=file_path, comments=page_coords['features'] + ',non_text'))
 
-        file_path = self.workspace.save_image_file(text_image,
+        file_path = self.workspace.save_image_file(text_part,
                                    file_id+"_txt",
                                    page_id=input_file.pageId,
                                    file_grp=self.output_file_grp,

From 665a8dd5ee6b4c1b8401627abb699c0a7ae580e6 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Wed, 3 Feb 2021 11:08:03 +0100
Subject: [PATCH 12/23] block-segmentation: resolve_resource already exits
 verbosely

---
 .../cli/ocrd_anybaseocr_block_segmentation.py         | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py b/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py
index 7a0696c..f10a7dc 100755
--- a/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py
+++ b/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py
@@ -89,17 +89,8 @@ def process(self):
                        'marginalia', 'footnote', 'footnote-continued', 'caption', 'endnote', 'footer', 'keynote',
                        'image', 'table', 'graphics']
 
-        if not Path(model_weights).is_file():
-            LOG.error("""\
-                Block Segmentation model weights file was not found at '%s'. Make sure the `model_weights` parameter
-                points to the local model weights path.
-                """, model_weights)
-            sys.exit(1)
-
-#         config = InferenceConfig(Config,DETECTION_MIN_CONFIDENCE)
-
         config = InferenceConfig(confidence)
-#         config = InferenceConfig()
+        # TODO: allow selecting active class IDs
         mrcnn_model = model.MaskRCNN(mode="inference", model_dir=str(model_path), config=config)
         mrcnn_model.load_weights(str(model_weights), by_name=True)
 

From 266756cfc98db4f27ff2b6827500cbaf7b157e46 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Wed, 3 Feb 2021 12:01:51 +0100
Subject: [PATCH 13/23] block-segmentation: proper class ID/name mapping

---
 .../cli/ocrd_anybaseocr_block_segmentation.py | 91 ++++++++++---------
 1 file changed, 50 insertions(+), 41 deletions(-)

diff --git a/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py b/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py
index f10a7dc..d70e042 100755
--- a/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py
+++ b/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py
@@ -43,6 +43,26 @@
 TOOL = 'ocrd-anybaseocr-block-segmentation'
 FALLBACK_IMAGE_GRP = 'OCR-D-IMG-BLOCK-SEGMENT'
 
+CLASS_NAMES = ['BG',
+               'page-number',
+               'paragraph',
+               'catch-word',
+               'heading',
+               'drop-capital',
+               'signature-mark',
+               'header',
+               'marginalia',
+               'footnote',
+               'footnote-continued',
+               'caption',
+               'endnote',
+               'footer',
+               'keynote',
+               # not included in the provided models yet:
+               #'image',
+               #'table',
+               #'graphics'
+]
 
 class InferenceConfig(Config):
 
@@ -51,7 +71,7 @@ def __init__(self, confidence):
 
     NAME = "block"
     IMAGES_PER_GPU = 1
-    NUM_CLASSES = 1 + 14
+    NUM_CLASSES = len(CLASS_NAMES)
 
 #     NAME = "block"
 #     IMAGES_PER_GPU = 1
@@ -85,10 +105,6 @@ def process(self):
         confidence = self.parameter['DETECTION_MIN_CONFIDENCE']
 #         DETECTION_MIN_CONFIDENCE = Path(self.parameter['DETECTION_MIN_CONFIDENCE'])
 
-        class_names = ['BG', 'page-number', 'paragraph', 'catch-word', 'heading', 'drop-capital', 'signature-mark', 'header',
-                       'marginalia', 'footnote', 'footnote-continued', 'caption', 'endnote', 'footer', 'keynote',
-                       'image', 'table', 'graphics']
-
         config = InferenceConfig(confidence)
         # TODO: allow selecting active class IDs
         mrcnn_model = model.MaskRCNN(mode="inference", model_dir=str(model_path), config=config)
@@ -152,7 +168,7 @@ def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n,
         page_image.save('./checkthis.png')
         if len(img_array.shape) <= 2:
             img_array = np.stack((img_array,)*3, axis=-1)
-        results = mrcnn_model.detect([img_array], verbose=1)
+        results = mrcnn_model.detect([img_array], verbose=0)
         r = results[0]
 
         th = self.parameter['th']
@@ -224,18 +240,20 @@ def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n,
                     # checking for ymax case with vertical overlapping
                     # along with y, check both for xmax and xmin
                     if (region_bbox[3] <= bbox[3] and region_bbox[3] >= bbox[1] and
-                        ((region_bbox[0] >= bbox[0] and region_bbox[0] <= bbox[2]) or (region_bbox[2] >= bbox[0]
-                                                                                       and region_bbox[2] <= bbox[2]) or (region_bbox[0] <= bbox[0] and region_bbox[2] >= bbox[2]))
-                            and r['class_ids'][i] != 5):
+                        ((region_bbox[0] >= bbox[0] and region_bbox[0] <= bbox[2]) or
+                         (region_bbox[2] >= bbox[0] and region_bbox[2] <= bbox[2]) or
+                         (region_bbox[0] <= bbox[0] and region_bbox[2] >= bbox[2])) and
+                        r['class_ids'][i] != 5):
 
                         r['rois'][i][2] = bbox[1] - 1
 
                     # checking for ymin now
                     # along with y, check both for xmax and xmin
                     if (region_bbox[1] <= bbox[3] and region_bbox[1] >= bbox[1] and
-                        ((region_bbox[0] >= bbox[0] and region_bbox[0] <= bbox[2]) or (region_bbox[2] >= bbox[0]
-                                                                                       and region_bbox[2] <= bbox[2]) or (region_bbox[0] <= bbox[0] and region_bbox[2] >= bbox[2]))
-                            and r['class_ids'][i] != 5):
+                        ((region_bbox[0] >= bbox[0] and region_bbox[0] <= bbox[2]) or
+                         (region_bbox[2] >= bbox[0] and region_bbox[2] <= bbox[2]) or
+                         (region_bbox[0] <= bbox[0] and region_bbox[2] >= bbox[2])) and
+                        r['class_ids'][i] != 5):
 
                         r['rois'][i][0] = bbox[3] + 1
 
@@ -313,10 +331,11 @@ def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n,
             min_y = r['rois'][i][1]
             max_x = r['rois'][i][2]
             max_y = r['rois'][i][3]
+            class_id = r['class_ids'][i]
 
-            if (min_y - 5) > width and r['class_ids'][i] == 2:
+            if (min_y - 5) > width and class_id == 2:
                 min_y -= 5
-            if (max_y + 10) < width and r['class_ids'][i] == 2:
+            if (max_y + 10) < width and class_id == 2:
                 min_y += 10
 
             # one change here to resolve flipped coordinates
@@ -326,7 +345,8 @@ def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n,
 
             if cut_region_polygon.is_empty:
                 continue
-            cut_region_polygon = [j for j in zip(list(cut_region_polygon.exterior.coords.xy[0]), list(cut_region_polygon.exterior.coords.xy[1]))][:-1]
+            cut_region_polygon = [j for j in zip(list(cut_region_polygon.exterior.coords.xy[0]),
+                                                 list(cut_region_polygon.exterior.coords.xy[1]))][:-1]
 
             # checking whether coordinates are flipped
 
@@ -348,35 +368,24 @@ def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n,
                                                        page_id=page_id,
                                                        file_grp=self.output_file_grp)
 
-            # ai = AlternativeImageType(filename=file_path, comments=page_xywh['features'])
-            region_id = '%s_region%04d' % (page_id, i)
-            coords = CoordsType(region_points)
-
-            # incase of imageRegion
-            if r['class_ids'][i] == 15:
-                image_region = ImageRegionType(custom='readingOrder {index:'+str(read_order)+';}', id=region_id, Coords=coords, type_=class_names[r['class_ids'][i]])
-                # image_region.add_AlternativeImage(ai)
+            region_args = {'custom': 'readingOrder {index:'+str(read_order)+';}',
+                           'id': '%s_region%04d' % (page_id, i),
+                           'Coords': CoordsType(region_points)}
+            if class_id >= len(CLASS_NAMES):
+                raise Exception('Unexpected class id %d - model does not match' % class_id)
+            if CLASS_NAMES[class_id] == 'image':
+                image_region = ImageRegionType(**region_args)
                 page.add_ImageRegion(image_region)
-                continue
-            if r['class_ids'][i] == 16:
-                table_region = TableRegionType(custom='readingOrder {index:'+str(read_order)+';}', id=region_id, Coords=coords, type_=class_names[r['class_ids'][i]])
-                # table_region.add_AlternativeImage(ai)
+            elif CLASS_NAMES[class_id] == 'table':
+                table_region = TableRegionType(**region_args)
                 page.add_TableRegion(table_region)
-                continue
-            if r['class_ids'][i] == 17:
-                graphic_region = GraphicRegionType(custom='readingOrder {index:'+str(read_order)+';}', id=region_id, Coords=coords, type_=class_names[r['class_ids'][i]])
-                # graphic_region.add_AlternativeImage(ai)
+            elif CLASS_NAMES[class_id] == 'graphics':
+                graphic_region = GraphicRegionType(**region_args)
                 page.add_GraphicRegion(graphic_region)
-                continue
-
-            textregion = TextRegionType(custom='readingOrder {index:'+str(read_order)+';}', id=region_id, Coords=coords, type_=class_names[r['class_ids'][i]])
-            # textregion.add_AlternativeImage(ai)
-
-            #border = page.get_Border()
-            # if border:
-            #    border.add_TextRegion(textregion)
-            # else:
-            page.add_TextRegion(textregion)
+            else:
+                region_args['type_'] = CLASS_NAMES[class_id]
+                textregion = TextRegionType(**region_args)
+                page.add_TextRegion(textregion)
 
 
 @click.command()

From a956f6302bb37a41b2b2c098d547062b659d3f06 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Wed, 3 Feb 2021 12:06:58 +0100
Subject: [PATCH 14/23] block-segmentation: fix Border intersection

---
 .../cli/ocrd_anybaseocr_block_segmentation.py | 24 ++++++-------------
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py b/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py
index d70e042..31faab8 100755
--- a/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py
+++ b/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py
@@ -130,7 +130,7 @@ def process(self):
                 LOG.warning("Image already has text segments!")
 
             if oplevel == "page":
-                self._process_segment(page_image, page, page_xywh, page_id, input_file, n, mrcnn_model, class_names, mask_image)
+                self._process_segment(page_image, page, page_xywh, page_id, input_file, n, mrcnn_model, mask_image)
             else:
                 LOG.warning('Operation level %s, but should be "page".', oplevel)
                 break
@@ -146,7 +146,7 @@ def process(self):
                 content=to_xml(pcgts).encode('utf-8')
             )
 
-    def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n, mrcnn_model, class_names, mask):
+    def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n, mrcnn_model, mask):
         LOG = getLogger('OcrdAnybaseocrBlockSegmenter')
         # check for existing text regions and whether to overwrite them
         border = None
@@ -165,7 +165,6 @@ def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n,
 #            page_image, page_xy = self.workspace.image_from_segment(page.get_Border(), page_image, page_xywh)
 
         img_array = ocrolib.pil2array(page_image)
-        page_image.save('./checkthis.png')
         if len(img_array.shape) <= 2:
             img_array = np.stack((img_array,)*3, axis=-1)
         results = mrcnn_model.detect([img_array], verbose=0)
@@ -173,7 +172,7 @@ def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n,
 
         th = self.parameter['th']
         # check for existing semgentation mask
-        # this code executes only when use_deeplr is set to True in ocrd-tool.json file
+        # this code executes only when the workflow had tiseg run before with use_deeplr=true
         if mask:
             mask = ocrolib.pil2array(mask)
             mask = mask//255
@@ -186,7 +185,6 @@ def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n,
                 max_x = r['rois'][i][2]
                 max_y = r['rois'][i][3]
                 mask[min_x:max_x, min_y:max_y] *= i+2
-            cv2.imwrite('mask_check.png', mask*(255/(len(r['rois'])+2)))
 
             # check for left over pixels and add them to the bounding boxes
             pixel_added = True
@@ -307,15 +305,6 @@ def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n,
             if (max_y + 10) < width and r['class_ids'][i] == 2:
                 min_y += 10
 
-            region_polygon = [[min_x, min_y], [max_x, min_y], [max_x, max_y], [min_x, max_y]]
-
-            if border:
-                cut_region_polygon = border.intersection(Polygon(region_polygon))
-                if cut_region_polygon.is_empty:
-                    continue
-            else:
-                cut_region_polygon = Polygon(region_polygon)
-
             order_index = reading_order.index((min_y, min_x, max_y, max_x))
             region_id = '%s_region%04d' % (page_id, i)
             regionRefIndex = RegionRefIndexedType(index=order_index, regionRef=region_id)
@@ -341,9 +330,10 @@ def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n,
             # one change here to resolve flipped coordinates
             region_polygon = [[min_y, min_x], [max_y, min_x], [max_y, max_x], [min_y, max_x]]
 
-            cut_region_polygon = border.intersection(Polygon(region_polygon))
-
-            if cut_region_polygon.is_empty:
+            cut_region_polygon = Polygon(region_polygon)
+            if border:
+                cut_region_polygon = border.intersection(cut_region_polygon)
+            if cut_region_polygon.is_empty or not cut_region_polygon.is_valid:
                 continue
             cut_region_polygon = [j for j in zip(list(cut_region_polygon.exterior.coords.xy[0]),
                                                  list(cut_region_polygon.exterior.coords.xy[1]))][:-1]

From 5a4d874a4caf62d960acca4622dd65ab56a58670 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Wed, 3 Feb 2021 12:52:57 +0100
Subject: [PATCH 15/23] block-segmentation: fix TF logger init

---
 ocrd_anybaseocr/tensorflow_importer.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/ocrd_anybaseocr/tensorflow_importer.py b/ocrd_anybaseocr/tensorflow_importer.py
index 5edf4cf..ef1fc17 100644
--- a/ocrd_anybaseocr/tensorflow_importer.py
+++ b/ocrd_anybaseocr/tensorflow_importer.py
@@ -2,9 +2,8 @@
 
 import os
 import warnings
-from ocrd_utils import initLogging, getLogger
-initLogging()
-getLogger('tensorflow').setLevel('ERROR')
+import logging
+logging.getLogger('tensorflow').setLevel('ERROR')
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # No prints from the tensorflow side
 warnings.filterwarnings('ignore', category=FutureWarning)
 #import tensorflow as tf

From e641e31553ba16ac380bde1a44a79ea2dae47955 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Wed, 3 Feb 2021 12:55:33 +0100
Subject: [PATCH 16/23] block-segmentation: remove buggy/useless
 AlternativeImage creation

---
 .../cli/ocrd_anybaseocr_block_segmentation.py | 30 ++-----------------
 1 file changed, 2 insertions(+), 28 deletions(-)

diff --git a/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py b/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py
index 31faab8..d3a012e 100755
--- a/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py
+++ b/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py
@@ -73,12 +73,8 @@ def __init__(self, confidence):
     IMAGES_PER_GPU = 1
     NUM_CLASSES = len(CLASS_NAMES)
 
-#     NAME = "block"
-#     IMAGES_PER_GPU = 1
 #     NUM_CLASSES = 1 + 14
 #     DETECTION_MIN_CONFIDENCE = 0.9 # needs to be changed back to parameter
-    #     DETECTION_MIN_CONFIDENCE = DETECTION_MIN_CONFIDENCE #taken as a parameter from tools.json
-
 
 class OcrdAnybaseocrBlockSegmenter(Processor):
 
@@ -102,15 +98,12 @@ def process(self):
         model_path = resource_filename(__name__, '../mrcnn')
         model_weights = Path(self.resolve_resource(self.parameter['block_segmentation_weights']))
 
-        confidence = self.parameter['DETECTION_MIN_CONFIDENCE']
-#         DETECTION_MIN_CONFIDENCE = Path(self.parameter['DETECTION_MIN_CONFIDENCE'])
-
+        confidence = self.parameter['min_confidence']
         config = InferenceConfig(confidence)
         # TODO: allow selecting active class IDs
         mrcnn_model = model.MaskRCNN(mode="inference", model_dir=str(model_path), config=config)
         mrcnn_model.load_weights(str(model_weights), by_name=True)
 
-        oplevel = self.parameter['operation_level']
         for (n, input_file) in enumerate(self.input_files):
 
             pcgts = page_from_file(self.workspace.download_file(input_file))
@@ -129,11 +122,7 @@ def process(self):
             if regions:
                 LOG.warning("Image already has text segments!")
 
-            if oplevel == "page":
-                self._process_segment(page_image, page, page_xywh, page_id, input_file, n, mrcnn_model, mask_image)
-            else:
-                LOG.warning('Operation level %s, but should be "page".', oplevel)
-                break
+            self._process_segment(page_image, page, page_xywh, page_id, input_file, n, mrcnn_model, mask_image)
 
             file_id = make_file_id(input_file, self.output_file_grp)
             pcgts.set_pcGtsId(file_id)
@@ -342,22 +331,7 @@ def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n,
 
             region_polygon = coordinates_for_segment(cut_region_polygon, page_image, page_xywh)
             region_points = points_from_polygon(region_polygon)
-
             read_order = reading_order.index((min_y, min_x, max_y, max_x))
-
-            # this can be tested, provided whether we need previous comments or not?
-            # resolving overlapping problem
-
-            region_img = img_array[min_x:max_x, min_y:max_y]  # extract from points and img_array
-
-            region_img = ocrolib.array2pil(region_img)
-
-            file_id = make_file_id(input_file, self.output_file_grp)
-            file_path = self.workspace.save_image_file(region_img,
-                                                       file_id+"_"+str(i),
-                                                       page_id=page_id,
-                                                       file_grp=self.output_file_grp)
-
             region_args = {'custom': 'readingOrder {index:'+str(read_order)+';}',
                            'id': '%s_region%04d' % (page_id, i),
                            'Coords': CoordsType(region_points)}

From 046735df73c8afe253e2588a31120661c297f8a9 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Wed, 3 Feb 2021 13:21:26 +0100
Subject: [PATCH 17/23] block-segmentation: fix Border intersection (applies in
 absolute coords)

---
 .../cli/ocrd_anybaseocr_block_segmentation.py    | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py b/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py
index d3a012e..4bf374f 100755
--- a/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py
+++ b/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py
@@ -151,7 +151,6 @@ def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n,
             border_coords = page.get_Border().get_Coords()
             border_points = polygon_from_points(border_coords.get_points())
             border = Polygon(border_points)
-#            page_image, page_xy = self.workspace.image_from_segment(page.get_Border(), page_image, page_xywh)
 
         img_array = ocrolib.pil2array(page_image)
         if len(img_array.shape) <= 2:
@@ -319,17 +318,20 @@ def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n,
             # one change here to resolve flipped coordinates
             region_polygon = [[min_y, min_x], [max_y, min_x], [max_y, max_x], [min_y, max_x]]
 
+            # convert to absolute coordinates
+            region_polygon = coordinates_for_segment(region_polygon, page_image, page_xywh)
+            # intersect with parent and plausibilize
             cut_region_polygon = Polygon(region_polygon)
             if border:
                 cut_region_polygon = border.intersection(cut_region_polygon)
-            if cut_region_polygon.is_empty or not cut_region_polygon.is_valid:
+            if cut_region_polygon.is_empty:
+                LOG.warning('region %d does not intersect page frame', i)
                 continue
-            cut_region_polygon = [j for j in zip(list(cut_region_polygon.exterior.coords.xy[0]),
+            if not cut_region_polygon.is_valid:
+                LOG.warning('region %d has invalid polygon', i)
+                continue
+            region_polygon = [j for j in zip(list(cut_region_polygon.exterior.coords.xy[0]),
                                                  list(cut_region_polygon.exterior.coords.xy[1]))][:-1]
-
-            # checking whether coordinates are flipped
-
-            region_polygon = coordinates_for_segment(cut_region_polygon, page_image, page_xywh)
             region_points = points_from_polygon(region_polygon)
             read_order = reading_order.index((min_y, min_x, max_y, max_x))
             region_args = {'custom': 'readingOrder {index:'+str(read_order)+';}',

From 0febe79604e66c3ab46edea9038d90a0b5e72413 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Wed, 3 Feb 2021 13:23:15 +0100
Subject: [PATCH 18/23] block-segmentation: fix overwrite==false (continue by
 adding more)

---
 .../cli/ocrd_anybaseocr_block_segmentation.py    | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py b/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py
index 4bf374f..0556519 100755
--- a/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py
+++ b/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py
@@ -117,10 +117,6 @@ def process(self):
                 mask_image, mask_xywh, mask_image_info = self.workspace.image_from_page(page, page_id, feature_selector='clipped', feature_filter='binarized,deskewed,cropped,non_text')
             except:
                 mask_image = None
-            # Display Warning If image segment results already exist or not in StructMap?
-            regions = page.get_TextRegion() + page.get_TableRegion()
-            if regions:
-                LOG.warning("Image already has text segments!")
 
             self._process_segment(page_image, page, page_xywh, page_id, input_file, n, mrcnn_model, mask_image)
 
@@ -138,25 +134,26 @@ def process(self):
     def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n, mrcnn_model, mask):
         LOG = getLogger('OcrdAnybaseocrBlockSegmenter')
         # check for existing text regions and whether to overwrite them
-        border = None
-        if page.get_TextRegion():
+        if page.get_TextRegion() or page.get_TableRegion():
             if self.parameter['overwrite']:
-                LOG.info('removing existing TextRegions in page "%s"', page_id)
+                LOG.info('removing existing text/table regions in page "%s"', page_id)
                 page.set_TextRegion([])
             else:
-                LOG.warning('keeping existing TextRegions in page "%s"', page_id)
-                return
+                LOG.warning('keeping existing text/table regions in page "%s"', page_id)
         # check if border exists
+        border = None
         if page.get_Border():
             border_coords = page.get_Border().get_Coords()
             border_points = polygon_from_points(border_coords.get_points())
             border = Polygon(border_points)
 
+        LOG.info('detecting regions on page "%s"', page_id)
         img_array = ocrolib.pil2array(page_image)
         if len(img_array.shape) <= 2:
             img_array = np.stack((img_array,)*3, axis=-1)
         results = mrcnn_model.detect([img_array], verbose=0)
         r = results[0]
+        LOG.info('found %d regions on page "%s"', len(r['rois']), page_id)
 
         th = self.parameter['th']
         # check for existing semgentation mask
@@ -352,6 +349,7 @@ def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n,
                 region_args['type_'] = CLASS_NAMES[class_id]
                 textregion = TextRegionType(**region_args)
                 page.add_TextRegion(textregion)
+            LOG.info('added %s region on page "%s"', CLASS_NAMES[class_id], page_id)
 
 
 @click.command()

From 3d54a19e4041b1116811917b7d89f530e9b6b5b3 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Wed, 3 Feb 2021 13:23:37 +0100
Subject: [PATCH 19/23] block-segmentation: move model loading to setup()

---
 .../cli/ocrd_anybaseocr_block_segmentation.py | 33 ++++++++++---------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py b/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py
index 0556519..3621618 100755
--- a/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py
+++ b/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py
@@ -82,30 +82,33 @@ def __init__(self, *args, **kwargs):
         kwargs['ocrd_tool'] = OCRD_TOOL['tools'][TOOL]
         kwargs['version'] = OCRD_TOOL['version']
         super(OcrdAnybaseocrBlockSegmenter, self).__init__(*args, **kwargs)
+        if hasattr(self, 'output_file_grp') and hasattr(self, 'parameter'):
+            # processing context
+            self.setup()
+
+    def setup(self):
+        LOG = getLogger('OcrdAnybaseocrBlockSegmenter')
         #self.reading_order = []
         self.order = 0
+        model_path = resource_filename(__name__, '../mrcnn')
+        model_weights = Path(self.resolve_resource(self.parameter['block_segmentation_weights']))
 
+        confidence = self.parameter['min_confidence']
+        config = InferenceConfig(confidence)
+        # TODO: allow selecting active class IDs
+        self.mrcnn_model = model.MaskRCNN(mode="inference", model_dir=str(model_path), config=config)
+        self.mrcnn_model.load_weights(str(model_weights), by_name=True)
+    
     def process(self):
 
         assert_file_grp_cardinality(self.input_file_grp, 1)
         assert_file_grp_cardinality(self.output_file_grp, 1)
 
         LOG = getLogger('OcrdAnybaseocrBlockSegmenter')
-
         if not tf.test.is_gpu_available():
             LOG.warning("Tensorflow cannot detect CUDA installation. Running without GPU will be slow.")
 
-        model_path = resource_filename(__name__, '../mrcnn')
-        model_weights = Path(self.resolve_resource(self.parameter['block_segmentation_weights']))
-
-        confidence = self.parameter['min_confidence']
-        config = InferenceConfig(confidence)
-        # TODO: allow selecting active class IDs
-        mrcnn_model = model.MaskRCNN(mode="inference", model_dir=str(model_path), config=config)
-        mrcnn_model.load_weights(str(model_weights), by_name=True)
-
-        for (n, input_file) in enumerate(self.input_files):
-
+        for input_file in self.input_files:
             pcgts = page_from_file(self.workspace.download_file(input_file))
             self.add_metadata(pcgts)
             page = pcgts.get_Page()
@@ -118,7 +121,7 @@ def process(self):
             except:
                 mask_image = None
 
-            self._process_segment(page_image, page, page_xywh, page_id, input_file, n, mrcnn_model, mask_image)
+            self._process_segment(page_image, page, page_xywh, page_id, input_file, mask_image)
 
             file_id = make_file_id(input_file, self.output_file_grp)
             pcgts.set_pcGtsId(file_id)
@@ -131,7 +134,7 @@ def process(self):
                 content=to_xml(pcgts).encode('utf-8')
             )
 
-    def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n, mrcnn_model, mask):
+    def _process_segment(self, page_image, page, page_xywh, page_id, input_file, mask):
         LOG = getLogger('OcrdAnybaseocrBlockSegmenter')
         # check for existing text regions and whether to overwrite them
         if page.get_TextRegion() or page.get_TableRegion():
@@ -151,7 +154,7 @@ def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n,
         img_array = ocrolib.pil2array(page_image)
         if len(img_array.shape) <= 2:
             img_array = np.stack((img_array,)*3, axis=-1)
-        results = mrcnn_model.detect([img_array], verbose=0)
+        results = self.mrcnn_model.detect([img_array], verbose=0)
         r = results[0]
         LOG.info('found %d regions on page "%s"', len(r['rois']), page_id)
 

From 681b70f3eb3141cc4c4a962c5c439d825021d43a Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Wed, 3 Feb 2021 13:53:58 +0100
Subject: [PATCH 20/23] block-segmentation: decode masks into polygons

---
 .../cli/ocrd_anybaseocr_block_segmentation.py | 79 ++++++++++---------
 1 file changed, 40 insertions(+), 39 deletions(-)

diff --git a/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py b/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py
index 3621618..e1c5d1a 100755
--- a/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py
+++ b/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py
@@ -253,17 +253,17 @@ def _process_segment(self, page_image, page, page_xywh, page_id, input_file, mas
 
         # define reading order on basis of coordinates
         reading_order = []
-
         for i in range(len(r['rois'])):
             width, height, _ = img_array.shape
-            min_x = r['rois'][i][0]
-            min_y = r['rois'][i][1]
-            max_x = r['rois'][i][2]
-            max_y = r['rois'][i][3]
+            min_x, min_y, max_x, max_y = r['rois'][i]
+            class_id = r['class_ids'][i]
+            if class_id >= len(CLASS_NAMES):
+                raise Exception('Unexpected class id %d - model does not match' % class_id)
+            class_name = CLASS_NAMES[class_id]
 
-            if (min_y - 5) > width and r['class_ids'][i] == 2:
+            if (min_y - 5) > width and class_name == 'paragraph':
                 min_y -= 5
-            if (max_y + 10) < width and r['class_ids'][i] == 2:
+            if (max_y + 10) < width and class_name == 'paragraph':
                 min_y += 10
             reading_order.append((min_y, min_x, max_y, max_x))
 
@@ -282,41 +282,41 @@ def _process_segment(self, page_image, page, page_xywh, page_id, input_file, mas
 
         # Creating Reading Order object in PageXML
         order_group = OrderedGroupType(caption="Regions reading order", id=page_id)
-
-        for i in range(len(r['rois'])):
-            min_x = r['rois'][i][0]
-            min_y = r['rois'][i][1]
-            max_x = r['rois'][i][2]
-            max_y = r['rois'][i][3]
-            if (min_y - 5) > width and r['class_ids'][i] == 2:
-                min_y -= 5
-            if (max_y + 10) < width and r['class_ids'][i] == 2:
-                min_y += 10
-
-            order_index = reading_order.index((min_y, min_x, max_y, max_x))
-            region_id = '%s_region%04d' % (page_id, i)
-            regionRefIndex = RegionRefIndexedType(index=order_index, regionRef=region_id)
-            order_group.add_RegionRefIndexed(regionRefIndex)
-
         reading_order_object = ReadingOrderType()
         reading_order_object.set_OrderedGroup(order_group)
         page.set_ReadingOrder(reading_order_object)
 
         for i in range(len(r['rois'])):
             width, height, _ = img_array.shape
-            min_x = r['rois'][i][0]
-            min_y = r['rois'][i][1]
-            max_x = r['rois'][i][2]
-            max_y = r['rois'][i][3]
+            min_x, min_y, max_x, max_y = r['rois'][i]
             class_id = r['class_ids'][i]
+            if class_id >= len(CLASS_NAMES):
+                raise Exception('Unexpected class id %d - model does not match' % class_id)
+            class_name = CLASS_NAMES[class_id]
 
-            if (min_y - 5) > width and class_id == 2:
+            if (min_y - 5) > width and class_name == 'paragraph':
                 min_y -= 5
-            if (max_y + 10) < width and class_id == 2:
+            if (max_y + 10) < width and class_name == 'paragraph':
                 min_y += 10
 
-            # one change here to resolve flipped coordinates
-            region_polygon = [[min_y, min_x], [max_y, min_x], [max_y, max_x], [min_y, max_x]]
+            # estimate glyph scale (roughly)
+            mask = r['masks'][:,:,i]
+            area = np.count_nonzero(mask)
+            scale = int(np.sqrt(area)//10)
+            scale = scale + (scale+1)%2 # odd
+
+            # dilate mask until we have a single outer contour
+            contours = [None, None]
+            for _ in range(10):
+                if len(contours) == 1:
+                    break
+                mask = cv2.dilate(mask.astype(np.uint8),
+                                  np.ones((scale,scale), np.uint8)) > 0
+                contours, _ = cv2.findContours(mask.astype(np.uint8),
+                                               cv2.RETR_EXTERNAL,
+                                               cv2.CHAIN_APPROX_SIMPLE)
+            region_polygon = contours[0][:,0,:] # already in x,y order
+            #region_polygon = [[min_y, min_x], [max_y, min_x], [max_y, max_x], [min_y, max_x]]
 
             # convert to absolute coordinates
             region_polygon = coordinates_for_segment(region_polygon, page_image, page_xywh)
@@ -335,24 +335,25 @@ def _process_segment(self, page_image, page, page_xywh, page_id, input_file, mas
             region_points = points_from_polygon(region_polygon)
             read_order = reading_order.index((min_y, min_x, max_y, max_x))
             region_args = {'custom': 'readingOrder {index:'+str(read_order)+';}',
-                           'id': '%s_region%04d' % (page_id, i),
+                           'id': 'region%04d' % i,
                            'Coords': CoordsType(region_points)}
-            if class_id >= len(CLASS_NAMES):
-                raise Exception('Unexpected class id %d - model does not match' % class_id)
-            if CLASS_NAMES[class_id] == 'image':
+            if class_name == 'image':
                 image_region = ImageRegionType(**region_args)
                 page.add_ImageRegion(image_region)
-            elif CLASS_NAMES[class_id] == 'table':
+            elif class_name == 'table':
                 table_region = TableRegionType(**region_args)
                 page.add_TableRegion(table_region)
-            elif CLASS_NAMES[class_id] == 'graphics':
+            elif class_name == 'graphics':
                 graphic_region = GraphicRegionType(**region_args)
                 page.add_GraphicRegion(graphic_region)
             else:
-                region_args['type_'] = CLASS_NAMES[class_id]
+                region_args['type_'] = class_name
                 textregion = TextRegionType(**region_args)
                 page.add_TextRegion(textregion)
-            LOG.info('added %s region on page "%s"', CLASS_NAMES[class_id], page_id)
+            order_index = reading_order.index((min_y, min_x, max_y, max_x))
+            regionRefIndex = RegionRefIndexedType(index=order_index, regionRef=region_args['id'])
+            order_group.add_RegionRefIndexed(regionRefIndex)
+            LOG.info('added %s region on page "%s"', class_name, page_id)
 
 
 @click.command()

From a8137980c40e6f0200ee6c680bdf5f40764d1560 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Thu, 4 Feb 2021 05:05:33 +0100
Subject: [PATCH 21/23] =?UTF-8?q?block=20segmentation:=20post-processing,?=
 =?UTF-8?q?=20fix=20reading=20order=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- drop unused and dysfunctional code against overlaps
- drop wrong reading order algorithm
- improve mask post-processing (closing instead of dilation)
- make mask-polygon conversion optional
- add optional post-processing to reduce overlaps
  (bbox-only or mask-based):
  - non-maximum suppression across classes (min_iou_drop)
  - non-maximum merging across classes (min_iou_merge)
  - within-other suppression across classes (min_share_drop)
  - within-other merging across classes (min_share_merge)
- implement correct reading order algorithm
  (bbox-only or mask-based):
  - partial order constraints under lr-tb assumption
  - topological sort
- annotate confidence along with coordinate results
---
 .../cli/ocrd_anybaseocr_block_segmentation.py | 346 +++++++++++-------
 ocrd_anybaseocr/ocrd-tool.json                |  73 +++-
 2 files changed, 279 insertions(+), 140 deletions(-)

diff --git a/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py b/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py
index e1c5d1a..1c5bbaf 100755
--- a/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py
+++ b/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py
@@ -39,10 +39,7 @@
 from ..constants import OCRD_TOOL
 from ..tensorflow_importer import tf
 
-
 TOOL = 'ocrd-anybaseocr-block-segmentation'
-FALLBACK_IMAGE_GRP = 'OCR-D-IMG-BLOCK-SEGMENT'
-
 CLASS_NAMES = ['BG',
                'page-number',
                'paragraph',
@@ -87,7 +84,7 @@ def __init__(self, *args, **kwargs):
             self.setup()
 
     def setup(self):
-        LOG = getLogger('OcrdAnybaseocrBlockSegmenter')
+        LOG = getLogger('processor.AnybaseocrBlockSegmenter')
         #self.reading_order = []
         self.order = 0
         model_path = resource_filename(__name__, '../mrcnn')
@@ -100,11 +97,11 @@ def setup(self):
         self.mrcnn_model.load_weights(str(model_weights), by_name=True)
     
     def process(self):
-
+        """Segment pages into regions using a Mask R-CNN model."""
         assert_file_grp_cardinality(self.input_file_grp, 1)
         assert_file_grp_cardinality(self.output_file_grp, 1)
 
-        LOG = getLogger('OcrdAnybaseocrBlockSegmenter')
+        LOG = getLogger('processor.AnybaseocrBlockSegmenter')
         if not tf.test.is_gpu_available():
             LOG.warning("Tensorflow cannot detect CUDA installation. Running without GPU will be slow.")
 
@@ -114,14 +111,22 @@ def process(self):
             page = pcgts.get_Page()
             page_id = input_file.pageId or input_file.ID
 
+            # todo rs: why not cropped?
             page_image, page_xywh, page_image_info = self.workspace.image_from_page(page, page_id, feature_filter='binarized,deskewed,cropped,clipped,non_text')
             # try to load pixel masks
             try:
-                mask_image, mask_xywh, mask_image_info = self.workspace.image_from_page(page, page_id, feature_selector='clipped', feature_filter='binarized,deskewed,cropped,non_text')
+                # todo rs: this combination only works for tiseg with use_deeplr=true
+                mask_image, _, _ = self.workspace.image_from_page(page, page_id, feature_selector='clipped', feature_filter='binarized,deskewed,cropped,non_text')
             except:
                 mask_image = None
+            if page_image_info.resolution != 1:
+                dpi = page_image_info.resolution
+                if page_image_info.resolutionUnit == 'cm':
+                    dpi = round(dpi * 2.54)
+            else:
+                dpi = None
 
-            self._process_segment(page_image, page, page_xywh, page_id, input_file, mask_image)
+            self._process_segment(page_image, page, page_xywh, page_id, input_file, mask_image, dpi)
 
             file_id = make_file_id(input_file, self.output_file_grp)
             pcgts.set_pcGtsId(file_id)
@@ -134,8 +139,8 @@ def process(self):
                 content=to_xml(pcgts).encode('utf-8')
             )
 
-    def _process_segment(self, page_image, page, page_xywh, page_id, input_file, mask):
-        LOG = getLogger('OcrdAnybaseocrBlockSegmenter')
+    def _process_segment(self, page_image, page, page_xywh, page_id, input_file, mask, dpi):
+        LOG = getLogger('processor.AnybaseocrBlockSegmenter')
         # check for existing text regions and whether to overwrite them
         if page.get_TextRegion() or page.get_TableRegion():
             if self.parameter['overwrite']:
@@ -144,11 +149,11 @@ def _process_segment(self, page_image, page, page_xywh, page_id, input_file, mas
             else:
                 LOG.warning('keeping existing text/table regions in page "%s"', page_id)
         # check if border exists
-        border = None
+        border_polygon = None
         if page.get_Border():
             border_coords = page.get_Border().get_Coords()
             border_points = polygon_from_points(border_coords.get_points())
-            border = Polygon(border_points)
+            border_polygon = Polygon(border_points)
 
         LOG.info('detecting regions on page "%s"', page_id)
         img_array = ocrolib.pil2array(page_image)
@@ -156,7 +161,7 @@ def _process_segment(self, page_image, page, page_xywh, page_id, input_file, mas
             img_array = np.stack((img_array,)*3, axis=-1)
         results = self.mrcnn_model.detect([img_array], verbose=0)
         r = results[0]
-        LOG.info('found %d regions on page "%s"', len(r['rois']), page_id)
+        LOG.info('found %d candidates on page "%s"', len(r['rois']), page_id)
 
         th = self.parameter['th']
         # check for existing semgentation mask
@@ -168,11 +173,8 @@ def _process_segment(self, page_image, page, page_xywh, page_id, input_file, mas
             # multiply all the bounding box part with 2
             for i in range(len(r['rois'])):
 
-                min_x = r['rois'][i][0]
-                min_y = r['rois'][i][1]
-                max_x = r['rois'][i][2]
-                max_y = r['rois'][i][3]
-                mask[min_x:max_x, min_y:max_y] *= i+2
+                min_y, min_x, max_y, max_x = r['rois'][i]
+                mask[min_y:max_y, min_x:max_x] *= i+2
 
             # check for left over pixels and add them to the bounding boxes
             pixel_added = True
@@ -181,8 +183,8 @@ def _process_segment(self, page_image, page, page_xywh, page_id, input_file, mas
 
                 pixel_added = False
                 left_over = np.where(mask == 1)
-                for x, y in zip(left_over[0], left_over[1]):
-                    local_mask = mask[x-th:x+th, y-th:y+th]
+                for y, x in zip(left_over[0], left_over[1]):
+                    local_mask = mask[y-th:y+th, x-th:x+th]
                     candidates = np.where(local_mask > 1)
                     candidates = [k for k in zip(candidates[0], candidates[1])]
                     if len(candidates) > 0:
@@ -192,93 +194,189 @@ def _process_segment(self, page_image, page, page_xywh, page_id, input_file, mas
                         index = local_mask[candidates[0]]-2
 
                         # add pixel to mask/bbox
-                        # x,y to bbox with index
-                        if x < r['rois'][index][0]:
-                            r['rois'][index][0] = x
+                        # y,x to bbox with index
+                        if y < r['rois'][index][0]:
+                            r['rois'][index][0] = y
 
-                        elif x > r['rois'][index][2]:
-                            r['rois'][index][2] = x
+                        elif y > r['rois'][index][2]:
+                            r['rois'][index][2] = y
 
-                        if y < r['rois'][index][1]:
-                            r['rois'][index][1] = y
+                        if x < r['rois'][index][1]:
+                            r['rois'][index][1] = x
 
-                        elif y > r['rois'][index][3]:
-                            r['rois'][index][3] = y
+                        elif x > r['rois'][index][3]:
+                            r['rois'][index][3] = x
 
                         # update the mask
-                        mask[x, y] = index + 2
-
-        # resolving overlapping problem
-        bbox_dict = {}  # to check any overlapping bbox
-        class_id_check = []
-
-        for i in range(len(r['rois'])):
-            min_x = r['rois'][i][0]
-            min_y = r['rois'][i][1]
-            max_x = r['rois'][i][2]
-            max_y = r['rois'][i][3]
-
-            region_bbox = [min_y, min_x, max_y, max_x]
-
-            for key in bbox_dict:
-                for bbox in bbox_dict[key]:
+                        mask[y, x] = index + 2
 
-                    # checking for ymax case with vertical overlapping
-                    # along with y, check both for xmax and xmin
-                    if (region_bbox[3] <= bbox[3] and region_bbox[3] >= bbox[1] and
-                        ((region_bbox[0] >= bbox[0] and region_bbox[0] <= bbox[2]) or
-                         (region_bbox[2] >= bbox[0] and region_bbox[2] <= bbox[2]) or
-                         (region_bbox[0] <= bbox[0] and region_bbox[2] >= bbox[2])) and
-                        r['class_ids'][i] != 5):
-
-                        r['rois'][i][2] = bbox[1] - 1
-
-                    # checking for ymin now
-                    # along with y, check both for xmax and xmin
-                    if (region_bbox[1] <= bbox[3] and region_bbox[1] >= bbox[1] and
-                        ((region_bbox[0] >= bbox[0] and region_bbox[0] <= bbox[2]) or
-                         (region_bbox[2] >= bbox[0] and region_bbox[2] <= bbox[2]) or
-                         (region_bbox[0] <= bbox[0] and region_bbox[2] >= bbox[2])) and
-                        r['class_ids'][i] != 5):
-
-                        r['rois'][i][0] = bbox[3] + 1
-
-            if r['class_ids'][i] not in class_id_check:
-                bbox_dict[r['class_ids'][i]] = []
-                class_id_check.append(r['class_ids'][i])
-
-            bbox_dict[r['class_ids'][i]].append(region_bbox)
-
-        # resolving overlapping problem code
-
-        # define reading order on basis of coordinates
-        reading_order = []
         for i in range(len(r['rois'])):
-            width, height, _ = img_array.shape
-            min_x, min_y, max_x, max_y = r['rois'][i]
             class_id = r['class_ids'][i]
             if class_id >= len(CLASS_NAMES):
                 raise Exception('Unexpected class id %d - model does not match' % class_id)
-            class_name = CLASS_NAMES[class_id]
 
-            if (min_y - 5) > width and class_name == 'paragraph':
-                min_y -= 5
-            if (max_y + 10) < width and class_name == 'paragraph':
-                min_y += 10
-            reading_order.append((min_y, min_x, max_y, max_x))
-
-        reading_order = sorted(reading_order, key=lambda reading_order: (reading_order[1], reading_order[0]))
-        for i in range(len(reading_order)):
-            min_y, min_x, max_y, max_x = reading_order[i]
-            min_y = 0
-            i_poly = Polygon([[min_x, min_y], [max_x, min_y], [max_x, max_y], [min_x, max_y]])
-            for j in range(i+1, len(reading_order)):
-                min_y, min_x, max_y, max_x = reading_order[j]
-                j_poly = Polygon([[min_x, min_y], [max_x, min_y], [max_x, max_y], [min_x, max_y]])
-                inter = i_poly.intersection(j_poly)
-                if inter:
-                    reading_order.insert(j+1, reading_order[i])
-                    del reading_order[i]
+        # find hull contours on masks
+        if self.parameter['use_masks']:
+            r.setdefault('polygons', list())
+            # estimate glyph scale (roughly)
+            scale = int(dpi / 6)
+            scale = scale + (scale+1)%2 # odd
+            for i in range(len(r['rois'])):
+                mask = r['masks'][:,:,i]
+                mask = cv2.dilate(mask.astype(np.uint8),
+                                  np.ones((scale,scale), np.uint8)) > 0
+                # close mask until we have a single outer contour
+                contours = None
+                for _ in range(10):
+                    mask = cv2.morphologyEx(mask.astype(np.uint8), cv2.MORPH_CLOSE,
+                                            np.ones((scale,scale), np.uint8)) > 0
+                    contours, _ = cv2.findContours(mask.astype(np.uint8),
+                                                   cv2.RETR_EXTERNAL,
+                                                   cv2.CHAIN_APPROX_SIMPLE)
+                    if len(contours) == 1:
+                        break
+                r['polygons'].append(Polygon(contours[0][:,0,:])) # already in x,y order
+
+        # to reduce overlaps, apply IoU-based non-maximum suppression
+        # (and other post-processing against overlaps) across classes,
+        # but not on the raw pixels, but the smoothed hull polygons
+        LOG.info('post-processing detections on page "%s"', page_id)
+        worse = []
+        if self.parameter['post_process']:
+            active = True
+            def _merge_rois(i, j):
+                """merges i into j"""
+                nonlocal r, active
+                r['rois'][j][0] = min(r['rois'][i][0], r['rois'][j][0])
+                r['rois'][j][1] = min(r['rois'][i][1], r['rois'][j][1])
+                r['rois'][j][2] = max(r['rois'][i][2], r['rois'][j][2])
+                r['rois'][j][3] = max(r['rois'][i][3], r['rois'][j][3])
+                r['polygons'][j] = r['polygons'][i].union(r['polygons'][j])
+                #r['scores'][j] = max(r['scores'][i], r['scores'][i])
+                active = True
+            # find overlapping pairs
+            while active:
+                active = False
+                for i in range(len(r["class_ids"])):
+                    if i in worse:
+                        continue
+                    for j in range(i + 1, len(r['class_ids'])):
+                        if j in worse:
+                            continue
+                        iclass = r['class_ids'][i]
+                        jclass = r['class_ids'][j]
+                        iname = CLASS_NAMES[iclass]
+                        jname = CLASS_NAMES[jclass]
+                        if (iname == 'drop-capital') != (jname == 'drop-capital'):
+                            # ignore drop-capital overlapping with others
+                            continue
+                        # rs todo: lower priority for footnote?
+                        if (r['rois'][i][1] > r['rois'][j][3] or
+                            r['rois'][i][3] < r['rois'][j][1] or
+                            r['rois'][i][0] > r['rois'][j][2] or
+                            r['rois'][i][2] < r['rois'][j][0]):
+                            # no overlap (cut)
+                            continue
+                        iscore = r['scores'][i]
+                        jscore = r['scores'][j]
+                        if not self.parameter['use_masks']:
+                            LOG.debug("roi %d[%s] overlaps roi %d[%s] and %s (replacing)",
+                                      i, iname, j, jname,
+                                      "looses" if iscore < jscore else "wins")
+                            if iscore < jscore:
+                                worse.append(i)
+                                break
+                            else:
+                                worse.append(j)
+                                continue
+                        # compare masks
+                        ipoly = r['polygons'][i]
+                        jpoly = r['polygons'][j]
+                        isize = ipoly.area
+                        jsize = jpoly.area
+                        inter = ipoly.intersection(jpoly).area
+                        union = ipoly.union(jpoly).area
+                        # LOG.debug("%d/%d %dpx/%dpx shared %dpx overall %dpx",
+                        #           i, j, isize, jsize, inter, union)
+                        if inter / isize > self.parameter['min_share_drop']:
+                            LOG.debug("roi %d[%s] contains roi %d[%s] (replacing)",
+                                      j, jname, i, iname)
+                            worse.append(i)
+                            break
+                        elif inter / jsize > self.parameter['min_share_drop']:
+                            LOG.debug("roi %d[%s] contains roi %d[%s] (replacing)",
+                                      i, iname, j, jname)
+                            worse.append(j)
+                        elif inter / union > self.parameter['min_iou_drop']:
+                            LOG.debug("roi %d[%s] heavily overlaps roi %d[%s] and %s (replacing)",
+                                      i, iname, j, jname,
+                                      "looses" if iscore < jscore else "wins")
+                            if iscore < jscore:
+                                worse.append(i)
+                                break
+                            else:
+                                worse.append(j)
+                        elif inter / isize > self.parameter['min_share_merge']:
+                            LOG.debug("roi %d[%s] covers roi %d[%s] (merging)",
+                                      j, jname, i, iname)
+                            worse.append(i)
+                            _merge_rois(i, j)
+                            break
+                        elif inter / jsize > self.parameter['min_share_merge']:
+                            LOG.debug("roi %d[%s] covers roi %d[%s] (merging)",
+                                      i, iname, j, jname)
+                            worse.append(j)
+                            _merge_rois(j, i)
+                        elif inter / union > self.parameter['min_iou_merge']:
+                            LOG.debug("roi %d[%s] slightly overlaps roi %d[%s] and %s (merging)",
+                                      i, iname, j, jname,
+                                      "looses" if iscore < jscore else "wins")
+                            if iscore < jscore:
+                                worse.append(i)
+                                _merge_rois(i, j)
+                                break
+                            else:
+                                worse.append(j)
+                                _merge_rois(j, i)
+
+        # define reading order on basis of coordinates
+        partial_order = np.zeros((len(r['rois']), len(r['rois'])), np.uint8)
+        for i, (min_y_i, min_x_i, max_y_i, max_x_i) in enumerate(r['rois']):
+            for j, (min_y_j, min_x_j, max_y_j, max_x_j) in enumerate(r['rois']):
+                if min_x_i < max_x_j and max_x_i > min_x_j:
+                    # xoverlaps
+                    if min_y_i < min_y_j:
+                        partial_order[i, j] = 1
+                else:
+                    min_y = min(min_y_i, min_y_j)
+                    max_y = max(max_y_i, max_y_j)
+                    min_x = min(min_x_i, min_x_j)
+                    max_x = max(max_x_i, max_x_j)
+                    if next((False for (min_y_k, min_x_k, max_y_k, max_x_k) in r['rois']
+                             if (min_y_k < max_y and max_y_k > min_y and
+                                 min_x_k < max_x and max_x_k > min_x)),
+                            True):
+                        # no k in between
+                        if ((min_y_j + max_y_j)/2 < min_y_i and
+                            (min_y_i + max_y_i)/2 > max_y_j):
+                            # vertically unrelated
+                            partial_order[j, i] = 1
+                        elif max_x_i < min_x_j:
+                            partial_order[i, j] = 1
+        def _topsort(po):
+            visited = np.zeros(po.shape[0], np.bool)
+            result = list()
+            def _visit(k):
+                if visited[k]:
+                    return
+                visited[k] = True
+                for l in np.nonzero(po[:, k])[0]:
+                    _visit(l)
+                result.append(k)
+            for k in range(po.shape[0]):
+                _visit(k)
+            return result
+        reading_order = _topsort(partial_order)
 
         # Creating Reading Order object in PageXML
         order_group = OrderedGroupType(caption="Regions reading order", id=page_id)
@@ -288,55 +386,43 @@ def _process_segment(self, page_image, page, page_xywh, page_id, input_file, mas
 
         for i in range(len(r['rois'])):
             width, height, _ = img_array.shape
-            min_x, min_y, max_x, max_y = r['rois'][i]
+            min_y, min_x, max_y, max_x = r['rois'][i]
+            score = r['scores'][i]
             class_id = r['class_ids'][i]
-            if class_id >= len(CLASS_NAMES):
-                raise Exception('Unexpected class id %d - model does not match' % class_id)
             class_name = CLASS_NAMES[class_id]
+            if i in worse:
+                LOG.debug("Ignoring instance %d[%s] overlapping better/larger neighbour",
+                          i, class_name)
+                continue
 
-            if (min_y - 5) > width and class_name == 'paragraph':
-                min_y -= 5
-            if (max_y + 10) < width and class_name == 'paragraph':
-                min_y += 10
-
-            # estimate glyph scale (roughly)
-            mask = r['masks'][:,:,i]
-            area = np.count_nonzero(mask)
-            scale = int(np.sqrt(area)//10)
-            scale = scale + (scale+1)%2 # odd
-
-            # dilate mask until we have a single outer contour
-            contours = [None, None]
-            for _ in range(10):
-                if len(contours) == 1:
-                    break
-                mask = cv2.dilate(mask.astype(np.uint8),
-                                  np.ones((scale,scale), np.uint8)) > 0
-                contours, _ = cv2.findContours(mask.astype(np.uint8),
-                                               cv2.RETR_EXTERNAL,
-                                               cv2.CHAIN_APPROX_SIMPLE)
-            region_polygon = contours[0][:,0,:] # already in x,y order
-            #region_polygon = [[min_y, min_x], [max_y, min_x], [max_y, max_x], [min_y, max_x]]
+            if self.parameter['use_masks']:
+                region_polygon = r['polygons'][i].exterior.coords[:-1]
+            else:
+                region_polygon = polygon_from_bbox(
+                    max(min_x - 5, 0) if class_name == 'paragraph' else min_x,
+                    min_y,
+                    min(max_x + 10, width) if class_name == 'paragraph' else max_x,
+                    max_y)
 
             # convert to absolute coordinates
             region_polygon = coordinates_for_segment(region_polygon, page_image, page_xywh)
             # intersect with parent and plausibilize
             cut_region_polygon = Polygon(region_polygon)
-            if border:
-                cut_region_polygon = border.intersection(cut_region_polygon)
+            if border_polygon:
+                cut_region_polygon = border_polygon.intersection(cut_region_polygon)
             if cut_region_polygon.is_empty:
                 LOG.warning('region %d does not intersect page frame', i)
                 continue
             if not cut_region_polygon.is_valid:
                 LOG.warning('region %d has invalid polygon', i)
                 continue
-            region_polygon = [j for j in zip(list(cut_region_polygon.exterior.coords.xy[0]),
-                                                 list(cut_region_polygon.exterior.coords.xy[1]))][:-1]
-            region_points = points_from_polygon(region_polygon)
-            read_order = reading_order.index((min_y, min_x, max_y, max_x))
+            region_polygon = cut_region_polygon.exterior.coords[:-1]
+            region_coords = CoordsType(points_from_polygon(region_polygon),
+                                       conf=score)
+            read_order = reading_order.index(i)
             region_args = {'custom': 'readingOrder {index:'+str(read_order)+';}',
                            'id': 'region%04d' % i,
-                           'Coords': CoordsType(region_points)}
+                           'Coords': region_coords}
             if class_name == 'image':
                 image_region = ImageRegionType(**region_args)
                 page.add_ImageRegion(image_region)
@@ -350,7 +436,7 @@ def _process_segment(self, page_image, page, page_xywh, page_id, input_file, mas
                 region_args['type_'] = class_name
                 textregion = TextRegionType(**region_args)
                 page.add_TextRegion(textregion)
-            order_index = reading_order.index((min_y, min_x, max_y, max_x))
+            order_index = reading_order.index(i)
             regionRefIndex = RegionRefIndexedType(index=order_index, regionRef=region_args['id'])
             order_group.add_RegionRefIndexed(regionRefIndex)
             LOG.info('added %s region on page "%s"', class_name, page_id)
diff --git a/ocrd_anybaseocr/ocrd-tool.json b/ocrd_anybaseocr/ocrd-tool.json
index 73680ef..62025c9 100755
--- a/ocrd_anybaseocr/ocrd-tool.json
+++ b/ocrd_anybaseocr/ocrd-tool.json
@@ -88,7 +88,7 @@
       "input_file_grp": ["OCR-D-IMG-CROP"],
       "output_file_grp": ["OCR-D-SEG-TISEG"],
       "categories": ["Layout analysis"],
-      "steps": ["layout/segmentation/text-image"],
+      "steps": ["layout/segmentation/text-nontext"],
       "description": "Separates the text and non-text elements with anyBaseOCR. Outputs clipped versions of the input image as AlternativeImage containing either only text or non-text elements.",
       "parameters": {
         "use_deeplr": {
@@ -141,7 +141,7 @@
       "input_file_grp": ["OCR-D-IMG-CROP"],
       "output_file_grp": ["OCR-D-SEG-LAYOUT"],
       "categories": ["Layout analysis"],
-      "steps": ["layout/segmentation/text-image"],
+      "steps": ["layout/analysis"],
       "description": "Generates a table-of-content like document structure of the whole document.",
       "parameters": {
         "batch_size":         {"type": "number", "format": "integer", "default": 4, "description": "Batch size for generating test images"},
@@ -152,16 +152,69 @@
     "ocrd-anybaseocr-block-segmentation": {
       "executable": "ocrd-anybaseocr-block-segmentation",
       "input_file_grp": ["OCR-D-IMG"],
-      "output_file_grp": ["OCR-D-BLOCK-SEGMENT"],
+      "output_file_grp": ["OCR-D-SEG-BLOCK"],
       "categories": ["Layout analysis"],
-      "steps": ["layout/segmentation/text-image"],
-      "description": "Segments and classifies document segments in a single page and outputs the the region polygons and classes.",
+      "steps": ["layout/segmentation/region"],
+      "description": "Segments and classifies regions in each single page and annotates the the region polygons and classes.",
       "parameters": {     
-        "block_segmentation_weights": { "type": "string","default":"block_segmentation_weights.h5",  "required": false, "description": "Path to model weights"},
-        "operation_level": {"type": "string", "enum": ["page"], "default": "page","description": "PAGE XML hierarchy level to operate on"},
-        "overwrite":   {"type": "boolean", "default": false, "description": "check whether to overwrite existing text lines"},
-        "th"       :   {"type": "integer", "default": 15, "description": "num of pixels to include in the area region"},
-        "DETECTION_MIN_CONFIDENCE"       :   {"type": "number", "default": 0.9, "description": "Confidence value for a model to detect bounding box"}
+        "block_segmentation_weights": {
+	  "type": "string",
+          "format":"uri",
+          "content-type": "application/x-hdf;subtype=bag",
+          "cacheable": true,
+	  "default":"block_segmentation_weights.h5",
+	  "description": "Path to model weights"
+	},
+        "overwrite": {
+	  "type": "boolean",
+	  "default": false,
+	  "description": "whether to delete existing text lines prior to segmentation"
+	},
+        "th": {
+	  "type": "integer",
+	  "default": 15,
+	  "description": "num of pixels to include in the area region (when applying text/non-text mask from tiseg)"
+	},
+        "post_process": {
+	  "type": "boolean",
+	  "default": true,
+	  "description": "whether to apply non-maximum suppression (across classes) on the detections"
+	},
+        "use_masks": {
+	  "type": "boolean",
+	  "default": true,
+	  "description": "whether to segment from the mask as polygon instead of just the bbox"
+	},
+        "min_confidence": {
+	  "type": "number",
+	  "format": "float",
+	  "default": 0.9,
+	  "description": "Confidence threshold for region detections"
+	},
+	"min_share_drop": {
+	  "type": "number",
+	  "format": "float",
+	  "default": 0.9,
+	  "description": "Minimum required overlap (intersection over single) of mask-derived contour area between neighbours to suppress smaller prediction"
+	},
+	"min_share_merge": {
+	  "type": "number",
+	  "format": "float",
+	  "default": 0.8,
+	  "description": "Minimum required overlap (intersection over single) of mask-derived contour area between neighbours to merge smaller prediction"
+	},
+	"min_iou_drop": {
+	  "type": "number",
+	  "format": "float",
+	  "default": 0.8,
+	  "description": "Minimum required overlap (intersection over union) of mask-derived contour area between neighbours to suppress prediction scoring worse"
+	},
+	"min_iou_merge": {
+	  "type": "number",
+	  "format": "float",
+	  "default": 0.2,
+	  "description": "Minimum required overlap (intersection over union) of mask-derived contour area between neighbours to merge prediction scoring worse"
+	}
       }       
     }
   }

From 8c3db3721cf183461edac300cd3949be46614695 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Thu, 4 Feb 2021 11:39:53 +0100
Subject: [PATCH 22/23] block segmentation: restrict active classes (default
 suppresses footnote/header etc)

---
 .../cli/ocrd_anybaseocr_block_segmentation.py |  6 ++-
 ocrd_anybaseocr/mrcnn/model.py                | 40 ++++++++++++++-----
 ocrd_anybaseocr/ocrd-tool.json                |  9 +++++
 3 files changed, 44 insertions(+), 11 deletions(-)

diff --git a/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py b/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py
index 1c5bbaf..6558269 100755
--- a/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py
+++ b/ocrd_anybaseocr/cli/ocrd_anybaseocr_block_segmentation.py
@@ -92,7 +92,6 @@ def setup(self):
 
         confidence = self.parameter['min_confidence']
         config = InferenceConfig(confidence)
-        # TODO: allow selecting active class IDs
         self.mrcnn_model = model.MaskRCNN(mode="inference", model_dir=str(model_path), config=config)
         self.mrcnn_model.load_weights(str(model_weights), by_name=True)
     
@@ -159,7 +158,10 @@ def _process_segment(self, page_image, page, page_xywh, page_id, input_file, mas
         img_array = ocrolib.pil2array(page_image)
         if len(img_array.shape) <= 2:
             img_array = np.stack((img_array,)*3, axis=-1)
-        results = self.mrcnn_model.detect([img_array], verbose=0)
+        # convert to incidence matrix
+        class_ids = np.array([[1 if category in self.parameter['active_classes'] else 0
+                               for category in CLASS_NAMES]], dtype=np.int32)
+        results = self.mrcnn_model.detect([img_array], verbose=0, active_class_ids=class_ids)
         r = results[0]
         LOG.info('found %d candidates on page "%s"', len(r['rois']), page_id)
 
diff --git a/ocrd_anybaseocr/mrcnn/model.py b/ocrd_anybaseocr/mrcnn/model.py
index f1ed968..e0b3aec 100644
--- a/ocrd_anybaseocr/mrcnn/model.py
+++ b/ocrd_anybaseocr/mrcnn/model.py
@@ -685,7 +685,7 @@ def compute_mask(self, inputs, mask=None):
 #  Detection Layer
 ############################################################
 
-def refine_detections_graph(rois, probs, deltas, window, config):
+def refine_detections_graph(rois, probs, deltas, window, active_class_ids, config):
     """Refine classified proposals and filter overlaps and return final
     detections.
 
@@ -696,10 +696,16 @@ def refine_detections_graph(rois, probs, deltas, window, config):
                 bounding box deltas.
         window: (y1, x1, y2, x2) in normalized coordinates. The part of the image
             that contains the image excluding the padding.
+        active_class_ids: [num_classes]. Has a value of 1 for classes
+            that are allowed in the dataset of the image, and 0 for classes
+            that are not allowed in the dataset.
 
     Returns detections shaped: [num_detections, (y1, x1, y2, x2, class_id, score)] where
         coordinates are normalized.
     """
+    # Suppress scores for inactive classes
+    probs = tf.where(tf.cast(K.tile(K.expand_dims(active_class_ids, 0), (probs.shape[0],1)), tf.bool),
+                     x=probs, y=K.zeros_like(probs))
     # Class IDs per ROI
     class_ids = tf.argmax(probs, axis=1, output_type=tf.int32)
     # Class probability of the top class of each ROI
@@ -809,11 +815,12 @@ def call(self, inputs):
         m = parse_image_meta_graph(image_meta)
         image_shape = m['image_shape'][0]
         window = norm_boxes_graph(m['window'], image_shape[:2])
+        active_class_ids = m['active_class_ids']
 
         # Run detection refinement graph on each item in the batch
         detections_batch = utils.batch_slice(
-            [rois, mrcnn_class, mrcnn_bbox, window],
-            lambda x, y, w, z: refine_detections_graph(x, y, w, z, self.config),
+            [rois, mrcnn_class, mrcnn_bbox, window, active_class_ids],
+            lambda r, p, d, w, c: refine_detections_graph(r, p, d, w, c, self.config),
             self.config.IMAGES_PER_GPU)
 
         # Reshape output
@@ -1275,7 +1282,7 @@ def hook(images, augmenter, parents, default):
     # Active classes
     # Different datasets have different classes, so track the
     # classes supported in the dataset of this image.
-    active_class_ids = np.zeros([dataset.num_classes], dtype=np.int32)
+    active_class_ids = np.zeros([config.NUM_CLASSES], dtype=np.int32)
     source_class_ids = dataset.source_class_ids[dataset.image_info[image_id]["source"]]
     active_class_ids[source_class_ids] = 1
 
@@ -2379,11 +2386,13 @@ def train(self, train_dataset, val_dataset, learning_rate, epochs, layers,
         )
         self.epoch = max(self.epoch, epochs)
 
-    def mold_inputs(self, images):
+    def mold_inputs(self, images, active_class_ids=None):
         """Takes a list of images and modifies them to the format expected
         as an input to the neural network.
         images: List of image matrices [height,width,depth]. Images can have
             different sizes.
+        active_class_ids: List of class_ids allowed for the given images. Or
+            boolean matrix [images, classes].
 
         Returns 3 Numpy matrices:
         molded_images: [N, h, w, 3]. Images resized and normalized.
@@ -2394,7 +2403,18 @@ def mold_inputs(self, images):
         molded_images = []
         image_metas = []
         windows = []
-        for image in images:
+        if isinstance(active_class_ids, np.ndarray):
+            assert active_class_ids.shape == (len(images), self.config.NUM_CLASSES), \
+                "active_class_ids dimensions must match number of images and classes"
+            active_classes = active_class_ids
+        elif active_class_ids:
+            active_classes = np.zeros([self.config.NUM_CLASSES], dtype=np.int32)
+            active_classes[active_class_ids] = 1
+            active_classes = np.tile(active_classes, (len(images), 1))
+        else:
+            active_classes = np.ones([self.config.NUM_CLASSES], dtype=np.int32)
+            active_classes = np.tile(active_classes, (len(images), 1))
+        for i, image in enumerate(images):
             # Resize image
             # TODO: move resizing to mold_image()
             molded_image, window, scale, padding, crop = utils.resize_image(
@@ -2407,7 +2427,7 @@ def mold_inputs(self, images):
             # Build image_meta
             image_meta = compose_image_meta(
                 0, image.shape, molded_image.shape, window, scale,
-                np.zeros([self.config.NUM_CLASSES], dtype=np.int32))
+                active_class_ids[i])
             # Append
             molded_images.append(molded_image)
             windows.append(window)
@@ -2483,10 +2503,12 @@ def unmold_detections(self, detections, mrcnn_mask, original_image_shape,
 
         return boxes, class_ids, scores, full_masks
 
-    def detect(self, images, verbose=0):
+    def detect(self, images, verbose=0, active_class_ids=None):
         """Runs the detection pipeline.
 
         images: List of images, potentially of different sizes.
+        active_class_ids: List of class_ids allowed for the given images. Or
+                          Boolean matrix [images, classes].
 
         Returns a list of dicts, one dict per image. The dict contains:
         rois: [N, (y1, x1, y2, x2)] detection bounding boxes
@@ -2504,7 +2526,7 @@ def detect(self, images, verbose=0):
                 log("image", image)
 
         # Mold inputs to format expected by the neural network
-        molded_images, image_metas, windows = self.mold_inputs(images)
+        molded_images, image_metas, windows = self.mold_inputs(images, active_class_ids)
 
         # Validate image sizes
         # All images in a batch MUST be of the same size
diff --git a/ocrd_anybaseocr/ocrd-tool.json b/ocrd_anybaseocr/ocrd-tool.json
index 62025c9..1112e25 100755
--- a/ocrd_anybaseocr/ocrd-tool.json
+++ b/ocrd_anybaseocr/ocrd-tool.json
@@ -175,6 +175,15 @@
 	  "default": 15,
 	  "description": "num of pixels to include in the area region (when applying text/non-text mask from tiseg)"
 	},
+	"active_classes": {
+	  "type": "array",
+	  "items": {
+	    "type": "string",
+	    "enum": ["page-number", "paragraph", "catch-word", "heading", "drop-capital", "signature-mark", "header", "marginalia", "footnote", "footnote-continued", "caption", "endnote", "footer", "keynote", "image", "table", "graphics"]
+	  },
+	  "default": ["page-number", "paragraph", "catch-word", "heading", "drop-capital", "signature-mark", "marginalia", "caption"],
+	  "description": "Restrict types of regions to be detected."
+	},
         "post_process": {
 	  "type": "boolean",
 	  "default": true,

From 06e75efeff34a2bf289d40258bc1fad04260aaf9 Mon Sep 17 00:00:00 2001
From: Konstantin Baierer <unixprog@gmail.com>
Date: Wed, 19 May 2021 13:35:25 +0200
Subject: [PATCH 23/23] tiseg: import keras from tensorflow not directly,
 OCR-D/ocrd_all#256

---
 ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py b/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
index 4b2d044..1e340e2 100755
--- a/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
+++ b/ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
@@ -20,6 +20,7 @@
 import numpy as np
 import shapely
 import ocrolib
+from ..tensorflow_importer import keras
 from keras.models import load_model
 #from keras_segmentation.models.unet import resnet50_unet
 from ocrd import Processor