Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix tiseg #79

Merged
merged 23 commits into from
May 19, 2021
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
57fd1e7
tiseg: fix typo
bertsky Feb 1, 2021
bab56a6
tiseg: remove trailing whitespace
bertsky Feb 1, 2021
624b32e
tiseg: unused parameters
bertsky Feb 1, 2021
99c457b
tiseg (legacy): do not enforce deskewed/cropped
bertsky Feb 1, 2021
62a9765
tiseg (legacy): fix image pageId
bertsky Feb 1, 2021
21a2cd9
tiseg: clean imports and import order
bertsky Feb 1, 2021
82a0055
tiseg (ML): load during init/setup instead of process
bertsky Feb 1, 2021
eb6c98f
tiseg (ML): clean unused function
bertsky Feb 1, 2021
96ec2ee
tiseg (legacy): fix image vs text part
bertsky Feb 1, 2021
9105973
tiseg (legacy): fix image vs background
bertsky Feb 1, 2021
25fc8e1
tiseg: show class counts
bertsky Feb 1, 2021
665a8dd
block-segmentation: resolve_resource already exits verbosely
bertsky Feb 3, 2021
266756c
block-segmentation: proper class ID/name mapping
bertsky Feb 3, 2021
a956f63
block-segmentation: fix Border intersection
bertsky Feb 3, 2021
5a4d874
block-segmentation: fix TF logger init
bertsky Feb 3, 2021
e641e31
block-segmentation: remove buggy/useless AlternativeImage creation
bertsky Feb 3, 2021
046735d
block-segmentation: fix Border intersection (applies in absolute coords)
bertsky Feb 3, 2021
0febe79
block-segmentation: fix overwrite==false (continue by adding more)
bertsky Feb 3, 2021
3d54a19
block-segmentation: move model loading to setup()
bertsky Feb 3, 2021
681b70f
block-segmentation: decode masks into polygons
bertsky Feb 3, 2021
a813798
block segmentation: post-processing, fix reading order…
bertsky Feb 4, 2021
8c3db37
block segmentation: restrict active classes (default suppresses footn…
bertsky Feb 4, 2021
06e75ef
tiseg: import keras from tensorflow not directly, OCR-D/ocrd_all#256
kba May 19, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
206 changes: 94 additions & 112 deletions ocrd_anybaseocr/cli/ocrd_anybaseocr_tiseg.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,38 +8,34 @@
# URL - https://www.dfki.de/fileadmin/user_upload/import/9512_ICDAR2017_anyOCR.pdf


from scipy import ones, zeros, array, where, shape, ndimage, logical_or, logical_and
import copy
from pylab import unique
import ocrolib
import json
from PIL import Image
import sys
import os
from pathlib import Path
import sys
import math
import click
from PIL import Image
from scipy import ndimage
import numpy as np
import shapely
import cv2
import math
from ..constants import OCRD_TOOL
from pathlib import Path
import ocrolib
from keras.models import load_model
#from keras_segmentation.models.unet import resnet50_unet
from ocrd import Processor
from ocrd_modelfactory import page_from_file
from ocrd_models.ocrd_page import to_xml, AlternativeImageType
from ocrd_utils import (
getLogger,
concat_padded,
getLogger,
concat_padded,
MIMETYPE_PAGE,
coordinates_for_segment,
points_from_polygon,
make_file_id,
assert_file_grp_cardinality,
)
import click
)
from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor

from keras.models import load_model
#from keras_segmentation.models.unet import resnet50_unet

from ocrd_models.ocrd_page import to_xml, AlternativeImageType
from ..constants import OCRD_TOOL

TOOL = 'ocrd-anybaseocr-tiseg'

Expand All @@ -49,56 +45,45 @@ def __init__(self, *args, **kwargs):
kwargs['ocrd_tool'] = OCRD_TOOL['tools'][TOOL]
kwargs['version'] = OCRD_TOOL['version']
super(OcrdAnybaseocrTiseg, self).__init__(*args, **kwargs)
if hasattr(self, 'output_file_grp') and hasattr(self, 'parameter'):
# processing context
self.setup()

def crop_image(self, image_path, crop_region):
img = Image.open(image_path)
cropped = img.crop(crop_region)
return cropped
def setup(self):
LOG = getLogger('OcrdAnybaseocrTiseg')
self.model = None
if self.parameter['use_deeplr']:

model_weights = self.resolve_resource(self.parameter['seg_weights'])
#model = resnet50_unet(n_classes=self.parameter['classes'], input_height=self.parameter['height'], input_width=self.parameter['width'])
#model.load_weights(model_weights)
self.model = load_model(model_weights)
LOG.info('Loaded segmentation model')

def process(self):
LOG = getLogger('OcrdAnybaseocrTiseg')

assert_file_grp_cardinality(self.input_file_grp, 1)
assert_file_grp_cardinality(self.output_file_grp, 1)
oplevel = self.parameter['operation_level']

model = None
if self.parameter['use_deeplr']:

model_weights = self.resolve_resource(self.parameter['seg_weights'])

if not Path(model_weights).is_file():
LOG.error("""\
Segementation model weights file was not found at '%s'. Make sure the `seg_weights` parameter
points to the local model weights path.
""" % model_weights)
sys.exit(1)

#model = resnet50_unet(n_classes=self.parameter['classes'], input_height=self.parameter['height'], input_width=self.parameter['width'])
#model.load_weights(model_weights)
model = load_model(model_weights)
LOG.info('Segmentation Model loaded')

for (n, input_file) in enumerate(self.input_files):
for input_file in self.input_files:
page_id = input_file.pageId or input_file.ID

pcgts = page_from_file(self.workspace.download_file(input_file))
self.add_metadata(pcgts)

page = pcgts.get_Page()
LOG.info("INPUT FILE %s", input_file.pageId or input_file.ID)

if self.parameter['use_deeplr']:
page_image, page_xywh, page_image_info = self.workspace.image_from_page(page, page_id, feature_filter='binarized,deskewed,cropped')
kwargs = {'feature_filter': 'binarized,deskewed,cropped'}
else:
page_image, page_xywh, page_image_info = self.workspace.image_from_page(page, page_id, feature_selector='binarized,deskewed,cropped')

if oplevel == 'page':
self._process_segment(page_image, page, page_xywh, page_id, input_file, n, model)
else:
LOG.warning('Operation level %s, but should be "page".', oplevel)
break

# _should_ also be deskewed and cropped, but no need to enforce that here
kwargs = {'feature_selector': 'binarized'}
page_image, page_coords, page_image_info = self.workspace.image_from_page(
page, page_id, **kwargs)

self._process_segment(page, page_image, page_coords, page_id, input_file)

file_id = make_file_id(input_file, self.output_file_grp)
pcgts.set_pcGtsId(file_id)
Expand All @@ -110,44 +95,43 @@ def process(self):
local_filename=os.path.join(self.output_file_grp, file_id + '.xml'),
content=to_xml(pcgts).encode('utf-8'),
)
def _process_segment(self,page_image, page, page_xywh, page_id, input_file, n, model):

def _process_segment(self, page, page_image, page_coords, page_id, input_file):
LOG = getLogger('OcrdAnybaseocrTiseg')
if model:

if self.model:

I = ocrolib.pil2array(page_image.resize((800, 1024), Image.ANTIALIAS))
I = np.array(I)[np.newaxis, :, :, :]
LOG.info('I shape %s', I.shape)
if len(I.shape)<3:
print('Wrong input shape. Image should have 3 channel')

# get prediction
#out = model.predict_segmentation(
#out = self.model.predict_segmentation(
# inp=I,
# out_fname="/tmp/out.png"
#)
out = model.predict(I)
out = self.model.predict(I)
out = out.reshape((2048, 1600, 3)).argmax(axis=2)

text_part = np.ones(out.shape)
text_part = 255 * np.ones(out.shape, 'B')
text_part[np.where(out==1)] = 0

image_part = np.ones(out.shape)
LOG.info('text: %d%', 100 * (1 - np.count_nonzero(text_part) / np.prod(out.shape)))

image_part = 255 * np.ones(out.shape, 'B')
image_part[np.where(out==2)] = 0

image_part = array(255*(image_part), 'B')
image_part = ocrolib.array2pil(image_part)
LOG.info('image: %d%', 100 * (1 - np.count_nonzero(image_part) / np.prod(out.shape)))

text_part = array(255*(text_part), 'B')
image_part = ocrolib.array2pil(image_part)
text_part = ocrolib.array2pil(text_part)

text_part = text_part.resize(page_image.size, Image.BICUBIC)

image_part = image_part.resize(page_image.size, Image.BICUBIC)

text_part = text_part.resize(page_image.size, Image.BICUBIC)

else:
I = ocrolib.pil2array(page_image)

if len(I.shape) > 2:
I = np.mean(I, 2)
I = 1-I/I.max()
Expand All @@ -160,92 +144,91 @@ def _process_segment(self,page_image, page, page_xywh, page_id, input_file, n, m
Iseedfill = self.pixSeedfillBinary(Imask, Iseed)

# Dilation of Iseedfill
mask = ones((3, 3))
mask = np.ones((3, 3))
Iseedfill = ndimage.binary_dilation(Iseedfill, mask)

# Expansion of Iseedfill to become equal in size of I
Iseedfill = self.expansion(Iseedfill, (rows, cols))

# Write Text and Non-Text images
image_part = array((1-I*Iseedfill), dtype=int)
text_part = array((1-I*(1-Iseedfill)), dtype=int)
image_part = np.array(255*(1-I*Iseedfill), dtype='B')
text_part = np.array(255*(1-I*(1-Iseedfill)), dtype='B')
LOG.info('text: %d%', 100 * (1 - np.count_nonzero(text_part) / np.prod(I.shape)))
LOG.info('image: %d%', 100 * (1 - np.count_nonzero(image_part) / np.prod(I.shape)))

image_part = ocrolib.array2pil(image_part)
text_part = ocrolib.array2pil(text_part)

bin_array = array(255*(text_part>ocrolib.midrange(img_part)),'B')
text_part = ocrolib.array2pil(bin_array)

bin_array = array(255*(text_part>ocrolib.midrange(text_part)),'B')
image_part = ocrolib.array2pil(bin_array)


file_id = make_file_id(input_file, self.output_file_grp)
file_path = self.workspace.save_image_file(image_part,
file_id+"_img",
page_id=page_id,
page_id=input_file.pageId,
file_grp=self.output_file_grp,
)
page.add_AlternativeImage(AlternativeImageType(filename=file_path, comments=page_xywh['features']+',non_text'))

page_xywh['features'] += ',clipped'
)
page.add_AlternativeImage(AlternativeImageType(
filename=file_path, comments=page_coords['features'] + ',non_text'))

file_path = self.workspace.save_image_file(text_part,
file_id+"_txt",
page_id=page_id,
page_id=input_file.pageId,
file_grp=self.output_file_grp,
)
page.add_AlternativeImage(AlternativeImageType(filename=file_path, comments=page_xywh['features']))

)
page.add_AlternativeImage(AlternativeImageType(
filename=file_path, comments=page_coords['features'] + ',clipped'))

def pixMorphSequence_mask_seed_fill_holes(self, I):
Imask = self.reduction_T_1(I)
Imask = self.reduction_T_1(Imask)
Imask = ndimage.binary_fill_holes(Imask)
Iseed = self.reduction_T_4(Imask)
Iseed = self.reduction_T_3(Iseed)
mask = array(ones((5, 5)), dtype=int)
mask = np.array(np.ones((5, 5)), dtype=int)
Iseed = ndimage.binary_opening(Iseed, mask)
Iseed = self.expansion(Iseed, Imask.shape)
return Imask, Iseed

def pixSeedfillBinary(self, Imask, Iseed):
Iseedfill = copy.deepcopy(Iseed)
s = ones((3, 3))
s = np.ones((3, 3))
Ijmask, k = ndimage.label(Imask, s)
Ijmask2 = Ijmask * Iseedfill
A = list(unique(Ijmask2))
A = list(np.unique(Ijmask2))
A.remove(0)
for i in range(0, len(A)):
x, y = where(Ijmask == A[i])
x, y = np.where(Ijmask == A[i])
Iseedfill[x, y] = 1
return Iseedfill

def reduction_T_1(self, I):
A = logical_or(I[0:-1:2, :], I[1::2, :])
A = logical_or(A[:, 0:-1:2], A[:, 1::2])
A = np.logical_or(I[0:-1:2, :], I[1::2, :])
A = np.logical_or(A[:, 0:-1:2], A[:, 1::2])
return A

def reduction_T_2(self, I):
A = logical_or(I[0:-1:2, :], I[1::2, :])
A = logical_and(A[:, 0:-1:2], A[:, 1::2])
B = logical_and(I[0:-1:2, :], I[1::2, :])
B = logical_or(B[:, 0:-1:2], B[:, 1::2])
C = logical_or(A, B)
A = np.logical_or(I[0:-1:2, :], I[1::2, :])
A = np.logical_and(A[:, 0:-1:2], A[:, 1::2])
B = np.logical_and(I[0:-1:2, :], I[1::2, :])
B = np.logical_or(B[:, 0:-1:2], B[:, 1::2])
C = np.logical_or(A, B)
return C

def reduction_T_3(self, I):
A = logical_or(I[0:-1:2, :], I[1::2, :])
A = logical_and(A[:, 0:-1:2], A[:, 1::2])
B = logical_and(I[0:-1:2, :], I[1::2, :])
B = logical_or(B[:, 0:-1:2], B[:, 1::2])
C = logical_and(A, B)
A = np.logical_or(I[0:-1:2, :], I[1::2, :])
A = np.logical_and(A[:, 0:-1:2], A[:, 1::2])
B = np.logical_and(I[0:-1:2, :], I[1::2, :])
B = np.logical_or(B[:, 0:-1:2], B[:, 1::2])
C = np.logical_and(A, B)
return C

def reduction_T_4(self, I):
A = logical_and(I[0:-1:2, :], I[1::2, :])
A = logical_and(A[:, 0:-1:2], A[:, 1::2])
A = np.logical_and(I[0:-1:2, :], I[1::2, :])
A = np.logical_and(A[:, 0:-1:2], A[:, 1::2])
return A

def expansion(self, I, rows_cols):
r, c = I.shape
rows, cols = rows_cols
A = zeros((rows, cols))
A = np.zeros((rows, cols))
A[0:4*r:4, 0:4*c:4] = I
A[1:4*r:4, :] = A[0:4*r:4, :]
A[2:4*r:4, :] = A[0:4*r:4, :]
Expand All @@ -254,7 +237,7 @@ def expansion(self, I, rows_cols):
A[:, 2:4*c:4] = A[:, 0:4*c:4]
A[:, 3:4*c:4] = A[:, 0:4*c:4]
return A

def alpha_shape(self, coords, alpha):
import shapely.geometry as geometry
from shapely.ops import cascaded_union, polygonize
Expand Down Expand Up @@ -283,7 +266,7 @@ def add_edge(edges, edge_points, coords, i, j):
return
edges.add( (i, j) )
edge_points.append(coords[ [i, j] ])

tri = Delaunay(coords)
edges = set()
edge_points = []
Expand Down Expand Up @@ -313,7 +296,6 @@ def add_edge(edges, edge_points, coords, i, j):
triangles = list(polygonize(m))
return cascaded_union(triangles), edge_points


@click.command()
@ocrd_cli_options
def cli(*args, **kwargs):
Expand Down
19 changes: 13 additions & 6 deletions ocrd_anybaseocr/ocrd-tool.json
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,19 @@
"steps": ["layout/segmentation/text-image"],
"description": "Separates the text and non-text elements with anyBaseOCR. Outputs clipped versions of the input image as AlternativeImage containing either only text or non-text elements.",
"parameters": {
"use_deeplr": {"type":"boolean", "default":true, "description": "use deep learning model"},
"seg_weights": {"type":"string", "default":"seg_model.hdf5", "description":"path to weights file", "required":false},
"classes": {"type":"integer", "default":3, "description":"number of classes" },
"width" : {"type":"integer", "default":1024, "description":"input image height"},
"height" : {"type":"integer", "default":800, "description":"input image width"},
"operation_level": {"type": "string", "enum": ["page","region", "line"], "default": "page","description": "PAGE XML hierarchy level to operate on"}
"use_deeplr": {
"type":"boolean",
"default":true,
"description": "Whether to use deep learning model (UNet pixel classifier) instead of rule-based implementation (multi-resolution morphology)."
},
"seg_weights": {
"type":"string",
"format":"uri",
"content-type": "application/x-hdf;subtype=bag",
"cacheable": true,
"default":"seg_model.hdf5",
"description":"Path to weights file for deep learning model when use_deeplr is true."
}
}
},
"ocrd-anybaseocr-textline": {
Expand Down