Skip to content

Commit

Permalink
Merge pull request #43 from bertsky/repair-fix-coords
Browse files Browse the repository at this point in the history
repair: valid polygons
  • Loading branch information
bertsky authored Sep 14, 2020
2 parents 7596e9c + 24d26cd commit aef6517
Show file tree
Hide file tree
Showing 11 changed files with 132 additions and 239 deletions.
2 changes: 2 additions & 0 deletions .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ ignored-modules=cv2

[MESSAGES CONTROL]
disable =
super-with-arguments,
trailing-whitespace,
missing-docstring,
no-self-use,
superfluous-parens,
Expand Down
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,17 @@ Versioned according to [Semantic Versioning](http://semver.org/).

## Unreleased

## [0.1.1] - 2020-09-14

Changed:

* repair: traverse all text regions recursively

Fixed:

* repair: be robust against invalid input polygons
* repair: be careful to make valid output polygons

## [0.1.0] - 2020-08-21

Changed:
Expand Down
16 changes: 1 addition & 15 deletions ocrd_segment/extract_lines.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,6 @@
polygon_from_points,
MIME_TO_EXT
)
from ocrd_models.ocrd_page import (
LabelsType, LabelType,
MetadataItemType
)
from ocrd_modelfactory import page_from_file
from ocrd import Processor

Expand Down Expand Up @@ -75,18 +71,8 @@ def process(self):
page_id = input_file.pageId or input_file.ID
LOG.info("INPUT FILE %i / %s", n, page_id)
pcgts = page_from_file(self.workspace.download_file(input_file))
self.add_metadata(pcgts)
page = pcgts.get_Page()
metadata = pcgts.get_Metadata() # ensured by from_file()
metadata.add_MetadataItem(
MetadataItemType(type_="processingStep",
name=self.ocrd_tool['steps'][0],
value=TOOL,
Labels=[LabelsType(
externalModel="ocrd-tool",
externalId="parameters",
Label=[LabelType(type_=name,
value=self.parameter[name])
for name in self.parameter.keys()])]))
page_image, page_coords, page_image_info = self.workspace.image_from_page(
page, page_id,
transparency=self.parameter['transparency'])
Expand Down
16 changes: 1 addition & 15 deletions ocrd_segment/extract_pages.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,6 @@
xywh_from_polygon,
MIME_TO_EXT
)
from ocrd_models.ocrd_page import (
LabelsType, LabelType,
MetadataItemType
)
from ocrd_modelfactory import page_from_file
from ocrd import Processor

Expand Down Expand Up @@ -168,19 +164,9 @@ def process(self):
num_page_id = int(page_id.strip(page_id.strip("0123456789")))
LOG.info("INPUT FILE %i / %s", n, page_id)
pcgts = page_from_file(self.workspace.download_file(input_file))
self.add_metadata(pcgts)
page = pcgts.get_Page()
ptype = page.get_type()
metadata = pcgts.get_Metadata() # ensured by from_file()
metadata.add_MetadataItem(
MetadataItemType(type_="processingStep",
name=self.ocrd_tool['steps'][0],
value=TOOL,
Labels=[LabelsType(
externalModel="ocrd-tool",
externalId="parameters",
Label=[LabelType(type_=name,
value=self.parameter[name])
for name in self.parameter])]))
page_image, page_coords, page_image_info = self.workspace.image_from_page(
page, page_id,
feature_filter='binarized',
Expand Down
16 changes: 1 addition & 15 deletions ocrd_segment/extract_regions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@
polygon_from_points,
MIME_TO_EXT
)
from ocrd_models.ocrd_page import (
LabelsType, LabelType,
MetadataItemType
)
from ocrd_modelfactory import page_from_file
from ocrd import Processor

Expand Down Expand Up @@ -72,18 +68,8 @@ def process(self):
page_id = input_file.pageId or input_file.ID
LOG.info("INPUT FILE %i / %s", n, page_id)
pcgts = page_from_file(self.workspace.download_file(input_file))
self.add_metadata(pcgts)
page = pcgts.get_Page()
metadata = pcgts.get_Metadata() # ensured by from_file()
metadata.add_MetadataItem(
MetadataItemType(type_="processingStep",
name=self.ocrd_tool['steps'][0],
value=TOOL,
Labels=[LabelsType(
externalModel="ocrd-tool",
externalId="parameters",
Label=[LabelType(type_=name,
value=self.parameter[name])
for name in self.parameter])]))
page_image, page_coords, page_image_info = self.workspace.image_from_page(
page, page_id,
transparency=self.parameter['transparency'])
Expand Down
16 changes: 1 addition & 15 deletions ocrd_segment/import_coco_segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
# pragma pylint: disable=unused-import
# (region types will be referenced indirectly via globals())
from ocrd_models.ocrd_page import (
MetadataItemType,
LabelsType, LabelType,
CoordsType,
TextRegionType,
ImageRegionType,
Expand Down Expand Up @@ -138,21 +136,9 @@ def process(self):
num_page_id = int(page_id.strip(page_id.strip("0123456789")))
LOG.info("INPUT FILE %i / %s", n, page_id)
pcgts = page_from_file(self.workspace.download_file(input_file))
self.add_metadata(pcgts)
page = pcgts.get_Page()

# add metadata about this operation and its runtime parameters:
metadata = pcgts.get_Metadata() # ensured by from_file()
metadata.add_MetadataItem(
MetadataItemType(type_="processingStep",
name=self.ocrd_tool['steps'][0],
value=TOOL,
Labels=[LabelsType(
externalModel="ocrd-tool",
externalId="parameters",
Label=[LabelType(type_=name,
value=self.parameter[name])
for name in self.parameter.keys()])]))

# find COCO image
if page.imageFilename in images_by_filename:
image = images_by_filename[page.imageFilename]
Expand Down
16 changes: 1 addition & 15 deletions ocrd_segment/import_image_segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@
# pragma pylint: disable=unused-import
# (region types will be referenced indirectly via globals())
from ocrd_models.ocrd_page import (
MetadataItemType,
LabelsType, LabelType,
CoordsType,
TextRegionType,
ImageRegionType,
Expand Down Expand Up @@ -89,21 +87,9 @@ def process(self):
input_file, segmentation_file = ift
LOG.info("processing page %s", input_file.pageId)
pcgts = page_from_file(self.workspace.download_file(input_file))
self.add_metadata(pcgts)
page = pcgts.get_Page()

# add metadata about this operation and its runtime parameters:
metadata = pcgts.get_Metadata() # ensured by from_file()
metadata.add_MetadataItem(
MetadataItemType(type_="processingStep",
name=self.ocrd_tool['steps'][0],
value=TOOL,
Labels=[LabelsType(
externalModel="ocrd-tool",
externalId="parameters",
Label=[LabelType(type_=name,
value=self.parameter[name])
for name in self.parameter.keys()])]))

# import mask image
segmentation_filename = self.workspace.download_file(segmentation_file).local_filename
with pushd_popd(self.workspace.directory):
Expand Down
2 changes: 1 addition & 1 deletion ocrd_segment/ocrd-tool.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": "0.1.0",
"version": "0.1.1",
"git_url": "https://github.com/OCR-D/ocrd_segment",
"tools": {
"ocrd-segment-repair": {
Expand Down
Loading

0 comments on commit aef6517

Please sign in to comment.