Skip to content

Commit

Permalink
Move global flags to the start of regular expressions (#349)
Browse files Browse the repository at this point in the history
* Revert "Move global flags to the start of regular expressions (fix for issue #348)"

This reverts commit 13b544b.

* Move global flags to the start of regular expressions

Fix regex replacements for models where global flags were put at the end
of the pattern strings. These patterns are invalid as of Python 3.11.
  • Loading branch information
mikegerber authored Oct 24, 2023
1 parent 9c8fb00 commit 6b1412b
Showing 1 changed file with 9 additions and 1 deletion.
10 changes: 9 additions & 1 deletion calamari_ocr/ocr/text_processing/text_regularizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def default_text_regularizer_params(params=TextProcessorParams(), groups=["simpl

def replacement(old, new, regex=False):
r = params.replacements.add()
r.old = "(?u)" + old[:-4] if regex and old.endswith("(?u)") else old
r.old = old
r.new = new
r.regex = regex

Expand Down Expand Up @@ -343,6 +343,14 @@ class TextRegularizer(TextProcessor):
def __init__(self, params=default_text_regularizer_params()):
super().__init__()
self.params = params
self._fix_regex_flags()

def _fix_regex_flags(self):
# Fix regex replacements for older models where global flags were put at the end
# of the pattern string. These patterns are invalid as of Python 3.11.
for replacement in self.params.replacements:
if replacement.regex and replacement.old.endswith("(?u)"):
replacement.old = "(?u)" + replacement.old[:-4]

def _apply_single(self, txt):
for replacement in self.params.replacements:
Expand Down

0 comments on commit 6b1412b

Please sign in to comment.