diff --git a/.circleci/config.yml b/.circleci/config.yml index 556d52388f..d5cf242cce 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -16,7 +16,7 @@ jobs: steps: - checkout - run: HOMEBREW_NO_AUTO_UPDATE=1 brew install imagemagick geos - - run: make install + - run: make install - run: make deps-test test benchmark test-python36: diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index d52633911f..c3c24c0db6 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -35,4 +35,4 @@ jobs: run: | docker push ghcr.io/ocrd/core:latest docker push ghcr.io/ocrd/core-cuda:latest - + diff --git a/CHANGELOG.md b/CHANGELOG.md index ff98421492..56b1d93300 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1053,7 +1053,7 @@ Added: * Workspace validation will check cardinality of images per file is 1, #243, OCR-D/spec#132 Changed: - + * bashlib will no longer warn about "non-conformant" file group names, #365 * Invalid `file:/` URL will now raise exceptions, #373 * image_from_*: increase tolerance for size mismatch after rotation to 2px, #371 diff --git a/Makefile b/Makefile index 19b072df08..5268d9a4ed 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ LOG_LEVEL = INFO PYTHONIOENCODING=utf8 TESTDIR = tests -SPHINX_APIDOC = +SPHINX_APIDOC = BUILD_ORDER = ocrd_utils ocrd_models ocrd_modelfactory ocrd_validators ocrd @@ -52,7 +52,7 @@ DOCKER_TAG = ocrd/core DOCKER_BASE_IMAGE = ubuntu:20.04 # Additional arguments to docker build. Default: '$(DOCKER_ARGS)' -DOCKER_ARGS = +DOCKER_ARGS = # pip install command. Default: $(PIP_INSTALL) PIP_INSTALL = $(PIP) install diff --git a/ocrd/ocrd/lib.bash b/ocrd/ocrd/lib.bash index a34263637c..eb2190028b 100644 --- a/ocrd/ocrd/lib.bash +++ b/ocrd/ocrd/lib.bash @@ -3,14 +3,14 @@ exit 1 ## ### `ocrd__raise` -## +## ## Raise an error and exit. ocrd__raise () { echo >&2 "ERROR: $1"; exit 127 } ## ### `ocrd__log` -## +## ## Delegate logging to `ocrd log` ocrd__log () { local log_level="${ocrd__argv[log_level]:-}" @@ -23,7 +23,7 @@ ocrd__log () { ## ### `ocrd__minversion` -## +## ## Ensure minimum version # ht https://stackoverflow.com/posts/4025065 ocrd__minversion () { @@ -53,28 +53,28 @@ ocrd__minversion () { } ## ### `ocrd__dumpjson` -## +## ## Output ocrd-tool.json. -## +## ## Requires `$OCRD_TOOL_JSON` and `$OCRD_TOOL_NAME` to be set: -## +## ## ```sh ## export OCRD_TOOL_JSON=/path/to/ocrd-tool.json ## export OCRD_TOOL_NAME=ocrd-foo-bar ## ``` -## +## ocrd__dumpjson () { ocrd ocrd-tool "$OCRD_TOOL_JSON" tool "$OCRD_TOOL_NAME" dump } -## +## ## Output file resource content. ## ocrd__show_resource () { ocrd ocrd-tool "$OCRD_TOOL_JSON" tool "$OCRD_TOOL_NAME" show-resource "$1" } -## +## ## Output file resources names. ## ocrd__list_resources () { @@ -82,9 +82,9 @@ ocrd__list_resources () { } ## ### `ocrd__usage` -## +## ## Print usage -## +## ocrd__usage () { ocrd ocrd-tool "$OCRD_TOOL_JSON" tool "$OCRD_TOOL_NAME" help @@ -92,9 +92,9 @@ ocrd__usage () { } ## ### `ocrd__parse_argv` -## +## ## Expects an associative array ("hash"/"dict") `ocrd__argv` to be defined: -## +## ## ```sh ## declare -A ocrd__argv=() ## ``` diff --git a/ocrd/ocrd/processor/base.py b/ocrd/ocrd/processor/base.py index 7f6f5c271b..6da31860c5 100644 --- a/ocrd/ocrd/processor/base.py +++ b/ocrd/ocrd/processor/base.py @@ -43,7 +43,7 @@ class Processor(): for run-time data processing. That is, it executes a single workflow step, or a combination of workflow steps, on the workspace (represented by local METS). It reads input files for all or requested physical pages of the input fileGrp(s), - and writes output files for them into the output fileGrp(s). It may take + and writes output files for them into the output fileGrp(s). It may take a number of optional or mandatory parameters. """ @@ -163,12 +163,12 @@ def verify(self): def process(self): """ - Process the :py:attr:`workspace` + Process the :py:attr:`workspace` from the given :py:attr:`input_file_grp` to the given :py:attr:`output_file_grp` for the given :py:attr:`page_id` under the given :py:attr:`parameter`. - + (This contains the main functionality and needs to be overridden by subclasses.) """ raise Exception("Must be implemented") @@ -279,7 +279,7 @@ def input_files(self): - Otherwise raise an error (complaining that only PAGE-XML warrants having multiple images for a single page) Algorithm _ - + Returns: A list of :py:class:`ocrd_models.ocrd_file.OcrdFile` objects. """ diff --git a/ocrd/ocrd/processor/helpers.py b/ocrd/ocrd/processor/helpers.py index 3450b51a0b..26f81e70f1 100644 --- a/ocrd/ocrd/processor/helpers.py +++ b/ocrd/ocrd/processor/helpers.py @@ -107,9 +107,9 @@ def run_processor( mem_usage = memory_usage(proc=processor.process, # only run process once max_iterations=1, - interval=.1, timeout=None, timestamps=True, + interval=.1, timeout=None, timestamps=True, # include sub-processes - multiprocess=True, include_children=True, + multiprocess=True, include_children=True, # get proportional set size instead of RSS backend=backend) mem_usage_values = [mem for mem, _ in mem_usage] @@ -198,7 +198,7 @@ def run_cli( def generate_processor_help(ocrd_tool, processor_instance=None): """Generate a string describing the full CLI of this processor including params. - + Args: ocrd_tool (dict): this processor's ``tools`` section of the module's ``ocrd-tool.json`` processor_instance (object, optional): the processor implementation @@ -281,7 +281,7 @@ def wrap(s): # Taken from https://github.com/OCR-D/core/pull/884 @freeze_args -@lru_cache(maxsize=environ.get('OCRD_MAX_PROCESSOR_CACHE', 128)) +@lru_cache(maxsize=environ.get('OCRD_MAX_PROCESSOR_CACHE', 128)) def get_cached_processor(parameter: dict, processor_class): """ Call this function to get back an instance of a processor. diff --git a/ocrd/ocrd/resolver.py b/ocrd/ocrd/resolver.py index 5eddf0970c..19f009d27b 100644 --- a/ocrd/ocrd/resolver.py +++ b/ocrd/ocrd/resolver.py @@ -123,7 +123,7 @@ def workspace_from_url(self, mets_url, dst_dir=None, clobber_mets=False, mets_ba download (boolean, False): Whether to also download all the files referenced by the METS src_baseurl (string, None): Base URL for resolving relative file locations - Download (clone) :py:attr:`mets_url` to ``mets.xml`` in :py:attr:`dst_dir`, unless + Download (clone) :py:attr:`mets_url` to ``mets.xml`` in :py:attr:`dst_dir`, unless the former is already local and the latter is ``none`` or already identical to its directory name. Returns: diff --git a/ocrd/ocrd/resource_manager.py b/ocrd/ocrd/resource_manager.py index 84fbc6b01e..d5c3659750 100644 --- a/ocrd/ocrd/resource_manager.py +++ b/ocrd/ocrd/resource_manager.py @@ -168,9 +168,9 @@ def list_installed(self, executable=None): resdict = resdict_list[0] elif str(res_filename.parent) == moduledir: resdict = { - 'name': res_name, - 'url': str(res_filename), - 'description': 'Found at module', + 'name': res_name, + 'url': str(res_filename), + 'description': 'Found at module', 'type': res_type, 'size': res_size } diff --git a/ocrd/ocrd/task_sequence.py b/ocrd/ocrd/task_sequence.py index dcf9c86bc4..a20bb69042 100644 --- a/ocrd/ocrd/task_sequence.py +++ b/ocrd/ocrd/task_sequence.py @@ -108,7 +108,7 @@ def validate_tasks(tasks, workspace, page_id=None, overwrite=False): # TODO disable output_file_grps checks once CLI parameter 'overwrite' is implemented # XXX Thu Jan 16 20:14:17 CET 2020 still not sufficiently clever. # if len(prev_output_file_grps) != len(set(prev_output_file_grps)): - # report.add_error("Output file group specified multiple times: %s" % + # report.add_error("Output file group specified multiple times: %s" % # [grp for grp, count in Counter(prev_output_file_grps).items() if count >= 2]) prev_output_file_grps += task.output_file_grps if not report.is_valid: diff --git a/ocrd_models/README.md b/ocrd_models/README.md index 57b0cbe7c7..8f1bc67f91 100644 --- a/ocrd_models/README.md +++ b/ocrd_models/README.md @@ -36,7 +36,7 @@ Let's say you want to add a method `get_FirstTextRegion` on the `pc:Page` elemen Would add the method `exportChildren` from a file `exportChildren_PageType.py`. - > **Note**: + > **Note**: > The method name in the file must match the method name passed to > `_add_method`. This is *not* checked automatically, so double-check manually! diff --git a/ocrd_models/ocrd_models/ocrd_mets.py b/ocrd_models/ocrd_models/ocrd_mets.py index dddf3f556f..fd027d7e5c 100644 --- a/ocrd_models/ocrd_models/ocrd_mets.py +++ b/ocrd_models/ocrd_models/ocrd_mets.py @@ -143,7 +143,7 @@ def _clear_caches(self): self._file_cache = None self._page_cache = None self._fptr_cache = None - + def refresh_caches(self): if self._cache_flag: # Cache for the files (mets:file) - two nested dictionaries @@ -164,11 +164,11 @@ def refresh_caches(self): # The inner dictionary's Key: 'fptr.FILEID' # The inner dictionary's Value: a 'fptr' object at some memory location self._fptr_cache = {} - + # Note, if the empty_mets() function is used to instantiate OcrdMets # Then the cache is empty even after this operation self._fill_caches() - + @property def unique_identifier(self): """ @@ -179,7 +179,7 @@ def unique_identifier(self): found = self._tree.getroot().find('.//mods:identifier[@type="%s"]' % t, NS) if found is not None: return found.text - + @unique_identifier.setter def unique_identifier(self, purl): """ @@ -295,7 +295,7 @@ def find_files(self, ID=None, fileGrp=None, pageId=None, mimetype=None, url=None mimetype = re.compile(mimetype[REGEX_PREFIX_LEN:]) if url and url.startswith(REGEX_PREFIX): url = re.compile(url[REGEX_PREFIX_LEN:]) - + candidates = [] if self._cache_flag: if fileGrp: @@ -307,7 +307,7 @@ def find_files(self, ID=None, fileGrp=None, pageId=None, mimetype=None, url=None candidates = [el_file for id_to_file in self._file_cache.values() for el_file in id_to_file.values()] else: candidates = self._tree.getroot().xpath('//mets:file', namespaces=NS) - + for cand in candidates: if ID: if isinstance(ID, str): @@ -364,11 +364,11 @@ def add_file_group(self, fileGrp): if el_fileGrp is None: el_fileGrp = ET.SubElement(el_fileSec, TAG_METS_FILEGRP) el_fileGrp.set('USE', fileGrp) - + if self._cache_flag: # Assign an empty dictionary that will hold the files of the added fileGrp self._file_cache[fileGrp] = {} - + return el_fileGrp def rename_file_group(self, old, new): @@ -379,7 +379,7 @@ def rename_file_group(self, old, new): if el_fileGrp is None: raise FileNotFoundError("No such fileGrp '%s'" % old) el_fileGrp.set('USE', new) - + if self._cache_flag: self._file_cache[new] = self._file_cache.pop(old) @@ -427,7 +427,7 @@ def remove_file_group(self, USE, recursive=False, force=False): if self._cache_flag: # Note: Since the files inside the group are removed - # with the 'remove_one_file' method above, + # with the 'remove_one_file' method above, # we should not take care of that again. # We just remove the fileGrp. del self._file_cache[el_fileGrp.get('USE')] @@ -566,7 +566,7 @@ def physical_pages(self): """ if self._cache_flag: return list(self._page_cache.keys()) - + return self._tree.getroot().xpath( 'mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]/mets:div[@TYPE="page"]/@ID', namespaces=NS) @@ -579,7 +579,7 @@ def get_physical_pages(self, for_fileIds=None): if for_fileIds is None: return self.physical_pages ret = [None] * len(for_fileIds) - + if self._cache_flag: for pageId in self._fptr_cache.keys(): for fptr in self._fptr_cache[pageId].keys(): @@ -632,14 +632,14 @@ def set_physical_page_for_file(self, pageId, ocrd_file, order=None, orderlabel=N if el_seqdiv is None: el_seqdiv = ET.SubElement(el_structmap, TAG_METS_DIV) el_seqdiv.set('TYPE', 'physSequence') - + el_pagediv = None if self._cache_flag: if pageId in self._page_cache: el_pagediv = self._page_cache[pageId] else: el_pagediv = el_seqdiv.find('mets:div[@ID="%s"]' % pageId, NS) - + if el_pagediv is None: el_pagediv = ET.SubElement(el_seqdiv, TAG_METS_DIV) el_pagediv.set('TYPE', 'page') @@ -651,10 +651,10 @@ def set_physical_page_for_file(self, pageId, ocrd_file, order=None, orderlabel=N if self._cache_flag: # Create a new entry in the page cache self._page_cache[pageId] = el_pagediv - # Create a new entry in the fptr cache and + # Create a new entry in the fptr cache and # assign an empty dictionary to hold the fileids self._fptr_cache[pageId] = {} - + el_fptr = ET.SubElement(el_pagediv, TAG_METS_FPTR) el_fptr.set('FILEID', ocrd_file.ID) @@ -715,7 +715,7 @@ def remove_physical_page_fptr(self, fileId): if self._cache_flag: for page_id in self._fptr_cache.keys(): if fileId in self._fptr_cache[page_id].keys(): - mets_fptrs.append(self._fptr_cache[page_id][fileId]) + mets_fptrs.append(self._fptr_cache[page_id][fileId]) else: mets_fptrs = self._tree.getroot().xpath( 'mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]/mets:div[@TYPE="page"]/mets:fptr[@FILEID="%s"]' % fileId, namespaces=NS) diff --git a/ocrd_models/ocrd_models/ocrd_page_generateds.py b/ocrd_models/ocrd_models/ocrd_page_generateds.py index d95ea4b321..888f5968a6 100644 --- a/ocrd_models/ocrd_models/ocrd_page_generateds.py +++ b/ocrd_models/ocrd_models/ocrd_page_generateds.py @@ -1234,14 +1234,14 @@ def id(self): def get_AllAlternativeImagePaths(self, page=True, region=True, line=True, word=True, glyph=True): """ Get all the ``pc:AlternativeImage/@filename`` paths referenced in the PAGE-XML document. - + Arguments: page (boolean): Get images on ``pc:Page`` level region (boolean): Get images on ``pc:*Region`` level line (boolean): Get images on ``pc:TextLine`` level word (boolean): Get images on ``pc:Word`` level glyph (boolean): Get images on ``pc:Glyph`` level - + Returns: a list of image filename strings """ @@ -1278,7 +1278,7 @@ def get_AllAlternativeImagePaths(self, page=True, region=True, line=True, word=T ret += doc.xpath('//page:Word/page:AlternativeImage/@filename', namespaces=NAMESPACES) if glyph: ret += doc.xpath('//page:Glyph/page:AlternativeImage/@filename', namespaces=NAMESPACES) - + return ret def prune_ReadingOrder(self): """ @@ -3120,7 +3120,7 @@ def id(self): # pylint: disable=line-too-long,invalid-name,protected-access,missing-module-docstring def _region_class(self, x): # pylint: disable=unused-argument return x.__class__.__name__.replace('RegionType', '') - + def _get_recursive_regions(self, regions, level, classes=None): from .constants import PAGE_REGION_TYPES # pylint: disable=relative-beyond-top-level,import-outside-toplevel if level == 1: @@ -3146,7 +3146,7 @@ def _get_recursive_regions(self, regions, level, classes=None): ret.append(r) ret += self._get_recursive_regions(more, level - 1 if level else 0, classes) return self._get_recursive_regions(ret, 1, classes) - + def _get_recursive_reading_order(self, rogroup): if isinstance(rogroup, (OrderedGroupType, OrderedGroupIndexedType)): # pylint: disable=undefined-variable elements = rogroup.get_AllIndexed() @@ -3158,12 +3158,12 @@ def _get_recursive_reading_order(self, rogroup): if not isinstance(elem, (RegionRefType, RegionRefIndexedType)): # pylint: disable=undefined-variable regionrefs.extend(self._get_recursive_reading_order(elem)) return regionrefs - + def get_AllRegions(self, classes=None, order='document', depth=0): """ Get all the ``*Region`` elements, or only those provided by `classes`. Return in document order, unless `order` is ``reading-order``. - + Arguments: classes (list): Classes of regions that shall be returned, \ e.g. ``['Text', 'Image']`` @@ -3174,7 +3174,7 @@ def get_AllRegions(self, classes=None, order='document', depth=0): omitted (``reading-order-only``) depth (int): Recursive depth to look for regions at, set to `0` for \ all regions at any depth. Default: 0 - + Returns: a list of :py:class:`TextRegionType`, :py:class:`ImageRegionType`, \ :py:class:`LineDrawingRegionType`, :py:class:`GraphicRegionType`, \ @@ -3184,7 +3184,7 @@ def get_AllRegions(self, classes=None, order='document', depth=0): :py:class:`MusicRegionType`, :py:class:`AdvertRegionType`, \ :py:class:`NoiseRegionType`, :py:class:`UnknownRegionType`, \ and/or :py:class:`CustomRegionType` - + For example, to get all text anywhere on the page in reading order, use: :: '\\n'.join(line.get_TextEquiv()[0].Unicode @@ -3218,14 +3218,14 @@ def get_AllRegions(self, classes=None, order='document', depth=0): def get_AllAlternativeImages(self, page=True, region=True, line=True, word=True, glyph=True): """ Get all the ``pc:AlternativeImage`` in a document - + Arguments: page (boolean): Get images on ``pc:Page`` level region (boolean): Get images on ``pc:*Region`` level line (boolean): Get images on ``pc:TextLine`` level word (boolean): Get images on ``pc:Word`` level glyph (boolean): Get images on ``pc:Glyph`` level - + Returns: a list of :py:class:`AlternativeImageType` """ @@ -3245,11 +3245,11 @@ def get_AllAlternativeImages(self, page=True, region=True, line=True, word=True, if glyph: ret += this_glyph.get_AlternativeImage() return ret - + def invalidate_AlternativeImage(self, feature_selector=None): """ Remove derived images from this segment (due to changed coordinates). - + If `feature_selector` is not none, remove only images with matching ``@comments``, e.g. ``feature_selector=cropped,deskewed``. """ @@ -3289,7 +3289,7 @@ def set_Border(self, Border): def get_AllTextLines(self, region_order='document', respect_textline_order=True): """ Return all the TextLine in the document - + Arguments: region_order ("document"|"reading-order"|"reading-order-only"): Whether to \ return regions sorted by document order (``document``, default) or by \ @@ -3297,7 +3297,7 @@ def get_AllTextLines(self, region_order='document', respect_textline_order=True) returned list (``reading-order``) or regions not in the reading order \ omitted (``reading-order-only``) respect_textline_order (boolean): Whether to respect `@textLineOrder` attribute - + Returns: a list of :py:class:`TextLineType` """ @@ -3311,7 +3311,7 @@ def get_AllTextLines(self, region_order='document', respect_textline_order=True) lo = reg.get_textLineOrder() or self.get_textLineOrder() or 'top-to-bottom' ret += lines if lo in ['top-to-bottom', 'left-to-right'] else list(reversed(lines)) return ret - + def set_orientation(self, orientation): """ Set deskewing angle to given `orientation` number. @@ -3981,7 +3981,7 @@ def __hash__(self): def invalidate_AlternativeImage(self, feature_selector=None): """ Remove derived images from this segment (due to changed coordinates). - + If `feature_selector` is not none, remove only images with matching ``@comments``, e.g. ``feature_selector=cropped,deskewed``. """ @@ -4473,7 +4473,7 @@ def __hash__(self): def invalidate_AlternativeImage(self, feature_selector=None): """ Remove derived images from this segment (due to changed coordinates). - + If `feature_selector` is not none, remove only images with matching ``@comments``, e.g. ``feature_selector=cropped,deskewed``. """ @@ -4917,7 +4917,7 @@ def __hash__(self): def invalidate_AlternativeImage(self, feature_selector=None): """ Remove derived images from this segment (due to changed coordinates). - + If `feature_selector` is not none, remove only images with matching ``@comments``, e.g. ``feature_selector=cropped,deskewed``. """ @@ -6234,12 +6234,12 @@ def __hash__(self): def get_AllIndexed(self, classes=None, index_sort=True): """ Get all indexed children sorted by their ``@index``. - + Arguments: classes (list): Type of children (sans ``Indexed``) to return. \ Default: ``['RegionRef', 'OrderedGroup', 'UnorderedGroup']`` index_sort (boolean): Whether to sort by ``@index`` - + Returns: a list of :py:class:`RegionRefIndexedType`, \ :py:class:`OrderedGroupIndexedType`, and \ @@ -6259,13 +6259,13 @@ def clear_AllIndexed(self): self.set_OrderedGroupIndexed([]) self.set_UnorderedGroupIndexed([]) return ret - + # pylint: disable=line-too-long,invalid-name,missing-module-docstring def extend_AllIndexed(self, elements, validate_continuity=False): """ Add all elements in list `elements`, respecting ``@index`` order. With `validate_continuity`, check that all new elements come after all old elements - (or raise an exception). + (or raise an exception). Otherwise, ensure this condition silently (by increasing ``@index`` accordingly). """ if not isinstance(elements, list): @@ -6304,7 +6304,7 @@ def sort_AllIndexed(self, validate_uniqueness=True): elif isinstance(element, UnorderedGroupIndexedType): # pylint: disable=undefined-variable self.add_UnorderedGroupIndexed(element) return self.get_AllIndexed() - + # pylint: disable=line-too-long,invalid-name,missing-module-docstring,missing-function-docstring def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='OrderedGroupType', fromsubclass_=False, pretty_print=True): # pylint: disable=unused-argument,too-many-arguments namespaceprefix_ = 'pc:' @@ -6706,7 +6706,7 @@ def get_UnorderedGroupChildren(self): """ # TODO: should not change order return self.get_RegionRef() + self.get_OrderedGroup() + self.get_UnorderedGroup() - + # end class UnorderedGroupIndexedType @@ -7166,12 +7166,12 @@ def __hash__(self): def get_AllIndexed(self, classes=None, index_sort=True): """ Get all indexed children sorted by their ``@index``. - + Arguments: classes (list): Type of children (sans ``Indexed``) to return. \ Default: ``['RegionRef', 'OrderedGroup', 'UnorderedGroup']`` index_sort (boolean): Whether to sort by ``@index`` - + Returns: a list of :py:class:`RegionRefIndexedType`, \ :py:class:`OrderedGroupIndexedType`, and \ @@ -7191,13 +7191,13 @@ def clear_AllIndexed(self): self.set_OrderedGroupIndexed([]) self.set_UnorderedGroupIndexed([]) return ret - + # pylint: disable=line-too-long,invalid-name,missing-module-docstring def extend_AllIndexed(self, elements, validate_continuity=False): """ Add all elements in list `elements`, respecting ``@index`` order. With `validate_continuity`, check that all new elements come after all old elements - (or raise an exception). + (or raise an exception). Otherwise, ensure this condition silently (by increasing ``@index`` accordingly). """ if not isinstance(elements, list): @@ -7236,7 +7236,7 @@ def sort_AllIndexed(self, validate_uniqueness=True): elif isinstance(element, UnorderedGroupIndexedType): # pylint: disable=undefined-variable self.add_UnorderedGroupIndexed(element) return self.get_AllIndexed() - + # pylint: disable=line-too-long,invalid-name,missing-module-docstring,missing-function-docstring def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='OrderedGroupType', fromsubclass_=False, pretty_print=True): # pylint: disable=unused-argument,too-many-arguments namespaceprefix_ = 'pc:' @@ -7620,7 +7620,7 @@ def get_UnorderedGroupChildren(self): """ # TODO: should not change order return self.get_RegionRef() + self.get_OrderedGroup() + self.get_UnorderedGroup() - + # end class UnorderedGroupType @@ -9710,7 +9710,7 @@ def __hash__(self): def invalidate_AlternativeImage(self, feature_selector=None): """ Remove derived images from this segment (due to changed coordinates). - + If `feature_selector` is not none, remove only images with matching ``@comments``, e.g. ``feature_selector=cropped,deskewed``. """ diff --git a/ocrd_models/ocrd_page_user_methods/extend_AllIndexed.py b/ocrd_models/ocrd_page_user_methods/extend_AllIndexed.py index 594d664277..c1ad330f91 100644 --- a/ocrd_models/ocrd_page_user_methods/extend_AllIndexed.py +++ b/ocrd_models/ocrd_page_user_methods/extend_AllIndexed.py @@ -3,7 +3,7 @@ def extend_AllIndexed(self, elements, validate_continuity=False): """ Add all elements in list `elements`, respecting ``@index`` order. With `validate_continuity`, check that all new elements come after all old elements - (or raise an exception). + (or raise an exception). Otherwise, ensure this condition silently (by increasing ``@index`` accordingly). """ if not isinstance(elements, list): diff --git a/ocrd_utils/ocrd_logging.conf b/ocrd_utils/ocrd_logging.conf index 3383b4f54d..1b9404a3ce 100644 --- a/ocrd_utils/ocrd_logging.conf +++ b/ocrd_utils/ocrd_logging.conf @@ -5,7 +5,7 @@ # into your CWD, HOME or /etc. These directories are searched # in said order, and the first find wins. When no config file # is found, the default logging configuration applies (cf. ocrd.logging.py). -# +# # mandatory loggers section # configure loggers with corresponding keys "root", "" # each logger requires a corresponding configuration section below @@ -43,8 +43,8 @@ handlers=consoleHandler # as separate configuration sections like below # # example logger "ocrd_workspace" uses fileHandler and overrides -# default log level "INFO" with custom level "DEBUG" -# "qualname" must match the logger label used in the corresponding +# default log level "INFO" with custom level "DEBUG" +# "qualname" must match the logger label used in the corresponding # ocrd module # see in the module-of-interest (moi) # diff --git a/ocrd_utils/ocrd_utils/__init__.py b/ocrd_utils/ocrd_utils/__init__.py index 1eb92cff9f..147a2a3e59 100644 --- a/ocrd_utils/ocrd_utils/__init__.py +++ b/ocrd_utils/ocrd_utils/__init__.py @@ -8,11 +8,11 @@ levels below page (i.e. region, line, word, glyph) between relative coordinates w.r.t. a corresponding image and absolute coordinates w.r.t. the top-level image. This includes rotation and offset correction, based on affine transformations. - (Used by :py:class:`ocrd.workspace.Workspace` methods - :py:meth:`ocrd.workspace.Workspace.image_from_page` and + (Used by :py:class:`ocrd.workspace.Workspace` methods + :py:meth:`ocrd.workspace.Workspace.image_from_page` and :py:meth:`ocrd.workspace.Workspace.image_from_segment`.) -* :py:func:`rotate_coordinates`, +* :py:func:`rotate_coordinates`, :py:func:`shift_coordinates`, :py:func:`transpose_coordinates`, :py:func:`transform_coordinates` @@ -22,7 +22,7 @@ used to pass down the coordinate system along with images (both invariably sharing the same operations context) when traversing the element hierarchy top to bottom. (Used by :py:class:`ocrd.workspace.Workspace` methods - :py:meth:`ocrd.workspace.Workspace.image_from_page` and + :py:meth:`ocrd.workspace.Workspace.image_from_page` and :py:meth:`ocrd.workspace.Workspace.image_from_segment`.) * :py:func:`rotate_image`, diff --git a/ocrd_utils/ocrd_utils/image.py b/ocrd_utils/ocrd_utils/image.py index 48fb7a0539..2433ead475 100644 --- a/ocrd_utils/ocrd_utils/image.py +++ b/ocrd_utils/ocrd_utils/image.py @@ -41,12 +41,12 @@ def adjust_canvas_to_rotation(size, angle): """Calculate the enlarged image size after rotation. - + Given a numpy array ``size`` of an original canvas (width and height), and a rotation angle in degrees counter-clockwise ``angle``, calculate the new size which is necessary to encompass the full image after rotation. - + Return a numpy array of the enlarged width and height. """ angle = np.deg2rad(angle) @@ -58,11 +58,11 @@ def adjust_canvas_to_rotation(size, angle): def adjust_canvas_to_transposition(size, method): """Calculate the flipped image size after transposition. - + Given a numpy array ``size`` of an original canvas (width and height), and a transposition mode ``method`` (see ``transpose_image``), calculate the new size after transposition. - + Return a numpy array of the enlarged width and height. """ if method in [Image.ROTATE_90, @@ -159,7 +159,7 @@ def coordinates_for_segment(polygon, parent_image, parent_coords): - ``parent_coords``, its corresponding affine transformation, ...calculate the absolute coordinates within the page. - + That is, apply the given transform inversely to ``polygon`` The transform encodes (recursively): @@ -209,7 +209,7 @@ def rotate_coordinates(transform, angle, orig=np.array([0, 0])): by pure rotation, and subsequent translation back. However, since rotation necessarily increases the bounding box, and thus image size, do not translate back the same amount, but to the enlarged offset.) - + Return a numpy array of the resulting affine transformation matrix. """ LOG = getLogger('ocrd_utils.coords.rotate_coordinates') @@ -295,7 +295,7 @@ def shift_coordinates(transform, offset): ``offset`` of the translation vector, calculate the affine coordinate transform corresponding to the composition of both transformations. - + Return a numpy array of the resulting affine transformation matrix. """ LOG = getLogger('ocrd_utils.coords.shift_coordinates') @@ -312,7 +312,7 @@ def scale_coordinates(transform, factors): ``factors`` of the scaling factors, calculate the affine coordinate transform corresponding to the composition of both transformations. - + Return a numpy array of the resulting affine transformation matrix. """ LOG = getLogger('ocrd_utils.coords.scale_coordinates') @@ -438,7 +438,7 @@ def transpose_image(image, method): columns become rows (but counted from the bottom), i.e. all pixels get mirrored at the opposite diagonal; width becomes height and vice versa - + Return a new PIL.Image. """ LOG = getLogger('ocrd_utils.transpose_image') @@ -497,7 +497,7 @@ def image_from_polygon(image, polygon, fill='background', transparency=False): Images which already have an alpha channel will have it shrunk from the polygon mask (i.e. everything outside the polygon will be transparent, in addition to existing transparent pixels). - + Return a new PIL.Image. """ if fill == 'none' or fill is None: diff --git a/ocrd_utils/ocrd_utils/os.py b/ocrd_utils/ocrd_utils/os.py index 478e019bc1..9bf13a0332 100644 --- a/ocrd_utils/ocrd_utils/os.py +++ b/ocrd_utils/ocrd_utils/os.py @@ -151,7 +151,7 @@ def list_all_resources(executable, moduled=None, xdg_data_home=None): # code and data; `is_resource()` only singles out # files over directories; but we want data files only # todo: more code and cache exclusion patterns! - ['*.py', '*.py[cod]', '*~', 'ocrd-tool.json', + ['*.py', '*.py[cod]', '*~', 'ocrd-tool.json', 'environment.pickle', 'resource_list.yml', 'lib.bash']): continue candidates.append(resource) diff --git a/ocrd_validators/ocrd_validators/page.xsd b/ocrd_validators/ocrd_validators/page.xsd index edeac039d1..f096f34825 100644 --- a/ocrd_validators/ocrd_validators/page.xsd +++ b/ocrd_validators/ocrd_validators/page.xsd @@ -545,7 +545,7 @@ - The secondary script used in the text line + The secondary script used in the text line @@ -629,7 +629,7 @@ - The secondary script used in the word + The secondary script used in the word diff --git a/ocrd_validators/ocrd_validators/page_validator.py b/ocrd_validators/ocrd_validators/page_validator.py index d8e768eefe..bac26de139 100644 --- a/ocrd_validators/ocrd_validators/page_validator.py +++ b/ocrd_validators/ocrd_validators/page_validator.py @@ -178,7 +178,7 @@ def compare_without_whitespace(a, b): def page_get_reading_order(ro, rogroup): """Add all elements from the given reading order group to the given dictionary. - + Given a dict ``ro`` from layout element IDs to ReadingOrder element objects, and an object ``rogroup`` with additional ReadingOrder element objects, add all references to the dict, traversing the group recursively. diff --git a/ocrd_validators/ocrd_validators/xlink.xsd b/ocrd_validators/ocrd_validators/xlink.xsd index f55eb6dae1..8283fe6697 100644 --- a/ocrd_validators/ocrd_validators/xlink.xsd +++ b/ocrd_validators/ocrd_validators/xlink.xsd @@ -1,75 +1,75 @@ - + - + - - - - - + + + + + - - - - + + + + - - - + + + - - - - - - - + + + + + + + - - - + + + - - - - - + + + + + - - - - - - - + + + + + + + - - - - + + + + - + - + diff --git a/tests/model/mets_bench_extreme.py b/tests/model/mets_bench_extreme.py index 63b30e31db..799e6f610e 100644 --- a/tests/model/mets_bench_extreme.py +++ b/tests/model/mets_bench_extreme.py @@ -50,7 +50,7 @@ def benchmark_find_files(number_of_pages, mets): benchmark_find_files_filegrp(number_of_pages, mets) benchmark_find_files_fileid(number_of_pages, mets) benchmark_find_files_physical_page(number_of_pages, mets) - # This is not really useful to measure. + # This is not really useful to measure. # We iterate all files in both cached and non-cached in the same routine # When no specific search parameters are provided # benchmark_find_files_all(number_of_pages, mets) @@ -94,7 +94,7 @@ def result(): @mark.benchmark(group="search", max_time=0.1, min_rounds=1, disable_gc=False, warmup=False) def test_s50(benchmark): @benchmark - def ret(): + def ret(): global mets_50 benchmark_find_files(50, mets_50) del mets_50 @@ -130,7 +130,7 @@ def result(): @mark.benchmark(group="search", max_time=0.1, min_rounds=1, disable_gc=False, warmup=False) def test_s500(benchmark): @benchmark - def ret(): + def ret(): global mets_500 benchmark_find_files(500, mets_500) del mets_500 @@ -168,7 +168,7 @@ def result(): @mark.benchmark(group="search", max_time=0.1, min_rounds=1, disable_gc=False, warmup=False) def test_s1000(benchmark): @benchmark - def ret(): + def ret(): global mets_1000 benchmark_find_files(1000, mets_1000) del mets_1000 @@ -205,7 +205,7 @@ def result(): @mark.benchmark(group="search", max_time=0.1, min_rounds=1, disable_gc=False, warmup=False) def test_s2000(benchmark): @benchmark - def ret(): + def ret(): global mets_2000 benchmark_find_files(2000, mets_2000) del mets_2000 @@ -242,7 +242,7 @@ def result(): @mark.benchmark(group="search", max_time=0.1, min_rounds=1, disable_gc=False, warmup=False) def test_s5000(benchmark): @benchmark - def ret(): + def ret(): global mets_5000 benchmark_find_files(5000, mets_5000) del mets_5000 diff --git a/tests/model/mets_bench_extreme_additional.py b/tests/model/mets_bench_extreme_additional.py index e699454e2b..67802a2da5 100644 --- a/tests/model/mets_bench_extreme_additional.py +++ b/tests/model/mets_bench_extreme_additional.py @@ -49,7 +49,7 @@ def benchmark_find_files(number_of_pages, mets): benchmark_find_files_filegrp(number_of_pages, mets) benchmark_find_files_fileid(number_of_pages, mets) benchmark_find_files_physical_page(number_of_pages, mets) - # This is not really useful to measure. + # This is not really useful to measure. # We iterate all files in both cached and non-cached in the same routine # When no specific search parameters are provided # benchmark_find_files_all(number_of_pages, mets) @@ -88,7 +88,7 @@ def result(): @mark.benchmark(group="search", max_time=0.1, min_rounds=1, disable_gc=False, warmup=False) def test_s500(benchmark): @benchmark - def ret(): + def ret(): global mets_500 benchmark_find_files(500, mets_500) del mets_500 diff --git a/tests/model/test_agent.py b/tests/model/test_agent.py index 57c741655e..6494b0c8db 100644 --- a/tests/model/test_agent.py +++ b/tests/model/test_agent.py @@ -35,7 +35,7 @@ def test_init_othertype(): def test_set_name(): ag = OcrdAgent(name='foobar') assert ag.name == 'foobar' - ag.name = 'barfoo' + ag.name = 'barfoo' assert ag.name == 'barfoo' diff --git a/tests/model/test_ocrd_mets.py b/tests/model/test_ocrd_mets.py index ad72a6ebe9..58dc1c0be9 100644 --- a/tests/model/test_ocrd_mets.py +++ b/tests/model/test_ocrd_mets.py @@ -159,7 +159,7 @@ def test_add_file_id_already_exists(sbb_sample_01): assert len(list(sbb_sample_01.find_files(ID='best-id-ever'))) == 1 if sbb_sample_01._cache_flag else 2 if sbb_sample_01._cache_flag: - # Does not work with caching + # Does not work with caching with pytest.raises(FileExistsError) as val_err: sbb_sample_01.add_file('OUTPUT', ID='best-id-ever', mimetype="beep/boop", force=True) else: diff --git a/tests/model/test_ocrd_mets_bench.py b/tests/model/test_ocrd_mets_bench.py index ace6387336..3acc29b2b3 100644 --- a/tests/model/test_ocrd_mets_bench.py +++ b/tests/model/test_ocrd_mets_bench.py @@ -65,7 +65,7 @@ def benchmark_find_files_fileid(number_of_pages, mets): assert_len(1, mets, dict(ID='FULL_0001_TIF')) # Worst case - does not exist assert_len(0, mets, dict(ID='FULL_0001_TIF-NOTEXISTS')) - + def benchmark_find_files_physical_page(number_of_pages, mets): # Best case - first physical page assert_len(1, mets, dict(pageId='PHYS_0001')) @@ -117,28 +117,28 @@ def result(): @mark.benchmark(group="search") def test_s5(benchmark): @benchmark - def ret(): + def ret(): global mets_5 benchmark_find_files(5, mets_5) @mark.benchmark(group="search") def test_s10(benchmark): @benchmark - def ret(): + def ret(): global mets_10 benchmark_find_files(10, mets_10) @mark.benchmark(group="search") def test_s20(benchmark): @benchmark - def ret(): + def ret(): global mets_20 benchmark_find_files(20, mets_20) @mark.benchmark(group="search") def test_s50(benchmark): @benchmark - def ret(): + def ret(): global mets_50 benchmark_find_files(50, mets_50) @@ -196,24 +196,24 @@ def ret(): @mark.benchmark(group="search") def test_s10_c(benchmark): @benchmark - def ret(): + def ret(): global mets_c_10 benchmark_find_files(10, mets_c_10) @mark.benchmark(group="search") def test_s20_c(benchmark): @benchmark - def ret(): + def ret(): global mets_c_20 benchmark_find_files(20, mets_c_20) @mark.benchmark(group="search") def test_s50_c(benchmark): @benchmark - def ret(): + def ret(): global mets_c_50 benchmark_find_files(50, mets_c_50) - + del mets_c_5 del mets_c_10 del mets_c_20 @@ -221,7 +221,7 @@ def ret(): def manual_t(): mets = _build_mets(2, cache_flag=False) - mets_cached = _build_mets(2, cache_flag=True) + mets_cached = _build_mets(2, cache_flag=True) # print("METS>--------------------------------------------------------------------") # print(mets) @@ -233,11 +233,11 @@ def manual_t(): benchmark_find_files(2, mets) print("-----Cached-Bench-------------------------------------------------------------") benchmark_find_files(2, mets_cached) - + print("-----Regular------------------------------------------------------------------") print("len=%d" % len(mets.find_all_files(fileGrp='SEG-REG'))) print(mets.find_all_files(fileGrp='SEG-REG')) - + print("-----Cached-------------------------------------------------------------------") print("len=%d" % len(mets_cached.find_all_files(fileGrp='SEG-REG'))) print(mets_cached.find_all_files(fileGrp='SEG-REG'))