From e9c6d07386f154b5a2761c09c78d281bfb18f8b7 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Mon, 14 Feb 2022 10:14:22 +0100 Subject: [PATCH] Processor.resolve_resource: support on-demand download of URL values --- ocrd/ocrd/processor/base.py | 24 ++++++++++++++++++++++-- ocrd/ocrd/resource_manager.py | 5 ++--- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/ocrd/ocrd/processor/base.py b/ocrd/ocrd/processor/base.py index 5e7ab6e9bb..06f228b5db 100644 --- a/ocrd/ocrd/processor/base.py +++ b/ocrd/ocrd/processor/base.py @@ -25,12 +25,14 @@ getLogger, initLogging, list_resource_candidates, + nth_url_segment, pushd_popd, list_all_resources, get_processor_resource_types ) from ocrd_validators import ParameterValidator from ocrd_models.ocrd_page import MetadataItemType, LabelType, LabelsType +from ocrd.resource_manager import OcrdResourceManager # XXX imports must remain for backwards-compatibilty from .helpers import run_cli, run_processor, generate_processor_help # pylint: disable=unused-import @@ -217,9 +219,27 @@ def resolve_resource(self, val): if ret: log.debug("Resolved %s to absolute path %s" % (val, ret[0])) return ret[0] - log.error("Could not find resource '%s' for executable '%s'. Try 'ocrd resmgr download %s %s' to download this resource.", + elif (val.startswith('http://') or val.startswith('https://')): + resmgr = OcrdResourceManager() + reslist = resmgr.find_resources(executable, url=val) + if reslist: + _, resdict = reslist[0] + log.info("Found registered resource for %s: '%s' (%s)." % (executable, val, resdict)) + else: + resdict = {} + log.info("Not a registered resource for %s: '%s'." % (executable, val)) + return str(resmgr.download( + executable, + val, + basedir = resmgr.location_to_resource_dir('data'), + name=resdict.get('name', nth_url_segment(val)), + path_in_archive=resdict.get('path_in_archive', '.'), + resource_type=resdict.get('type', 'file') + )) + else: + log.error("Could not find resource '%s' for executable '%s'. Try 'ocrd resmgr download %s %s' to download this resource or use a URL for the parameter value.", val, executable, executable, val) - sys.exit(1) + sys.exit(1) def list_all_resources(self): """ diff --git a/ocrd/ocrd/resource_manager.py b/ocrd/ocrd/resource_manager.py index 06832e0fd2..6d87ac5202 100644 --- a/ocrd/ocrd/resource_manager.py +++ b/ocrd/ocrd/resource_manager.py @@ -10,7 +10,7 @@ from yaml import safe_load, safe_dump from ocrd_validators import OcrdResourceListValidator -from ocrd_utils import getLogger +from ocrd_utils import getLogger, nth_url_segment from ocrd_utils.os import get_processor_resource_types, list_all_resources, pushd_popd from .constants import RESOURCE_LIST_FILENAME, RESOURCE_USER_LIST_COMMENT @@ -235,8 +235,7 @@ def download( log = getLogger('ocrd.resource_manager.download') destdir = Path(basedir) if no_subdir else Path(basedir, executable) if not name: - url_parsed = urlparse(url) - name = Path(unquote(url_parsed.path)).name + name = nth_url_segment(url) fpath = Path(destdir, name) is_url = url.startswith('https://') or url.startswith('http://') if fpath.exists() and not overwrite: