Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Processor.resolve_resource: support on-demand download of URL values #799

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 22 additions & 2 deletions ocrd/ocrd/processor/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,14 @@
getLogger,
initLogging,
list_resource_candidates,
nth_url_segment,
pushd_popd,
list_all_resources,
get_processor_resource_types
)
from ocrd_validators import ParameterValidator
from ocrd_models.ocrd_page import MetadataItemType, LabelType, LabelsType
from ocrd.resource_manager import OcrdResourceManager

# XXX imports must remain for backwards-compatibilty
from .helpers import run_cli, run_processor, generate_processor_help # pylint: disable=unused-import
Expand Down Expand Up @@ -217,9 +219,27 @@ def resolve_resource(self, val):
if ret:
log.debug("Resolved %s to absolute path %s" % (val, ret[0]))
return ret[0]
log.error("Could not find resource '%s' for executable '%s'. Try 'ocrd resmgr download %s %s' to download this resource.",
elif (val.startswith('http://') or val.startswith('https://')):
resmgr = OcrdResourceManager()
reslist = resmgr.find_resources(executable, url=val)
if reslist:
_, resdict = reslist[0]
log.info("Found registered resource for %s: '%s' (%s)." % (executable, val, resdict))
else:
resdict = {}
log.info("Not a registered resource for %s: '%s'." % (executable, val))
return str(resmgr.download(
executable,
val,
basedir = resmgr.location_to_resource_dir('data'),
name=resdict.get('name', nth_url_segment(val)),
path_in_archive=resdict.get('path_in_archive', '.'),
resource_type=resdict.get('type', 'file')
))
else:
log.error("Could not find resource '%s' for executable '%s'. Try 'ocrd resmgr download %s %s' to download this resource or use a URL for the parameter value.",
val, executable, executable, val)
sys.exit(1)
sys.exit(1)

def list_all_resources(self):
"""
Expand Down
5 changes: 2 additions & 3 deletions ocrd/ocrd/resource_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from yaml import safe_load, safe_dump

from ocrd_validators import OcrdResourceListValidator
from ocrd_utils import getLogger
from ocrd_utils import getLogger, nth_url_segment
from ocrd_utils.os import get_processor_resource_types, list_all_resources, pushd_popd
from .constants import RESOURCE_LIST_FILENAME, RESOURCE_USER_LIST_COMMENT

Expand Down Expand Up @@ -235,8 +235,7 @@ def download(
log = getLogger('ocrd.resource_manager.download')
destdir = Path(basedir) if no_subdir else Path(basedir, executable)
if not name:
url_parsed = urlparse(url)
name = Path(unquote(url_parsed.path)).name
name = nth_url_segment(url)
fpath = Path(destdir, name)
is_url = url.startswith('https://') or url.startswith('http://')
if fpath.exists() and not overwrite:
Expand Down