From 3a953fc8621614d31ebcff8ca1f5a8bda6c0889a Mon Sep 17 00:00:00 2001 From: Christian Monch Date: Thu, 8 Sep 2022 15:02:02 +0200 Subject: [PATCH 1/4] add required but removed metadata code to metalad This commit adds the metadata code to datalad-metalad that was removed from datalad-core, but it required in datalad-metalad for different reasons: 1. Base class definitions 2. Extractor implementations --- datalad_metalad/extract.py | 2 +- datalad_metalad/extractors/base.py | 52 +++++++ datalad_metalad/extractors/legacy/__init__.py | 9 ++ datalad_metalad/extractors/legacy/annex.py | 76 +++++++++ datalad_metalad/extractors/legacy/audio.py | 97 ++++++++++++ datalad_metalad/extractors/legacy/datacite.py | 101 ++++++++++++ .../extractors/legacy/datalad_core.py | 136 ++++++++++++++++ .../extractors/legacy/datalad_rfc822.py | 106 +++++++++++++ .../extractors/legacy/definitions.py | 147 ++++++++++++++++++ datalad_metalad/extractors/legacy/exif.py | 80 ++++++++++ .../legacy/frictionless_datapackage.py | 85 ++++++++++ datalad_metalad/extractors/legacy/image.py | 103 ++++++++++++ .../extractors/legacy/tests/__init__.py | 9 ++ .../extractors/legacy/tests/test_audio.py | 78 ++++++++++ .../extractors/legacy/tests/test_base.py | 85 ++++++++++ .../legacy/tests/test_datacite_xml.py | 102 ++++++++++++ .../extractors/legacy/tests/test_exif.py | 98 ++++++++++++ .../tests/test_frictionless_datapackage.py | 71 +++++++++ .../extractors/legacy/tests/test_image.py | 61 ++++++++ .../extractors/legacy/tests/test_rfc822.py | 73 +++++++++ .../extractors/legacy/tests/test_xmp.py | 68 ++++++++ datalad_metalad/extractors/legacy/xmp.py | 119 ++++++++++++++ datalad_metalad/indexers/base.py | 52 +++++++ datalad_metalad/indexers/jsonld.py | 2 +- datalad_metalad/indexers/studyminimeta.py | 2 +- setup.py | 9 ++ 26 files changed, 1820 insertions(+), 3 deletions(-) create mode 100644 datalad_metalad/extractors/legacy/__init__.py create mode 100644 datalad_metalad/extractors/legacy/annex.py create mode 100644 datalad_metalad/extractors/legacy/audio.py create mode 100644 datalad_metalad/extractors/legacy/datacite.py create mode 100644 datalad_metalad/extractors/legacy/datalad_core.py create mode 100644 datalad_metalad/extractors/legacy/datalad_rfc822.py create mode 100644 datalad_metalad/extractors/legacy/definitions.py create mode 100644 datalad_metalad/extractors/legacy/exif.py create mode 100644 datalad_metalad/extractors/legacy/frictionless_datapackage.py create mode 100644 datalad_metalad/extractors/legacy/image.py create mode 100644 datalad_metalad/extractors/legacy/tests/__init__.py create mode 100644 datalad_metalad/extractors/legacy/tests/test_audio.py create mode 100644 datalad_metalad/extractors/legacy/tests/test_base.py create mode 100644 datalad_metalad/extractors/legacy/tests/test_datacite_xml.py create mode 100644 datalad_metalad/extractors/legacy/tests/test_exif.py create mode 100644 datalad_metalad/extractors/legacy/tests/test_frictionless_datapackage.py create mode 100644 datalad_metalad/extractors/legacy/tests/test_image.py create mode 100644 datalad_metalad/extractors/legacy/tests/test_rfc822.py create mode 100644 datalad_metalad/extractors/legacy/tests/test_xmp.py create mode 100644 datalad_metalad/extractors/legacy/xmp.py create mode 100644 datalad_metalad/indexers/base.py diff --git a/datalad_metalad/extract.py b/datalad_metalad/extract.py index 965e9d8a..784ec42d 100644 --- a/datalad_metalad/extract.py +++ b/datalad_metalad/extract.py @@ -39,11 +39,11 @@ from datalad.interface.base import Interface from datalad.interface.base import build_doc from datalad.interface.utils import eval_results -from datalad.metadata.extractors.base import BaseMetadataExtractor from datalad.support.annexrepo import AnnexRepo from datalad.ui import ui from .extractors.base import ( + BaseMetadataExtractor, DataOutputCategory, DatasetMetadataExtractor, FileInfo, diff --git a/datalad_metalad/extractors/base.py b/datalad_metalad/extractors/base.py index 5148ad55..1dce9fc2 100644 --- a/datalad_metalad/extractors/base.py +++ b/datalad_metalad/extractors/base.py @@ -42,6 +42,58 @@ class ExtractorResult: immediate_data: Optional[Dict[str, Any]] = None +# Legacy extractor base from datalad-core +class BaseMetadataExtractor(object): + + NEEDS_CONTENT = True # majority of the extractors need data content + + def __init__(self, ds, paths): + """ + Parameters + ---------- + ds : dataset instance + Dataset to extract metadata from. + paths : list + Paths to investigate when extracting content metadata + """ + + self.ds = ds + self.paths = paths + + def get_metadata(self, dataset=True, content=True): + """ + Returns + ------- + dict or None, dict or None + Dataset metadata dict, dictionary of filepath regexes with metadata, + dicts, each return value could be None if there is no such metadata + """ + # default implementation + return \ + self._get_dataset_metadata() if dataset else None, \ + ((k, v) for k, v in self._get_content_metadata()) if content else None + + def _get_dataset_metadata(self): + """ + Returns + ------- + dict + keys and values are arbitrary + """ + raise NotImplementedError + + def _get_content_metadata(self): + """Get ALL metadata for all dataset content. + + Possibly limited to the paths given to the extractor. + + Returns + ------- + generator((location, metadata_dict)) + """ + raise NotImplementedError + + class DataOutputCategory(enum.Enum): """ Describe how extractors output metadata. diff --git a/datalad_metalad/extractors/legacy/__init__.py b/datalad_metalad/extractors/legacy/__init__.py new file mode 100644 index 00000000..a397a3bc --- /dev/null +++ b/datalad_metalad/extractors/legacy/__init__.py @@ -0,0 +1,9 @@ +# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*- +# ex: set sts=4 ts=4 sw=4 et: +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +# +# See COPYING file distributed along with the datalad package for the +# copyright and license terms. +# +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +"""Legacy metadata extractors""" diff --git a/datalad_metalad/extractors/legacy/annex.py b/datalad_metalad/extractors/legacy/annex.py new file mode 100644 index 00000000..6594c3cf --- /dev/null +++ b/datalad_metalad/extractors/legacy/annex.py @@ -0,0 +1,76 @@ +# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*- +# ex: set sts=4 ts=4 sw=4 et: +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +# +# See COPYING file distributed along with the datalad package for the +# copyright and license terms. +# +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +"""Metadata extractor for Git-annex metadata""" + +from ..base import BaseMetadataExtractor + +import logging +lgr = logging.getLogger('datalad.metadata.extractors.annexmeta') +from datalad.log import log_progress + +from datalad.support.annexrepo import AnnexRepo +# use main version as core version + +# this must stay, despite being a seemingly unused import, each extractor defines a version +from .definitions import version as vocabulary_version + + +class MetadataExtractor(BaseMetadataExtractor): + + NEEDS_CONTENT = False + + def _get_dataset_metadata(self): + return {} + + def _get_content_metadata(self): + log_progress( + lgr.info, + 'extractorannex', + 'Start annex metadata extraction from %s', self.ds, + total=len(self.paths), + label='Annex metadata extraction', + unit=' Files', + ) + repo = self.ds.repo # OPT: .repo could be relatively expensive + if not isinstance(repo, AnnexRepo): + log_progress( + lgr.info, + 'extractorannex', + 'Finished annex metadata extraction from %s', self.ds + ) + return + + valid_paths = None + if self.paths and sum(len(i) for i in self.paths) > 500000: + valid_paths = set(self.paths) + for file, meta in repo.get_metadata( + self.paths if self.paths and valid_paths is None else '.'): + if file.startswith('.datalad') or valid_paths and file not in valid_paths: + # do not report on our own internal annexed files (e.g. metadata blobs) + continue + log_progress( + lgr.info, + 'extractorannex', + 'Extracted annex metadata from %s', file, + update=1, + increment=True) + meta = {k: v[0] if isinstance(v, list) and len(v) == 1 else v + for k, v in meta.items()} + key = repo.get_file_annexinfo(file).get('key') + if key: + meta['key'] = key + yield (file, meta) + # we need to make sure that batch processes are terminated + # otherwise they might cause trouble on windows + repo.precommit() + log_progress( + lgr.info, + 'extractorannex', + 'Finished annex metadata extraction from %s', self.ds + ) diff --git a/datalad_metalad/extractors/legacy/audio.py b/datalad_metalad/extractors/legacy/audio.py new file mode 100644 index 00000000..5a8db324 --- /dev/null +++ b/datalad_metalad/extractors/legacy/audio.py @@ -0,0 +1,97 @@ +# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*- +# ex: set sts=4 ts=4 sw=4 et: +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +# +# See COPYING file distributed along with the datalad package for the +# copyright and license terms. +# +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +"""Audio metadata extractor""" +from __future__ import absolute_import + +from os.path import join as opj +import logging +lgr = logging.getLogger('datalad.metadata.extractors.audio') +from datalad.log import log_progress + +from mutagen import File as audiofile + +from .definitions import vocabulary_id +from ..base import BaseMetadataExtractor + + +# how properties reported by mutagen map onto our vocabulary +vocab_map = { + 'album': 'music:album', + 'artist': 'music:artist', + 'channels': 'music:channels', + 'composer': 'music:Composer', + 'copyright': 'dcterms:rights', + 'genre': 'music:Genre', + 'length': 'duration(s)', + 'sample_rate': 'music:sample_rate', + 'title': 'name', +} + + +class MetadataExtractor(BaseMetadataExtractor): + + _unique_exclude = {'bitrate'} + + def get_metadata(self, dataset, content): + if not content: + return {}, [] + log_progress( + lgr.info, + 'extractoraudio', + 'Start audio metadata extraction from %s', self.ds, + total=len(self.paths), + label='audio metadata extraction', + unit=' Files', + ) + contentmeta = [] + for f in self.paths: + absfp = opj(self.ds.path, f) + log_progress( + lgr.info, + 'extractoraudio', + 'Extract audio metadata from %s', absfp, + update=1, + increment=True) + info = audiofile(absfp, easy=True) + if info is None: + continue + meta = {vocab_map.get(k, k): info[k][0] + if isinstance(info[k], list) and len(info[k]) == 1 else info[k] + for k in info} + if hasattr(info, 'mime') and len(info.mime): + meta['format'] = 'mime:{}'.format(info.mime[0]) + for k in ('length', 'channels', 'bitrate', 'sample_rate'): + if hasattr(info.info, k): + val = getattr(info.info, k) + if k == 'length': + # duration comes in seconds, cap at millisecond level + val = round(val, 3) + meta[vocab_map.get(k, k)] = val + contentmeta.append((f, meta)) + + log_progress( + lgr.info, + 'extractoraudio', + 'Finished audio metadata extraction from %s', self.ds + ) + return { + '@context': { + 'music': { + '@id': 'http://purl.org/ontology/mo/', + 'description': 'Music Ontology with main concepts and properties for describing music', + 'type': vocabulary_id, + }, + 'duration(s)': { + "@id": 'time:Duration', + "unit": "uo:0000010", + 'unit_label': 'second', + }, + }, + }, \ + contentmeta diff --git a/datalad_metalad/extractors/legacy/datacite.py b/datalad_metalad/extractors/legacy/datacite.py new file mode 100644 index 00000000..2cfadc96 --- /dev/null +++ b/datalad_metalad/extractors/legacy/datacite.py @@ -0,0 +1,101 @@ +# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*- +# ex: set sts=4 ts=4 sw=4 et: +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +# +# See COPYING file distributed along with the datalad package for the +# copyright and license terms. +# +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +"""Extractor for datacite xml records, currently for CRCNS datasets +""" + +import re +import os.path as op +from collections import OrderedDict +import logging +lgr = logging.getLogger('datalad.metadata.extractors.datacite') + +try: + import xml.etree.cElementTree as ET +except ImportError: + import xml.etree.ElementTree as ET + +from ..base import BaseMetadataExtractor + + +def _merge(iterable): + """Merge multiple items into a single one separating with a newline""" + return "\n".join(iterable) + + +def _unwrap(text): + """Basic unwrapping of text separated by newlines""" + return re.sub(r'\n\s*', ' ', text) + + +def _process_tree(tree, nstag): + """Process XML tree for a record and return a dictionary for our standard + """ + rec = OrderedDict() + for key, tag_, getall, trans1_, transall_ in [ + ('author', 'creatorName', True, None, None), + ('name', "title[@titleType='AlternativeTitle']", False, None, None), + # actually it seems we have no title but "ShortDescription"!!! TODO + #('title', "title", False, _unwrap, None), + ('shortdescription', "title", False, _unwrap, None), + ('description', 'description', True, _unwrap, _merge), + ('version', 'version', False, None, None), + ('sameas', "identifier[@identifierType='DOI']", False, None, None), + # conflicts with our notion for having a "type" to be internal and to demarkate a Dataset + # here might include the field e.g. Dataset/Neurophysiology, so skipping for now + # ('type', "resourceType[@resourceTypeGeneral='Dataset']", False, None, None), + ('citation', "relatedIdentifier", True, None, None), + ('tag', "subject", True, None, None), + ('formats', "format", True, None, None), + ]: + trans1 = trans1_ or (lambda x: x) + text = lambda x: trans1(x.text.strip()) + tag = nstag(tag_) + try: + if getall: + value = list(map(text, tree.findall(tag))) + else: + value = text(tree.find(tag)) + except AttributeError: + continue + if not value or value == ['']: + continue + if transall_: + value = transall_(value) + rec[key] = value + return rec + + +class MetadataExtractor(BaseMetadataExtractor): + def _get_dataset_metadata(self): + canonical = op.join(self.ds.path, '.datalad', 'meta.datacite.xml') + + # look for the first matching filename and go with it + fname = [canonical] if op.lexists(canonical) else \ + [op.join(self.ds.path, f) for f in self.paths + if op.basename(f) == 'meta.datacite.xml'] + if not fname or not op.lexists(fname[0]): + return {} + fname = fname[0] + # those namespaces are a b.ch + # TODO: avoid reading file twice + namespaces = dict([ + node for _, node in ET.iterparse( + open(fname), events=('start-ns',) + ) + ]) + ns = namespaces[''] + + def nstag(tag): + return './/{%s}%s' % (ns, tag) + + tree = ET.ElementTree(file=fname) + return _process_tree(tree, nstag) + + def _get_content_metadata(self): + return [] # no content metadata provided diff --git a/datalad_metalad/extractors/legacy/datalad_core.py b/datalad_metalad/extractors/legacy/datalad_core.py new file mode 100644 index 00000000..add56d11 --- /dev/null +++ b/datalad_metalad/extractors/legacy/datalad_core.py @@ -0,0 +1,136 @@ +# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*- +# ex: set sts=4 ts=4 sw=4 et: +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +# +# See COPYING file distributed along with the datalad package for the +# copyright and license terms. +# +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +"""Metadata extractor for DataLad's own core storage""" + +from ..base import BaseMetadataExtractor + +import logging +lgr = logging.getLogger('datalad.metadata.extractors.datalad_core') +from datalad.log import log_progress + +from os.path import join as opj +from os.path import exists + +from datalad.consts import ( + DATASET_METADATA_FILE, + DATALAD_DOTDIR, + WEB_SPECIAL_REMOTE_UUID, +) +from datalad.support.json_py import load as jsonload +from datalad.support.annexrepo import AnnexRepo +from datalad.coreapi import subdatasets +# use main version as core version + +# this must stay, despite being a seemingly unused import, each extractor defines a version +from .definitions import version as vocabulary_version + + +class MetadataExtractor(BaseMetadataExtractor): + + NEEDS_CONTENT = False + + _unique_exclude = {"url"} + + def _get_dataset_metadata(self): + """ + Returns + ------- + dict + keys are homogenized datalad metadata keys, values are arbitrary + """ + fpath = opj(self.ds.path, DATASET_METADATA_FILE) + obj = {} + if exists(fpath): + obj = jsonload(fpath, fixup=True) + if 'definition' in obj: + obj['@context'] = obj['definition'] + del obj['definition'] + obj['@id'] = self.ds.id + subdsinfo = [{ + # this version would change anytime we aggregate metadata, let's not + # do this for now + #'version': sds['revision'], + 'type': sds['type'], + 'name': sds['gitmodule_name'], + } + for sds in subdatasets( + dataset=self.ds, + recursive=False, + return_type='generator', + result_renderer='disabled', + on_failure='ignore') + ] + if subdsinfo: + obj['haspart'] = subdsinfo + superds = self.ds.get_superdataset(registered_only=True, topmost=False) + if superds: + obj['ispartof'] = { + '@id': superds.id, + 'type': 'dataset', + } + + return obj + + def _get_content_metadata(self): + """Get ALL metadata for all dataset content. + + Returns + ------- + generator((location, metadata_dict)) + """ + log_progress( + lgr.info, + 'extractordataladcore', + 'Start core metadata extraction from %s', self.ds, + total=len(self.paths), + label='Core metadata extraction', + unit=' Files', + ) + if not isinstance(self.ds.repo, AnnexRepo): + for p in self.paths: + # this extractor does give a response for ANY file as it serves + # an an indicator of file presence (i.e. a file list) in the + # content metadata, even if we know nothing but the filename + # about a file + yield (p, dict()) + log_progress( + lgr.info, + 'extractordataladcore', + 'Finished core metadata extraction from %s', self.ds + ) + return + valid_paths = None + if self.paths and sum(len(i) for i in self.paths) > 500000: + valid_paths = set(self.paths) + # Availability information + for file, whereis in self.ds.repo.whereis( + self.paths if self.paths and valid_paths is None else '.', + output='full').items(): + if file.startswith(DATALAD_DOTDIR) or valid_paths and file not in valid_paths: + # do not report on our own internal annexed files (e.g. metadata blobs) + continue + log_progress( + lgr.info, + 'extractordataladcore', + 'Extracted core metadata from %s', file, + update=1, + increment=True) + # pull out proper (public) URLs + # TODO possibly extend with special remote info later on + meta = {'url': whereis[remote].get('urls', []) + for remote in whereis + # "web" remote + if remote == WEB_SPECIAL_REMOTE_UUID and + whereis[remote].get('urls', None)} + yield (file, meta) + log_progress( + lgr.info, + 'extractordataladcore', + 'Finished core metadata extraction from %s', self.ds + ) diff --git a/datalad_metalad/extractors/legacy/datalad_rfc822.py b/datalad_metalad/extractors/legacy/datalad_rfc822.py new file mode 100644 index 00000000..4997965c --- /dev/null +++ b/datalad_metalad/extractors/legacy/datalad_rfc822.py @@ -0,0 +1,106 @@ +# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*- +# ex: set sts=4 ts=4 sw=4 et: +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +# +# See COPYING file distributed along with the datalad package for the +# copyright and license terms. +# +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +"""Extractor for RFC822-based metadata specifications + +This is inspired by (and very similar to) Debian's package metadata format. +The main difference is that information spread across multiple files in Debian +packages, is concentrated in one file. + +The main advantage of this format is that it is proven to be hand-editable, +i.e. can be composed from scratch, by hand, in an editor -- with a good +chance of producing syntax-compliant content with the first attempt. +""" + +import logging +lgr = logging.getLogger('datalad.metadata.extractors.datalad_rfc822') +from os.path import exists +import email +import email.parser # necessary on Python 2.7.6 (trusty) +from os.path import join as opj +from datalad.interface.base import dedent_docstring + +from ..base import BaseMetadataExtractor + + +def _split_list_field(content): + return [i.strip() for i in content.split(',') if i.strip()] + + +def _beautify_multiline_field(content): + content = dedent_docstring(content) + lines = content.split('\n') + title = '' + if len(lines): + title = lines[0] + if len(lines) > 1: + content = '' + for l in lines[1:]: + l = l.strip() + content = '{}{}{}'.format( + content, + ' ' if len(content) and l != '.' and content[-1] != '\n' else '', + l if l != '.' else '\n') + return title, content + + +class MetadataExtractor(BaseMetadataExtractor): + _metadata_compliance = "http://docs.datalad.org/metadata.html#v0-1" + _core_metadata_filename = opj('.datalad', 'meta.rfc822') + + _key2stdkey = { + 'name': 'name', + 'license': 'license', + 'author': 'author', + 'maintainer': 'maintainer', + 'audience': 'audience', + 'homepage': 'homepage', + 'version': 'version', + 'funding': 'fundedby', + 'issue-tracker': 'issuetracker', + 'cite-as': 'citation', + 'doi': 'sameas', + 'description': None, + } + + def _get_dataset_metadata(self): + meta = {} + if not exists(opj(self.ds.path, self._core_metadata_filename)): + return meta + spec = email.parser.Parser().parse( + open(opj(self.ds.path, self._core_metadata_filename)), + headersonly=True) + + for term in self._key2stdkey: + if term not in spec: + continue + hkey = self._key2stdkey[term] + content = spec[term] + if term == 'description': + short, long = _beautify_multiline_field(content) + meta['shortdescription'] = short + meta['description'] = long + elif term == 'license': + # TODO if title looks like a URL, use it as @id + label, desc = _beautify_multiline_field(content) + if label: + meta[hkey] = [label, desc] + else: + meta[hkey] = desc + elif term in ('maintainer', 'author'): + meta[hkey] = _split_list_field(content) + elif term == 'doi': + meta[hkey] = 'http://dx.doi.org/{}'.format(content) + else: + meta[hkey] = content + + meta['conformsto'] = self._metadata_compliance + return meta + + def _get_content_metadata(self): + return [] diff --git a/datalad_metalad/extractors/legacy/definitions.py b/datalad_metalad/extractors/legacy/definitions.py new file mode 100644 index 00000000..67880e1a --- /dev/null +++ b/datalad_metalad/extractors/legacy/definitions.py @@ -0,0 +1,147 @@ +# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*- +# ex: set sts=4 ts=4 sw=4 et: +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +# +# See COPYING file distributed along with the datalad package for the +# copyright and license terms. +# +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +"""Metadata definitions""" + +# identifiers that defines an ontology as a whole +vocabulary_id = 'http://purl.org/dc/dcam/VocabularyEncodingScheme' + +# this is the canonical version string of DataLad's current metadata scheme +version = '2.0' + +# for maximum compatibility with git-annex' metadata setup, _keys_ in this +# dictionary should be all lower-case, and be limited to alphanumerics, plus +# '_', '-', and '.' -- except for JSON-LD keywords (which start with '@' and +# will be ignored in the context of git-annex metadata +common_defs = { + # ontologies/external vocabularies + "schema": { + 'def': "http://schema.org/", + 'descr': 'base vocabulary', + 'type': vocabulary_id}, + "dcterms": { + 'def': "http://purl.org/dc/terms/", + 'descr': 'DCMI metadata terms', + 'type': vocabulary_id}, + "dctype": { + 'def': "http://purl.org/dc/dcmitype/", + 'descr': 'DCMI Type Vocabulary', + 'type': vocabulary_id}, + "doap": { + 'def': "http://usefulinc.com/ns/doap#", + 'descr': 'vocabulary for the description of a project', + 'type': vocabulary_id}, + "foaf": { + 'def': "http://xmlns.com/foaf/spec/#term_", + 'descr': 'vocabulary for describing (social) networks', + 'type': vocabulary_id}, + "idqa": { + 'def': "http://purl.obolibrary.org/obo/ID_", + 'descr': 'vocabulary for Image and Data Quality Assessment for scientific data management', + 'type': vocabulary_id}, + "mime": { + 'def': "https://www.iana.org/assignments/media-types/", + 'descr': 'IANA media types, see https://www.iana.org/assignments/media-types/media-types.xhtml', + 'type': vocabulary_id}, + "pato": { + 'def': "http://purl.obolibrary.org/obo/PATO_", + 'descr': 'Vocabulary of phenotypic qualities', + 'type': vocabulary_id}, + "time": { + 'def': 'https://www.w3.org/TR/owl-time/#', + 'descr': 'ontology of temporal concepts', + 'type': vocabulary_id}, + "uo": { + 'def': "http://purl.obolibrary.org/obo/UO_", + 'descr': "Units of Measurement Ontology", + 'type': vocabulary_id}, + # individually defined terms + "author": { + 'def': "schema:author", + 'descr': 'author of some resource'}, + "audience": { + 'def': "doap:audience", + 'descr': 'target audience description'}, + "citation": { + 'def': "schema:citation", + 'descr': 'reference to another creative work, such as a scholarly article'}, + "contributors": { + 'def': "schema:contributor", + 'descr': 'secondary author of a resource'}, + "comment": { + 'def': 'http://purl.obolibrary.org/obo/NCIT_C25393', + 'descr': 'A written explanation, or observation'}, + "description": { + 'def': "schema:description", + 'descr': 'description of a resource'}, + 'format': { + 'def': 'http://purl.org/dc/elements/1.1/format', + 'descr': 'file format, physical medium, or dimensions of the resource.'}, + "homepage": { + 'def': "doap:homepage", + 'descr': 'homepage associated with a resource'}, + "issuetracker": { + 'def': "doap:bug-database", + 'descr': 'location of an issue tracker for a resource'}, + "tag": { + 'def': "schema:keywords", + 'descr': 'tag or keyword (often multiple) for a resource'}, + "license": { + 'def': "http://www.w3.org/1999/xhtml/vocab#license", + 'descr': 'licence or usage terms for a resource'}, + "location": { + 'def': "schema:location", + 'descr': 'location where a resource is available'}, + "maintainer": { + 'def': "doap:maintainer", + 'descr': 'maintainer of a resource'}, + "name": { + 'def': "schema:name", + 'descr': 'name of a resource'}, + "shortdescription": { + 'def': "doap:shortdesc", + 'descr': 'short description or summary or title of a resource'}, + # wondering why there is no title, eh? + # MIH: we have name and short description + # adding title seems superfluous + #"title": "dcterms:title", + "subject": { + 'def': 'dcterms:subject', + 'descr': 'topic of a resource, best practice is to use a controlled vocabulary'}, + # TODO why not JSON-LD @type instead, an annexed file is type 'file' anyways + "type": { + 'def': "dcterms:type", + 'descr': 'type or category of a resource (e.g. file, dataset)'}, + "unit": { + 'def': 'uo:0000000', + 'descr': 'standardized quantity of a physical quality'}, + "version": { + 'def': "doap:Version", + 'descr': 'version of a resource'}, + "conformsto": { + 'def': "dcterms:conformsTo", + 'descr': 'reference to a standard to which the described resource conforms'}, + "fundedby": { + 'def': "foaf:fundedBy", + 'descr': 'reference to an entity that provided funding for a resource'}, + "haspart": { + 'def': "dcterms:hasPart", + 'descr': 'related resource that is physically/logically included in a resource'}, + "ispartof": { + 'def': "dcterms:isPartOf", + 'descr': 'related resource in which a resource is physically/logically included'}, + "isversionof": { + 'def': "dcterms:isVersionOf", + 'descr': 'related resource of which the a resource is a version, edition, or adaptation'}, + "modified": { + 'def': "dcterms:modified", + 'descr': 'date on which the resource was changed'}, + "sameas": { + 'def': "schema:sameAs", + 'descr': "URL of a web page that unambiguously indicates a resource's identity"}, +} diff --git a/datalad_metalad/extractors/legacy/exif.py b/datalad_metalad/extractors/legacy/exif.py new file mode 100644 index 00000000..dc846668 --- /dev/null +++ b/datalad_metalad/extractors/legacy/exif.py @@ -0,0 +1,80 @@ +# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*- +# ex: set sts=4 ts=4 sw=4 et: +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +# +# See COPYING file distributed along with the datalad package for the +# copyright and license terms. +# +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +"""EXIF metadata extractor""" + +from os.path import join as opj +import logging +lgr = logging.getLogger('datalad.metadata.extractors.exif') +from datalad.log import log_progress + +from exifread import process_file + +from .definitions import vocabulary_id +from ..base import BaseMetadataExtractor + + +def _return_as_appropriate_dtype(val): + # TODO we could make an attempt to detect and convert + # lists/arrays -- but it would be costly and have little gain + # as we will in most cases convert back to string very quickly + try: + return float(val) + except: + try: + return int(val) + except: + return val + + +class MetadataExtractor(BaseMetadataExtractor): + def get_metadata(self, dataset, content): + if not content: + return {}, [] + log_progress( + lgr.info, + 'extractorexif', + 'Start EXIF metadata extraction from %s', self.ds, + total=len(self.paths), + label='EXIF metadata extraction', + unit=' Files', + ) + contentmeta = [] + for f in self.paths: + absfp = opj(self.ds.path, f) + log_progress( + lgr.info, + 'extractorexif', + 'Extract EXIF metadata from %s', absfp, + update=1, + increment=True) + # TODO we might want to do some more elaborate extraction in the future + # but for now plain EXIF, no maker extensions, no thumbnails + info = process_file(open(opj(self.ds.path, f), 'rb'), details=False) + if not info: + # got nothing, likely nothing there + continue + meta = {k.split()[-1]: _return_as_appropriate_dtype(info[k].printable) + for k in info} + contentmeta.append((f, meta)) + + log_progress( + lgr.info, + 'extractorexif', + 'Finished EXIF metadata extraction from %s', self.ds + ) + return { + '@context': { + 'exif': { + '@id': 'http://www.w3.org/2003/12/exif/ns/', + 'description': 'Vocabulary to describe an Exif format picture data', + 'type': vocabulary_id, + }, + }, + }, \ + contentmeta diff --git a/datalad_metalad/extractors/legacy/frictionless_datapackage.py b/datalad_metalad/extractors/legacy/frictionless_datapackage.py new file mode 100644 index 00000000..a89ffbbd --- /dev/null +++ b/datalad_metalad/extractors/legacy/frictionless_datapackage.py @@ -0,0 +1,85 @@ +# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*- +# ex: set sts=4 ts=4 sw=4 et: +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +# +# See COPYING file distributed along with the datalad package for the +# copyright and license terms. +# +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +"""Extractor for friction-less data packages +(http://specs.frictionlessdata.io/data-packages) +""" + +import logging +lgr = logging.getLogger('datalad.metadata.extractors.frictionless_datapackage') +from os.path import join as opj, exists +from datalad.support.json_py import load as jsonload + +from ..base import BaseMetadataExtractor + + +def _compact_author(obj): + if isinstance(obj, dict): + bits = [] + if 'name' in obj: + bits.append(obj['name']) + if 'email' in obj: + bits.append('<{}>'.format(obj['email'])) + if 'web' in obj: + bits.append('({})'.format(obj['web'])) + return ' '.join(bits) + else: + return obj + + +def _compact_license(obj): + if isinstance(obj, dict): + # With obj itself if no url or type + obj = obj.get('path', obj.get('type', obj)) + if isinstance(obj, dict) and len(obj) == 1: + # didn't get lucky with compacting, try one more + obj = obj.popitem()[1] + return obj + else: + return obj + + +class MetadataExtractor(BaseMetadataExtractor): + metadatasrc_fname = 'datapackage.json' + + _key2stdkey = { + 'name': 'name', + 'title': 'shortdescription', + 'description': 'description', + 'keywords': 'tag', + 'version': 'version', + 'homepage': 'homepage', + } + + def _get_dataset_metadata(self): + meta = {} + metadata_path = opj(self.ds.path, self.metadatasrc_fname) + if not exists(metadata_path): + return meta + foreign = jsonload(metadata_path) + + for term in self._key2stdkey: + if term in foreign: + meta[self._key2stdkey[term]] = foreign[term] + if 'author' in foreign: + meta['author'] = _compact_author(foreign['author']) + if 'contributors' in foreign: + meta['contributors'] = [_compact_author(c) + for c in foreign['contributors']] + # two license terms were supported at some point + if 'license' in foreign: + meta['license'] = _compact_license(foreign['license']) + if 'licenses' in foreign: + meta['license'] = [_compact_license(l) for l in foreign['licenses']] + + meta['conformsto'] = 'http://specs.frictionlessdata.io/data-packages' + + return meta + + def _get_content_metadata(self): + return [] diff --git a/datalad_metalad/extractors/legacy/image.py b/datalad_metalad/extractors/legacy/image.py new file mode 100644 index 00000000..241a7a00 --- /dev/null +++ b/datalad_metalad/extractors/legacy/image.py @@ -0,0 +1,103 @@ +# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*- +# ex: set sts=4 ts=4 sw=4 et: +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +# +# See COPYING file distributed along with the datalad package for the +# copyright and license terms. +# +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +"""generic image metadata extractor""" + +from os.path import join as opj +import logging +lgr = logging.getLogger('datalad.metadata.extractors.image') +from datalad.log import log_progress + +from PIL import Image +from datalad.support.exceptions import CapturedException + +from ..base import BaseMetadataExtractor + + +vocabulary = { + "spatial_resolution(dpi)": { + '@id': "idqa:0000162", + 'unit': "uo:0000240", # DPI + 'unit_label': 'dpi', + 'description': "spatial resolution in dot-per-inch"}, + "color_mode": { + '@id': 'idqa:0000160', + 'description': 'color resolution/mode'}, +} + + +mode_map = { + '1': '1-bit pixels, black and white, stored with one pixel per byte', + 'L': '8-bit pixels, black and white', + 'P': '8-bit pixels, mapped to any other mode using a color palette', + 'RGB': '3x8-bit pixels, true color', + 'RGBA': '4x8-bit pixels, true color with transparency mask', + 'CMYK': '4x8-bit pixels, color separation', + 'YCbCr': '3x8-bit pixels, color video format', + 'LAB': '3x8-bit pixels, the L*a*b color space', + 'HSV': '3x8-bit pixels, Hue, Saturation, Value color space', + 'I': '32-bit signed integer pixels', + 'F': '32-bit floating point pixels', +} + + +class MetadataExtractor(BaseMetadataExtractor): + + _extractors = { + 'format': lambda x: x.format_description, + 'dcterms:SizeOrDuration': lambda x: x.size, + 'spatial_resolution(dpi)': lambda x: x.info.get('dpi', ''), + 'color_mode': lambda x: mode_map.get(x.mode, ''), + } + + def get_metadata(self, dataset, content): + if not content: + return {}, [] + contentmeta = [] + log_progress( + lgr.info, + 'extractorimage', + 'Start image metadata extraction from %s', self.ds, + total=len(self.paths), + label='image metadata extraction', + unit=' Files', + ) + for f in self.paths: + absfp = opj(self.ds.path, f) + log_progress( + lgr.info, + 'extractorimage', + 'Extract image metadata from %s', absfp, + update=1, + increment=True) + try: + img = Image.open(absfp) + except Exception as e: + lgr.debug("Image metadata extractor failed to load %s: %s", + absfp, CapturedException(e)) + continue + meta = { + 'type': 'dctype:Image', + } + + # run all extractors + meta.update({k: v(img) for k, v in self._extractors.items()}) + # filter useless fields (empty strings and NaNs) + meta = {k: v for k, v in meta.items() + if not (hasattr(v, '__len__') and not len(v))} + contentmeta.append((f, meta)) + + log_progress( + lgr.info, + 'extractorimage', + 'Finished image metadata extraction from %s', self.ds + ) + return { + '@context': vocabulary, + }, \ + contentmeta diff --git a/datalad_metalad/extractors/legacy/tests/__init__.py b/datalad_metalad/extractors/legacy/tests/__init__.py new file mode 100644 index 00000000..06724a6e --- /dev/null +++ b/datalad_metalad/extractors/legacy/tests/__init__.py @@ -0,0 +1,9 @@ +# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*- +# ex: set sts=4 ts=4 sw=4 et: +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +# +# See COPYING file distributed along with the datalad package for the +# copyright and license terms. +# +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +"""Tests for legacy metadata extractors""" diff --git a/datalad_metalad/extractors/legacy/tests/test_audio.py b/datalad_metalad/extractors/legacy/tests/test_audio.py new file mode 100644 index 00000000..0c7b1284 --- /dev/null +++ b/datalad_metalad/extractors/legacy/tests/test_audio.py @@ -0,0 +1,78 @@ +# emacs: -*- mode: python-mode; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil; coding: utf-8 -*- +# ex: set sts=4 ts=4 sw=4 et: +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +# +# See COPYING file distributed along with the datalad package for the +# copyright and license terms. +# +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +"""Test audio extractor""" + +from datalad.tests.utils_pytest import ( + SkipTest, + assert_in, + assert_not_in, + assert_repo_status, + assert_result_count, + assert_status, + eq_, + with_tempfile, +) + +try: + import mutagen +except ImportError: + raise SkipTest + +from os.path import dirname +from os.path import join as opj +from shutil import copy + +from datalad.api import Dataset + +target = { + "format": "mime:audio/mp3", + "duration(s)": 1.0, + "name": "dltracktitle", + "music:album": "dlalbumtitle", + "music:artist": "dlartist", + "music:channels": 1, + "music:sample_rate": 44100, + "music:Genre": "dlgenre", + "date": "", + "tracknumber": "dltracknumber", +} + + +@with_tempfile(mkdir=True) +def test_audio(path=None): + ds = Dataset(path).create() + ds.config.add('datalad.metadata.nativetype', 'audio', scope='branch') + copy( + opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'audio.mp3'), + path) + ds.save() + assert_repo_status(ds.path) + res = ds.aggregate_metadata() + assert_status('ok', res) + res = ds.metadata('audio.mp3') + assert_result_count(res, 1) + + # from this extractor + meta = res[0]['metadata']['audio'] + for k, v in target.items(): + eq_(meta[k], v) + + assert_in('@context', meta) + + uniques = ds.metadata( + reporton='datasets', return_type='item-or-list')['metadata']['datalad_unique_content_properties'] + # test file has it, but uniques have it blanked out, because the extractor considers it worthless + # for discovering whole datasets + assert_in('bitrate', meta) + eq_(uniques['audio']['bitrate'], None) + + # 'date' field carries not value, hence gets exclude from the unique report + assert_in('date', meta) + assert(not meta['date']) + assert_not_in('date', uniques['audio']) diff --git a/datalad_metalad/extractors/legacy/tests/test_base.py b/datalad_metalad/extractors/legacy/tests/test_base.py new file mode 100644 index 00000000..6293dc6d --- /dev/null +++ b/datalad_metalad/extractors/legacy/tests/test_base.py @@ -0,0 +1,85 @@ +# emacs: -*- mode: python-mode; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil; coding: utf-8 -*- +# ex: set sts=4 ts=4 sw=4 et: +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +# +# See COPYING file distributed along with the datalad package for the +# copyright and license terms. +# +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +"""Test all extractors at a basic level""" + +from inspect import isgenerator + +from datalad.api import Dataset +from datalad.support.entrypoints import iter_entrypoints +from datalad.tests.utils_pytest import ( + SkipTest, + assert_equal, + assert_repo_status, + known_failure_githubci_win, + with_tree, +) + + +@with_tree(tree={'file.dat': ''}) +def check_api(annex, path): + ds = Dataset(path).create(force=True, annex=annex) + ds.save() + assert_repo_status(ds.path) + + processed_extractors, skipped_extractors = [], [] + for ename, emod, eload in iter_entrypoints('datalad.metadata.extractors'): + # we need to be able to query for metadata, even if there is none + # from any extractor + try: + extractor_cls = eload() + except Exception as exc: + exc_ = str(exc) + skipped_extractors += [exc_] + continue + extractor = extractor_cls( + ds, paths=['file.dat']) + meta = extractor.get_metadata( + dataset=True, + content=True) + # we also get something for the dataset and something for the content + # even if any of the two is empty + assert_equal(len(meta), 2) + dsmeta, contentmeta = meta + assert (isinstance(dsmeta, dict)) + assert hasattr(contentmeta, '__len__') or isgenerator(contentmeta) + # verify that generator does not blow and has an entry for our + # precious file + cm = dict(contentmeta) + # datalad_core does provide some (not really) information about our + # precious file + if ename == 'datalad_core': + assert 'file.dat' in cm + elif ename == 'annex': + if annex: + # verify correct key, which is the same for all files of 0 size + assert_equal( + cm['file.dat']['key'], + 'MD5E-s0--d41d8cd98f00b204e9800998ecf8427e.dat' + ) + else: + # no metadata on that file + assert not cm + processed_extractors.append(ename) + assert "datalad_core" in processed_extractors, \ + "Should have managed to find at least the core extractor extractor" + if skipped_extractors: + raise SkipTest( + "Not fully tested/succeeded since some extractors failed" + " to load:\n%s" % ("\n".join(skipped_extractors))) + + +@known_failure_githubci_win +def test_api_git(): + # should tollerate both pure git and annex repos + check_api(False) + + +@known_failure_githubci_win +def test_api_annex(): + check_api(True) diff --git a/datalad_metalad/extractors/legacy/tests/test_datacite_xml.py b/datalad_metalad/extractors/legacy/tests/test_datacite_xml.py new file mode 100644 index 00000000..318c3ae0 --- /dev/null +++ b/datalad_metalad/extractors/legacy/tests/test_datacite_xml.py @@ -0,0 +1,102 @@ +# emacs: -*- mode: python-mode; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*- +# ex: set sts=4 ts=4 sw=4 et: +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +# +# See COPYING file distributed along with the datalad package for the +# copyright and license terms. +# +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +"""Test datacite metadata extractor """ + +from simplejson import dumps + +from datalad.api import create +from datalad.tests.utils_pytest import ( + assert_equal, + with_tree, +) + +from ..datacite import MetadataExtractor + + +xml_content = """\ + + + 10.6080/K0QN64NG + + + Last1, First1 + + + Last2, First2 + + + + Main + title + CRCNS.org xxx-1 + + CRCNS.org + 2011 + + Neuroscience + fMRI + + eng + Dataset/Neurophysiology + + 10 GB + + + application/matlab + NIFTY + + 1.0 + + + Some long + description. + + + + 10.1016/j.cub.2011.08.031 + + +""" + + +@with_tree(tree={'.datalad': {'meta.datacite.xml': xml_content}}) +@with_tree(tree={'elsewhere': {'meta.datacite.xml': xml_content}}) +def test_get_metadata(path1=None, path2=None): + for p in (path1, path2): + print('PATH') + ds = create(p, force=True) + ds.save() + meta = MetadataExtractor( + ds, + _get_metadatarelevant_paths(ds, []))._get_dataset_metadata() + assert_equal( + dumps(meta, sort_keys=True, indent=2), + """\ +{ + "author": [ + "Last1, First1", + "Last2, First2" + ], + "citation": [ + "10.1016/j.cub.2011.08.031" + ], + "description": "Some long description.", + "formats": [ + "application/matlab", + "NIFTY" + ], + "name": "CRCNS.org xxx-1", + "sameas": "10.6080/K0QN64NG", + "shortdescription": "Main title", + "tag": [ + "Neuroscience", + "fMRI" + ], + "version": "1.0" +}""") diff --git a/datalad_metalad/extractors/legacy/tests/test_exif.py b/datalad_metalad/extractors/legacy/tests/test_exif.py new file mode 100644 index 00000000..ac00a52d --- /dev/null +++ b/datalad_metalad/extractors/legacy/tests/test_exif.py @@ -0,0 +1,98 @@ +# emacs: -*- mode: python-mode; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil; coding: utf-8 -*- +# ex: set sts=4 ts=4 sw=4 et: +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +# +# See COPYING file distributed along with the datalad package for the +# copyright and license terms. +# +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +"""Test EXIF extractor""" + +from datalad.tests.utils_pytest import ( + SkipTest, + assert_in, + assert_repo_status, + assert_result_count, + assert_status, + eq_, + with_tempfile, +) + +try: + import exifread +except ImportError: + raise SkipTest + +from os.path import dirname +from os.path import join as opj +from shutil import copy + +from datalad.api import Dataset + +target = { + "InteroperabilityVersion": "[48, 49, 48, 48]", + "ExifVersion": 221.0, + "FocalLengthIn35mmFilm": 38.0, + "CompressedBitsPerPixel": 5.0, + "GainControl": "None", + "Compression": "JPEG (old-style)", + "PrintIM": "[80, 114, 105, 110, 116, 73, 77, 0, 48, 51, 48, 48, 0, 0, 0, 5, 0, 1, 0, 22, 0, 22, 0, 2, 1, 0, 0, 0, 1, 0, 5, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 16, 131, 0, 0, 0]", + "Make": "CASIO COMPUTER CO.,LTD.", + "Sharpness": "Normal", + "Contrast": "Normal", + "ColorSpace": "sRGB", + "ExposureMode": "Auto Exposure", + "ExposureBiasValue": 0.0, + "ExifImageWidth": 4.0, + "ComponentsConfiguration": "YCbCr", + "DateTimeOriginal": "2011:03:13 16:36:02", + "MaxApertureValue": "14/5", + "DateTime": "2017:10:08 10:21:03", + "InteroperabilityOffset": 30412.0, + "InteroperabilityIndex": "R98", + "FileSource": "Digital Camera", + "ResolutionUnit": "Pixels/Inch", + "FNumber": "27/10", + "ExposureProgram": "Program Normal", + "DigitalZoomRatio": "0/0", + "LightSource": "Unknown", + "ExifImageLength": 3.0, + "FlashPixVersion": 100.0, + "CustomRendered": "Normal", + "Flash": "Flash fired, auto mode", + "WhiteBalance": "Auto", + "Orientation": "Horizontal (normal)", + "ExposureTime": "1/60", + "Software": "GIMP 2.8.20", + "Model": "EX-S600", + "FocalLength": "31/5", + "SceneCaptureType": "Standard", + "ExifOffset": 272.0, + "Saturation": "Normal", + "YCbCrPositioning": "Centered", + "DateTimeDigitized": "2011:03:13 16:36:02", + "XResolution": 72.0, + "YResolution": 72.0, + "MeteringMode": "Pattern", +} + + +@with_tempfile(mkdir=True) +def test_exif(path=None): + ds = Dataset(path).create() + ds.config.add('datalad.metadata.nativetype', 'exif', scope='branch') + copy( + opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'exif.jpg'), + path) + ds.save() + assert_repo_status(ds.path) + res = ds.aggregate_metadata() + assert_status('ok', res) + res = ds.metadata('exif.jpg') + assert_result_count(res, 1) + # from this extractor + meta = res[0]['metadata']['exif'] + for k, v in target.items(): + eq_(meta[k], v) + + assert_in('@context', meta) diff --git a/datalad_metalad/extractors/legacy/tests/test_frictionless_datapackage.py b/datalad_metalad/extractors/legacy/tests/test_frictionless_datapackage.py new file mode 100644 index 00000000..8d04a9ac --- /dev/null +++ b/datalad_metalad/extractors/legacy/tests/test_frictionless_datapackage.py @@ -0,0 +1,71 @@ +# emacs: -*- mode: python-mode; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*- +# ex: set sts=4 ts=4 sw=4 et: +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +# +# See COPYING file distributed along with the datalad package for the +# copyright and license terms. +# +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +"""Test frictionless datapackage metadata extractor """ + +from simplejson import dumps + +from datalad.api import Dataset +from datalad.tests.utils_pytest import ( + assert_equal, + with_tree, +) + +from ..frictionless_datapackage import MetadataExtractor + + +# bits from examples and the specs +@with_tree(tree={'datapackage.json': """ +{ + "name": "cpi", + "title": "Annual Consumer Price Index (CPI)", + "description": "Annual Consumer Price Index (CPI) for most countries in the world. Reference year is 2005.", + "license" : { + "type": "odc-pddl", + "path": "http://opendatacommons.org/licenses/pddl/" + }, + "keywords": [ "CPI", "World", "Consumer Price Index", "Annual Data", "The World Bank" ], + "version": "2.0.0", + "last_updated": "2014-09-22", + "contributors": [ + { + "name": "Joe Bloggs", + "email": "joe@example.com", + "web": "http://www.example.com" + } + ], + "author": "Jane Doe " +} +"""}) +def test_get_metadata(path=None): + + ds = Dataset(path).create(force=True) + p = MetadataExtractor(ds, []) + meta = p._get_dataset_metadata() + assert_equal( + dumps(meta, sort_keys=True, indent=2), + """\ +{ + "author": "Jane Doe ", + "conformsto": "http://specs.frictionlessdata.io/data-packages", + "contributors": [ + "Joe Bloggs (http://www.example.com)" + ], + "description": "Annual Consumer Price Index (CPI) for most countries in the world. Reference year is 2005.", + "license": "http://opendatacommons.org/licenses/pddl/", + "name": "cpi", + "shortdescription": "Annual Consumer Price Index (CPI)", + "tag": [ + "CPI", + "World", + "Consumer Price Index", + "Annual Data", + "The World Bank" + ], + "version": "2.0.0" +}""") diff --git a/datalad_metalad/extractors/legacy/tests/test_image.py b/datalad_metalad/extractors/legacy/tests/test_image.py new file mode 100644 index 00000000..ac3586c1 --- /dev/null +++ b/datalad_metalad/extractors/legacy/tests/test_image.py @@ -0,0 +1,61 @@ +# emacs: -*- mode: python-mode; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil; coding: utf-8 -*- +# ex: set sts=4 ts=4 sw=4 et: +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +# +# See COPYING file distributed along with the datalad package for the +# copyright and license terms. +# +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +"""Test image extractor""" + +from datalad.tests.utils_pytest import ( + SkipTest, + assert_in, + assert_repo_status, + assert_result_count, + assert_status, + eq_, + with_tempfile, +) + +try: + from PIL import Image +except ImportError as exc: + raise SkipTest( + "No PIL module available or it cannot be imported") from exc + +from os.path import dirname +from os.path import join as opj +from shutil import copy + +from datalad.api import Dataset + +target = { + "dcterms:SizeOrDuration": [4, 3], + "color_mode": "3x8-bit pixels, true color", + "type": "dctype:Image", + "spatial_resolution(dpi)": [72, 72], + "format": "JPEG (ISO 10918)" +} + + +@with_tempfile(mkdir=True) +def test_image(path=None): + ds = Dataset(path).create() + ds.config.add('datalad.metadata.nativetype', 'image', scope='branch') + copy( + opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'exif.jpg'), + path) + ds.save() + assert_repo_status(ds.path) + res = ds.aggregate_metadata() + assert_status('ok', res) + res = ds.metadata('exif.jpg') + assert_result_count(res, 1) + + # from this extractor + meta = res[0]['metadata']['image'] + for k, v in target.items(): + eq_(meta[k], v) + + assert_in('@context', meta) diff --git a/datalad_metalad/extractors/legacy/tests/test_rfc822.py b/datalad_metalad/extractors/legacy/tests/test_rfc822.py new file mode 100644 index 00000000..ecbaa648 --- /dev/null +++ b/datalad_metalad/extractors/legacy/tests/test_rfc822.py @@ -0,0 +1,73 @@ +# emacs: -*- mode: python-mode; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*- +# ex: set sts=4 ts=4 sw=4 et: +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +# +# See COPYING file distributed along with the datalad package for the +# copyright and license terms. +# +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +"""Test BIDS metadata extractor """ + +from simplejson import dumps + +from datalad.distribution.dataset import Dataset +from datalad.tests.utils_pytest import ( + assert_equal, + with_tree, +) + +from ..datalad_rfc822 import MetadataExtractor + + +@with_tree(tree={'.datalad': {'meta.rfc822': """\ +Name: studyforrest_phase2 +Version: 1.0.0-rc3 +Description: Basic summary + A text with arbitrary length and content that can span multiple + . + paragraphs (this is a new one) +License: CC0-1.0 + The person who associated a work with this deed has dedicated the work to the + public domain by waiving all of his or her rights to the work worldwide under + copyright law, including all related and neighboring rights, to the extent + allowed by law. + . + You can copy, modify, distribute and perform the work, even for commercial + purposes, all without asking permission. +Maintainer: Mike One , + Anna Two , +Homepage: http://studyforrest.org +Funding: BMBFGQ1411, NSF 1429999 +Issue-Tracker: https://github.com/psychoinformatics-de/studyforrest-data-phase2/issues +Cite-As: Cool (2016) +DOI: 10.5281/zenodo.48421 + +"""}}) +def test_get_metadata(path=None): + + ds = Dataset(path).create(force=True) + ds.save() + meta = MetadataExtractor(ds, [])._get_dataset_metadata() + assert_equal( + dumps(meta, sort_keys=True, indent=2), + """\ +{ + "citation": "Cool (2016)", + "conformsto": "http://docs.datalad.org/metadata.html#v0-1", + "description": "A text with arbitrary length and content that can span multiple\\nparagraphs (this is a new one)", + "fundedby": "BMBFGQ1411, NSF 1429999", + "homepage": "http://studyforrest.org", + "issuetracker": "https://github.com/psychoinformatics-de/studyforrest-data-phase2/issues", + "license": [ + "CC0-1.0", + "The person who associated a work with this deed has dedicated the work to the public domain by waiving all of his or her rights to the work worldwide under copyright law, including all related and neighboring rights, to the extent allowed by law.\\nYou can copy, modify, distribute and perform the work, even for commercial purposes, all without asking permission." + ], + "maintainer": [ + "Mike One ", + "Anna Two " + ], + "name": "studyforrest_phase2", + "sameas": "http://dx.doi.org/10.5281/zenodo.48421", + "shortdescription": "Basic summary", + "version": "1.0.0-rc3" +}""") diff --git a/datalad_metalad/extractors/legacy/tests/test_xmp.py b/datalad_metalad/extractors/legacy/tests/test_xmp.py new file mode 100644 index 00000000..aaf11b05 --- /dev/null +++ b/datalad_metalad/extractors/legacy/tests/test_xmp.py @@ -0,0 +1,68 @@ +# emacs: -*- mode: python-mode; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil; coding: utf-8 -*- +# ex: set sts=4 ts=4 sw=4 et: +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +# +# See COPYING file distributed along with the datalad package for the +# copyright and license terms. +# +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +"""Test XMP extractor""" + +import pytest + +from datalad.tests.utils_pytest import ( + assert_in, + assert_repo_status, + assert_result_count, + assert_status, + eq_, + skip_if_no_module, + with_tempfile, +) + +try: + import libxmp +except Exception as e: + pytestmark = pytest.mark.skip(reason=f"Module 'libxmp' failed to load: {e}") + +from os.path import dirname +from os.path import join as opj +from shutil import copy + +from datalad.api import Dataset + +target = { + 'dc:creator': 'Michael Hanke', + 'dc:description': 'dlsubject', + 'dc:description': 'x-default', + 'dc:title': 'dltitle', + 'dc:title': 'x-default', + 'pdfaid:part': '1', + 'pdfaid:conformance': 'A', + 'pdf:Keywords': 'dlkeyword1 dlkeyword2', + 'pdf:Producer': 'LibreOffice 5.2', + 'xmp:CreateDate': '2017-10-08T10:27:06+02:00', + 'xmp:CreatorTool': 'Writer', +} + + +@with_tempfile(mkdir=True) +def test_xmp(path=None): + ds = Dataset(path).create() + ds.config.add('datalad.metadata.nativetype', 'xmp', scope='branch') + copy( + opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'xmp.pdf'), + path) + ds.save() + assert_repo_status(ds.path) + res = ds.aggregate_metadata() + assert_status('ok', res) + res = ds.metadata('xmp.pdf') + assert_result_count(res, 1) + + # from this extractor + meta = res[0]['metadata']['xmp'] + for k, v in target.items(): + eq_(meta[k], v) + + assert_in('@context', meta) diff --git a/datalad_metalad/extractors/legacy/xmp.py b/datalad_metalad/extractors/legacy/xmp.py new file mode 100644 index 00000000..ca2187f3 --- /dev/null +++ b/datalad_metalad/extractors/legacy/xmp.py @@ -0,0 +1,119 @@ +# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*- +# ex: set sts=4 ts=4 sw=4 et: +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +# +# See COPYING file distributed along with the datalad package for the +# copyright and license terms. +# +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +""" Extensible Metadata Platform (XMP) metadata extractor + +https://en.wikipedia.org/wiki/Extensible_Metadata_Platform +""" + +import re +from os.path import join as opj +import logging +lgr = logging.getLogger('datalad.metadata.extractors.xmp') +from datalad.log import log_progress + +from libxmp.utils import file_to_dict +from datalad.utils import ensure_unicode + +from .definitions import vocabulary_id +from ..base import BaseMetadataExtractor + + +xmp_field_re = re.compile(r'^([^\[\]]+)(\[\d+\]|)(/?.*|)') + + +class MetadataExtractor(BaseMetadataExtractor): + def get_metadata(self, dataset, content): + if not content: + return {}, [] + context = {} + contentmeta = [] + + # which files to look for + fname_match_regex = self.ds.config.get( + 'datalad.metadata.xmp.fname-match', + '.*(jpg|jpeg|pdf|gif|tiff|tif|ps|eps|png|mp3|mp4|avi|wav)$') + fname_match_regex = re.compile(fname_match_regex) + + log_progress( + lgr.info, + 'extractorxmp', + 'Start XMP metadata extraction from %s', self.ds, + total=len(self.paths), + label='XMP metadata extraction', + unit=' Files', + ) + for f in self.paths: + log_progress( + lgr.info, + 'extractorxmp', + 'Extract XMP metadata from %s', f, + update=1, + increment=True) + # run basic file name filter for performance reasons + # it is OK to let false-positives through + if fname_match_regex.match(f, re.IGNORECASE) is None: + continue + absfp = opj(self.ds.path, f) + info = file_to_dict(absfp) + if not info: + # got nothing, likely nothing there + # TODO check if this is an XMP sidecar file, parse that, and assign metadata + # to the base file + continue + # update vocabulary + vocab = {info[ns][0][0].split(':')[0]: {'@id': ns, 'type': vocabulary_id} for ns in info} + # TODO this is dirty and assumed that XMP is internally consistent with the + # definitions across all files -- which it likely isn't + context.update(vocab) + # now pull out actual metadata + # cannot do simple dict comprehension, because we need to beautify things a little + + meta = {} + for ns in info: + for key, val, props in info[ns]: + if not val: + # skip everything empty + continue + if key.count('[') > 1: + # this is a nested array + # MIH: I do not think it is worth going here + continue + if props['VALUE_IS_ARRAY']: + # we'll catch the actual array values later + continue + # normalize value + val = ensure_unicode(val) + # non-breaking space + val = val.replace(u"\xa0", ' ') + + field, idx, qual = xmp_field_re.match(key).groups() + normkey = u'{}{}'.format(field, qual) + if '/' in key: + normkey = u'{0}<{1}>'.format(*normkey.split('/')) + if idx: + # array + arr = meta.get(normkey, []) + arr.append(val) + meta[normkey] = arr + else: + meta[normkey] = val + # compact + meta = {k: v[0] if isinstance(v, list) and len(v) == 1 else v for k, v in meta.items()} + + contentmeta.append((f, meta)) + + log_progress( + lgr.info, + 'extractorxmp', + 'Finished XMP metadata extraction from %s', self.ds + ) + return { + '@context': context, + }, \ + contentmeta diff --git a/datalad_metalad/indexers/base.py b/datalad_metalad/indexers/base.py new file mode 100644 index 00000000..1a0989a4 --- /dev/null +++ b/datalad_metalad/indexers/base.py @@ -0,0 +1,52 @@ +# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*- +# ex: set sts=4 ts=4 sw=4 et: +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +# +# See COPYING file distributed along with the datalad package for the +# copyright and license terms. +# +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +""" Metadata indexer base class """ +import abc +from typing import ( + Any, + Dict, + List, + Union, +) + + +class MetadataIndexer(metaclass=abc.ABCMeta): + """ Defines the indexer interface """ + def __init__(self, metadata_format_name: str): + """ + Create a metadata indexer + + The format name is passed to the constructor to allow + a single indexer to process multiple extractor results + """ + self.metadata_format_name = metadata_format_name + + @abc.abstractmethod + def create_index(self, metadata: Union[Dict, List]) -> Dict[str, Any]: + """ + Create an index from metadata. + + The input is a list or dictionary that contains metadata + in the format identified by metadata_format_name. + + The output should be a set of key-value pairs that represent + the information stored in `metadata´. + + Parameters + ---------- + metadata : Dict or List + Metadata created by an extractor. + + Returns + ------- + dict: + key-value pairs representing the information in metadata. + values can be literals or lists of literals + """ + raise NotImplementedError diff --git a/datalad_metalad/indexers/jsonld.py b/datalad_metalad/indexers/jsonld.py index 7e3a981c..9453e6ae 100644 --- a/datalad_metalad/indexers/jsonld.py +++ b/datalad_metalad/indexers/jsonld.py @@ -7,7 +7,7 @@ cast ) -from datalad.metadata.indexers.base import MetadataIndexer +from .base import MetadataIndexer class IndexerJsonLdTags: diff --git a/datalad_metalad/indexers/studyminimeta.py b/datalad_metalad/indexers/studyminimeta.py index fe9e2eae..bd426045 100644 --- a/datalad_metalad/indexers/studyminimeta.py +++ b/datalad_metalad/indexers/studyminimeta.py @@ -6,7 +6,7 @@ Union, ) -from datalad.metadata.indexers.base import MetadataIndexer +from .base import MetadataIndexer from .jsonld import ( IndexerSchemaOrgProperties, diff --git a/setup.py b/setup.py index 1d28359f..8ba19998 100755 --- a/setup.py +++ b/setup.py @@ -59,6 +59,15 @@ 'metalad_studyminimeta=datalad_metalad.extractors.studyminimeta.main:StudyMiniMetaExtractor', 'external_dataset=datalad_metalad.extractors.external_dataset:ExternalDatasetExtractor', 'external_file=datalad_metalad.extractors.external_file:ExternalFileExtractor', + 'annex=datalad_metalad.extractors.legacy.annex:MetadataExtractor', + 'audio=datalad_metalad.extractors.legacy.audio:MetadataExtractor', + 'datacite=datalad_metalad.extractors.legacy.datacite:MetadataExtractor', + 'datalad_core=datalad_metalad.extractors.legacy.datalad_core:MetadataExtractor', + 'datalad_rfc822=datalad_metalad.extractors.legacy.datalad_rfc822:MetadataExtractor', + 'exif=datalad_metalad.extractors.legacy.exif:MetadataExtractor', + 'frictionless_datapackage=datalad_metalad.extractors.legacy.frictionless_datapackage:MetadataExtractor', + 'image=datalad_metalad.extractors.legacy.image:MetadataExtractor', + 'xmp=datalad_metalad.extractors.legacy.xmp:MetadataExtractor', ], 'datalad.metadata.indexers': [ 'metalad_studyminimeta=datalad_metalad.indexers.studyminimeta:StudyMiniMetaIndexer', From 492c2289053911d7b62670d41553439d8e711f04 Mon Sep 17 00:00:00 2001 From: Christian Monch Date: Thu, 8 Sep 2022 21:51:17 +0200 Subject: [PATCH 2/4] salvage extractors removed from datalad-core This commit adds the extractors that were removed from datalad core in the datalad PR #7014 (https://github.com/datalad/datalad/pull/7014) including their tests to datalad-metalad. This is done to keep them available with only `datalad` and `datalad-metalad` installed. --- datalad_metalad/extractors/base.py | 56 +--------- datalad_metalad/extractors/legacy/__init__.py | 8 ++ datalad_metalad/extractors/legacy/annex.py | 2 +- datalad_metalad/extractors/legacy/audio.py | 2 +- datalad_metalad/extractors/legacy/datacite.py | 2 +- .../extractors/legacy/datalad_core.py | 14 +-- .../extractors/legacy/datalad_rfc822.py | 2 +- datalad_metalad/extractors/legacy/exif.py | 2 +- .../legacy/frictionless_datapackage.py | 2 +- datalad_metalad/extractors/legacy/image.py | 2 +- .../extractors/legacy/tests/data/audio.mp3 | Bin 0 -> 5702 bytes .../extractors/legacy/tests/data/exif.jpg | Bin 0 -> 33891 bytes .../legacy/tests/data/nifti1.nii.gz | Bin 0 -> 15920 bytes .../extractors/legacy/tests/data/xmp.pdf | Bin 0 -> 19350 bytes .../extractors/legacy/tests/test_audio.py | 30 ++---- .../extractors/legacy/tests/test_base.py | 53 +-------- .../legacy/tests/test_datacite_xml.py | 101 +++++++++--------- .../extractors/legacy/tests/test_exif.py | 17 ++- .../tests/test_frictionless_datapackage.py | 17 +-- .../extractors/legacy/tests/test_image.py | 21 ++-- .../extractors/legacy/tests/test_rfc822.py | 9 +- .../extractors/legacy/tests/test_xmp.py | 15 ++- datalad_metalad/extractors/legacy/xmp.py | 2 +- setup.py | 22 ++-- 24 files changed, 132 insertions(+), 247 deletions(-) create mode 100644 datalad_metalad/extractors/legacy/tests/data/audio.mp3 create mode 100644 datalad_metalad/extractors/legacy/tests/data/exif.jpg create mode 100644 datalad_metalad/extractors/legacy/tests/data/nifti1.nii.gz create mode 100644 datalad_metalad/extractors/legacy/tests/data/xmp.pdf diff --git a/datalad_metalad/extractors/base.py b/datalad_metalad/extractors/base.py index 1dce9fc2..8addb97d 100644 --- a/datalad_metalad/extractors/base.py +++ b/datalad_metalad/extractors/base.py @@ -42,58 +42,6 @@ class ExtractorResult: immediate_data: Optional[Dict[str, Any]] = None -# Legacy extractor base from datalad-core -class BaseMetadataExtractor(object): - - NEEDS_CONTENT = True # majority of the extractors need data content - - def __init__(self, ds, paths): - """ - Parameters - ---------- - ds : dataset instance - Dataset to extract metadata from. - paths : list - Paths to investigate when extracting content metadata - """ - - self.ds = ds - self.paths = paths - - def get_metadata(self, dataset=True, content=True): - """ - Returns - ------- - dict or None, dict or None - Dataset metadata dict, dictionary of filepath regexes with metadata, - dicts, each return value could be None if there is no such metadata - """ - # default implementation - return \ - self._get_dataset_metadata() if dataset else None, \ - ((k, v) for k, v in self._get_content_metadata()) if content else None - - def _get_dataset_metadata(self): - """ - Returns - ------- - dict - keys and values are arbitrary - """ - raise NotImplementedError - - def _get_content_metadata(self): - """Get ALL metadata for all dataset content. - - Possibly limited to the paths given to the extractor. - - Returns - ------- - generator((location, metadata_dict)) - """ - raise NotImplementedError - - class DataOutputCategory(enum.Enum): """ Describe how extractors output metadata. @@ -388,7 +336,7 @@ def get_state(self, dataset): # XXX this is the legacy-legacy interface, keep around for a bit more and then # remove -class BaseMetadataExtractor(metaclass=abc.ABCMeta): +class BaseMetadataExtractor: NEEDS_CONTENT = True # majority of the extractors need data content @@ -418,7 +366,6 @@ def get_metadata(self, dataset=True, content=True): self._get_dataset_metadata() if dataset else None, \ ((k, v) for k, v in self._get_content_metadata()) if content else None - @abc.abstractmethod def _get_dataset_metadata(self): """ Returns @@ -428,7 +375,6 @@ def _get_dataset_metadata(self): """ raise NotImplementedError - @abc.abstractmethod def _get_content_metadata(self): """Get ALL metadata for all dataset content. diff --git a/datalad_metalad/extractors/legacy/__init__.py b/datalad_metalad/extractors/legacy/__init__.py index a397a3bc..a5b83b17 100644 --- a/datalad_metalad/extractors/legacy/__init__.py +++ b/datalad_metalad/extractors/legacy/__init__.py @@ -7,3 +7,11 @@ # # ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## """Legacy metadata extractors""" + +from os.path import join + +from datalad.consts import DATALAD_DOTDIR + +METADATA_DIR = join(DATALAD_DOTDIR, 'metadata') +DATASET_METADATA_FILE = join(METADATA_DIR, 'dataset.json') +DATASET_CONFIG_FILE = join(DATALAD_DOTDIR, 'config') diff --git a/datalad_metalad/extractors/legacy/annex.py b/datalad_metalad/extractors/legacy/annex.py index 6594c3cf..785a2aec 100644 --- a/datalad_metalad/extractors/legacy/annex.py +++ b/datalad_metalad/extractors/legacy/annex.py @@ -21,7 +21,7 @@ from .definitions import version as vocabulary_version -class MetadataExtractor(BaseMetadataExtractor): +class AnnexMetadataExtractor(BaseMetadataExtractor): NEEDS_CONTENT = False diff --git a/datalad_metalad/extractors/legacy/audio.py b/datalad_metalad/extractors/legacy/audio.py index 5a8db324..bf21573f 100644 --- a/datalad_metalad/extractors/legacy/audio.py +++ b/datalad_metalad/extractors/legacy/audio.py @@ -34,7 +34,7 @@ } -class MetadataExtractor(BaseMetadataExtractor): +class AudioMetadataExtractor(BaseMetadataExtractor): _unique_exclude = {'bitrate'} diff --git a/datalad_metalad/extractors/legacy/datacite.py b/datalad_metalad/extractors/legacy/datacite.py index 2cfadc96..1e55fa7a 100644 --- a/datalad_metalad/extractors/legacy/datacite.py +++ b/datalad_metalad/extractors/legacy/datacite.py @@ -71,7 +71,7 @@ def _process_tree(tree, nstag): return rec -class MetadataExtractor(BaseMetadataExtractor): +class DataciteMetadataExtractor(BaseMetadataExtractor): def _get_dataset_metadata(self): canonical = op.join(self.ds.path, '.datalad', 'meta.datacite.xml') diff --git a/datalad_metalad/extractors/legacy/datalad_core.py b/datalad_metalad/extractors/legacy/datalad_core.py index add56d11..4aa9175f 100644 --- a/datalad_metalad/extractors/legacy/datalad_core.py +++ b/datalad_metalad/extractors/legacy/datalad_core.py @@ -17,21 +17,23 @@ from os.path import join as opj from os.path import exists -from datalad.consts import ( - DATASET_METADATA_FILE, - DATALAD_DOTDIR, - WEB_SPECIAL_REMOTE_UUID, -) +from datalad.consts import WEB_SPECIAL_REMOTE_UUID from datalad.support.json_py import load as jsonload from datalad.support.annexrepo import AnnexRepo from datalad.coreapi import subdatasets + +from . import ( + DATASET_METADATA_FILE, + DATALAD_DOTDIR, +) + # use main version as core version # this must stay, despite being a seemingly unused import, each extractor defines a version from .definitions import version as vocabulary_version -class MetadataExtractor(BaseMetadataExtractor): +class DataladCoreMetadataExtractor(BaseMetadataExtractor): NEEDS_CONTENT = False diff --git a/datalad_metalad/extractors/legacy/datalad_rfc822.py b/datalad_metalad/extractors/legacy/datalad_rfc822.py index 4997965c..8ae3cb83 100644 --- a/datalad_metalad/extractors/legacy/datalad_rfc822.py +++ b/datalad_metalad/extractors/legacy/datalad_rfc822.py @@ -49,7 +49,7 @@ def _beautify_multiline_field(content): return title, content -class MetadataExtractor(BaseMetadataExtractor): +class DataladRFC822MetadataExtractor(BaseMetadataExtractor): _metadata_compliance = "http://docs.datalad.org/metadata.html#v0-1" _core_metadata_filename = opj('.datalad', 'meta.rfc822') diff --git a/datalad_metalad/extractors/legacy/exif.py b/datalad_metalad/extractors/legacy/exif.py index dc846668..1de4bf9d 100644 --- a/datalad_metalad/extractors/legacy/exif.py +++ b/datalad_metalad/extractors/legacy/exif.py @@ -32,7 +32,7 @@ def _return_as_appropriate_dtype(val): return val -class MetadataExtractor(BaseMetadataExtractor): +class ExifMetadataExtractor(BaseMetadataExtractor): def get_metadata(self, dataset, content): if not content: return {}, [] diff --git a/datalad_metalad/extractors/legacy/frictionless_datapackage.py b/datalad_metalad/extractors/legacy/frictionless_datapackage.py index a89ffbbd..7cde82db 100644 --- a/datalad_metalad/extractors/legacy/frictionless_datapackage.py +++ b/datalad_metalad/extractors/legacy/frictionless_datapackage.py @@ -44,7 +44,7 @@ def _compact_license(obj): return obj -class MetadataExtractor(BaseMetadataExtractor): +class FRDPMetadataExtractor(BaseMetadataExtractor): metadatasrc_fname = 'datapackage.json' _key2stdkey = { diff --git a/datalad_metalad/extractors/legacy/image.py b/datalad_metalad/extractors/legacy/image.py index 241a7a00..555325e7 100644 --- a/datalad_metalad/extractors/legacy/image.py +++ b/datalad_metalad/extractors/legacy/image.py @@ -46,7 +46,7 @@ } -class MetadataExtractor(BaseMetadataExtractor): +class ImageMetadataExtractor(BaseMetadataExtractor): _extractors = { 'format': lambda x: x.format_description, diff --git a/datalad_metalad/extractors/legacy/tests/data/audio.mp3 b/datalad_metalad/extractors/legacy/tests/data/audio.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..04e6729534e64d00a40973c2d1b177d74800020d GIT binary patch literal 5702 zcmeI0XIN9&*2i}$0Ya6cf*K$w5=v+a3PX@42ojJ`R5SqsM2ZZ8BDO>CNEZ|xKxx5( z3JSt#FadE045&C*iGm3Djun-j_awGEkN4BPAK&*pPue;=>$lf`{r5h}ak4W(0}22L zCjbB*LBL>w%+bo8wr-o@&p7xI7#7Toh+=wr*lvP9$%-S&9#9=)H*i1M`au>D8C*3H!w4v09SBQPvr zcX&86BKlwAK0ZD_;+|A{BY2#ocq}6-nzbidIGf88efJ-S4=W-V4#>d|9RQT~03wk@ zB9qBxX6x6lr%>$e?VX*eR4R?Ob?erhJDE)8u3fu#?~aa+i;FvQBq=E=J^kp>qlJZ~ zrKP7%ojG%+uI~K#^OrBTw6t8m&gF8ux_Wzi@7^689DMR*czAehY-VOgD1=#-E;g?A zCI%K3QRoZb=3_=6Vh-8>1Yb0dp5Hq5e>?Gi_qvh-Q7Ob994!5>^&w<_6 z`svEsf%F#XZEFz?NRB)vUOK?Le&Ef}V>0Yd2QGJL3gCn}D}(~s57u)s!gn0&DXjIJ zC1MERt!I`4Ee_xgmBa!R0F0NTmH;Ktk~`8X5*51~KEA!N1b~7_%tsIvxC{!1Y7QGh zVlha8fMk~5;0Hu5k-QzTw~$qMu(HdJY~KsZw}VOm0cI59XR{1~>1Pu(Gek6HYxKg)kVFlVQXv2(m}Un(u3 zyf)nv5DveEClhA_RCc8XfaI(oe@udlo`m5acl`L%!0kfynMygOm}Sud0RZ|<@IR5ejf20KttM}=hR_b;;PE0vK;Gt&7Z zX(JB}x~166QPqc6$P~Y~=s)C}LNGUX*mn1>8`*phZIHfBMJZ=8ylTmyfcY(>K?;C} z!YNtK>u%y42Bnv+H7z4tNBVCtZFKZ6m@@nPeyv(@umMOa5@3puC3^CDrMQ%YVg>{O zBro5ds%D2q>T~?+ePV+1#8^%)q$iT-v~u+mDSTUEs|t7m{bL1Z1d>c z@!H_lLct9*TKOqR(C}@OmQc5CF3){sqrLc$gwkjB#x<<|quy5!u)ojp8r|feC~btZ zK^h{as@%IWLyk(0v`>-}d~#KJuJvF={nH&4kb?BzjQC0V0P#nkJw>3oyq~ z+Uqj6mUL=g)!de#|BmV@mkQ|=HmWUt!1{3gN6J@pAe@!w&4Lb~@7l{4rKe(cS>#w2 zhKeY%R4W7Ywgal_>i*`A#cL=_iQ}7Oj|8}wE!3+VWCjTfi#u;NQTj=p_8a#tbSYlus76(7tn52qdA}PE*4tu`(Yds~ToqW~^ zJukW5BD!=?bOp3eQk>}oG$F_f>9u2El z{(!Q56pw`N2L4r`vBxaF^sKJ0POG@(G#YKT2;s;P_h%mBy(Mkp?;#S(Fm?MM)8XB4 zk92kSA{kfd!x!=TWKaBg0ojXaurfdCj@N@^S?16^g8C3JGLct})77zHDO==Xmq@8E zS*n9ha4*f}*xcUV6m;?ZTs&Ymi1ar>W_pS0nTB`=^%;ET%JM7>dpf(JFwlQhJpNf) z7CqkSCjDlFb&$LLo2{}Z#?_AX?t7O8jzKaMhlk5^T|WAF={6=@z@~L#J%X+2RB8XZ z<9}2~vEn&Ur6Lw95{}KDp#vX*xSkn^C(z$=>kSYjC7-pZwL$~nVC(8mmNHAy-v%j< z%hy%AmJxNE%yCi@?jMshdt-+i+h~3-RYoLvk+WN=9b( zPEvKyxBko}1D4otGk{m3@5g7YEYC=;o6aHMgNxnQhpX$(ZS8JOi2P&Ah-3Odf_^m8 zteGyIhQ73)u`rhhy!$XIo4qANt`cq1S2-6aYgsAgCSe#e$?CdwM{SFaH+|z6n9+oN zBHiG63iW+^eov$4CbKQtjPUZiM82+!8Pv3>X?1Q3SI7C&Y*2I11%t2&xF$eG(}_YpA!?&?8Bp1`P8cV^d0mf70H8&A|Bs9@RUAR}EO3vF@GI z!L?4uztCWUv{9@eB$ z&AK$YP0rVcNMqi;F|!%57_k)Ep?&F6z72_SrURu+Nnf?$an0Rf;sw>j+G!C7UO)5h z1T-Y}+Z^a{(RbzVP9Y@2J8X^-9=`I`JLR4>jWI5mE^HX_LLyv-w9FH|R*e*BZNIZC zxK-#uMbo7m^*vN}T=j$LA7N9)J8^&vm;#WVv=HS}A|$z?T5pS05zlyYs_S%8z2;eMi8j2@@!A ztpE@e=M75wqkZcqFK%L2v^P9hvePE(52@m*X@UOqYVY+Vk2QrtH~g7*!y5!mu{_T1NXOP zMRMwZR`G&u{_L_hHB?ljR6b=fOF^+osd#mapN3+mW&oi$n#-ua+^6^P#m^iF_M3ap zvk5{jhSicoFNEv-a>!k~j1|@Ou#|HOLuG7C5wU3L6NkI5z|I7g$cv!dHI6?}ht|f$>*64@t8dAhJE6mykbkCNo&`EpzR(NSR!%|vQXHbU)iun)*CXQxhs|b- z`U9^Y0en4WP7v~8PWDAS9ET4ue+$z9Gm7-~_Ggkk(N{DTY8eGXMNs3C*G`&7<2n*9 z4eGJsv~rcghFXkY$L;N%zB&!uk2K@=G25RXee#0?&s*#_z&oq&#NQ(YUr*~=&Rf}u z>jAT85Wm2fE(aNBc@@Fx*4&Ko*gbUQ$ja7tYKYvG^lUI!g9?^4E-ox0;UVTjztMaX z6|1SN$ne&S3)ymEavZg@QXFotcu?2M_11Lm)yd|^ytHch+K`Ioltl!F|V z&E^^g3@=^UIr)=d?k`}C3nQDhZHanp5J^F$Y^5Aqi6FYMD2 z%(uIAFfw^Xl^a^LOrFXcznO=|KZpO|eAR6d;+g7ub66VX;mP&QGUUK7y3HK}>qG_h zC@(8hHVeFc!t(a!!|#IiPeo~HLscI6>FJ z4sPQ8yiO+pcSN7TVQSHT#e_EF&xDIPBJGCe#D7`mZ!bUgAknHmv#>p=!s7i?Rr+&h zD0l>@?-ShZXB22Ia&&UKGG?MAH+`&IGQmgF&fBf%+0TCa1b9lYZUO#+gIL{InYO?$ zx`8Kk00(bDGZ!vJHB}+0if=rw-^8raxb*w!rUZO}-LpineinzE{ZMT2x0xv|H&B1$ z@il6p2A>vATWhw}|6I=}h(|GU)qO4_1GvrL1IW)%C1-$VwgxbAoq%l z>jU5(HmV*V0(vpuZx%i@pRU@a>A2!4#U0%WA>_?*e*h>%SFuDZYwYR*xD>2vM12V zAw$dKdE&-ic|p3_kgW_7B|!rF4QGuEU+sF5ib< zQ9{@3e$Vy!%89gq>q!iWN@2nKK@~{3j7mVzBikv2XgMUZvRyKs0g})X$elgHXsl#= zxy3$+!s!B3z&?xoYscAT7hVdF44&{~y%|3Em%+=Thg+Ms9D(JU(^Na1I-KWZ@AJEe z1N$1kb_<88t&^C#TGXG=_s6e`g$tWq zAJw$z)n!|4n=HIf3T@h#v$gxk(S;jd&wqJ&^|WT-S{Zy3L7w*kaHHY=SQE$grLxWO z4QIN^E0O>A5}sK xea~XkKnAe@}z4x9_ z0|{`WukZKwKF|HU|;|g&>P@(#Y05a z)5;tGP*DM}0{{R#z#R-?045qyqW5@=dw(z<8W&-Z0C(SvcXBNqTfV=nZ*3IMq8%1CP} zsxiu_sl3wGlGQ*1uBR`xUT^_q^*A+!czDokzdiVE%|AzZMU_{Kd|V=2d^`X?9$sNF zULG+X5k~YkAFmh>Kj4+KmA$K?3V?_Ik1!5ek{rEZ{#JnTj|=E(LF0Qt=x+c+V9LLK zVTK2${}0E1WcoiJV*!6`#^&MUT0?2kSo=`(GQ}J92RT(jTGu zasR{fuP?v#{KuD^e|`B)C-^^p^v~brcNiqlI}Lg#K*#cT{KR7=L0(3IMp4GGhM0=sb39VZ{0qS9IHO9cRS;C&o9oh+D>g>jHfdMv}kR z8pU|a_;33`w+P06+W@+qFh2Mr%YxQ3&-m~UKiUpA03*}C_5cYOng7uLv^_@NKm2G( zv@G9WjJA}K|35qefAL#1UGOh9Lt~+Tbn1P4?Zqhl4;`l*O@ICu7of4sUw(8H7+?Iw zzpo|x7o%&GQSKl7P!Wyg|Fsj175<@P?4hyZKlqP+rT@sP|BoMwQR5$sNy_+!QS*Ov z0ORjy{F4U&M(sa7{>}*iqYmKr5d-%(*8LA%@2~#fbp3y@|8H#Yn+GEYU7vX99^-f1 z|3s7L-~2{vDR@X-7R)BXq3 z{lUL|0r1fOhtBXH%=j1ow*Q|z{D+_EKeGSk;cq=GfBApg!1@oyKu3+|(O-Jk@2jE5 znCaU&z=z*s4kr48iT@6NyFH{1WGa@ztR!2`fBo?&4y0We80ut+d&J21YXql<&K z4PE{*7yTO(3mXR)Z4SX5^zes)g@uWYg@c2Qb`8V#cPEZbflvLELkJuhRVHXe-5*85^lX)R4C$FHWq^YH?qpPQH zU~2Zx+``hz+Qrq)-NVz%JLp4jNa)9~@c4vJiAl+yzodN2&dJToFDNW3uc)l5uBrW5 z*Vf+A+11_C+c!EkJ~25pJu|zqy0*TtxwXBs3qL+NJv+ZZTwb9w`yY4y=h1)sAwm0t ziH(hg4gBp72BybvXGySe9`fRnN~;6kINfLB3&4BuJT9xO6`z@314j1Nd4zzRMPP*$ z{@bNL9{qoFDDeN6NB?u^fBxLg0`6j=8!ZVI2>=8@VZ(ChO|n^D+ycJ8ypg{JK#drp z%!8z(EGCQ@12K1py0HM6uoBP%&&jEk!Jp%9Uq8~m_i7{4t1*bS*v$KvfL>LSnn#{^^aKLg%YEUYU zJU73ZxB_yFA8l}wn{z$y5{m^=onvN=+>liEQPJ49h)SH89Kn{B#XZBX_2XV(1bPK~ zN!3D?cPn34MO~|U_EqWHWXP@zN8SQRPZf_I!K$H!vggwSE|*S$Kgr*&7(>U z66?oGXz9CnO&lgyfbZyt^KLrawCAg&9wYE?0XTG{a~-_haE$@W2e*K?QAEf345}f48(o)QIsNkKY2`HH*3zjV@S15FV>Hzz%`?Z#&k<56{H_9{VAC( zEmSfXu9G6@AMBw~&>N-m?wSyS0c~BE?2Aj!P2B|wUOBBCunoeZ3-i_b=UA!_&YsKq zIeHe&?WY6*u?~-kYk-%!0g1{N(QNKg!okDbuv9V?*#^n> zeM8UqJ1QEBc;;g(@l==C6=9SXyI2^fn2jFoO@hg-nA?<-n~T!1rJg-ICdPtGb2JCX z^G2-gMykdy^n+!JegMu{f-+xo9I-3M?LtcF#J63W2y1T!;S{l3R;j$Eba3=ulTz%Qgn&-AB|(z!L*k_~ZPdLP~l*oq&l`|_IjuR26u z5#_!)H>~BhH`SiQ?h7#Byb9ek00Oj0^T;`1Y)?7Fp(?jDHG~xbT z>yuTAtW_BbZ+h?uM~DM5>gB|5s-jlGk(ztnV!ow&14(=jsa}d01xneYf>PGB1zEe} z9CGG@hRE;PJU|#+y@)ff`^b{%iG49Bvii7W-+h>))8to#KZjp_)y;6cbP-~9vt3`o z;^bhUfRgIjo9KvLX;2)rMn3p;O>jJo71Leb&n4P~e#TadgsCd2gH=r#RJ=8=FA7^r zknhGDK(6n8xzWJc`Ua`KlsyK<+C-Na2o{NoidU3Vx${K|hC~b?U6d+q9@9UO0lvp+ z-8)XxJ(Oz1fcvBFE4L>XVfE+lmRZm}d!%&>nCbMp3lW*nHGu8w;&px~E9+f9J30b4 z+yW}b30KvoRX0c1YcY-H*kjIu->I~pf}*@c6n(_TA7@4JgXtw-0#{qsSD)ynXU+R9=2 zDly#-e3#lj$m*SBC2zlYwk&02ASwU#?AenlGe@BW`Gs2m#i{67^M|G<6?&_;0Oe7x zl~&MVSm+t_WaJhQn6#7XZ*%Rw^S-MvYg>3Z`o{c=wIb>Syf8R?kcsY5j4NuZN9`fn zMPBeq|Ls|_HLSSkZ4Sg+v}pKJdkM-3c0G~XcuMaO==w49;MM1WsJl|q?}oLnCo#^U zX+v&VCsNt${*5-hH=&!T>aW&1gV8ZD1G*`Qx-q+nZ3)OFRDV3Vzb@bwP`B%EaDukB zLuGK&DJ`gbw3#~>WwCRWnXPPKGk&sn$8HwV&fqq8sjY_JTGM#~y*R)?rJ~0=!xW{X z%AY11&AIBZI@2zvTR@HF+VLBcCPL)7#JZ*^47a8%4WU;$mVIW30QVUWn&nua0 zI&2lWTJN>o`F7YYGG<4|Y>0eytl&FG>MNH{4-cLzU66}It*NRqZ2INQy*MU3VTwsKIwN{D9w0oRNf2TT zin#uQOY#v$2KJiI9ar8&YD!mIE#GGk3mvTZ*@5D@m+gsk&rTVXPIia3uOgp5FfdlD zmZG!#u?u_nF>4<5LAPW9f$_SUGvN4213~km=9fUO&kIS6r(V6eaEs`zit+@c5HWA| zay$>{>I^e(Cx;ig;6K>iB5AJhp^Srr!=6%?us^;WcP_ryBtj*Ip6 zwm637J2dfFPtJag(HMO>c^oTT(2ZNm{Xo`X^^&DtS@HV>UerKDV^$> zN^;Gu+9f5>t*iN1VHv8>FH?SS)JWu2HH`_U)14%xNp$e!p)30vm$r4WuG{}r||31N~Lnc&0RUGwy4{#_iO z=;q~mJzIT2PQ{5_$~e31g@Bz}xCu$Ar=~*X2{x%wZzbgWkSGerFDF-}zELgEwW!l7 zt9?(3qi*Wm9q})`ZA?)`AcAcPwq=*np26atAIzBdssU`Xbx=a9t4DEp5rYP@M4`As zv|BriwZ!y@at6{}65kLI*WxtUGdcc@e7bt{9Z-^dMz*tM$BrYjH@*G|<30NKm?%X`7aUB<=TuCn{z4y%Yaw-hb-Qhu9 znPV(`)Y||0N(VRU`}|xV;937N zUoeR3LGCrBXd&G?Nb>yb-Y10DtojFyvuX(L6qNq1{zQ6A01!q419$uD zsSN7TQm%v2qmm5$ETYTsO)aDKa{?T%>(B%6DyJOreoP2GU2{_-o288nejdv$z$YN+ z733~L6!`+Nc%;EVjAC0w7TBHne5i|@&TUYiS6DO)N5U&_PI-ZOD_|As`_%)NKeg)X ziAsmyMPIcpE}$n;eA3XtC}seW_T&&_Z$#xU9(%8lR* z4Pb*sLt%?P-K6#t62^I0^(4dTC%;fIB_! z6`$(ThW;YmrG$k!G2F1PIgYGwZ!z2eNarj6}VNQ@z!S)4_|jy`{DM&iIM zL~Bg&4jfxiEM#~g==%_fhq#)2%q75)9mdpTEQdFjlWi%- zpk7JqZ86#?R;rX1KYz++?P|UHwb%~2=u1qzFKCeuUH}uCY+kQ^`Wh`$%l!kC=*`v+ z4T-Z#`NlfnzW7q2dOA`5m+qvuY;R-Jm_;M^30e56;7e{eExZfEp+}d^Kc_TxwSQb5 zgRE6LN>SGJFxR+E}BK#?Bt9W2CF=;b7%qTCET_{BkO7zUJk5Zpep;eLq!*Mibn{CW!}2 z;>S-dD~#Sd8QB15N1Fu}gjRctIhDn3KUtbW`{x{-Ic>>%rl5OEE)@G}{RX z;TMhLZ+?2+b!i+|sK1wXlFTl0bJ6I<1%)E=l}DDG?dyLg3J6VyO#eM|vMEZgyQ9WM&0uV8d|zMPiI1?)C)0Wc-Rvv|EwszRSLH)+5@l9~pZQt`SK3EX zn~TdY2xe`V>}q6|&-)$4G{#%M6P2|**nLshmtDy;w;w&Gir~od^u6Obr>76D)M@5L zIgM7?p2ZMb8RXBPNNi!KVEu)1aJ6qyXy^^8wG<P~*pG77vwx*364OT6a1UcsnZ?Z9jX7i(ZV{99 zsOdApaK%vNPU|oIY1s$-U3`O)hxV6erB)Lgb-OX&vV+{HJ?gBl82D|_SDb4TVzskh z$a{xedrxdMH86@8dJLr@RjS~)XJ#UL)B|m7@LJKCl;ADyEi|bEE9f)Rz{s3_5nO7F zotrc^)pFPs8WzFi)d8TWtMa(U@E0|?ACgu4{%-4b5_aw!2(Ji+PIyGzaiCTNo zE@3b>n34e3dkeVDG2d7z1^$EtwKP9S=+KmY=TE74pWY~ZhOqJ$5MqNQxCI3HfE>_I z8K81d8*!MJ+5xf0xpJejc1>L+etv3lJU@x9aSQF;Z!lF+=nC;l@<*My4)S{n1ZA=8 zJZHmXSH-Yo-Hgc#b!9mMGc7-7Z;M+%I5eTOG5QgIsFtrR`QDoiD#tQ7}=#ZE1eUn0D@Jq96iXJx8wm;xeSR{@I&tKtBYArg?*9pvH6 z)PjlMpO(aXop7$(i}OI&MpaaA)EehTT(maExgS7A(X9?DJ^7_&JAeG9w?XC>0OZ>~ zUiWla@kPcu=WS`c0>LMs2@PcE6PSpKmFwXM7FhE8Q{yJKDf7uPCl6*X!lzCTr2~d} z?T3+a&B?O`j)AOYuEZ_Wo0~-x!*Do0o^jYHr-t=A5q1u~d{Z4DOA@vfK5k(|!j)Fn#k YcXb!o zI0}sX(5!J}myGf>&i}wTb|xB9Nw`Laff+Ql1cQF9BF=a?6=Z~GsuM$=;ZL98 z@6-nDOqN+$+h+7^MaPg+uzPz*dR=p{X^b(4OoSH5#?lV!zQ0(p$iXgq8Iv693Y%m^7($LdXP?OPL$>l!n5J;7tttP1CpJRcMzk zA`UW1E@hXSTMM+`r=pBpzvjrjPOhY=f)h?$w+zH7lodL>B)DLok<&c%nx?Ax7|B_k ze*_9z8$Wd2HtAZW34XW1a*2nk6>Rs+2Pbjg6CICZ%y?RCCwdq7(vPeJcG5vLmdzXQ z6wqx8*KKfEQ7`H+i^XC#OYI3v#kYfbh?VK_-eY_cLA$0El0`3lep2Qm++1{ z5!ggm7Oi@}n0)$veQNEESE;xA=0&v#`=qj>Br!Lv65nklj6p%mi{6U14=|7xm#~+n zJ7~}yl!EL?u5RL0cd;pDnv)1+?C>xn+3BT45?slwB$b%DtW0wWq<+*>Vfd^~Ns7N7 zC|Mke2-zv<7JG&f7YbH7-|!r`oL0{@NEVhk=2*RGw`{n-imWH3KXA~PV-P!uIajNN zGS-0EAxe0XykJ?+O`)HNI2W9(sJz;rdU#ZTpDmKUdu8C6c%+HOd9n`j*YPt6AKh`e zZy$8|K@ST<6BYYsy3HY-{8*&#h1oNly{Y#k?yQQ(+}Z8rN_QX}FZbJE{uGr{MtS8D zZR4;CY#Wqcfk)a9P*WM=vuEJb#WZmY9736^YRsS4*AbVvJy-JXIN6mQt4j*aM?FVG ztfm^PUdY2$#vKLZ&8p9Ha7>WOL~6Bzo-CsZ>q#z=rvB&=rh#s7v#-#Do&+XWidvbd-$x5 zUGYOBi=D|PQW$ZRJp`+`aWa`!BP^SzCq6PnLmDNDl_FuWx_1kpvbGWa_(=X3cRKsy zn|uBhH-ekvt|B)+q9SFG_`soLF+U61 z&Tzlil}oU=-b>+4BH#U5pM*iz3*lrx;z}z)8lfbdl|BVL^ImxT; z)t}kAdiG}RfzhcKnBsvP6IY!c-wW!!=Z|xf-;h@FsymE8J9@?&$lm3nUcmYh?pU-G z_6Gq;y)yNnII;^+d%a=5y7>W!_4_=k#VWxDYp(t=y{_*#E zBFUc&iN1|Aag*Pw1T~xrzHF%{Ye#r%GS~kmZBo%z9sP<9qO5o3CG51cI<)hMJlTBMX>Cph7v+ z_IFRM6cwK^f2o6M=Y-3>b12qvmAYSEt`hgG2!D%e9XVyUiDW2|t`#;|J$EuDI`opt zf7(P>9Z44}M~wXT&5l?7xZrE_2ZCLy_dBCSJ;D~@#iTA5I?^R-D<Zl-2$qv{S6kp_zqQ_ zrMIR6F0VN<;moVF^z}bc*Bq(Yf`;+znB9_ME@$8E-nrx}sO9lbO*L9~KjKkbHqE#9 zSL&YJz*2Qo69F#eT0JnIl>*m|Wok`rHo22fJ$RxpX4wO9MR7EuZxW7B6a{zEsf|e5 zi|>Zgf6%>qY(x}}qf~R;dGixQ0@hhB>Hmeg1@r=)h&mi7nhF;7_R>Ga&0>CaHpVLFErU0%Dgu42GwHz*#t|D9dk z>MtOkaGhsrwLl;oikk<)i$aMUY8tvU2JmUQTyNJCRzx;Gn1f zT4z+%deT*T$EzNvDx+X*UmN%hXx654JLq7xWTVvt__;NJ{sW20)G~4SWF{%Ovj06# z=ebwS_ayq=?#*fype+InQw?jH`1Efk)m_nl=bv?BJ6r&P5Wwx`FhAy*CH{67K6bwJ z&wiA`z)ONl%~g+1A4%LLJq7!?*Omj8&5J$DkB=6c1?^BS=OtpxODevE`)Z#^hTIc} zRi(Rl!#A5|oC*Rs_bZeJ#|E6c$)>J96*hazry71_8B;e%xgmqw9eqG)5*6k)u?F{h zS%|#$WW1r(o!Ubx@5DbFTWIA@DIR!;FM7(B3(=-IiFDBwFy>OxH4Gf3`c~tUbledi zU&L?HXuw-&lOd!Y!p( zlWT+j&VFN2@0p`K!7i@X8RHfVN{t9WibC6tfR9R6_h8%}+%w;oV_BkGA~O(ora(%| z5W*s`P4k2OQm*~=8k>fSpJ`2wId@BW2)#tYAtU883?4GX8vLJj@*jg&6{oK>*TBKV zk&ycyq>n7ZAlsfdf;*STAZpZ?iS^l4;k~rgW^XQ6?F40vP;p<5RfbvN_E{tFU1(l& zWgy3%;fH8^(=on8f)?WrX$fN~B~KE-eInD!!a+V8EP7OZi-ptbkEH zR$yBlHvmEOWaRD0*MVC=#{i!p9Ll@-O1-g4Qe^qj#=EkuCOm|p;6Z%ax1b=q1q|28n!)gT!~ zLcOOs!dwYwh_GK=8aSEJmz8>zy0P$f5>Q>Gae3SFprFBR27e@S+Bd@@F>gMJ5sBt#^yFtpd(aVXaKdJ{f=uF_fe zZi0-;gKTfE%FhSku;y2m<@s_3e30Q8(sO7gQLr;y;v=vtPllsGwAK6eu~y6ASTT7_ zmfvHPt{OhG{`M`Paoos{1et=M>1^Um)vf<5aP+i1t^bKA%N4-@92mhmZqMq(TtA@p zg@fTs#WhEa5%i(qUijIQu~#M*Q5yCRJ*<)JW)`!B%|m=H(TS}4QKV10342N!feJSh z@x1Og+O;5lQ#M!$t00cMOBa!9&zXPV;$trokH{XeCqMO^A1jg+ptOzITJtV)oyp_HqH@Nwv zrv)n@2K_L>b;Dvs?bA~iJlitj-1bbm`wR+#skK1K5NDh@WtEMd&l4Qpd+-tQ?~9v5 zAv)Ko_Dz(Q(GYOcT1Rs6X7@X)c}}JqRDM1EXK3bDwvuWZtFM~=Sc$%mJPikJO&G|^ zmqFf9VA|e+Z$4h4EXMZ4i2r6HOz125oU&U`Jw-2O>N0r7LL2T8TBYI-j#f6NP+^d4 z$NFM)`r?f3lx%3)Z!m+gAmz8>WeviW{Rp!Z_sPCNyLMyU0k!LO+P21f47Bc_ea%99VC17s zIvDmeennU&R=41^#2+6KebiPy&stg0ZReTGrjTkceNpO76Gt*1nvd;0(fgJ(4*@?R z`t)e^@I&g$Avj?qsWnt6wsS4jrZ;geWDB&0m|%^nF}0#xzN@{7ONC{2kl$ro8w!LM zeo9N9DVg(lXb2Ki1Z*vZ7Yn6r@-S(i0FO%pMao?svh64e>|nkRN|@H-&Gu%+2G;2P8#_1P!)qv}-5O=+k3mdzZ3u zg0$uEtI)QkD*OCpv&x_jA(Gu#?mlzw>gT}1Z3TvOFOecQRvJx^Wq<9LZv4B17M5LC z7gNxzi_fF1DPy*lPSJ^%0Z41BWO5g)@~&gn%IJ}Pde5h#8uGuY{Hh=lh?=jS{nDWt zztY5B3YGI*#?=?neS{`ATY&@WhPzWaY`8;GAT`Mbizu(`u}Kme z(x~m}x$o>{T}s2|R%uP{j~#Mym-OchHkwJFU>)s>lwMJ}qSw%vRL;s2&DR>YMo+#K zd+j~7o!JIPFy}{}U5=pKmu6rIJyFWl9H+x83@DOfxnK6A%3p`{h!+^rXMAW3&B|mF@pP6r{~Z1ii;E1^2cTVBDe_!y*rwwP=>M1 zY>q#by*_@OmiL??*DnS_bRE_p2-8&=3cjo6#Y1pxOJGQ!F>$UFwDR#3=~zATt>$&{ zd*B%~u;cB5yyd=M@%uh9$`fUm9tjRu({bMnIgO1Wnqx>#cd^lXSS`7oJa@m8eDxrJ z1&K`ADG_L!)~{eZiVpb#iDKEYu`QEL3t@}YO@CZ(?VE_#MZDG|?sPym~@c0z!0aykR0|&KH zTLhx82tQOy@iZ6{1o3(m=?8r_SyMl05A@clDG+a0Su0iHBo}>&ykDny!o9J zTASnVQr_|pvGUH59q`k0=mU67gngyY_IC*t7XEhc)yMWh9G~7g62UK$MS3JFz>vgB z5XYy|!+4X>WAt1Z4#{=0gd|b|0)$F}RVc-N^Go>7gAsZ12>T>GHkKx3&{`6DMh*k|aRp)Vn93)|WXZkZYCKo?+ppU3 zz^TGn2;am?#pq$@ML#`k4!%T7aO6j24W4UyV%VPws*$(s)$v)c1;eE+T-O>EAb%;OJC7%g%K-E{m%;M^;J2$BY8H4(xAjcvwrsa~2z3A4@2 z$xZt+$aFC!d`k#=*7ENCJVZ$UOBMUJK0;=heyc07TR_3m*uW>^ruhspXP2}o@im9( zl!b9+mvh_BhWEuIsYf+s%*>JjjJmt3RVA(YMfP{ss8$=4JL$#CnsjvTf9)Ep{Ayo)Oz_@>y@bJ!Px=0Kfi1E&YY)1SMua6 zqc~@tNb5pbXf{#yVslmVtc|Q1saN&k+=02~CJxBgd+J0TX$N5&X~6!qgzz3@)X$j{ zkq&0OB32*tH6b;ML_pe7Qltn*nppMw^PW3kA2}A0OUJB|v37S%X!!M-ulxLfrd3fT z4c$0YNR2;LT8j*?cmoCcR`3kJ3DhogNPBcb*x} zHMDdOoB~>(ZGJD%OJl7Ib=jLu6*GTUyLjD81^ou%=g)00u7ATuxfh<#0=E2MAzl_! zV#oP40n@IVVL{~D#+#E;KtfTPmitkTBqt9E!U0=&SIq?JuI$==28yd?*?iO;H$5n}%C`!f%z=ZdM*8C}P zEM0%vOahrmwV1y0N1;TkcpJ6x+ah%Yqij(F)wlnuvcwH`mZAu;GV(ezI-rTZ; zU`zgRL)%8)*1O@sJiy2My2GQ>FA|}=eDh^xPLNu=s`BwyfEGsPO`|Wyo8>IKkk@yg zkzbsb)Vfk;_4-DJcN8*boul?PH}lmORks$$&b4$oMP|HdRYF2Ig&%)ld`TNOSywAi z?~y36-24#RwCon}sA_gp(KSVUynM@zmTBvD`MP614lCo5OUI%LT zQi0qnV?Ht~1o4Kc0_ivg1s&)O6bmGqQknYKOZCU!V_ZNv#+x2nCDj~}UAKMlV#aPzLtdp^Dn9)lDc7uuG=~*Wx@GOM@}R*WUrCj!DLj}^_eyRXj-9#?GW``yx)g+AiPwbr$ZKXnMGcn$N!MA`r$>%vq3<$lH zgW>jJ;_@#C`>y3>LmSQEIv|f2%}ib#jArmX-7HN+zX^LGEAyrB5N@fL-(#>^DtSCk$ON;ndxM z5H?5?Z)N=A@d#CGn0~yR`f=h>Z#OXIniC;|ltvlBJI-sG_+Tbc`JNuH$_;zSSm!w8 zR$f|cOS6GgIha>Z4Zt6BYEpIwnU%v{IKM!!8lqAVyI#@E_!?sn^kTY|;j)emN{B4S zSPaL`7z`AB^Z@bgM02v#!A+Frz{9!WSo|*eQ$@B+$P_bM*hXw{EUIbZTHX)6G^A`e zsKIo_Mw#&W(BsWd0zU>ZKO_+~6k=tqF~U3jXiz@aQA;bi*@MgVdr`(EW^T2Wo_KMu zT;mid-|fr4vN@fn=iZA`<@N9t29b3wXOOBM*BPsY3EG@^1WS!p_+l&)ig+OrVw!hMG zg+;%-=4juOy3fH6{GTmyO*ApJ+K8h#8!LWd_r536Uxwyqq4?3yczo|Zcl9415(pJC2y3W{Tl|`u0uvr^f06Zk>W%FHV3z5=gY0aCRcob5YJ_2;uE z=W?))$!sx%XHfc#`z$>1|7Cx#r%04QVT-;yn&Wa4-C91#Qo>qluz5(6wQ1MJZ>p|| zJ?py_zj>OUQ{^t)32~kG);$S@JG5VbDWE7#H?t!r0SYD>1rlX@0I4Vava>hA`C~FHQ&2)603O7tz^r4v;CL~E2ZHf`p-^3 zcEXb^oWFx5G9T|4-YXsYB3kgZniYrzQd$h3x20(vcJS6=3BgkqrEJ+1DIT<|4U}e_ z=of#D$GFvK>H8Vj#jC;8nAn68;-NSxjrY;1k!E=YAER^8X8f?8xXt0T&hSRtBs;$m zx8SD6e>o$AGpGxUzi7O>4{vu1^gH^T$%7)GMNTb_eEy#OF6xuW#SVD*2t}H|+B|)_ zEs*-N?FYZUl-i=`iRm@I!FAd=#LX)At=#7)SlIV7e5TsXD=%%Tlj6s3(t0`MN=f+= zCMERZ8|&X1vVCIC zwVaoN{UlJ<=oFca7?X@?8Kgf_2Qr$>Llf>D#5Nk9Lg_Qe-Q4BtKMriasqxMnT?&U- z`A{qKIQkcD!P5&TXG{(UG6(6ukGPi ze}dqL)z{KwGj~}9;l`*K%u=z2sl)x`!n38Wqd5wraXWj92$M{?nz!;Zr*P_(=DJb3 z88(>I#3lY3!)#Cob!m)`&sW4=XXe};w{jW_i0sD2+wN1!VlW8`y^>vN{CiibEz*vO zmx`;D@0eF~m?c*1u5PBWU%$!IAU?@TrTh%DUb3@Kuv1gAUkLWkX_{#zUS79=xEMR+ zTa30iH!04XBn^XYkriP%(Z%#Wc77XIYkp{*1|G!Iwh*~kjD#q zGg02g!+d(yOMN<-28pI`m?NRDd;)Ef!6b)^7H{*dv?z!8J8gHR?u?InCFD3i*$XAx z5<6v@br11e%ip8`2+3=MK}ms%d4Yyohdim@_+zFzvXCyl2?Lq^BJE zDx8NR4F*6J@2b5A(YP%|_&p&(CzpXak|5$E6ITC;*T1QbH7~S>RWsh02&rb*6kgwfs3U?bmeX7MtFVxvCg;3+o7-pT}|)ui}kb?q|qq#MjlQ?>l1IwK5mrQ4~N) z5f1R#rB&_Ph7?P#qu2nZe$t;c5LfNjPOU(Zy;HI?idMsV8ebosaW}W%?6CvtNd|#; zrtaDn{&_a#^=^bO7{DN;v|MvUhQkH18-A$9MSZy)#!UUJ$t8dpiN zUl-p3-u!ekcJEdwD~mocnebo`R%u7|mDGyRnmyK$tjMszy1|cn+KeG3VeLl&@6p?8 ztW@O8X;o)S>i#ORMdW!4Fq_&TjzGDsEFomR+c(kAFU;>zAM>OfkZMKQwLV$Le1=Uf z=YH`@RJ?e3ILr^X9ubv~u;~Edvz3jWMPaTU>T{cOvBqZa>$#s2bkbf$Ze@|3aI+MC zer@rwP-s^xG5bO3QT?35-V0ynp`GQjPlaQ7+0^O{xm8nY?KiYH-%c}W3B_9~tht6+ z8}9rZ5sTaASf!&l?-^g6$TusqtUTWDB6I6ImR^V#-2n^x^)*+U%}#vq7VhMrDWN{6 z8tq$t`7_L%k*qyhw5Q8I*YDF#CjJebXVZAUFQ&9t?V&5=-jZq}0=qyBLsY$l*o*%v zhI;^3TRizmbagVc8A65#$cD)OdN^J=kw`Clxf=WCQ*^Vs)(yK2kI7t592w4o5-+56 z3b@S%82Y?Uno6a(Av*Cmzauk7h}tBTvn$(+G$9suKi(H`XcN~unX)d=;&d-}U@{`> zg!_QF)C6oS{ms*uCFAfcoQbhl5LOel0lR1c&(wF+7<{UbU#*YmPj#mHf$?;^w%e#s zu3IL^XsT;?PR1BP0%z~;Y@#14Im`>_yAIiknmi;Zb2v<>kID7Z@Ed2Sl0E7kv}Q}8 zJb36uy;HKrW-YqSzl7E3yYTcuZZ8F_L~V%o^MsN{s>#V5YB=64HL z+S=_jeG3rVd0}EMjMY;t^{|-g2YQKuQ}20+=l+k+_^m9b#nnIT#6LNUP1Z&LEMnka zd%wi(Acf*_ZPe{VL}*PJSCbQ+d0R!Zpbb^bv~D>}K9>9~K&c6ro;Le!+1Ikg-vJ4+ zzE(S>{q?+fwG17WPhM=BNPgjr71#cl4~&sUul&I+lboO9JQ!@JA8{FUC9lIREaLH#1KOylFnr@WvDM+iRl`$1(Z|NK!JVHP22sg+Sp7ngwFk#zv z9RkFx``EzpNLLs;t|lN7!z5ei^T7Ia;^XFy9?>hE#EdxB0ow$|y|nijk1Cukl=9-% z$rovbpw%oI3R{?U$~5)Uc+yV;?w3groc2dNsHk+0r;B1_uU$JXRRt0MX!-f{t68Q7 zJ6Hck*#lduPdk4~Ig=yfFRDs?yxM!q&Z`yTPw;y&*C^pc*V zxYOcVzE3IM+#i|4cT6+*E6dB5xs_o4QGVn?ip-QR_@y6fz3DMq=aKqA+8peAr?Bn( zq@7)MlP^P&HekmH+EK#VAkP%GZ22+{V{=P9nrLYq#J-oM8E=(g!jxQ@&jNADeO|p+ zXH*%{7BH?eP{X^Bowrc_po^F)zv|mG_vc>ZqIQ53YKo>n$#2|LvusA%=&s8!LU%6n zi>*tF#m6%SOvEL3gVY@BC8Xo6yFy0P=@zic@2lVNQl&Pu!xz0?PzAQv<927?XkiB9 z3rVcqhWv!1e14;!0V2cq%72#Taep_s(S-S%O$sy+0w%9nuFouA%Gw(F*$W(}CE}Q9 zGtK?r)Zn$6lFZ>gVLL3;L?jY{oG&Rh=BSq(-f?MfVyP}eIHLpmj;!&M!3PSV0jh}S zcQx%JF+GhAA^yH$dtepM`^#G)oKQRBPP-S@pjn+THz|g`>N2k~W`}1yN{3y%6U8v@ zbI;w~In$`638P@i$IJJ9EvqFu6dRgolbOn+4g{8S`DmLmg|5wv-8Y;+Mq~Ip!qqBtgkgVC1g7{K&&8T7UYFv3^gjMB)n!P#_X|XCALnX6;0>`=dB!_%@H*9zaK>;a2X^GVDND38ytzIUznk8jp8_TJZZpM7@R zXP?;jm08$}JqfGQ(=FiF@FE`fF)&-~ZTb#+F1&?07ib=>KCk^SF}3P~PT83$qSz1m zq#mJ4hAz7R3g16wU*JTy^u``$DBelRA5oQyCSQxff0;u?l+w{qxOoHR{F%)AcIVtV zZTH|AuMMJOGS1k{mN8@jia?umdI*M9o5ZiYun? zqI@l^_}R``_KywUmg#F6V2;rJ>~!8J*UV$phlVIpOI9~6Q_4WuJ*E=qw&v@Wi+;ZN z#k(NWYBBM~Aoe!h8TH~;P=Q#!@?DsdwyJC)RJ85`(I|UxR_nZV-~hAH`GBGaDe`%K zS|E#VQ<<&zS4D)5tHxdL$Nu~Wq`fTD5vs{bTU)Z*x;FT0!XY;pEO086c&rKry=mA! zNLS?dvR0~Wcqg$=EN)MX-TubF7`z9@)Yzk;mL%qp&25wDRc@`K*5+N!d3tAXLO4cL zd=}S(k0@U4e5>Xxub)j+3~t$>F~j8eaZW-!i9KmnTR;gHzCvfGo8Yye&&`5nwvbI; zc=M~rj<3{6iQ{{kk8SH3h|k@wiFP3B zX3nQdtRmE`l9fe$+VEJt$Q#u7!ggnQ1nLs3XN|H5X>1+$>DoNt;|yV{L+ZS=0r2UQ zbo&=YZ)wuEbNG2S&S=>prJbKQgc50__D#K@j9?#fc>iD2Ag(6-zM zQ6OfqqehI~bIYA8>c^zhO7vcbp7hi>w+n+0-p z^X=%nFm&d55ocvClPFg@d@HyoB>y?XbFJK14Ge7YIn=rgSCuly@P@$-o|^QlymTXO z6gXwc0G9nl5pfE>;xCJdF9-oHJ?yz(T1DthRqr9Osopvxh5Dh=n7TYA{zBNno*(C3 zd#XA3CciXmG`HGHyR~g;C-lZ4R|neDSwGJ6d_X%ac_T=7j@Fy$1%AJ}ZAteMOQ@rm zoVFDkW2aE3b56QyhqbY2-QecHa?A}+o=y@s(<%(U|Hd>F2fw1N(Ef6f==~kd2Nf~Rm?x$p` z%KDqF4^2#ox@JX;%1x-K_O}v_lv@o6_-XwU!g^BX&w19Bs~#N}O+7GnT3-`du74M6 zq-3`gqIi;$GS4$ny1c|Uum|xRT2&Ol?`mhUE7&N^MU4^hcVe3Ym-@NT%)Yc5YAwFQAR3RkRi=6golmvDhRgR-Mu z)WT-7j$EPjxUd{xFT_E#Kn(B*^M-7sOt=m?*l{BD+x-(C4nyqC`5_+*aZCx1Xq)o_ zbJ}Uo#*~4E%r;MTq-#a90C&Ou@O1uTWxnmWt6&wj!VT`j3T}q4iyt%xm#UuZoDDOo zMI~4WOnUh+?uvS9nBn0*4i(Df+o<1KUo?`O3-Q5Us}Rl#NuRXZ9h+p;*O{8kFD%M` zuJC^KnF%?{I^i^KZmUJ?scVGN_KGirdad>J$p_t2SVe_a!Q#v3al|Hf&;r%IOgSOR zkKl}jL31LXi7bUg(U9ast)Yq>gG`<)>v%3 z$XC_XBYQUUr2UmI#5hn1)UKCouRJz9+rCm6eDt2VXZ zEGe=089G29MUPHWH9L0M3@be*b?FnLX?*%xEJz0>N@s}$2+ zpSnFIlj~Y7U0p4`dwaLB%UV5-&RIk|0J+Fo-_Ms(yJO>DtF~kC>0+=k!M~8MvaVb+ z*2PrsIDe}u(eA47S?)O5N7B6rzgqEB*M|AYqnE3itFvBr`}f9>S=tX@y~G>1H5)dQ zuhf?@8s^*jRM4G2$(^M<_wFhJ?x?sn>Ks$&enPeZ;S`P`l}^vhoESOSN-yz>7H#+> zPH<+n-IgbKd5WUbq+T z?HH=qT@qwlW4y`pjuZ3@%`XihK9+d|H`iT=$L08LQix{V8JM{qWclJ%5ix!rmqV-j zXge)MJ&WO){0^otGT0{hThB~4xEMa!6&a;((rXycv+YJLyo`M5uxjS^_IcoMAfQ*`oRBT}xY5xtlBH0kgL@>$a0Awdr@T-r(MDtEe6m z^Io7m$A+vMCb^87il|pSd6l8+e(|mW*q&o$h^=Qui_7mgw82rJqXiiRB`}3@VYse| zJbz1Z#^uE$VkW|)hxaNpbTp)yp9eh0x2&dia#F;oHzhQtHzw2BCQm6M3~ zY>c=@xbB=7q|3w6hrMO#=BEk8bcZjXCduGgFSJq z$#=j=BuZXZ=^Dn1j|F#}Gt8_Ih)^$~{J{NtGOkOE@yWd1g!;${74mwosps9d$M!@_ zPx-h*@sgEI05SLN zYNgY)o{_IUZKUiJS?;mXe5$%Rb=6vPD~fl&zdt6I$Inyw`&V2YZz;8E}36*6+jp z)?@JYSEtc?uRgD`+L^B}6U^M{Rpr+I+!PTE;XDGO@$6WpR}K}};7* z_f-jCk)gI5sU196KM&vg95jO-g;F2f+f>OWFBt(ni6xMp!tN6G@)+%Gz!jgZNG7e> z;V+LlRdBWMlpS397R#>G_lA%cBoklR;d@nx9699$d;lmA=y+@B40sD^Rnj0qKk4iI zY-(E!m92AhrcY7nrTSqNx^{OsGf1y?R3G7U^*f0AJ7_lM$NdXfG}_{3q@^5|tn6pJ zHDP$KTEwbh9L1>rRxNwJo|e_fhY7>Us&FmS2l)Y06yHH|Ha9&f{mFK+b=@qn9yop6 zLk8D_FFws+9>o>5d+dmOnn2|655JAg(xyXyWb@#vQ5%?wXfWZ#x<;s=D`!M=S6sB4 zN0=xiM{4}QZ2S3k_|}F`sn3f{<|ReretR#xT=4=Qc|*LtX*2!Amh{G*^iG@XDTO;e zuR>L7M0lc~X}YtSI2{MP@2&Av1|I3tdGZWWh!eZw?CSONUnh^fxO36JBHc}nk2ZC! z^*$BF@IYyu{Ar`TInDL3!k9w5L-_S7cxnuL-0by9-tf~P6#qF|%=`=(S2a{8)a~8k z;=o{b&&gRtBlnnc-aJ^x4nCd<^AR|**cHut(6<77z`EBd#>C4$w43M+x?t=T0%OoQ z=o19CWF=WSCwN`NxY@xx9gow3zuo1gTa1V)ksxoRq9Vk1O>Ys$EkoiQMUEYHv|8Q0 zxpsrUpi6MqGh6$SSuy?%?39{>{oZj}$?fb7gQojcBfze}8#;;lIvr;ymVA>a zOd+aga%wg2S6&$Qy~%P)(0D)hy3k$GayYrV;B*4DQ01&1+n1I3=Cn+$U!*{Mt|$7y zeOj?`@fQq{p}LX-iSo;yg_EH9pxGv2JV>ftU65>+X< znd`C`l0vq*a5J7*B(-h9l)Dj`3RGnr*f2q%kv89pTdL{K`dN{RtCg%QlyR~>Tz#Vv z9ldFbw3mNL<%>pG0j0k4)THPQHv3{do-|S66s1aJ)4J`MU5|fIhmQ*K)19%F@-ZqmMk0_ zR_PX6PvJ} za}s0qzSRcfk=^r;hB`}IP6CQ@zBE+l6RY{9Wx8hCWcCgyofX=Lv{t)B;{lQeclY55 zb~9zALOt>)H2&3L^w=ZRMB3^{f=pG{-N(KVvMT#ep-p5s<(!GlD|o<_t|1s8#Ir3}f?%P;t6m_0iQ*~EG@#}c5tFkMp`}xO? z{40;v^7(I=rgo8h;J(s;f7h{HLs75OuIBoh`>6OlWm!dqN}Wu1q_ADVc>~K6RapYt>mpoH1ttr1-D+#@^+`G9i5cLo+1V|I6DqZg zo!PKMXUx|>ki(lk~ zHoKj5%mU$v&%|xyx&9`<&F&XE>PM`y`o`zSAbRe>jFdr7Ak*dbvJvqD zvfTdaLN>dv4CJsfIYQn~H{SaChVjPztwHA83qjf#kxxiFt1O=kx#gs}XJT&qdG3pb z+mXnGSAho9c_{alvZU~kX}q0D-J|GFD7C@l+a9KGL?{Cc>he14sVw3Qfha)<%uRV} zOdp(Z+UcXip*<#$Mq7f>nex)@6VZw&gj5h}%Lunss2%)mWuodj3-H)%&N;_zUOAv5 z&fPoBimF7XF2m1+O+xd&%wva8m%MRpOp~hU@yCL$s~7@R~p>!l?KpJjIPWW5%*amcMLb`(x%!EBCzXvLOc+s z>EGh#lvToK&#$`;e3W({j<#ty!^m8=%g&;SQW(`dqFn{mL;9TFLvK2;rIURggzT@- zi1`kx56y-sPTZW~uFd(1V(^wwUNBtQO!Xcx1QrFR?39}{L_2i{$ShsLcQXr4?doFN zu*cxK> z`i8c5gd$4yN~*>?YzL}kt*M`6dk4=1P?_WsQTOBFSHg7jH)!>SALglECth2D` z_#B5dC0-yNqRqsO1xU(V0j_-ckjXC1R|)$yk|H`n5-#faDAxLlozKts?-pFo4rEIm z7*x=1^OrIW8|5y6ir!?AWiDscQ=-XVEmY6^>gKriiD$`1E$V1Hjv{WOEZ=g5d0?YV zSD%IXl)pLZ*kQ7l*e1k+))gaCCJmg146qjEguIVdwtm%eCHHHB+glkD=x|acm1%v* z`MY|)2~P_zaAN~*5<~x7jAS)rRz3bEp3Id+Fxf`Nsb<@S&<=Y_AbsuNnFOUw+LRPy zVc6|uVsiQ8d~tuXr4kP#2{NJcIM(^zee63q!AN|!Cs9up$~Nj&zLe_QK<5py#0h!L zjqW~K47*dIs!>;!QbVh0w(2khp|o1~IKxz3RgoZm^|+roC4#UH&@(f!^dgL=I-ujOzQ0LXPh)IDAn|( zPsqy?tJvSi%=wzvOZDPaq@6+7ybq!7QMyFwQ0X3b`MU_J{+D-dJ~R$hSDwE9Y}Oec z+j6gxGO)-~KcQC25`wxxvN$lpyIM1zH$5pWL%SWz>HBS1$Ilw)}Ee6csX zC-g{W$863pn4!;;!IO4g9A7qTM9rE#_Uq&O<|=XL#wqc5;lY#zp6&B@MwNF+Gkg6R zJiOF43P_vWLW6f`oGYnO+ZE~zG{d{7z%xmk1%e<3%@C$ zS>x@uczWCB!pM6M42Y@r5eZhA2giqdkO;;mwQe3@i?~^*ocZ87oHiz9#^}trkFv1= zdwEAAFSFN-fANN;#i{bY%?F@VQ8@fumF`0}i^AKS0NJA>^gimXA zcB>c%yMSS3tcMjPlb`S06J$&=3EHT+rua6~KC+P;QC<-*<&T9;#$Kg7Quk<(T-9mC zHwC45+U*oQd_KYXYIlqc)7&Tr3f>+zSlfsV*|k}=*YQt3s6=PY8b07#Xn=3vZm0`V7Wy;YYpvU3EDeu4B!a?yc-2T3?ggRC&_AOFGfa+nQm-3(xBB}>{ zprO0lZeX3ZeiD~NL(HMYEi10DcCmDo+wg(=aD_>NS6|gbI?qsd%^vwoK^w-y6s?@0 zPVjT?vdnBCj$P&6sW=rk}F|eA4;^o^C&V0mMFNo7WjBGx=Azu=p+~1)^Cpi@S zF+w@^K(Dn!hl%Z)(0siv5Jw}eaHcD=MNrCdc}95WlXACD&x)n(oq^{VM-X)pb?2P%DO=e?r1X)d6HUwloUaqXq>UNE z;88?Iitxenjv1FBSyz+-;~WY0y@)m&)sYG{RV7x5`S_Gq|KP)WTrbE&R{tQGu4&!T z;6=v>LW@op<`eqX+0Dh}YA@AkWDG+cqjBh0p>x);C{eOHK4ID`@^C$6kvNS9xiW); z74GXdo$FJGKm~1-0rGPj`yn1>g!ee-I~9(6+LP{Uods5GqPyCHYc{vip03*K z%5GxZzH0Y&G+|wKlp=&zzPoY(`zWt=nA2e5iPrFodS$T9@uuFUUp{+wv`>oQk|y}= zfDXpcvg7f5tY%n1!VPgpP5o5uOy4(!b1?SE)bJ_e_ZAxWCd4YEZlN!p&%4nVbL`{h zDH!)rf2$HApY^!#MS5M+q1mUYBYY#x(35+wEJrObR0Pnu8U(FDRjo?s$=^XOGAzY$r50-ug0(~J zXNHCH^sk5t@yzwT^+VAx#2?{ooe34_O}9W2=Vi3`sl{;^7|`g$)$7*7heQd?vfgl5 z>j?H?(~EvJc0qgx^|^QnBKd~Pj?Y9K=@ndcOhh~f1Hee6{95pGKMgecMGfRlzo!g^ zo?96CD;g$k6&aikceB(rAK@e9f%rF3 zTvu*Vp5!@gt|u8l)^i*QXb#qoqE(mG>I;1>SPrR4zanb#^n&zk(6vUtc8B8wBy;}S z%<{p3B-=n$rwC2P=sx4=6;I>n7TOQlFW5Ov%sEPv#5YE=oiP{?THci43-&m;VsVOoz`X3C?+`OcLkjvP)+U71xz*#`y<2N#IiE802eoK{Ni;Y+(i z%QSanf(x?;3N~&Jnb;i|QOfb=M+&y$H-@`=Qvt!YWEKcMiP0`}2PSx7C=ZEB-gNOV zzt1s${H7Dwr2{f^K`x|(mZtb#P!<)9r)8eMC1wH>uJPuAY`s+)fIx!C_$B3LYd&Tt zic%X`^=SKvOMlWET0(JryBnd#OY#NC{?+9=4{MJI-?HnfocNQybGjB;<*y`&u$BG=86WA_tAGKR}TuYrPS-b&<-jeI+YHsK7`D|UWZm@Ob$SZhmL+TMCI9;Q* zY_J1}!t|zf2HX_1J+#fMI1o~o{;n<`>C>)Fje4mmy~B6nHQGXr=)B+Wblr%3sixW{ z1)}%TGaxD88QED?;};g?3FWz-=W{8+OoMZ<_Ub{7X8H+pz3SSRSL~^5r+k~ku^}51)H^R4QIhM1el+4mA(A_g zoS>(~UWHO)wze2VgEI{o-$8^*7q^RRb_*pGwQ)LGxyJ8AS8ccnVKuz9iMXJ7p>Xsf zKco0TK7!)Gm!*jA7rm(~-;iAwzIw_P2AoKm}H zk%n5Y2PR~~wZ@fsFj^rgVNmfv_U=;Kp7f=UJgH4kG4C=qXC5ibk{L17_-oH>%q)Vv z`J8qfgGY=N5$-#cvmlRzIeQOo%d+r*u^kyY!xS7bKk1z!&piL$v~HvE8j~OfW04ms zr4orOJxZ>0`^b-a{KF~zEIQWCDw&lLyq)xKsZO4>^AXWk%hK-f_n~fFm0(%A^FHNf%V#rw8wSB!uY)XW>X`6f%?nOP} z&P*i7H9>EpL+e!nq7Vl01r?@N)|Vwb$_ewISqfl1P8i#64PwfkqYQX&G4%qi9zX~EnWTZH$?_Sd!D_wu}7#XZFfm0Wob zM19;J&2e>RwYrl=Or1kfKEvDoSUlEhGjR|)H=lnf`V|gV8Qka|(0Ofua_y}ccmvP< ztiaoX=UPpvMchQ*!;KoLV{Nr(yCAv?j_H}Nk#<2FsO)BGomi0b*KiJATELQK2I0Ky zv?^{}ySOu&sD`crUuNzgtYTQmxlp%XP#w137bs8Kq>z<)^ARkx(x7zRiRg<3_2EIZ zLj4^qZWJhzwTDm4jX#BKYU)vdUG%k8;Fa)x?!HN#fJlvpqR#vpzxM4QGW4Df-DL6B zd;<%tx$gQL_f0l0^k-=Nt5Ro+QLNO@=><(Op`)ZP1OhzQm#UOtcK+bEKVqcuBosGx zr)y74q?A$J18#T7Hd{=})2y;ng&(|+r=sDmqzXPn9!VKhRori5a2CBuzPhOa7rk{o z!&uw+;bS*8p;k-1C|rT!-TZq3qazi!1kB;=#u#qoY~vdQKvc<_6+jp(AA=8G#=In& zd3RM_L6 zMc7>Iw<3#X+(cJqJ}mZ45$p%DuCik^-b@&7)XlW_tf0Hb)3r86rqaW$S!RQ&^F@b` zMk*t^e51CDOE1(6j&-=ViZ>CM#j4>nIYVjar>s~5xjc-bBKSdTM`pE9uk*lKQ$#lwu zND>ZX%SXLa-kg)00~?hjW@~cZ1zTARr$$Zdz=#RST zwrIn=*tb(O`}TZ-PsD@yLo7_HKUBD?&QCx|LPPKQ;|a2*v-&S!h7YeVk%!^aHOXh` zOAQnzEn&_Sdufb*SWO#k79#Z~ZILFM=qR{macx{%&e3C1sbX^35rVTF5OyZF&}}Gn zlsVg(gr%z>`;|U+^05Ey2fP)L_JI&ZPp_dDtB!d z!E^R30r^zRQO%B4da4~wE_f-=Pf?6EAqnp%(-fELYE(27t0%xrHC|t@CC0g{D*ILV zM#}XaScg^hMYbD>+MOXJWNM?gs%sM6MyvvkNhsLZ;^jQRPGRu7U1ZzUMH~wNXuXmoqIKjHirgPw1H~61UvGh!%d{ z!)o|&R4E89{x-Vobzi5016#a0lnQ@JRL(BJ_J{#RAY z&&d_1H7SExXwV`}=h?vAIl5D)~B&A6t%)8E@ zocTuFjEY8R;e_sID~mzHj~AxtO8&IB%xmOTN;D^W4con$JB6!sS!LmSr#0H(xN~aq z?Ph6@MIS@?-E*@hhc6nZp*{U|45CLvBp2ZxbSb!{Fj2kqM)P zggT#uci2t8Ezfp2fYX~ES3ro!PWR7Tk=YkGNA3zXSUNrLQC%AL^_zVc@9Lk7cFss; zkLgBk@`bKp=NZ|32ZfJ*$@b^)PkJYXX<3{hU;OMvNvMyXcY3Uv;}^rKV+34n!dPb` z-(F|vdUhv{9w^^$`a@qN>kyyS^L$t5yQwT0beKpw=nFmNW?lMK>gJ}yakf;d{ahOK zDp@vk3Gkh#4}*G@El;QiA@&ERz>XVWy|PC5j&&Q|R$OOovXjE_b>l7Y(#Q49lTp0E zRzyb{o?J)rnY+eCwrjO_OkE%`qUmkKYo`ny)2zg-Tm1}vJcPdF9zd++b>#KB=cVlM zKxvujR`K34kL(xwD(=+j@Q*7=W!Z=H#ILTia6qig*cPEJRYS9RzgbsjtwtsHu(XZFQv#8Did@l1t3>-;Ph z(;567R3Hh<=+)fb#&u+f!^`KoQ#vPsaZNg%D^(kW)L;+8* z(a^iKMxXSZ|BRaZXSSD{*jZw%NlAB;#qw4o%@Z7^9w;b=qRX9Mam7!|Za2%)dX{Ti zS-q>@wwNjd@=hHcHMn!iHU|QmScPLd1RpCjpF|t1kJwG3m#_2E7ww}&FVr)XEXJuc zL-`IBpS~fxb~~m7pG(@~LUIooekK833cD{K62V3z?iXTm;k4fnh8WYI>UDZWn{dl~ zB|Ig`vo2>fgjJEAmsPI=3KRMQYD5)Zl$9|5=9b*zQKI4#AmFayW`FT4b5R?(kIG$B{8YyCUSi{jFje(pwfoF@**jP08yyqn zyPXJ?N!tj>5z-+*FoL%cI`^z_YJn-Xy3D&`^N2qe!YbO%MOI5y>=+gLE^$y{GF|^W zi1MpzPf#BFK^mR{C|SlycAks|T8tCYn@5U0HahURC;+9~5F}1>2A>VXmKm%SgX6T@ zR>$lYRo0;suwcDJ9NR0c;)J8IKR0AF;E(g)RQ(Rpu2&$%=ne!KXkZ-0+-CquG*RJ& z3z4oj06x<^9^p-9aOCk=;SPV|%~&r1EjPcSD9K8cEf+4svx*+xwuMMpqso$TFL^pTu~n;^+NH|2Dr!C~XyM(#BZ{8hiC)jMH$O^_D z(l^|Q&6K#1n59Fs+In-ip)p07jf(52(pc9fG0_l`vgm1+H#TM^d+{WPt;VyLhkF-e ztfB(^4Jqn@S@v4N?k?${Zqss z(*qF9pt}SJ&?1IBAPSku@UN74wEG}e&^E^RW6)dlYm6WeQz)QiCk@bIK_I1PfFSC9 z5XiC@1iG~i0)+*FK-U2iv>6}}5um-8c?1HHV}d~7IS}ZE2?#_AVCg=9KsnDr`9L-Z z>!(y^)(<_st3Ra&(*PYp5Fi)`6u$YPNQj4q2FOB2{}6Axj*dx;1`5XlWa+MBV%$Ur z1!H1jqF=j$MT!pc!@>mQ=&oF)KnFboaxp+ySLx9~9#~k|m}uBI3>YA1ENmQ1%xkzz zSRgyBYj{^MaPgS2K-Pec-qkDDiP*70X4i0U;9wJwa9#x&n;}|NDYUCo|>A9lbsZ#gin5#0n8u_rU1#2({r-jVU=R12R)=_ ziNI*vNZzWq7 zy1$idUGDs;Wc%v~|4^I#K8H(Z?&xSM%*lyx;4m_=F@|wKZQz`)Mz);X99*0rQE^vW zBd8V3kTP{QpNbCo=%DH!&4feJK440;I&4f7Qjs#f8I#m&3;1 zjFVePNQje*hm(hg9YC-_^GZGXv;!bfQ0!!iq*#Fvx$==@sSt8}c{7#sp>!`K93d8}T#m5~6Hx0Rw9KqfU?~ zHn!~6FjL20$UloN@mi`d2ODdrpLhYDU&uc*FOk~+3HfK{CGu~eI>N#cB8PyQ|1$h9 zC6^2}8&gLYBYPMG=zA`@fBpR;UxF@a4~-mQPb}at$bS&D{YAQjT#~dUjV0|L+c;SK zIA;rl8AK9jFql2eA-T1)5$U(yhSO7t|01I?d8ke2$*YNqf{F16@Ba9$M(a(U*7#M-~AK#x3`zJff`vqf+5Tt z%^~;Lc8 zaC85@`uyblQFO_Xwy=i%>ZgZllE3T!cjhHf(#gTm2CfPNd@|tI|8OEuV9fqLk$++? zxxa_*&*3BCSS)W{)t`QdP&b>I+Q!QajD_h|d0_>!##g&|;)Mz%ku z*^dznQFAmxm>Ajrz6kzX`6W}v$N~X4CP#Z4>t7A~>xh3AT%vy>jT{{R3BAm{#Hkqp zHTlcW{pIq18T}*wZ>+hIy)6Rh$$uJEyUe~s%Kf;o+W_9g$k@W#!qH6*7!a-yU{(9e z`hPC}uk_!u>Bmy|zgYMU0al1V-R~buL}1zYS01qR1Qwy3mrGS3ak4)++Al&_@@;8VSkR<h($ literal 0 HcmV?d00001 diff --git a/datalad_metalad/extractors/legacy/tests/data/nifti1.nii.gz b/datalad_metalad/extractors/legacy/tests/data/nifti1.nii.gz new file mode 100644 index 0000000000000000000000000000000000000000..bd793d0264a5a1f718556c540a46989c4979ec14 GIT binary patch literal 15920 zcmeIx&ntrg0LJmxqTb_sP~(Jyi^SGi!qV1+A~n+DXhpB`qKu0PDF?sar8VZ@fZmi` z_sT)dGVNlKM5&pUXjZFbwJiP+-`jKk_IzsAMKL~57&iPW^4P50q{y=Hl`v0S*`bWbd+%NjW+l63E8_hQd+e#0+Kbx)EWc0Q^ zr8X38)jRLbQ0}IEFmzdx)%};xbNxc9wJ!-}hH{ZX1fGBb6rcbFC_n)UP=Epypa2CZ zKmiI+fC3bt00k&O0SZun0u-PC1t>rP3Q&Lo6rcbFC_n)UP=Epypa2CZKmiI+fC3bt b00k&O0SZunf`6?*cPEXfo(*fhszvw%#h)yj literal 0 HcmV?d00001 diff --git a/datalad_metalad/extractors/legacy/tests/data/xmp.pdf b/datalad_metalad/extractors/legacy/tests/data/xmp.pdf new file mode 100644 index 0000000000000000000000000000000000000000..b3cee7890a7cad10c7be1de32c2ffe259fd42216 GIT binary patch literal 19350 zcmeHvbx>U0@*o~0!3huu3>qXbGq}5ZfZz~pkl^l+V8Jyw!4uqq2bbU;2n2U`2rfGl z`Q9h*w_9(kwrc-)w`wx?^zGAj`kdVE>4rv0Oo9o_%!W?Wp3~mYUfrIK&H?}dYz$wb z^YH=YAl9aiW&lVYyZs`cI2TE8PI6}lAMmEL}0ReOeM|+5Y6}oFGdpJKv8;HpN z>`Z-TXSgBQssJ0|R8(n0={}d#8~vhM{QK~CP5FQtzAif#9Vt&KQbYHF+qB{POpUFY zS`1?z0p}UpC>Y28g50n{y z)@CkdBc@hYBmP>b?PhPf^*Fpa7w3ii2~{UL#M=0mSkOC|Cg?B)U^iek5bWs>80SCK zR_Gg_}H9Txp*5d@{8{IEn(->%iQHKc#iT(9DmkgeC##>3pm$x2; z$|zXLRvXK{Vxs7wjWT}rNnS>La+pn4q|NfBgwE$Ybc@-5K}1i&JUEW)IO@5lju+$S_Ie z?G+XRxN8hJO7Aa8f}PhNgK(~g zv+<>oSBceV)m!f7MZHlNT7dHyFQtOlpWnN((vU0-27=61o-5ZqUG zuGkJ2Ph4YnUXd>;OSfH#AxE}g4R2os?cPzV%MqtZh}%0NUq0mPN)7bt=j~3-#qI1d zTVSshLK$&*xXo9gV|PQ&eS^*ywoVq*jz79YF@JRk%rHBw(HL(p!)ij3w>1v<%(&uK zO|(9NF=V9J3^W|1$s8RbGi+)H)ig)+Pw3xo#__FUHJzkA=&o9Uz}0Z#?`-dG?x8!9 zP0o)rl@82r9lL8Kr_6*u@Yvq3uPJdJ+SBDNc!ybUHT{+0_@Q@2k4FBNmtznMnUGLb znE;(m79>l(Il?vnf-*qi(v3Io>IEu7G=Y%D0`^hySDf0iyw2jUj@fG)y2*RKA3QW; z&1&8AYNok>Lui6W2Q&9K_t7c6I@{lH8R+Glg;o~H7=HyAv1B5z7l(Opz4qrWT}TO8 zZ#2NVOkt+CUeq@H>`Wp@m;li9eOnf;C?9L}K#V%kYR)5Y*b1;zB$BLgKS)n!W*W$Cm^45|8hL4QSfBgAcmFUd1Ffs4QMtTn&pESU z`!=H`(MyNV7xk9?=!2G@G>O(InrAXm8O|i;PJWLVh9LiPpnoq_32uled7~w(Tdt0xH?D3r8kZ>{>RE_` z)_B)n+crG>e5d{Vc_|0Fd3ATiLZyf#-yQ@lR#0Tq>hyed;9hpNFW53@Nr7b5 z7bSK9bMcj^C+J}o#sgmCq;^|ca8^s@3Cgs``Dw|me(_r=VWHWc{<->@17XqD2h7~U z5H&mD&Xz!>XMS+&qP{jIT@@K@g$ISLfuVG|1q0u8qKNdRNxon|P32sRKA~&w%}-^{ zeRvnci3}N044v0UPeOk)+nJFVi;x`Q+k4raK*?oDAK`~ z7dxkF8}rO-B%Np;4jK#;MW}y%dhDBWy=gazE>d z^K@`SXP=LfIib%d_;m54yaufFS8)r8vK#@)~p?{itK=VFp zWBF3ne~GzR+JeJeo-9y%QY1|?sP9VP@?PYQq)@up;aTDh!4=}dBCono71#QlUd~~k zs@VIl_;Hux$zNk)Tx5y7f-h1`C|S4rOOxioeOpcDx9^g`Lr(`kF&2+|7bb1=8-JBc ze3g{b&wZ@PDjSdxo^q2F)2Orm=6f;sa#1vVc4s%mVW8b$o)U2#T&Z3uJs72zLv2AN ztwg)zb*Edx*YSRnrCBPx1RQuTxo^D)1S%(pU= zz7I$818=5m(zQ)%N{xK^MbV!J?>KX>YFNK}cY52h^xzctG!H54ptkye?o z*7wv9dbQ{;AJ>iLqQg9s+(HcS?>~#XY(=6S>@brX|M1FukUBecx?|2XYj&7(_gK(| zk{-`4rR_uNCUWdkOUkUg{+)yE8+5A-DI?S}_Z=jw7QU|$aYl>g1JlQqFE;@^f%7_5 z7VFmP;d*snT8Bp&9aD(ZJlA!1Gu?gBk;Wf<9KC3YH8K9!`(X#W@HYRe@74i!XKHarZ)2Vnb5{jEBf$Bn zAl%(~bX5GXEDEKD#?-C#C~!4i+&c3cNzL?)Xng3ywS~@+@P3KA!==)Pte@lH-)U7i zIV7aUtN0nw48Fd24Zl6o3O}&TKKOVX#3h7suZ+%txX;EJRPW58k9_G-d98L&Q*)H; zw8h=W#q28BCw21MQA9%p}{e?4AC+E<{VX`a!otGWhMj?ZJP)D7zVAvbovyiE zdD2*;$iYs?m>lA?wbYh|wLt30b&tyOP0{r2^T%!9bOBVFzD&a160U&!jAr#{&o7I=`dc?qyl2ODuely+-fwW#NiVmuBW`~)JrR9+n_3T^X zV0kek^TEN{VmnSBQ2d~h$(wBVNTc<~o3p*?v8X13@!iHo=16#WI_cS6t;m$$CdJ}I z<;3ddAnk)ABOOw5NRoz|mPm22-?%=wo3Xs>7Q>@r)9NhO5te0nCnG}-F;W3r`%tHZ zLt^j_EjgS@zqsv8f+fY2$%J*gr0i!dG(YYfsy=z#v$m)xzt6l*Bf zZvvU{>_H$Aki2xIqtx2JYoi%-vG<-)l4Ycut7 zxO~#sD}`{+7VM-%5jp{wnhW|QVXu%&*_Fc6`K6ph?`OGw7e!*Xl&We)ddVG?bvMH0 zJHboFLP^+ealce0i)nmEsLhxE3_db<%*4L}F z!SqbHqMjZ)zFv3A8NLFK$f}*_4$B2KT`6Ttj-dy|R}yN1r~LEAuh4LK!Ao>gUj}DK zNO8POqk*mpl#dfWxuf!?J!B0Eah9!ufR4zAUX`@6m9(ws>SleD4+fuwx~l1@s6HgD z!=DLl=25tw%^<(GRdmWndEpc^#`W?x-&u2pg1$TwY&U3>r=3_QdcqyCT`0OW>|eWQ z{)Se4Ok-mpx0pTMm1!rHs_=TaXOyu@w^x8_Rgj^R-jOL_o%)oY)_IyD%g8VsE!6-a zKki~AI9z&i_S@6dkDt=+`X-A`RE^q#(rx7k176Ua_g?X!7mhQx{fdu#2-`4@z11fRD^a`5M9 zrtVS;*>6Xk8uzUxssw5kwh$*ACIYsfdTH%6moOaaKCzrT zQDgGFPJO7U*K?mB_t{OzAwj2W-SJURpXE7CmUquNE}kp%HL1fX!m}43JzG8fbpCx% zn$4`S=jDvr@=TrfI?;@<{P2s|SVe(D>R9)uXOrK$CTFru$sx=Ny6+5G;6uUO z2J8xz)|@2{&rj)fEz^B<*_366%G^?bd@CH7nWHy1EuB8|*dymbThqt_q+a+h#KjQTL>FQ~@l|bmoyq1M(%_Hr1 z#e&v1knI7r&q|DLWOtr~Ug};&)24QpGfnce^C-=kiu(l{Uuws^?!OjT@)gK+e>RXe zF1i)_Qtd%_%gTwEf=Tzc%{j8dDldt;$yw$+gfonlxB7dTrY`Eu8C*0%FYazy%|&h* zPBxNQha1Xu^ET1Y7sE51DMI{HaJi}3NOHdqllB^aRCC8JBXb1EQ6lk;_an}D!cY~W^|sfT;ojXn42tK4|0YKP%Rayo6Kr61 zjz5wV-2X~asJhugfD$&=j$#l8BYSgOM;mA^20BzQuz~oVg*y-oVk^###kp zZ*GDPOV*e=0N9{6P)rdK8&_>6Fee9qi50{O0E0m60B$a@E>POhz|!1E*xJ+*0sx@{ zg&m9_){X!!ZZJ9!#tb%KVgYlY14RvNr6A^}W{$9V=s*=mh?N?E3pxcx9-1G5umE6b z6=ig2mgIMy1q}L^4!`nAEbO5FLpDh_s>iw=fE~Ec_n9&#CLKj`e=5+>Y@z!F7SNkbEPao*`NQ;r{Z&3v0J)wptF< z<{GE3G@9P?LcVT1|Hd!C;D6cg4<7)kI2k&^975IJ3GxFfV&DLQt-}B83g-3>j-qA; z_OK=CfQ6Iu$G7g!75Y_k2ooIuhRuLdRkF7+Qh_*X1EI-Dpen@G zQTHzn^*4Jc8JI$VN(S~YH|2m404hTqY@FMNxp*?1NS+6aX;`)C{qTHF~Q@1OYaM?Q#3xQax=`3@(D z(6YCAb0hfR>Z*PnUZ@`tM0z;CPBjFd@zE_K{kD`=0}GDk-m50wyKzqjxLRjYxUYmw zhhVh3myUNAk6yv!X23nthud4I#D+U;q3La<s6*UOhJa71LTHVAUB ziOb;PThKKSr~UYLUL$fLrM_adg)eD&>ld;i(S;cBVl+tq$k2STn`3c87d+43UTMb z#brDid!?mxZ>yDi7;ex{Vi+k|7{d(NY5~{m-s1)I4J6$Rl9N|@!}m&FZ!e$_!Ev?% zh!ENrIEdg~TIq-o&sq>cm}I_^`nY^3Dhp_d@QSUhAdIyZj04<$zX^T#26)*8Cf9p& zepvbhMXw_lSkDofd>A~zXoRGPap(wm(BF$DDRI2Tc>Lxl6Hkdk73p1INTzZgX~C<4j7NEx1#cc@U?|~@ zA?W%u3-1m?^6<6--7^I8$Uni)`lDnTm?3?_2p2xfwA;X8Lz5T&GOV(}&33QRe=QT) z3{5L=PLy%@(Fximf>%J3uwaJ#24N-QO~9!>TssK2vxO@WE><61E`Zp;B$2@6O`Sej zBG$uK0s7B0?#I7+udk^=rsn@#pIrm-tFM!R*CaN_>j8a?$w$=zy9Sbz#LfZy`oKw& zQ$NH7eJ*^=H`xnBTx1IVv5Ttbbg5{j?Oh9ewQvGx>4Bk(EVT$FffkGIweZ&dDT|>8 zH0LZcXfFah7QJ2Jnvk*DsSfBo$;sY4S?D@oyS|?m@S?4BQT&|g8dLAJU|afP>4A$Y z-dv!|g03q$Je3maQ-nwW#Rm3$R9YlPU&Pk%1#wh>KMtEek_b9QsFDyJ)fjd%s+B)k z8@7QjNK%8s5(g7i(jTMk@nXP%@Hv$)c4iQ&7#k&KbP$)Xl&CS_BX(U7pBQ`=svXL+ zkUX(5z#!!#?C_w6VkB9JBf+&H8=^XrGh+8CB(SrC?gdE(bp@Gsuy&9x5g6heK0G8V z4>|YOko1(ar1Yk^1)N5Ka05Ch(WKC0LX1T|QgTQ0NqOgB#{|9=lA&6SmXUg%L#uo* zKax!%nSv?0TIya7-spqP;1l6XD!(X$ZYkrZEbvKTa-t>xhp4V-5|UqEqCuD$QygHx-7nD0(o$% zAO=!`@DNFw>`|qpJW^G%0&J&8&qE5t#lYdp^cK2KN%x>7EnTckJ&( z?0(7OjUr50@y{6Z6!S{+((_*AHJiPX$SOCZOvW4tG7)#kx)>HQd!&WS6sjgxm{m3G zZbnr}JcVu*=`2#8**M&^0dOSZL4W*4K_WMkb);?sZ-dei{0TwVuSAkFOM7H@gV_;y z$>WaA?@J~sC?+T&C_a?AJ|equwh_7EbmDTtafyYZA4NnO=$~nr2qq;W3j;{tQw2wV zrZX4=O%Zv8p>^UKh1zx08a9BM5&?2tw2xw6Nw?-0*@G?;V-pSKkUyX}V1Dth3#tpN z3#kjN3$6<%ZVxjeGfFem)7LYaGgJemgXcjhiI4jQ9tM9lkk(*`NA3$=ZAUlyTt2FS zT*RP;Q5ebIs%5xarl8?c#K{3i5)$6lX%wmdrJQ@x(-Nl|Njtc>J*}%+f4W?EQeX$p znTR*!ahE`+z~`nGX2W@diL&iU^2ryIx}*qiK1bWCl43+)bVcH|N-r4O^>RH(WgbK| zeO>XHduhgYt`<&*j`>l-E7?}aQp6&R0=U1T-1F3ry^vTK}s@tw5e2VcvEPz z{@!hXMP`FkkhrTt;*$oNBAg~%4m6)2Y#dQ)9O58LF}*C%GhaGMRf?B54}#akw6pL= z{5B+^TM-8jx(O`}1rG7cQ#EAy6fJ4=C{A(j1yOZebx1F%uwG_2va`| zmNTTZi#nIeqNb0blzx{Z$wHwSiy1v5Es&$HOilm#txPZNYOF$Rue4K+gfdBfsAD*r z)FT>xYWY~bsE5+zQt8ryQoP1NVHB}Fa>ke}f=Lqbw2aa0Jt)SkADO--?TBLn-b5Sq zNOmW!@Y&IYTjE&74Cm@-(9S1cmLxqO-Z&mPi~)4iZe zdE4xW%ER|KE+RZ4LBU@kSs_>qnw|dkz8cbTH+k280=*_Wgr2RrqeUd?bQYIG19Tan)urDHJg6kr>GApF{m&ILqVZzq3EH^p~Rv5 zp}e7jvPiO&vM90wvS3*zS-d_-yv5V@r&t;vH1*0Gj4s+f4xnq;6{)IWstK#nt3lKr ztEs5*s=287d`0~#_EqjHUJ*@ERnb6^y2h2Jqz3s%0f?@?uEDJStbu45&bP#Gu9Ie# z_Lf_gftG5PgO+oanU;2zlHVdK-We2{SQu~X*LQOCa}08{pC<1jPNPg?^7_8(80;VH zWA0=gD(SKAux{V$-RmP8OzTeT@%C+ed-K!-!-Mb&?uzb;`ik^Q`*dh0W#@{N3ljyC z850MS7?T|n5t9+q6q7sx6yX}-7~vU#8^IItFoHP(KH^zK7b%b{p1aA`(m^B@Nr6^@ zOyN;3UoLGfIF~7xBv;TBe;sF#bdYjTX7D(5H`Ob(hMU}8U|DBHXZgCPt}9?Q%ihjD z*tW+$-+|cvn{A!_g`K!PjRVAi!0x&IyY;PwPn}LF+c*;rB{U_9Iw>=+NQMSF2HKfA zi(866TlHEE?e;wGc-;4{^Iey2n{L-^@9cnA=oQ%++Zo>(`kBjKGanHd1J;vB)JW1u zlt^&o<49gbfZ}~ch9SE&!!(|>hiTSn+I(kjAKdhgi;k`rzAb+~VmYus9(5&ft#TW1 zeeBxkD(brI#_EdU+UG{%I_*|^7;&I*FnGv(TylK7CVKSjeDcEo-1vO#BJAAqV&($j z!tgxeX!SVmX#BY9@cYq&WB#R-Rgb>2fPF&2nDn-xL0|{4y|mS)-==H7hrFM>D}C@q z%ZtJJmigBCA?Q&IzLUMhx}HD3L)S+pLUKjwK*C3MM1G2titLF@i5!7SipNEz^kkS} zm-zwi?Uw&UvUo2!( zQBWMi<1s?=Uh2(`0 z1y2UMhWH0j2N{Q^;^yO+<80y(KXju$5?N{N=8V>T)<9R0_bRV4?^9lIp5KPSi0z2+ zh*8$(EK_k{upV9`BM;4`mZQ95D9RT)Hq2x?`PUz0D{|^a5zNag?`u&MhkTXW{_tRg zY-G)B-+bQ8!w?Cq1{MZuf|L~tb?}*|^`lxHAZw1)x(u38f*gMqc6LVMm>Y2V8wrZ66@bfaY165QNsb}5ld8%e= z8_#RALB$6Wi-m0^l+U7dqd(*ondNL`Y!nh@CuX|Jlq;0$+6f)LUP@gGS^9vh94s8H zC@mqa-6PYZORu^aq}-$&pj@F`I;#7g_&v@02k*TgA6XJU`hWcNG5=$QeTKcSeTjYk z5%Q|_>U6J4Z)op!Z#{z;0VV;M;5h*v!IL<+IM+DRINZ2n23>}l=O0ubKL7gM`FU^w zX~9y#%YqLDg9Wb(f(yD-`&3Dud#LW`DHJm2d#fP6#AhdCr)QUEmt}XUQmVqQ!h1RX zl5qTTe6Mhzpg7MeKYKiBECBlQ{9tT%+-uxutYT~^k2BAu0IN_jA6Ve?eEti-!q!6S zrP)i3m$erD<~c9%&Bn}hMr$*{C5Ot(*_}4>skAAy@+!~2h<(vym1313VRqD9~A#FzC1owEkiBqt6edA@xx*Zt+Ps#Dz(b$D)7|PDaxtdsm>{d zsn=7|Q*KjnQ^iv^Q)g8#%u3DgzP4_bZ3b=TZ656*CqW7F!g1XmHAKs+@)Gxy@S7@_u6dwD0J%m3fkQ;{W~aN!a()6W^1VlcVni z-=|JcPROrtR z5V;bG5#^Aia+rZOIEcWBU{}*J^B@jmFh08~I~%Jb>wQ)^)~75*oI7j{R$NxKmgiqk zAsvR)FLPcRo2;1No4hq~?A=IxmM{`D5;nq{&14h?E(8x~%ucpVwsW;{wV#I_x-F9} zKU-e!EbMHSKcG#ejiH^Q<)EFTEvMzCU8QBBosjR8Uz2Z2+({WtI!ftHd6dkPjFLnK zW&#znHd^vn@>qSc{A3=;VZ@Hds=`^pcFYpW(Z|BbZoyG+a@w*J_l>Z^%Ynzi+X2~u z%E9zl{}}%`cJ2F`?wU(~dQxfPU3H7Lt(KJLhQ_4kxhA#7il(Y|rS^R-In9QubMxS8 z;|lzmvC7Kolj^Xl{;Hy~1~a!W_M=DPt1gT=1fc{2GPAv-y?qRw3|$2?TUq0;#{I{$ z#$7CO*elovv}UJzFUy?5oJyPmoKn8aZ%c2}Y?E`Ck*JZ_k;s#fkdUXirYv)eb3En{ z<@my}Z&__+SDjitTK%aSv-)*)Pjx``_iEj07b{b%3Cp{wXFKP+_&Z^{I=gMVEIT2) z$vc<3WK&F&UKNs4tkYk1*msI|y>^y&k9RC~fYbBUJ};XMo0o;`7o4J8K=!tFs18yt zbk1h>8!oOJWeW#4*jw1!6o;Z(Z!EDc$Ypp`Z8vr@OGLBZtYLcbRIW=o;oGa)vALYs zhuQTz71>+5klLr(T`$xwi`g$ZDciC*+dG8V_S!QzeA;mQV%}^1&3u}8zkm1Xw&ljo z4(VFzqR!IHl4rZy=tc4I2aLghKJm`aEuX*9jM2=#?|A=tHLx$IPdkB0J(V$`u&6Mz zFw`Q^BG4k=qHtS&f_}oKqkjM~g?t;!7YOx*0i zELMG2-Fn@`Cznt2PU#MJJMeBGw_!dueoOvq{s>omx74$ZW7m!Hjl*`l?Ve5gW%Gs2 zCE~rLRn5I`Yuqc`%XLc^6QP5$-Lm6-y?tGbBZV#7sp^pvh@}&H^H|wf4y5q{mbQ}@ zw&#mQ?W=EqkI8thPY$LQb!`f2f@(h1gx55m;p|iHW6a&m;myg;X$v5c>ye9-tB@XZbh=PeC@IVJ+2Fvh1F2*P(4?aHbTO3s~NBRxt z;p2>N@H&YA)(9ijzV+ zvqR}tDNdVAG#FFdzUD@H|i#v!3LX#gdBj@gOlX zx%Hzx0y1(vZ0O4cxd8Dkv*#0E0TF5u);0l3YD%gszAUOGg^tjma4AP>(;Uwn^OePJ z|L(SKjOYofTuMQ)JqfT_u=u7Jw|Ic$s@QW$C-K=(t)NR{`JlB(wczRq%piqu=3whk zvWT>By-49s+m7zfi5;$=sXJ1`%){z%j;T%9p}(tht5K>6er4z2;Q&?(R0EwUPjz<` zNWD_cQk_$cbX;{5bmVnhX8dY7?WPY`To+vjT;H4zpBJ4MoDZIR9AX@5qvasAB7H}y z_mA@L_wV&*L_5QMh|7T&9PAss73>^L5u6kp7yK^x43`kMCPqqHMEa35rgWyXru4Y9 zt~6H69QBRN2kKC23mRTpN$Pdl37Ru%K^Z-TTL=Heph=uYsuIbp$Q5#Pu2CQ3t?rlT zQU=n|3Sfmp8EGkc8FK|`q$t0H})A*(w9S=5+v$r=l9QPbo9k)+fPEJljP6kh| zHf=_IOovT}SsYn)SPfYejCHz+x?PhGx$%;E;_?&Cy=zWS4y^ zE7-@x$Xp02q_+TVyM3Lol;g0eepzkhEamL%Ebpv%Dt3Cc(?}XmnnW5)x}QqLO~bv* z?c$)fZn>&#FYe&vpbPzt5yPIu!Ddxp=*IoNN7#Pi6!^->pvyk;9l!JLEH(Uvg?NXKBtL`3iUyi8My z8J}s4(UCC>$oG6wi%Xg7MXg(meT=j6e$G{oe{8Oz9-rI#%69Cu_=)(EIEr|hxU~3Z z@m+Bvahnil1#oH%AH=iwO0_rtql&r$KUbCK%QLn<^KW0)a&HuK(@O+q9o&yjwusN~ zuM@v#-IJ7n2Fg))^^mX?;5 z?74xyf%?d5%0Oh9WDsRoWi4c`WO8K$<1%|)`^Ne(jR)jxUezTL?Wc4Yv>1m3NiblQ$Yr z8t6+N(7MzL)>_t@pUSR!Ue#EoQkA|1a=La(c4Bt2bIRRj9uFUHES~1?b5>g{8u&I` z(KVDN$m@E#`fb4d(mmKc)_wl+2mY(yF;ym$hzZDUAiofx_im-OuGZ)gCLYN zlq!_2z{J3;!0N#A!1sZ($Cm2^L(^$kg6Td@o7cO?r9*d3EsnyczN-NX7VK5I8V5AemB_ za+PeF++2;VB?kRpAG0Q>O10{wO0LG!il#E8`cb8O&4((rn$xPmO7BLg2B~^8*OB$5 zkx+KRE1*E*d2#p*?N^hTwTbu*D|6j%c=LFDT~7ATXL=eO_685$ zedhIWsoQ%w<5ZtwUvs(lz23p4Mp9EEP68{uGcrCx@9{gb8vzMHT2>R*huqFDcaA1I zCKEh5JaEtY_XFm5?Mip&{5&kq%Fo32N#?N{e0IE#Z$_^6$ur1M1wgI~XMn5LfO{@v z4UgY+U358hHFmLfk#xT4n&(5pawFg3J8@6EExTPDZvXt|D$FRJFXK9A|b?JS*i)NnFg74#g>g4Na$1%rh$I-{8YxV2t zgEmbj-o-~#t4_V!QxkrrgoGK$x5$3TcLB>7#Td&N>4eguqoEw3;L!J>jiD8x>i= zBd*IAv^VnYDZDmwz{~`(g#P%pcm-x%P4|x{%S>OiS8JGUA{&amc+XDfb$ymFr`RxmE_9P|(0V9(^d=dr zM&M~l3!^Wi1Y;{BGUH=L6~=jWV)Zz6U1(Nts06c=xis^u*hK%=wh4t|r&501>D%tp zXUo~^UHy^a|$cWvk57opn; zmB96;nTsRu$Mwuj)>UMUdY`duo^PK$`0ya^K}~oz$peyX zlEQh^`JAS2-l11&7gh_?Z7v)23ofzyne(AdE?z#THxpP9(4rAxF!1LC zXhnyyIjkN^+8A2cVQyj$v8QuTmJ|U^CnnH3CIA># z@uB?Z5|BUji~Zm%4{9v88}n1Dv&)0zZABqw=2GtV5EXX?RU>x`BW`0#K>>6=R~}a@TPr9RfUA|I zwF8eUKcxY*bq&G;y@!E;lmh4gK6_&m9%V6!UpUYaKc$(Yqb&~*=;GqS?83@yV{Zxs zb8~Y8K`cNP7A7bLlY^VJqk$`vwF4E5#7`Q(YkQ!!0>Ee*7}_{F@>5bmpTn-7BG?+6 z7{KKI69)rxbE_XbV9$Wo4nT9`AA&*2{>0|7HL!OS0P_L=JcLpG{oKgL+Qi1*%D~zP zA|U(++Ml5?8G$gN{v_moCb+TDA41zY*<1b)*w_dNv4mK`3VwiKX7F#Rjg5F5%^fWv zFb2PDBW&sTbM(_xmgX>X@K_pHoAOh-G8scm44f<-DFuuze@y%-s-^ia0zU-*O@R;i z7xH}2krA|k5?agmCw3|jyT7s%kT*9nGk{nEqztSrApeg3?E9Ky1_!45|`0v>MrtyRA-`M=$uGk-*_Q#4@J23xnd}bpXE0`BSSBv%c zYC(rQ&^>GHWCXDnfR;bnLljL+%#9!bc4n3zzVxrBzkQBJ7UJe&V{hyLT`-HEw_v~@ zx4&lnf%AXwd%t$AmF>S*#MQtSstD+}BGA3gBl^=7RG}}Q0_yhW&~95k;NRgtwD?M!pg&FjiTOWh^K-4B#|ZS;0se9B{BlCr zHu!&d!4&#`e*Krpe`MnSn(M#j`j0H|A0hwOyZ&pg|HuOW5%Pb%>;FS@{VN25wuSIh zy1)XQe;4%qsozBr+N9tHjX|L80G0+uKi<=UB9=Br7Ju#q_#Gtv4hMgh^ov7Z>!IB# z&;ox0Yhwd@V<-v0k9ZDxV+w%vQ$U*xgaP1RO$@Towt&BM82qW)|3|3zSE^z*MozG( z0Y(-4<8OxinXIh#kEVdX68wSo=dkrBMOAwUMA^m$M(IcU0&EOm`PoPTRJO5%$Q#%~ zHT$gwPzloKJ;AG{XjSd261Aa@V`={9U??E-Jqrw6G zA8i!qz(4vZpeFpIkpjvUC~I!)p#9Ubu%W1p6Re%%Pb^se-o+vc4V^7*Ore@XdHw9$ zKnE(?Lac?2VDY;4k5vKyH8iyV)=rj|KmP$iX{(t-Tp;#J_7D>YwB-ZZvin7M+qMH#%bolbAR?R4D^{3u$Nw3Z(o_(Fuy{ezX|x0BoqfGBqtOfDm*7_K2&g4s3|Tkh8({>uxBvqK;bZP zSYh8#Yq0*o66!s$sEAIC2NoAWrRD;ISU7)#L{C7VC-i?>FjuG-L3afUD>{_YuM5Dz z&d$mXFai96v9oYP&k5KCu>K7LfnaCH-(W1P&`t3d7_`-n3%cL_24nvRJ!tBcHmJIPhp~fL{t09G=UjFcHt1pUS9OVf$}P?zDgup*|BooJgAaNFiJC!-EF7Gy06-CLZgy62kO+skD7OfRO&kPf n - - 10.6080/K0QN64NG - - - Last1, First1 - - - Last2, First2 - - - - Main - title - CRCNS.org xxx-1 - - CRCNS.org - 2011 - - Neuroscience - fMRI - - eng - Dataset/Neurophysiology - - 10 GB - - - application/matlab - NIFTY - - 1.0 - - + + + 10.6080/K0QN64NG + + + Last1, First1 + + + Last2, First2 + + + + Main + title + CRCNS.org xxx-1 + + CRCNS.org + 2011 + + Neuroscience + fMRI + + eng + Dataset/Neurophysiology + + 10 GB + + + application/matlab + NIFTY + + 1.0 + + Some long description. - - - - 10.1016/j.cub.2011.08.031 - + + + + 10.1016/j.cub.2011.08.031 + """ @with_tree(tree={'.datalad': {'meta.datacite.xml': xml_content}}) -@with_tree(tree={'elsewhere': {'meta.datacite.xml': xml_content}}) -def test_get_metadata(path1=None, path2=None): - for p in (path1, path2): - print('PATH') - ds = create(p, force=True) - ds.save() - meta = MetadataExtractor( - ds, - _get_metadatarelevant_paths(ds, []))._get_dataset_metadata() - assert_equal( - dumps(meta, sort_keys=True, indent=2), - """\ +def test_get_metadata(path=None): + ds = create(path, force=True) + ds.save() + meta = DataciteMetadataExtractor(ds, 'elsewhere')._get_dataset_metadata() + assert_equal( + dumps(meta, sort_keys=True, indent=2), + """\ { "author": [ "Last1, First1", diff --git a/datalad_metalad/extractors/legacy/tests/test_exif.py b/datalad_metalad/extractors/legacy/tests/test_exif.py index ac00a52d..a58a4cfd 100644 --- a/datalad_metalad/extractors/legacy/tests/test_exif.py +++ b/datalad_metalad/extractors/legacy/tests/test_exif.py @@ -23,8 +23,7 @@ except ImportError: raise SkipTest -from os.path import dirname -from os.path import join as opj +from pathlib import Path from shutil import copy from datalad.api import Dataset @@ -80,19 +79,15 @@ @with_tempfile(mkdir=True) def test_exif(path=None): ds = Dataset(path).create() - ds.config.add('datalad.metadata.nativetype', 'exif', scope='branch') - copy( - opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'exif.jpg'), - path) + copy(Path(__file__).parent / 'data' / 'exif.jpg', path) ds.save() assert_repo_status(ds.path) - res = ds.aggregate_metadata() + + res = ds.meta_extract('exif', str(Path(path) / 'exif.jpg')) assert_status('ok', res) - res = ds.metadata('exif.jpg') assert_result_count(res, 1) + # from this extractor - meta = res[0]['metadata']['exif'] + meta = res[0]['metadata_record']['extracted_metadata'] for k, v in target.items(): eq_(meta[k], v) - - assert_in('@context', meta) diff --git a/datalad_metalad/extractors/legacy/tests/test_frictionless_datapackage.py b/datalad_metalad/extractors/legacy/tests/test_frictionless_datapackage.py index 8d04a9ac..e98c2c4d 100644 --- a/datalad_metalad/extractors/legacy/tests/test_frictionless_datapackage.py +++ b/datalad_metalad/extractors/legacy/tests/test_frictionless_datapackage.py @@ -8,15 +8,17 @@ # ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## """Test frictionless datapackage metadata extractor """ -from simplejson import dumps +import json from datalad.api import Dataset from datalad.tests.utils_pytest import ( assert_equal, + assert_result_count, + assert_status, with_tree, ) -from ..frictionless_datapackage import MetadataExtractor +from ..frictionless_datapackage import FRDPMetadataExtractor # bits from examples and the specs @@ -43,12 +45,15 @@ } """}) def test_get_metadata(path=None): - ds = Dataset(path).create(force=True) - p = MetadataExtractor(ds, []) - meta = p._get_dataset_metadata() + + res = ds.meta_extract('frictionless_datapackage') + assert_status('ok', res) + assert_result_count(res, 1) + + meta = res[0]['metadata_record']['extracted_metadata'] assert_equal( - dumps(meta, sort_keys=True, indent=2), + json.dumps(meta, sort_keys=True, indent=2), """\ { "author": "Jane Doe ", diff --git a/datalad_metalad/extractors/legacy/tests/test_image.py b/datalad_metalad/extractors/legacy/tests/test_image.py index ac3586c1..d3dd67b3 100644 --- a/datalad_metalad/extractors/legacy/tests/test_image.py +++ b/datalad_metalad/extractors/legacy/tests/test_image.py @@ -8,6 +8,8 @@ # ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## """Test image extractor""" +from pathlib import Path + from datalad.tests.utils_pytest import ( SkipTest, assert_in, @@ -24,17 +26,15 @@ raise SkipTest( "No PIL module available or it cannot be imported") from exc -from os.path import dirname -from os.path import join as opj from shutil import copy from datalad.api import Dataset target = { - "dcterms:SizeOrDuration": [4, 3], + "dcterms:SizeOrDuration": (4, 3), "color_mode": "3x8-bit pixels, true color", "type": "dctype:Image", - "spatial_resolution(dpi)": [72, 72], + "spatial_resolution(dpi)": (72, 72), "format": "JPEG (ISO 10918)" } @@ -42,20 +42,15 @@ @with_tempfile(mkdir=True) def test_image(path=None): ds = Dataset(path).create() - ds.config.add('datalad.metadata.nativetype', 'image', scope='branch') - copy( - opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'exif.jpg'), - path) + copy(Path(__file__).parent / 'data' / 'exif.jpg', path) ds.save() assert_repo_status(ds.path) - res = ds.aggregate_metadata() + + res = ds.meta_extract('image', 'exif.jpg') assert_status('ok', res) - res = ds.metadata('exif.jpg') assert_result_count(res, 1) # from this extractor - meta = res[0]['metadata']['image'] + meta = res[0]['metadata_record']['extracted_metadata'] for k, v in target.items(): eq_(meta[k], v) - - assert_in('@context', meta) diff --git a/datalad_metalad/extractors/legacy/tests/test_rfc822.py b/datalad_metalad/extractors/legacy/tests/test_rfc822.py index ecbaa648..5a974033 100644 --- a/datalad_metalad/extractors/legacy/tests/test_rfc822.py +++ b/datalad_metalad/extractors/legacy/tests/test_rfc822.py @@ -8,7 +8,7 @@ # ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## """Test BIDS metadata extractor """ -from simplejson import dumps +import json from datalad.distribution.dataset import Dataset from datalad.tests.utils_pytest import ( @@ -16,7 +16,7 @@ with_tree, ) -from ..datalad_rfc822 import MetadataExtractor +from ..datalad_rfc822 import DataladRFC822MetadataExtractor @with_tree(tree={'.datalad': {'meta.rfc822': """\ @@ -44,12 +44,11 @@ """}}) def test_get_metadata(path=None): - ds = Dataset(path).create(force=True) ds.save() - meta = MetadataExtractor(ds, [])._get_dataset_metadata() + meta = DataladRFC822MetadataExtractor(ds, [])._get_dataset_metadata() assert_equal( - dumps(meta, sort_keys=True, indent=2), + json.dumps(meta, sort_keys=True, indent=2), """\ { "citation": "Cool (2016)", diff --git a/datalad_metalad/extractors/legacy/tests/test_xmp.py b/datalad_metalad/extractors/legacy/tests/test_xmp.py index aaf11b05..4f23578c 100644 --- a/datalad_metalad/extractors/legacy/tests/test_xmp.py +++ b/datalad_metalad/extractors/legacy/tests/test_xmp.py @@ -8,6 +8,8 @@ # ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## """Test XMP extractor""" +from pathlib import Path + import pytest from datalad.tests.utils_pytest import ( @@ -49,20 +51,15 @@ @with_tempfile(mkdir=True) def test_xmp(path=None): ds = Dataset(path).create() - ds.config.add('datalad.metadata.nativetype', 'xmp', scope='branch') - copy( - opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'xmp.pdf'), - path) + copy(Path(__file__).parent / 'data' / 'xmp.pdf', path) ds.save() assert_repo_status(ds.path) - res = ds.aggregate_metadata() + + res = ds.meta_extract('xmp', 'xmp.pdf') assert_status('ok', res) - res = ds.metadata('xmp.pdf') assert_result_count(res, 1) # from this extractor - meta = res[0]['metadata']['xmp'] + meta = res[0]['metadata_record']['extracted_metadata'] for k, v in target.items(): eq_(meta[k], v) - - assert_in('@context', meta) diff --git a/datalad_metalad/extractors/legacy/xmp.py b/datalad_metalad/extractors/legacy/xmp.py index ca2187f3..aba2c348 100644 --- a/datalad_metalad/extractors/legacy/xmp.py +++ b/datalad_metalad/extractors/legacy/xmp.py @@ -27,7 +27,7 @@ xmp_field_re = re.compile(r'^([^\[\]]+)(\[\d+\]|)(/?.*|)') -class MetadataExtractor(BaseMetadataExtractor): +class XmpMetadataExtractor(BaseMetadataExtractor): def get_metadata(self, dataset, content): if not content: return {}, [] diff --git a/setup.py b/setup.py index 8ba19998..2d5e825a 100755 --- a/setup.py +++ b/setup.py @@ -24,8 +24,10 @@ 'datalad_metalad.pipeline.processor', 'datalad_metalad.pipeline.provider', 'datalad_metalad.extractors', - 'datalad_metalad.extractors.studyminimeta', 'datalad_metalad.extractors.tests', + 'datalad_metalad.extractors.legacy', + 'datalad_metalad.extractors.legacy.tests', + 'datalad_metalad.extractors.studyminimeta', 'datalad_metalad.filters', 'datalad_metalad.indexers', 'datalad_metalad.indexers.tests', @@ -59,15 +61,15 @@ 'metalad_studyminimeta=datalad_metalad.extractors.studyminimeta.main:StudyMiniMetaExtractor', 'external_dataset=datalad_metalad.extractors.external_dataset:ExternalDatasetExtractor', 'external_file=datalad_metalad.extractors.external_file:ExternalFileExtractor', - 'annex=datalad_metalad.extractors.legacy.annex:MetadataExtractor', - 'audio=datalad_metalad.extractors.legacy.audio:MetadataExtractor', - 'datacite=datalad_metalad.extractors.legacy.datacite:MetadataExtractor', - 'datalad_core=datalad_metalad.extractors.legacy.datalad_core:MetadataExtractor', - 'datalad_rfc822=datalad_metalad.extractors.legacy.datalad_rfc822:MetadataExtractor', - 'exif=datalad_metalad.extractors.legacy.exif:MetadataExtractor', - 'frictionless_datapackage=datalad_metalad.extractors.legacy.frictionless_datapackage:MetadataExtractor', - 'image=datalad_metalad.extractors.legacy.image:MetadataExtractor', - 'xmp=datalad_metalad.extractors.legacy.xmp:MetadataExtractor', + 'annex=datalad_metalad.extractors.legacy.annex:AnnexMetadataExtractor', + 'audio=datalad_metalad.extractors.legacy.audio:AudioMetadataExtractor', + 'datacite=datalad_metalad.extractors.legacy.datacite:DataciteMetadataExtractor', + 'datalad_core=datalad_metalad.extractors.legacy.datalad_core:DataladCoreMetadataExtractor', + 'datalad_rfc822=datalad_metalad.extractors.legacy.datalad_rfc822:DataladRFC822MetadataExtractor', + 'exif=datalad_metalad.extractors.legacy.exif:ExifMetadataExtractor', + 'frictionless_datapackage=datalad_metalad.extractors.legacy.frictionless_datapackage:FRDPMetadataExtractor', + 'image=datalad_metalad.extractors.legacy.image:ImageMetadataExtractor', + 'xmp=datalad_metalad.extractors.legacy.xmp:XmpMetadataExtractor', ], 'datalad.metadata.indexers': [ 'metalad_studyminimeta=datalad_metalad.indexers.studyminimeta:StudyMiniMetaIndexer', From eb7e01b1d05960b3acbfcd72b678e4ef81f48c6a Mon Sep 17 00:00:00 2001 From: Christian Monch Date: Fri, 9 Sep 2022 09:48:52 +0200 Subject: [PATCH 3/4] use un-released datalad version without metadata This commit declares a dependency on the datalad branch `mih/mnt-metadata-mv`. In this branch the metadata code is removed from datalad core. This dependency should be replaces by a proper `datalad>=0.17.x`, whenever `datalad` with metadata code removed is released. --- requirements-devel.txt | 5 ++++- requirements.txt | 5 ++++- setup.cfg | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/requirements-devel.txt b/requirements-devel.txt index 1688dd3e..34d69e52 100644 --- a/requirements-devel.txt +++ b/requirements-devel.txt @@ -1,5 +1,8 @@ six -datalad>=0.15.6 +# replace the following line with the datalad version without +# metadata code, when that is released, e.g. +# datalad>=0.17.4 +git+https://github.com/mih/datalad.git@mnt-metadata-mv#egg=datalad coverage sphinx>=1.7.8 sphinx-rtd-theme diff --git a/requirements.txt b/requirements.txt index a7aeac43..74b057a4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,8 @@ six -datalad>=0.15.6 +# replace the following line with the datalad version without +# metadata code, when that is released, e.g. +# datalad>=0.17.4 +git+https://github.com/mih/datalad.git@mnt-metadata-mv#egg=datalad sphinx>=1.7.8 sphinx-rtd-theme pyyaml diff --git a/setup.cfg b/setup.cfg index 45f0f9f0..b7d0f508 100644 --- a/setup.cfg +++ b/setup.cfg @@ -15,7 +15,7 @@ classifiers = python_requires = >= 3.5 install_requires = six - datalad >=0.15.6 + # uncomment this when datalad is released without metadata code in core: datalad >=0.17.x datalad-metadata-model >=0.3.5,<0.4.0 pyyaml test_requires = From 332a427375fba92696b82df85546d8e67aeed1ae Mon Sep 17 00:00:00 2001 From: Christian Monch Date: Fri, 9 Sep 2022 10:01:57 +0200 Subject: [PATCH 4/4] remove simplejson usage from legacy extractor tests --- .../extractors/legacy/tests/test_datacite_xml.py | 4 ++-- datalad_metalad/extractors/tests/test_custom.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/datalad_metalad/extractors/legacy/tests/test_datacite_xml.py b/datalad_metalad/extractors/legacy/tests/test_datacite_xml.py index afe5997a..f8c4c48b 100644 --- a/datalad_metalad/extractors/legacy/tests/test_datacite_xml.py +++ b/datalad_metalad/extractors/legacy/tests/test_datacite_xml.py @@ -8,7 +8,7 @@ # ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## """Test datacite metadata extractor """ -from simplejson import dumps +import json from datalad.api import create from datalad.tests.utils_pytest import ( @@ -73,7 +73,7 @@ def test_get_metadata(path=None): ds.save() meta = DataciteMetadataExtractor(ds, 'elsewhere')._get_dataset_metadata() assert_equal( - dumps(meta, sort_keys=True, indent=2), + json.dumps(meta, sort_keys=True, indent=2), """\ { "author": [ diff --git a/datalad_metalad/extractors/tests/test_custom.py b/datalad_metalad/extractors/tests/test_custom.py index 1a4fad2a..6f2ce518 100644 --- a/datalad_metalad/extractors/tests/test_custom.py +++ b/datalad_metalad/extractors/tests/test_custom.py @@ -8,6 +8,7 @@ # ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## """Test custom metadata extractor""" +import json from six import text_type from datalad.distribution.dataset import Dataset @@ -19,7 +20,6 @@ known_failure_windows, with_tree, ) -from simplejson import dumps as jsondumps # some metadata to play with, taken from the examples of the google dataset @@ -85,9 +85,9 @@ @with_tree( tree={ '.metadata': { - 'dataset.json': jsondumps(sample_jsonld)}, + 'dataset.json': json.dumps(sample_jsonld)}, 'down': { - 'customloc': jsondumps(testmeta)}}) + 'customloc': json.dumps(testmeta)}}) def test_custom_dsmeta(path=None): ds = Dataset(path).create(force=True) sample_jsonld_ = dict(sample_jsonld)