diff --git a/datalad_metalad/extract.py b/datalad_metalad/extract.py index 36eca6a4..6d551395 100644 --- a/datalad_metalad/extract.py +++ b/datalad_metalad/extract.py @@ -798,6 +798,8 @@ def legacy_get_file_info(dataset: Dataset, def legacy_extract_file(ea: ExtractionArguments) -> Iterable[dict]: + import sys + print(repr(ea), file=sys.stderr) if issubclass(ea.extractor_class, MetadataExtractor): # Call metalad legacy extractor with a single status record. diff --git a/datalad_metalad/extractors/base.py b/datalad_metalad/extractors/base.py index 1bf24f02..5284b126 100644 --- a/datalad_metalad/extractors/base.py +++ b/datalad_metalad/extractors/base.py @@ -22,6 +22,13 @@ from uuid import UUID from datalad.distribution.dataset import Dataset +# XXX this is the legacy-legacy interface, keep around for a bit more and then +# remove +from datalad_deprecated.metadata.extractors.base import BaseMetadataExtractor + + +# Add a generation identifier to the old extractor base class +BaseMetadataExtractor.__generation__ = 2 @dataclasses.dataclass @@ -350,58 +357,3 @@ def get_state(self, dataset): object instance is passed via the method's `dataset` argument. """ return {} - - -# XXX this is the legacy-legacy interface, keep around for a bit more and then -# remove -class BaseMetadataExtractor: - - __generation__ = 2 - - NEEDS_CONTENT = True # majority of the extractors need data content - - def __init__(self, ds, paths): - """ - Parameters - ---------- - ds : dataset instance - Dataset to extract metadata from. - paths : list - Paths to investigate when extracting content metadata - """ - - self.ds = ds - self.paths = paths - - def get_metadata(self, dataset=True, content=True): - """ - Returns - ------- - dict or None, dict or None - Dataset metadata dict, dictionary of filepath regexes with metadata, - dicts, each return value could be None if there is no such metadata - """ - # default implementation - return \ - self._get_dataset_metadata() if dataset else None, \ - ((k, v) for k, v in self._get_content_metadata()) if content else None - - def _get_dataset_metadata(self): - """ - Returns - ------- - dict - keys and values are arbitrary - """ - raise NotImplementedError - - def _get_content_metadata(self): - """Get ALL metadata for all dataset content. - - Possibly limited to the paths given to the extractor. - - Returns - ------- - generator((location, metadata_dict)) - """ - raise NotImplementedError