From 0892fa7736bc7d2217b0ab4b521f531936a99850 Mon Sep 17 00:00:00 2001 From: Christian Monch Date: Tue, 23 Jan 2024 11:52:04 +0100 Subject: [PATCH] remove redefinition of legacy extractor base-class This commit removes the definition of the legacy extractor base class `BaseMetadataExtractor` from `datalad_metalad.extractors.base`. It now is imported from `datalad_deprecated.metadata.extractors.base` and augmented with a generation-ID. This should fix mixups with identical class names from different packages. --- datalad_metalad/extract.py | 2 + datalad_metalad/extractors/base.py | 62 ++++-------------------------- 2 files changed, 9 insertions(+), 55 deletions(-) diff --git a/datalad_metalad/extract.py b/datalad_metalad/extract.py index 36eca6a4..6d551395 100644 --- a/datalad_metalad/extract.py +++ b/datalad_metalad/extract.py @@ -798,6 +798,8 @@ def legacy_get_file_info(dataset: Dataset, def legacy_extract_file(ea: ExtractionArguments) -> Iterable[dict]: + import sys + print(repr(ea), file=sys.stderr) if issubclass(ea.extractor_class, MetadataExtractor): # Call metalad legacy extractor with a single status record. diff --git a/datalad_metalad/extractors/base.py b/datalad_metalad/extractors/base.py index 1bf24f02..5284b126 100644 --- a/datalad_metalad/extractors/base.py +++ b/datalad_metalad/extractors/base.py @@ -22,6 +22,13 @@ from uuid import UUID from datalad.distribution.dataset import Dataset +# XXX this is the legacy-legacy interface, keep around for a bit more and then +# remove +from datalad_deprecated.metadata.extractors.base import BaseMetadataExtractor + + +# Add a generation identifier to the old extractor base class +BaseMetadataExtractor.__generation__ = 2 @dataclasses.dataclass @@ -350,58 +357,3 @@ def get_state(self, dataset): object instance is passed via the method's `dataset` argument. """ return {} - - -# XXX this is the legacy-legacy interface, keep around for a bit more and then -# remove -class BaseMetadataExtractor: - - __generation__ = 2 - - NEEDS_CONTENT = True # majority of the extractors need data content - - def __init__(self, ds, paths): - """ - Parameters - ---------- - ds : dataset instance - Dataset to extract metadata from. - paths : list - Paths to investigate when extracting content metadata - """ - - self.ds = ds - self.paths = paths - - def get_metadata(self, dataset=True, content=True): - """ - Returns - ------- - dict or None, dict or None - Dataset metadata dict, dictionary of filepath regexes with metadata, - dicts, each return value could be None if there is no such metadata - """ - # default implementation - return \ - self._get_dataset_metadata() if dataset else None, \ - ((k, v) for k, v in self._get_content_metadata()) if content else None - - def _get_dataset_metadata(self): - """ - Returns - ------- - dict - keys and values are arbitrary - """ - raise NotImplementedError - - def _get_content_metadata(self): - """Get ALL metadata for all dataset content. - - Possibly limited to the paths given to the extractor. - - Returns - ------- - generator((location, metadata_dict)) - """ - raise NotImplementedError