Skip to content

Commit

Permalink
Merge pull request #22 from asfadmin/rew/alternate-source-types
Browse files Browse the repository at this point in the history
PR-5948 Refactor ConfigSourceProvider and allow alternate Source implementations
  • Loading branch information
reweeden authored Sep 19, 2024
2 parents 017a67c + 7055973 commit 08864d9
Show file tree
Hide file tree
Showing 13 changed files with 254 additions and 107 deletions.
4 changes: 2 additions & 2 deletions mandible/metadata_mapper/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from .context import Context
from .format import Format
from .mapper import MetadataMapper, MetadataMapperError
from .source import ConfigSourceProvider, PySourceProvider, Source
from .source import ConfigSourceProvider, FileSource, PySourceProvider

__all__ = [
"ConfigSourceProvider",
Expand All @@ -10,5 +10,5 @@
"MetadataMapper",
"MetadataMapperError",
"PySourceProvider",
"Source",
"FileSource",
]
8 changes: 4 additions & 4 deletions mandible/metadata_mapper/format/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from .format import (
FORMAT_REGISTRY,
FileFormat,
Format,
FormatError,
Json,
SimpleFormat,
Zip,
ZipInfo,
ZipMember,
)

try:
Expand All @@ -21,12 +21,12 @@

__all__ = (
"FORMAT_REGISTRY",
"FileFormat",
"Format",
"FormatError",
"H5",
"Json",
"SimpleFormat",
"Xml",
"Zip",
"ZipInfo",
"ZipMember",
)
61 changes: 45 additions & 16 deletions mandible/metadata_mapper/format/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,21 @@ def get_value(self, file: IO[bytes], key: Key) -> Any:


@dataclass
class SimpleFormat(Format, ABC, register=False):
class FileFormat(Format, ABC, register=False):
"""A Format for querying files from a standard data file.
Simple, single format data types such as 'json' that can be queried
directly.
"""

def get_values(
self,
file: IO[bytes],
keys: Iterable[Key],
) -> Dict[Key, Any]:
"""Get a list of values from a file"""

with self._parse_data(file) as data:
with self.parse_data(file) as data:
return {
key: self._eval_key_wrapper(data, key)
for key in keys
Expand All @@ -66,12 +72,12 @@ def get_values(
def get_value(self, file: IO[bytes], key: Key) -> Any:
"""Convenience function for getting a single value"""

with self._parse_data(file) as data:
with self.parse_data(file) as data:
return self._eval_key_wrapper(data, key)

def _eval_key_wrapper(self, data, key: Key) -> Any:
try:
return self._eval_key(data, key)
return self.eval_key(data, key)
except KeyError as e:
if key.default is not RAISE_EXCEPTION:
return key.default
Expand All @@ -81,20 +87,35 @@ def _eval_key_wrapper(self, data, key: Key) -> Any:

@staticmethod
@abstractmethod
def _parse_data(file: IO[bytes]) -> ContextManager[T]:
def parse_data(file: IO[bytes]) -> ContextManager[T]:
"""Parse the binary stream into a queryable data structure.
The return type can be anything, but must be compatible with the input
to `eval_key`.
:param file: The binary stream to parse
:returns: A queryable data structure that will be passed to `eval_key`
"""
pass

@staticmethod
@abstractmethod
def _eval_key(data: T, key: Key) -> Any:
def eval_key(data: T, key: Key) -> Any:
"""Query the parsed data for a key.
:param data: Object returned by `parse_data`
:param key: The key to extract
:returns: The value associated with the key
:raises: KeyError
"""
pass


# Define placeholders for when extras are not installed


@dataclass
class _PlaceholderBase(SimpleFormat, register=False):
class _PlaceholderBase(FileFormat, register=False):
"""
Base class for defining placeholder implementations for classes that
require extra dependencies to be installed
Expand All @@ -106,11 +127,11 @@ def __init__(self, dep: str):
)

@staticmethod
def _parse_data(file: IO[bytes]) -> ContextManager[T]:
def parse_data(file: IO[bytes]) -> ContextManager[T]:
pass

@staticmethod
def _eval_key(data: T, key: Key):
def eval_key(data: T, key: Key):
pass


Expand All @@ -129,19 +150,25 @@ def __init__(self):
# Define formats that don't require extra dependencies

@dataclass
class Json(SimpleFormat):
class Json(FileFormat):
@staticmethod
@contextlib.contextmanager
def _parse_data(file: IO[bytes]) -> dict:
def parse_data(file: IO[bytes]) -> dict:
yield json.load(file)

@staticmethod
def _eval_key(data: dict, key: Key):
def eval_key(data: dict, key: Key):
return jsonpath.get_key(data, key)


@dataclass
class Zip(Format):
class ZipMember(Format):
"""A member from a zip archive.
:param filters: A set of filters used to select the desired archive member
:param format: The Format of the archive member
"""

filters: Dict[str, Any]
"""Filter against any attributes of zipfile.ZipInfo objects"""
format: Format
Expand Down Expand Up @@ -208,10 +235,12 @@ def _matches_filters(self, zipinfo: zipfile.ZipInfo) -> bool:


@dataclass
class ZipInfo(SimpleFormat):
class ZipInfo(FileFormat):
"""Query Zip headers and directory information."""

@staticmethod
@contextlib.contextmanager
def _parse_data(file: IO[bytes]) -> dict:
def parse_data(file: IO[bytes]) -> dict:
with zipfile.ZipFile(file, "r") as zf:
yield {
"infolist": [
Expand All @@ -227,5 +256,5 @@ def _parse_data(file: IO[bytes]) -> dict:
}

@staticmethod
def _eval_key(data: dict, key: Key) -> Any:
def eval_key(data: dict, key: Key) -> Any:
return jsonpath.get_key(data, key)
8 changes: 4 additions & 4 deletions mandible/metadata_mapper/format/h5.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,17 @@

from mandible.metadata_mapper.key import Key

from .format import SimpleFormat
from .format import FileFormat


@dataclass
class H5(SimpleFormat):
class H5(FileFormat):
@staticmethod
def _parse_data(file: IO[bytes]) -> ContextManager[Any]:
def parse_data(file: IO[bytes]) -> ContextManager[Any]:
return h5py.File(file, "r")

@staticmethod
def _eval_key(data, key: Key) -> Any:
def eval_key(data, key: Key) -> Any:
return normalize(data[key.key][()])


Expand Down
8 changes: 4 additions & 4 deletions mandible/metadata_mapper/format/xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,18 @@

from mandible.metadata_mapper.key import Key

from .format import SimpleFormat
from .format import FileFormat


@dataclass
class Xml(SimpleFormat):
class Xml(FileFormat):
@staticmethod
@contextlib.contextmanager
def _parse_data(file: IO[bytes]) -> Any:
def parse_data(file: IO[bytes]) -> Any:
yield etree.parse(file)

@staticmethod
def _eval_key(data: etree.ElementTree, key: Key) -> Any:
def eval_key(data: etree.ElementTree, key: Key) -> Any:
nsmap = data.getroot().nsmap
elements = data.xpath(key.key, namespaces=nsmap)
values = [element.text for element in elements]
Expand Down
Loading

0 comments on commit 08864d9

Please sign in to comment.