Skip to content

Commit

Permalink
Merge pull request #16 from asfadmin/rew/string-parsing
Browse files Browse the repository at this point in the history
Add directive for re-parsing extracted values
  • Loading branch information
reweeden authored Sep 19, 2023
2 parents 4f0c1aa + 9e9d7e9 commit 5ec525a
Show file tree
Hide file tree
Showing 12 changed files with 599 additions and 133 deletions.
26 changes: 26 additions & 0 deletions mandible/jsonpath.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
try:
import jsonpath_ng
import jsonpath_ng.ext
except ImportError:
jsonpath_ng = None


def get_key(data: dict, key: str):
# Fall back to simple dot paths
if jsonpath_ng is None:
if key == "$":
return data

val = data
for key in key.split("."):
val = val[key]

return val

expr = jsonpath_ng.ext.parse(key)
# TODO(reweeden): Add a way to return the whole list here and not just
# the first element.
try:
return next(match.value for match in expr.find(data))
except StopIteration:
raise KeyError(key)
105 changes: 105 additions & 0 deletions mandible/metadata_mapper/directive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import io
from abc import ABC, abstractmethod
from typing import Any, Callable, Dict, Union

from .context import Context
from .exception import MetadataMapperError
from .format import FORMAT_REGISTRY
from .source import Source

Key = Union[str, Callable[[Context], str]]


def get_key(key: Key, context: Context) -> str:
if callable(key):
key = key(context)

return key


class TemplateDirective(ABC):
"""Base class for directives in a metadata template.
A directive is a special marker in the metadata template which will be
replaced by the MetadataMapper.
"""

def __init__(self, context: Context, sources: Dict[str, Source]):
self.context = context
self.sources = sources

@abstractmethod
def call(self):
pass

def prepare(self):
pass


class Mapped(TemplateDirective):
"""A value mapped to the template from a metadata Source.
The directive will be replaced by looking at the specified Source and
extracting the defined key.
"""
def __init__(
self,
context: Context,
sources: Dict[str, Source],
source: str,
key: Key
):
super().__init__(context, sources)

if source not in sources:
raise MetadataMapperError(f"source '{source}' does not exist")

self.source = sources[source]
self.key = get_key(key, context)

def call(self):
return self.source.get_value(self.key)

def prepare(self):
self.source.add_key(self.key)


class Reformatted(TemplateDirective):
"""A value mapped to the template from a metadata Source.
The directive will be replaced by looking at the specified Source and
extracting the defined key.
"""
def __init__(
self,
context: Context,
sources: Dict[str, Source],
format: str,
value: Any,
key: Key
):
super().__init__(context, sources)

format_cls = FORMAT_REGISTRY.get(format)
if format_cls is None:
raise MetadataMapperError(f"format '{format}' does not exist")

self.format = format_cls()
self.value = value
self.key = get_key(key, context)

def call(self):
if isinstance(self.value, bytes):
value = self.value
elif isinstance(self.value, str):
value = self.value.encode()
else:
raise MetadataMapperError(
"value must be of type 'bytes' or 'str' but got "
f"'{type(self.value).__name__}'"
)

return self.format.get_value(
io.BytesIO(value),
self.key
)
20 changes: 20 additions & 0 deletions mandible/metadata_mapper/exception.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
class MetadataMapperError(Exception):
"""A generic error raised by the MetadataMapper"""

def __init__(self, msg: str):
self.msg = msg


class TemplateError(MetadataMapperError):
"""An error that occurred while processing the metadata template."""

def __init__(self, msg: str, debug_path: str = None):
super().__init__(msg)
self.debug_path = debug_path

def __str__(self) -> str:
debug = ""
if self.debug_path is not None:
debug = f" at {self.debug_path}"

return f"failed to process template{debug}: {self.msg}"
23 changes: 13 additions & 10 deletions mandible/metadata_mapper/format/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from dataclasses import dataclass
from typing import IO, Any, ContextManager, Dict, Iterable, Type

from mandible import jsonpath


class FormatError(Exception):
def __init__(self, reason: str):
Expand All @@ -19,10 +21,9 @@ def __str__(self):
@dataclass
class Format(ABC):
# Registry boilerplate
def __init_subclass__(cls):
name = cls.__name__
if not name.startswith("_"):
FORMAT_REGISTRY[name] = cls
def __init_subclass__(cls, register: bool = True):
if register:
FORMAT_REGISTRY[cls.__name__] = cls

# Begin class definition
def get_values(self, file: IO[bytes], keys: Iterable[str]):
Expand All @@ -32,6 +33,12 @@ def get_values(self, file: IO[bytes], keys: Iterable[str]):
for key in keys
}

def get_value(self, file: IO[bytes], key: str):
"""Convenience function for getting a single value"""

with self._parse_data(file) as data:
return self._eval_key_wrapper(data, key)

def _eval_key_wrapper(self, data, key: str):
try:
return self._eval_key(data, key)
Expand All @@ -53,7 +60,7 @@ def _eval_key(data, key: str):

# Define placeholders for when extras are not installed

class _PlaceholderBase(Format):
class _PlaceholderBase(Format, register=False):
"""
Base class for defining placeholder implementations for classes that
require extra dependencies to be installed
Expand Down Expand Up @@ -93,8 +100,4 @@ def _parse_data(file: IO[bytes]):

@staticmethod
def _eval_key(data: dict, key: str):
val = data
for key in key.split("."):
val = val[key]

return val
return jsonpath.get_key(data, key)
Loading

0 comments on commit 5ec525a

Please sign in to comment.