Skip to content

Commit

Permalink
TMP: convert extract into ValidatedInterface, and adopt constraint va…
Browse files Browse the repository at this point in the history
…lidation

This is a demonstration how one existing command could adopt datalad-next's
parameter constraint validation. It changes the baseclass to next's
ValidatedInterface, and defines a validator with relevant parameter constraints:

Specifically, the constraints are:
- The provided datasets exists, or a dataset can be derived from the curdir
- The path points to an existing file (ref datalad#354)
- The extractorname is a string
- The extractorargs is a mapping of key-value pairs

This makes a dedicated check whether a file exists obsolete, and it could replace
the checks that check_dataset() does (provided an additional constraint option
in EnsureDataset() that allows to check for valid dataset IDs - I've created an
issue about this in datalad/datalad-next#272).

This change would introduce a dependency to datalad-next, and as parts of
this PR were only tested with yet unreleased branches of datalad-next,
it will not work right now unless you're on the right development version
of datalad-next.
  • Loading branch information
adswa committed Mar 2, 2023
1 parent fc20e5c commit 68ac70a
Showing 1 changed file with 38 additions and 15 deletions.
53 changes: 38 additions & 15 deletions datalad_metalad/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,22 @@
from uuid import UUID

from dataclasses import dataclass

from datalad_next.commands import (
EnsureCommandParameterization,
ValidatedInterface,
)
from datalad.distribution.dataset import Dataset
from datalad.distribution.dataset import (
datasetmethod,
EnsureDataset,
from datalad.distribution.dataset import datasetmethod
from datalad_next.constraints import (
EnsurePath,
EnsureStr,
EnsureMapping,
EnsureNone,
EnsureListOf,
NoConstraint,
)
from datalad_next.constraints.dataset import EnsureDataset
from datalad.interface.base import (
Interface,
build_doc,
eval_results,
)
Expand All @@ -54,10 +62,6 @@
MetadataExtractorBase,
)

from datalad.support.constraints import (
EnsureNone,
EnsureStr,
)
from datalad.support.param import Parameter

from dataladmetadatamodel.metadatapath import MetadataPath
Expand Down Expand Up @@ -91,7 +95,7 @@ class ExtractionArguments:


@build_doc
class Extract(Interface):
class Extract(ValidatedInterface):
"""Run a metadata extractor on a dataset or file.
This command distinguishes between dataset-level extraction and
Expand Down Expand Up @@ -131,6 +135,23 @@ class Extract(Interface):
on the whether file-level- or dataset-level extraction is requested.
"""

# Define parameter constraints
extractorargs_constraints = EnsureMapping(key=EnsureStr(),
value=EnsureStr(),
delimiter='') | \
EnsureListOf(item_constraint=NoConstraint(),
min_len=0)
_validator_ = EnsureCommandParameterization(
param_constraints=dict(
path=EnsurePath(lexists=True),
dataset=EnsureDataset(installed=True, purpose='meta-extract'),
extractor=EnsureStr(),
extractorargs=extractorargs_constraints,
),
validate_defaults=("dataset",),
tailor_for_dataset=dict(path="dataset")
)

result_renderer = "tailored"

_examples_ = [
Expand Down Expand Up @@ -180,20 +201,20 @@ class Extract(Interface):
specified.
You might provide an absolute file path, but it has to contain
the dataset path as prefix.""",
constraints=EnsureStr() | EnsureNone()),
),
dataset=Parameter(
args=("-d", "--dataset"),
doc="""Dataset to extract metadata from. If no dataset
is given, the dataset is determined by the current work
directory.""",
constraints=EnsureDataset() | EnsureNone()),
),
context=Parameter(
args=("-c", "--context"),
doc="""Context, a JSON-serialized dictionary that provides
constant data which has been gathered before, so meta-extract
will not have re-gather this data. Keys and values are strings.
meta-extract will look for the following key: 'dataset_version'.""",
constraints=EnsureDataset() | EnsureNone()),
),
get_context=Parameter(
args=("--get-context",),
action="store_true",
Expand All @@ -219,7 +240,8 @@ class Extract(Interface):
prevent interpretation of the key of the first extractor argument
as path for a file-level extraction.""",
nargs="*",
constraints=EnsureStr() | EnsureNone()))
)
)

@staticmethod
@datasetmethod(name="meta_extract")
Expand Down Expand Up @@ -247,7 +269,8 @@ def __call__(
if isinstance(context, str)
else context))

source_dataset = check_dataset(dataset or curdir, "extract metadata")
# dataset is a DatasetParameter from the parameter validation
source_dataset = dataset.ds
source_dataset_version = context.get("dataset_version", None)
if source_dataset_version is None:
source_dataset_version = source_dataset.repo.get_hexsha()
Expand Down

0 comments on commit 68ac70a

Please sign in to comment.