From 68ac70a4413d49325b1accf4b194d5bde0e79013 Mon Sep 17 00:00:00 2001 From: Adina Wagner Date: Thu, 2 Mar 2023 17:36:48 +0100 Subject: [PATCH] TMP: convert extract into ValidatedInterface, and adopt constraint validation This is a demonstration how one existing command could adopt datalad-next's parameter constraint validation. It changes the baseclass to next's ValidatedInterface, and defines a validator with relevant parameter constraints: Specifically, the constraints are: - The provided datasets exists, or a dataset can be derived from the curdir - The path points to an existing file (ref #354) - The extractorname is a string - The extractorargs is a mapping of key-value pairs This makes a dedicated check whether a file exists obsolete, and it could replace the checks that check_dataset() does (provided an additional constraint option in EnsureDataset() that allows to check for valid dataset IDs - I've created an issue about this in https://github.com/datalad/datalad-next/issues/272). This change would introduce a dependency to datalad-next, and as parts of this PR were only tested with yet unreleased branches of datalad-next, it will not work right now unless you're on the right development version of datalad-next. --- datalad_metalad/extract.py | 53 +++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/datalad_metalad/extract.py b/datalad_metalad/extract.py index 4fd7b456..81d7cfd1 100644 --- a/datalad_metalad/extract.py +++ b/datalad_metalad/extract.py @@ -30,14 +30,22 @@ from uuid import UUID from dataclasses import dataclass - +from datalad_next.commands import ( + EnsureCommandParameterization, + ValidatedInterface, +) from datalad.distribution.dataset import Dataset -from datalad.distribution.dataset import ( - datasetmethod, - EnsureDataset, +from datalad.distribution.dataset import datasetmethod +from datalad_next.constraints import ( + EnsurePath, + EnsureStr, + EnsureMapping, + EnsureNone, + EnsureListOf, + NoConstraint, ) +from datalad_next.constraints.dataset import EnsureDataset from datalad.interface.base import ( - Interface, build_doc, eval_results, ) @@ -54,10 +62,6 @@ MetadataExtractorBase, ) -from datalad.support.constraints import ( - EnsureNone, - EnsureStr, -) from datalad.support.param import Parameter from dataladmetadatamodel.metadatapath import MetadataPath @@ -91,7 +95,7 @@ class ExtractionArguments: @build_doc -class Extract(Interface): +class Extract(ValidatedInterface): """Run a metadata extractor on a dataset or file. This command distinguishes between dataset-level extraction and @@ -131,6 +135,23 @@ class Extract(Interface): on the whether file-level- or dataset-level extraction is requested. """ + # Define parameter constraints + extractorargs_constraints = EnsureMapping(key=EnsureStr(), + value=EnsureStr(), + delimiter='') | \ + EnsureListOf(item_constraint=NoConstraint(), + min_len=0) + _validator_ = EnsureCommandParameterization( + param_constraints=dict( + path=EnsurePath(lexists=True), + dataset=EnsureDataset(installed=True, purpose='meta-extract'), + extractor=EnsureStr(), + extractorargs=extractorargs_constraints, + ), + validate_defaults=("dataset",), + tailor_for_dataset=dict(path="dataset") + ) + result_renderer = "tailored" _examples_ = [ @@ -180,20 +201,20 @@ class Extract(Interface): specified. You might provide an absolute file path, but it has to contain the dataset path as prefix.""", - constraints=EnsureStr() | EnsureNone()), + ), dataset=Parameter( args=("-d", "--dataset"), doc="""Dataset to extract metadata from. If no dataset is given, the dataset is determined by the current work directory.""", - constraints=EnsureDataset() | EnsureNone()), + ), context=Parameter( args=("-c", "--context"), doc="""Context, a JSON-serialized dictionary that provides constant data which has been gathered before, so meta-extract will not have re-gather this data. Keys and values are strings. meta-extract will look for the following key: 'dataset_version'.""", - constraints=EnsureDataset() | EnsureNone()), + ), get_context=Parameter( args=("--get-context",), action="store_true", @@ -219,7 +240,8 @@ class Extract(Interface): prevent interpretation of the key of the first extractor argument as path for a file-level extraction.""", nargs="*", - constraints=EnsureStr() | EnsureNone())) + ) + ) @staticmethod @datasetmethod(name="meta_extract") @@ -247,7 +269,8 @@ def __call__( if isinstance(context, str) else context)) - source_dataset = check_dataset(dataset or curdir, "extract metadata") + # dataset is a DatasetParameter from the parameter validation + source_dataset = dataset.ds source_dataset_version = context.get("dataset_version", None) if source_dataset_version is None: source_dataset_version = source_dataset.repo.get_hexsha()