From 24e35a6a5464fe91296830c1bbf08546dfe60eec Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Fri, 17 Nov 2023 10:33:14 -0800 Subject: [PATCH 01/11] add test_feedstock --- tests/unit/test_feedstock.py | 78 ++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 tests/unit/test_feedstock.py diff --git a/tests/unit/test_feedstock.py b/tests/unit/test_feedstock.py new file mode 100644 index 0000000..0beb085 --- /dev/null +++ b/tests/unit/test_feedstock.py @@ -0,0 +1,78 @@ +from textwrap import dedent + +import pytest +from ruamel.yaml import YAML + +from pangeo_forge_runner.feedstock import Feedstock + +yaml = YAML() + + +@pytest.fixture(params=["recipe_object", "dict_object"]) +def tmp_feedstock(request, tmp_path_factory: pytest.TempPathFactory): + tmpdir = tmp_path_factory.mktemp("feedstock") + if request.param == "recipe_object": + meta_yaml = dedent( + """\ + recipes: + - id: aws-noaa-sea-surface-temp-whoi + object: 'recipe:recipe' + """ + ) + recipe_py = dedent( + """\ + class Recipe: + pass + + recipe = Recipe() + """ + ) + elif request.param == "dict_object": + meta_yaml = dedent( + """\ + recipes: + dict_object: 'recipe:recipes' + """ + ) + recipe_py = dedent( + """\ + class Recipe: + pass + + recipes = {"my_recipe": Recipe()} + """ + ) + + with open(tmpdir / "meta.yaml", mode="w") as f: + f.write(meta_yaml) + with open(tmpdir / "recipe.py", mode="w") as f: + f.write(recipe_py) + + yield tmpdir, meta_yaml, request.param + + +def test_feedstock(tmp_feedstock): + tmpdir, meta_yaml, recipes_section_type = tmp_feedstock + f = Feedstock(feedstock_dir=tmpdir) + assert f.meta == yaml.load(meta_yaml) + + expanded_meta = f.get_expanded_meta() + recipes = f.parse_recipes() + + for recipe_metadata in expanded_meta["recipes"]: + # the recipe_object metadata looks something like this: + # {'recipes': [{'id': 'my_recipe'}]} + # and the dict_object metadata looks like this: + # {'recipes': [{'id': 'aws-noaa-sea-surface-temp-whoi', 'object': 'recipe:recipe'}]} + # both have an "id" field: + assert "id" in recipe_metadata + # but only the "recipe_object" has an "object" field: + if recipes_section_type == "recipe_object": + assert "object" in recipe_metadata + elif recipes_section_type == "dict_object": + assert "object" not in recipe_metadata + + for r in recipes.values(): + # the values of the recipes dict should all be python objects + # we used the mock type `Recipe` here, so this should be true: + assert str(r).startswith(" Date: Fri, 17 Nov 2023 11:58:23 -0800 Subject: [PATCH 02/11] add traitlets-based meta yaml schema --- pangeo_forge_runner/feedstock.py | 20 +++++----- pangeo_forge_runner/meta_yaml.py | 64 ++++++++++++++++++++++++++++++++ tests/unit/test_feedstock.py | 5 ++- 3 files changed, 78 insertions(+), 11 deletions(-) create mode 100644 pangeo_forge_runner/meta_yaml.py diff --git a/pangeo_forge_runner/feedstock.py b/pangeo_forge_runner/feedstock.py index 491a8aa..5964582 100644 --- a/pangeo_forge_runner/feedstock.py +++ b/pangeo_forge_runner/feedstock.py @@ -5,6 +5,7 @@ from ruamel.yaml import YAML +from .meta_yaml import MetaYaml from .recipe_rewriter import RecipeRewriter yaml = YAML() @@ -30,7 +31,7 @@ def __init__( """ self.feedstock_dir = feedstock_dir with open(self.feedstock_dir / "meta.yaml") as f: - self.meta = yaml.load(f) + self.meta = MetaYaml(**yaml.load(f)) self.prune = prune self.callable_args_injections = ( @@ -75,18 +76,17 @@ def parse_recipes(self): *Executes arbitrary code* defined in the feedstock recipes. """ recipes = {} - recipes_config = self.meta.get("recipes") - if isinstance(recipes_config, list): - for r in recipes_config: + if isinstance(self.meta.recipes, list): + for r in self.meta.recipes: recipes[r["id"]] = self._import(r["object"]) - elif isinstance(recipes_config, dict): - recipes = self._import(recipes_config["dict_object"]) + elif isinstance(self.meta.recipes, dict): + recipes = self._import(self.meta.recipes["dict_object"]) else: raise ValueError("Could not parse recipes config in meta.yaml") return recipes - def get_expanded_meta(self): + def get_expanded_meta(self) -> dict: """ Return full meta.yaml file, expanding recipes if needed. @@ -94,11 +94,11 @@ def get_expanded_meta(self): 'object' keys *may* be present, but not guaranteed. """ meta_copy = deepcopy(self.meta) - if "recipes" in self.meta and "dict_object" in self.meta["recipes"]: + if self.meta.recipes and "dict_object" in self.meta.recipes: # We have a dict_object, so let's parse the recipes, and provide # keep just the ids, discarding the values - as the values do not # actually serialize. recipes = self.parse_recipes() - meta_copy["recipes"] = [{"id": k} for k, v in recipes.items()] + meta_copy.recipes = [{"id": k} for k, v in recipes.items()] - return meta_copy + return meta_copy.trait_values() diff --git a/pangeo_forge_runner/meta_yaml.py b/pangeo_forge_runner/meta_yaml.py new file mode 100644 index 0000000..6dcbeb8 --- /dev/null +++ b/pangeo_forge_runner/meta_yaml.py @@ -0,0 +1,64 @@ +from traitlets import Dict, HasTraits, List, Unicode, Union + + +class MetaYaml(HasTraits): + """Schema for the ``meta.yaml`` file which must be included in each feedstock directory. + Only the ``recipes`` field is strictly required for ``pangeo-forge-runner`` to function. + All other fields are recommended but not required. + """ + + title = Unicode( + allow_none=True, + help=""" + Title for this dataset. + """, + ) + description = Unicode( + allow_none=True, + help=""" + Description of the dataset. + """, + ) + recipes = Union( + [List(Dict()), Dict()], + help=""" + Specifies the deployable Python objects to run in the recipe module. + If the recipes are assigned to their own Python variable names, + should be of the form: + + ```yaml + recipes: + - id: "unique-identifier-for-recipe" + object: "recipe:transforms" + ``` + + Alternatively, if the recipes are values in a Python dict: + + ```yaml + recipes: + dict_object: "recipe:name_of_recipes_dict" + ``` + """, + ) + pangeo_forge_version = Unicode( # TODO: drop + allow_none=True, + ) + pangeo_notebook_version = Unicode( # TODO: drop + allow_none=True, + ) + provenance = Dict( # TODO: add detail + allow_none=True, + help=""" + Dataset provenance information including provider, license, etc. + """, + ) + maintainers = List( # TODO: add detail + Dict(), + allow_none=True, + help=""" + Maintainers of this Pangeo Forge feedstock. + """, + ) + bakery = Dict( # TODO: drop + allow_none=True, + ) diff --git a/tests/unit/test_feedstock.py b/tests/unit/test_feedstock.py index 0beb085..565090b 100644 --- a/tests/unit/test_feedstock.py +++ b/tests/unit/test_feedstock.py @@ -4,6 +4,7 @@ from ruamel.yaml import YAML from pangeo_forge_runner.feedstock import Feedstock +from pangeo_forge_runner.meta_yaml import MetaYaml yaml = YAML() @@ -54,7 +55,9 @@ class Recipe: def test_feedstock(tmp_feedstock): tmpdir, meta_yaml, recipes_section_type = tmp_feedstock f = Feedstock(feedstock_dir=tmpdir) - assert f.meta == yaml.load(meta_yaml) + # equality of HasTraits instances doesn't work as I might expect, + # so just check equality of the relevant trait (`.recipes`) + assert f.meta.recipes == MetaYaml(**yaml.load(meta_yaml)).recipes expanded_meta = f.get_expanded_meta() recipes = f.parse_recipes() From 7059a0169afec76d2062ee31424b9b78655be431 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Fri, 17 Nov 2023 12:00:00 -0800 Subject: [PATCH 03/11] drop unused fields from meta yaml schema --- pangeo_forge_runner/meta_yaml.py | 9 --------- tests/unit/test_expand_meta.py | 3 --- 2 files changed, 12 deletions(-) diff --git a/pangeo_forge_runner/meta_yaml.py b/pangeo_forge_runner/meta_yaml.py index 6dcbeb8..6e604ae 100644 --- a/pangeo_forge_runner/meta_yaml.py +++ b/pangeo_forge_runner/meta_yaml.py @@ -40,12 +40,6 @@ class MetaYaml(HasTraits): ``` """, ) - pangeo_forge_version = Unicode( # TODO: drop - allow_none=True, - ) - pangeo_notebook_version = Unicode( # TODO: drop - allow_none=True, - ) provenance = Dict( # TODO: add detail allow_none=True, help=""" @@ -59,6 +53,3 @@ class MetaYaml(HasTraits): Maintainers of this Pangeo Forge feedstock. """, ) - bakery = Dict( # TODO: drop - allow_none=True, - ) diff --git a/tests/unit/test_expand_meta.py b/tests/unit/test_expand_meta.py index 4beb35b..8e95367 100644 --- a/tests/unit/test_expand_meta.py +++ b/tests/unit/test_expand_meta.py @@ -10,8 +10,6 @@ "meta": { "title": "Global Precipitation Climatology Project", "description": "Global Precipitation Climatology Project (GPCP) Daily Version 1.3 gridded, merged ty satellite/gauge precipitation Climate data Record (CDR) from 1996 to present.\n", - "pangeo_forge_version": "0.9.0", - "pangeo_notebook_version": "2022.06.02", "recipes": [{"id": "gpcp-from-gcs", "object": "recipe:recipe"}], "provenance": { "providers": [ @@ -37,7 +35,6 @@ "github": "cisaacstern", } ], - "bakery": {"id": "pangeo-ldeo-nsf-earthcube"}, }, } ] From 37bfa901435aad1cc59f557b00ffcbbadf8b95a6 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Fri, 17 Nov 2023 12:19:35 -0800 Subject: [PATCH 04/11] more detail on schema fields --- pangeo_forge_runner/feedstock.py | 10 ++++++++-- pangeo_forge_runner/meta_yaml.py | 25 ++++++++++++++++++++++--- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/pangeo_forge_runner/feedstock.py b/pangeo_forge_runner/feedstock.py index 5964582..b3c4e19 100644 --- a/pangeo_forge_runner/feedstock.py +++ b/pangeo_forge_runner/feedstock.py @@ -86,7 +86,7 @@ def parse_recipes(self): return recipes - def get_expanded_meta(self) -> dict: + def get_expanded_meta(self, drop_none=True) -> dict: """ Return full meta.yaml file, expanding recipes if needed. @@ -101,4 +101,10 @@ def get_expanded_meta(self) -> dict: recipes = self.parse_recipes() meta_copy.recipes = [{"id": k} for k, v in recipes.items()] - return meta_copy.trait_values() + return ( + # the traitlets MetaYaml schema will give us empty containers + # by default, but in most cases lets assume we don't want that + {k: v for k, v in meta_copy.trait_values().items() if v} + if drop_none + else meta_copy.trait_values() + ) diff --git a/pangeo_forge_runner/meta_yaml.py b/pangeo_forge_runner/meta_yaml.py index 6e604ae..9896724 100644 --- a/pangeo_forge_runner/meta_yaml.py +++ b/pangeo_forge_runner/meta_yaml.py @@ -40,14 +40,33 @@ class MetaYaml(HasTraits): ``` """, ) - provenance = Dict( # TODO: add detail + provenance = Dict( allow_none=True, help=""" Dataset provenance information including provider, license, etc. """, + per_key_traits={ + "providers": List( + Dict( + per_key_traits={ + "name": Unicode(), + "description": Unicode(), + "roles": List(), # TODO: enum + "url": Unicode(), + }, + ), + ), + "license": Unicode(), # TODO: guidance on suggested formatting (enum?) + }, ) - maintainers = List( # TODO: add detail - Dict(), + maintainers = List( + Dict( + per_key_traits={ + "name": Unicode(help="Full name of the maintainer."), + "orcid": Unicode(help="Maintainer's ORCID ID"), # TODO: regex + "github": Unicode(help="Maintainer's GitHub username."), + }, + ), allow_none=True, help=""" Maintainers of this Pangeo Forge feedstock. From 1695798e9d7336bb450e4dea2a4557007f154f3a Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Fri, 17 Nov 2023 12:29:24 -0800 Subject: [PATCH 05/11] workaround traitlets default containers --- pangeo_forge_runner/feedstock.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pangeo_forge_runner/feedstock.py b/pangeo_forge_runner/feedstock.py index b3c4e19..fe32ef9 100644 --- a/pangeo_forge_runner/feedstock.py +++ b/pangeo_forge_runner/feedstock.py @@ -76,10 +76,10 @@ def parse_recipes(self): *Executes arbitrary code* defined in the feedstock recipes. """ recipes = {} - if isinstance(self.meta.recipes, list): + if self.meta.recipes and isinstance(self.meta.recipes, list): for r in self.meta.recipes: recipes[r["id"]] = self._import(r["object"]) - elif isinstance(self.meta.recipes, dict): + elif self.meta.recipes and isinstance(self.meta.recipes, dict): recipes = self._import(self.meta.recipes["dict_object"]) else: raise ValueError("Could not parse recipes config in meta.yaml") From 6440b82be94b3105d669dcaa7ed6887483cf73ec Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Fri, 17 Nov 2023 13:14:44 -0800 Subject: [PATCH 06/11] recipes field is required in metayaml --- pangeo_forge_runner/feedstock.py | 4 +- pangeo_forge_runner/meta_yaml.py | 19 +++- tests/unit/test_meta_yaml.py | 168 +++++++++++++++++++++++++++++++ tests/unit/test_recipes.py | 6 +- 4 files changed, 191 insertions(+), 6 deletions(-) create mode 100644 tests/unit/test_meta_yaml.py diff --git a/pangeo_forge_runner/feedstock.py b/pangeo_forge_runner/feedstock.py index fe32ef9..b3c4e19 100644 --- a/pangeo_forge_runner/feedstock.py +++ b/pangeo_forge_runner/feedstock.py @@ -76,10 +76,10 @@ def parse_recipes(self): *Executes arbitrary code* defined in the feedstock recipes. """ recipes = {} - if self.meta.recipes and isinstance(self.meta.recipes, list): + if isinstance(self.meta.recipes, list): for r in self.meta.recipes: recipes[r["id"]] = self._import(r["object"]) - elif self.meta.recipes and isinstance(self.meta.recipes, dict): + elif isinstance(self.meta.recipes, dict): recipes = self._import(self.meta.recipes["dict_object"]) else: raise ValueError("Could not parse recipes config in meta.yaml") diff --git a/pangeo_forge_runner/meta_yaml.py b/pangeo_forge_runner/meta_yaml.py index 9896724..53f5d20 100644 --- a/pangeo_forge_runner/meta_yaml.py +++ b/pangeo_forge_runner/meta_yaml.py @@ -1,4 +1,4 @@ -from traitlets import Dict, HasTraits, List, Unicode, Union +from traitlets import Dict, HasTraits, List, TraitError, Unicode, Union, validate class MetaYaml(HasTraits): @@ -7,6 +7,23 @@ class MetaYaml(HasTraits): All other fields are recommended but not required. """ + def __init__(self, recipes=None, **kwargs): + """The only required field is ``recipes``, so we put it explicitly in the init + signature to ensure it is not omitted, as demonstrated in: + https://github.com/ipython/traitlets/issues/490#issuecomment-479716288 + """ + super().__init__(**kwargs) + self.recipes = recipes + + @validate("recipes") + def _validate_recipes(self, proposal): + """Ensure the ``recipes`` trait is not passed as an empty container.""" + if not proposal["value"]: + raise TraitError( + f"The ``recipes`` trait, passed as {proposal['value']}, cannot be empty." + ) + return proposal["value"] + title = Unicode( allow_none=True, help=""" diff --git a/tests/unit/test_meta_yaml.py b/tests/unit/test_meta_yaml.py new file mode 100644 index 0000000..370770c --- /dev/null +++ b/tests/unit/test_meta_yaml.py @@ -0,0 +1,168 @@ +import copy +from textwrap import dedent + +import pytest +from ruamel.yaml import YAML +from traitlets import TraitError + +from pangeo_forge_runner.meta_yaml import MetaYaml + +yaml = YAML() + + +@pytest.fixture +def with_recipes_list() -> str: + return dedent( + """\ + title: 'AWS NOAA WHOI SST' + description: 'Analysis-ready datasets derived from AWS NOAA WHOI NetCDF' + recipes: + - id: aws-noaa-sea-surface-temp-whoi + object: 'recipe:recipe' + provenance: + providers: + - name: 'AWS NOAA Oceanic CDR' + description: 'Registry of Open Data on AWS National Oceanographic & Atmospheric Administration National Centers for Environmental Information' + roles: + - producer + - licensor + url: s3://noaa-cdr-sea-surface-temp-whoi-pds/ + license: 'Open Data' + maintainers: + - name: 'Jo Contributor' + orcid: '0000-0000-0000-0000' + github: jocontributor123 + """ # noqa: E501 + ) + + +@pytest.fixture +def valid_meta_yaml(with_recipes_list: str) -> dict: + return yaml.load(with_recipes_list) + + +@pytest.fixture +def valid_meta_yaml_dict_object(with_recipes_list: str) -> dict: + with_dict_object = with_recipes_list.replace( + dedent( + """\ + recipes: + - id: aws-noaa-sea-surface-temp-whoi + object: 'recipe:recipe' + """ + ), + dedent( + """\ + recipes: + dict_object: 'recipe:recipes' + """ + ), + ) + return yaml.load(with_dict_object) + + +def test_schema_valid(valid_meta_yaml): + _ = MetaYaml(**valid_meta_yaml) + + +def test_schema_valid_dict_object(valid_meta_yaml_dict_object): + _ = MetaYaml(**valid_meta_yaml_dict_object) + + +@pytest.mark.parametrize( + "field", + [ + "title", + "description", + "recipes", + "provenance", + "maintainers", + ], +) +def test_missing_toplevel_field(valid_meta_yaml, field): + meta_yaml_copy = copy.deepcopy(valid_meta_yaml) + del meta_yaml_copy[field] + if field == "recipes": + # ``recipes`` is the only required field + with pytest.raises(TraitError): + _ = MetaYaml(**meta_yaml_copy) + else: + # all others fields can be left empty without raising an error + _ = MetaYaml(**meta_yaml_copy) + + +# @pytest.mark.parametrize( +# "subfield", +# [ +# "id", +# "object", +# ], +# ) +# def test_missing_recipes_subfield(valid_meta_yaml, subfield, schema): +# invalid_meta_yaml = copy.deepcopy(valid_meta_yaml) +# del invalid_meta_yaml["recipes"][0][subfield] + +# with pytest.raises(ValidationError, match=f"'{subfield}' is a required property"): +# validate(invalid_meta_yaml, schema=schema) + + +# @pytest.mark.parametrize( +# "subfield", +# [ +# "providers", +# "license", +# ], +# ) +# def test_missing_provenance_subfield(valid_meta_yaml, subfield, schema): +# invalid_meta_yaml = copy.deepcopy(valid_meta_yaml) +# del invalid_meta_yaml["provenance"][subfield] + +# with pytest.raises(ValidationError, match=f"'{subfield}' is a required property"): +# validate(invalid_meta_yaml, schema=schema) + + +# @pytest.mark.parametrize( +# "subfield", +# [ +# "name", +# "description", +# "roles", +# "url", +# ], +# ) +# def test_missing_providers_subfield(valid_meta_yaml, subfield, schema): +# invalid_meta_yaml = copy.deepcopy(valid_meta_yaml) +# del invalid_meta_yaml["provenance"]["providers"][0][subfield] + +# with pytest.raises(ValidationError, match=f"'{subfield}' is a required property"): +# validate(invalid_meta_yaml, schema=schema) + + +# @pytest.mark.parametrize( +# "subfield", +# [ +# "name", +# "orcid", +# "github", +# ], +# ) +# def test_missing_maintainers_subfield(valid_meta_yaml, subfield, schema): +# invalid_meta_yaml = copy.deepcopy(valid_meta_yaml) +# del invalid_meta_yaml["maintainers"][0][subfield] + +# with pytest.raises(ValidationError, match=f"'{subfield}' is a required property"): +# validate(invalid_meta_yaml, schema=schema) + + +# @pytest.mark.parametrize( +# "subfield", +# [ +# "id", +# ], +# ) +# def test_missing_bakery_subfield(valid_meta_yaml, subfield, schema): +# invalid_meta_yaml = copy.deepcopy(valid_meta_yaml) +# del invalid_meta_yaml["bakery"][subfield] + +# with pytest.raises(ValidationError, match=f"'{subfield}' is a required property"): +# validate(invalid_meta_yaml, schema=schema) diff --git a/tests/unit/test_recipes.py b/tests/unit/test_recipes.py index 9d658c6..d268953 100644 --- a/tests/unit/test_recipes.py +++ b/tests/unit/test_recipes.py @@ -2,6 +2,7 @@ import pytest from ruamel.yaml import YAML +from traitlets import TraitError from pangeo_forge_runner import Feedstock @@ -35,6 +36,5 @@ def test_recipes_dict(): def test_recipes_broken(): list_recipe = HERE / "test-recipes/broken-recipe/feedstock" - feed = Feedstock(list_recipe) - with pytest.raises(ValueError): - feed.parse_recipes() + with pytest.raises(TraitError): + _ = Feedstock(list_recipe) From d051aebc72adbca43f0547c9c8cd5207287cfd81 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Fri, 17 Nov 2023 13:34:29 -0800 Subject: [PATCH 07/11] ensure 'id' and 'object' are present on lists of recipes --- pangeo_forge_runner/meta_yaml.py | 21 ++++++++++- tests/unit/test_meta_yaml.py | 60 ++++++++++++++------------------ 2 files changed, 46 insertions(+), 35 deletions(-) diff --git a/pangeo_forge_runner/meta_yaml.py b/pangeo_forge_runner/meta_yaml.py index 53f5d20..19ab404 100644 --- a/pangeo_forge_runner/meta_yaml.py +++ b/pangeo_forge_runner/meta_yaml.py @@ -1,5 +1,15 @@ +import jsonschema from traitlets import Dict, HasTraits, List, TraitError, Unicode, Union, validate +recipes_field_per_element_schema = { + "type": "object", + "properties": { + "id": {"type": "string"}, + "object": {"type": "string"}, + }, + "required": ["id", "object"], +} + class MetaYaml(HasTraits): """Schema for the ``meta.yaml`` file which must be included in each feedstock directory. @@ -17,11 +27,20 @@ def __init__(self, recipes=None, **kwargs): @validate("recipes") def _validate_recipes(self, proposal): - """Ensure the ``recipes`` trait is not passed as an empty container.""" + """Ensure the ``recipes`` trait is not passed as an empty container and that + each element of the field contains all expected subfields. + """ if not proposal["value"]: raise TraitError( f"The ``recipes`` trait, passed as {proposal['value']}, cannot be empty." ) + + if isinstance(proposal["value"], list): + for recipe_spec in proposal["value"]: + try: + jsonschema.validate(recipe_spec, recipes_field_per_element_schema) + except jsonschema.ValidationError as e: + raise TraitError(e) return proposal["value"] title = Unicode( diff --git a/tests/unit/test_meta_yaml.py b/tests/unit/test_meta_yaml.py index 370770c..f6d24ae 100644 --- a/tests/unit/test_meta_yaml.py +++ b/tests/unit/test_meta_yaml.py @@ -91,21 +91,23 @@ def test_missing_toplevel_field(valid_meta_yaml, field): _ = MetaYaml(**meta_yaml_copy) -# @pytest.mark.parametrize( -# "subfield", -# [ -# "id", -# "object", -# ], -# ) -# def test_missing_recipes_subfield(valid_meta_yaml, subfield, schema): -# invalid_meta_yaml = copy.deepcopy(valid_meta_yaml) -# del invalid_meta_yaml["recipes"][0][subfield] +@pytest.mark.parametrize( + "subfield", + [ + "id", + "object", + ], +) +def test_missing_recipes_subfield(valid_meta_yaml, subfield): + invalid_meta_yaml = copy.deepcopy(valid_meta_yaml) + del invalid_meta_yaml["recipes"][0][subfield] -# with pytest.raises(ValidationError, match=f"'{subfield}' is a required property"): -# validate(invalid_meta_yaml, schema=schema) + with pytest.raises(TraitError): + _ = MetaYaml(**invalid_meta_yaml) +# TODO: In a future "strict" mode, ensure provenance fields are all provided. +# -------------------------------------------------------------------------- # @pytest.mark.parametrize( # "subfield", # [ @@ -116,11 +118,13 @@ def test_missing_toplevel_field(valid_meta_yaml, field): # def test_missing_provenance_subfield(valid_meta_yaml, subfield, schema): # invalid_meta_yaml = copy.deepcopy(valid_meta_yaml) # del invalid_meta_yaml["provenance"][subfield] - -# with pytest.raises(ValidationError, match=f"'{subfield}' is a required property"): -# validate(invalid_meta_yaml, schema=schema) +# +# with pytest.raises(TraitError): +# _ = MetaYaml(**meta_yaml_copy) +# TODO: In a future "strict" mode, ensure providers fields are all provided. +# -------------------------------------------------------------------------- # @pytest.mark.parametrize( # "subfield", # [ @@ -133,11 +137,13 @@ def test_missing_toplevel_field(valid_meta_yaml, field): # def test_missing_providers_subfield(valid_meta_yaml, subfield, schema): # invalid_meta_yaml = copy.deepcopy(valid_meta_yaml) # del invalid_meta_yaml["provenance"]["providers"][0][subfield] - +# # with pytest.raises(ValidationError, match=f"'{subfield}' is a required property"): -# validate(invalid_meta_yaml, schema=schema) +# _ = MetaYaml(**meta_yaml_copy) +# TODO: In a future "strict" mode, ensure maintainers fields are all provided. +# ---------------------------------------------------------------------------- # @pytest.mark.parametrize( # "subfield", # [ @@ -149,20 +155,6 @@ def test_missing_toplevel_field(valid_meta_yaml, field): # def test_missing_maintainers_subfield(valid_meta_yaml, subfield, schema): # invalid_meta_yaml = copy.deepcopy(valid_meta_yaml) # del invalid_meta_yaml["maintainers"][0][subfield] - -# with pytest.raises(ValidationError, match=f"'{subfield}' is a required property"): -# validate(invalid_meta_yaml, schema=schema) - - -# @pytest.mark.parametrize( -# "subfield", -# [ -# "id", -# ], -# ) -# def test_missing_bakery_subfield(valid_meta_yaml, subfield, schema): -# invalid_meta_yaml = copy.deepcopy(valid_meta_yaml) -# del invalid_meta_yaml["bakery"][subfield] - -# with pytest.raises(ValidationError, match=f"'{subfield}' is a required property"): -# validate(invalid_meta_yaml, schema=schema) +# +# with pytest.raises(TraitError): +# _ = MetaYaml(**meta_yaml_copy) From addacd7a9b243b5e74ccd953fc8558e0ae376196 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Fri, 17 Nov 2023 13:44:56 -0800 Subject: [PATCH 08/11] let parsed dict object conform to recipes section schema --- pangeo_forge_runner/feedstock.py | 5 ++++- tests/unit/test_feedstock.py | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/pangeo_forge_runner/feedstock.py b/pangeo_forge_runner/feedstock.py index b3c4e19..2a3919f 100644 --- a/pangeo_forge_runner/feedstock.py +++ b/pangeo_forge_runner/feedstock.py @@ -99,7 +99,10 @@ def get_expanded_meta(self, drop_none=True) -> dict: # keep just the ids, discarding the values - as the values do not # actually serialize. recipes = self.parse_recipes() - meta_copy.recipes = [{"id": k} for k, v in recipes.items()] + meta_copy.recipes = [ + {"id": k, "object": "DICT_VALUE_PLACEHOLDER"} + for k, _ in recipes.items() + ] return ( # the traitlets MetaYaml schema will give us empty containers diff --git a/tests/unit/test_feedstock.py b/tests/unit/test_feedstock.py index 565090b..a7ac78d 100644 --- a/tests/unit/test_feedstock.py +++ b/tests/unit/test_feedstock.py @@ -64,7 +64,7 @@ def test_feedstock(tmp_feedstock): for recipe_metadata in expanded_meta["recipes"]: # the recipe_object metadata looks something like this: - # {'recipes': [{'id': 'my_recipe'}]} + # {'recipes': [{'id': 'my_recipe', 'object': 'DICT_VALUE_PLACEHOLDER'}]} # and the dict_object metadata looks like this: # {'recipes': [{'id': 'aws-noaa-sea-surface-temp-whoi', 'object': 'recipe:recipe'}]} # both have an "id" field: @@ -73,7 +73,7 @@ def test_feedstock(tmp_feedstock): if recipes_section_type == "recipe_object": assert "object" in recipe_metadata elif recipes_section_type == "dict_object": - assert "object" not in recipe_metadata + assert recipe_metadata["object"] == "DICT_VALUE_PLACEHOLDER" for r in recipes.values(): # the values of the recipes dict should all be python objects From 252de396fa7c1db0f0baa27f4890096618618d99 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Fri, 17 Nov 2023 13:52:05 -0800 Subject: [PATCH 09/11] fix recipes dict test --- tests/unit/test_recipes.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_recipes.py b/tests/unit/test_recipes.py index d268953..1d6e9a2 100644 --- a/tests/unit/test_recipes.py +++ b/tests/unit/test_recipes.py @@ -30,7 +30,10 @@ def test_recipes_dict(): with open(list_recipe / "meta.yaml") as f: meta = yaml.load(f) - meta["recipes"] = [{"id": "test_1"}, {"id": "test_2"}] + meta["recipes"] = [ + {"id": "test_1", "object": "DICT_VALUE_PLACEHOLDER"}, + {"id": "test_2", "object": "DICT_VALUE_PLACEHOLDER"}, + ] assert meta == feed.get_expanded_meta() From 8f452a22fea86c39fa870d0a5526051a1a36dc2b Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Fri, 17 Nov 2023 13:56:59 -0800 Subject: [PATCH 10/11] explain dict value placeholder in comment --- pangeo_forge_runner/feedstock.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pangeo_forge_runner/feedstock.py b/pangeo_forge_runner/feedstock.py index 2a3919f..f03501d 100644 --- a/pangeo_forge_runner/feedstock.py +++ b/pangeo_forge_runner/feedstock.py @@ -100,6 +100,9 @@ def get_expanded_meta(self, drop_none=True) -> dict: # actually serialize. recipes = self.parse_recipes() meta_copy.recipes = [ + # In place of the values, we add a placeholder string, so that the + # re-assignment to the MetaYaml schema here will pass validation + # of the `recipes` field, which requires "id" and "object" fields. {"id": k, "object": "DICT_VALUE_PLACEHOLDER"} for k, _ in recipes.items() ] From e6cefceffecc61a4157cd91276b2a50d25a252c8 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Fri, 17 Nov 2023 13:58:34 -0800 Subject: [PATCH 11/11] test recipes field cannot be empty --- tests/unit/test_meta_yaml.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/unit/test_meta_yaml.py b/tests/unit/test_meta_yaml.py index f6d24ae..69487ee 100644 --- a/tests/unit/test_meta_yaml.py +++ b/tests/unit/test_meta_yaml.py @@ -106,6 +106,11 @@ def test_missing_recipes_subfield(valid_meta_yaml, subfield): _ = MetaYaml(**invalid_meta_yaml) +def test_recipes_field_cannot_be_empty_container(): + with pytest.raises(TraitError): + _ = MetaYaml(recipes=[]) + + # TODO: In a future "strict" mode, ensure provenance fields are all provided. # -------------------------------------------------------------------------- # @pytest.mark.parametrize(