Skip to content

Commit

Permalink
Merge pull request #137 from pangeo-forge/minimal-metayaml
Browse files Browse the repository at this point in the history
Meta.yaml schema (implemented with traitlets!)
  • Loading branch information
cisaacstern authored Nov 17, 2023
2 parents 005951c + e6cefce commit 1c6178f
Show file tree
Hide file tree
Showing 6 changed files with 386 additions and 18 deletions.
34 changes: 23 additions & 11 deletions pangeo_forge_runner/feedstock.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from ruamel.yaml import YAML

from .meta_yaml import MetaYaml
from .recipe_rewriter import RecipeRewriter

yaml = YAML()
Expand All @@ -30,7 +31,7 @@ def __init__(
"""
self.feedstock_dir = feedstock_dir
with open(self.feedstock_dir / "meta.yaml") as f:
self.meta = yaml.load(f)
self.meta = MetaYaml(**yaml.load(f))

self.prune = prune
self.callable_args_injections = (
Expand Down Expand Up @@ -75,30 +76,41 @@ def parse_recipes(self):
*Executes arbitrary code* defined in the feedstock recipes.
"""
recipes = {}
recipes_config = self.meta.get("recipes")
if isinstance(recipes_config, list):
for r in recipes_config:
if isinstance(self.meta.recipes, list):
for r in self.meta.recipes:
recipes[r["id"]] = self._import(r["object"])
elif isinstance(recipes_config, dict):
recipes = self._import(recipes_config["dict_object"])
elif isinstance(self.meta.recipes, dict):
recipes = self._import(self.meta.recipes["dict_object"])
else:
raise ValueError("Could not parse recipes config in meta.yaml")

return recipes

def get_expanded_meta(self):
def get_expanded_meta(self, drop_none=True) -> dict:
"""
Return full meta.yaml file, expanding recipes if needed.
recipes are guaranteed to be a list of dictionaries with 'id' keys.
'object' keys *may* be present, but not guaranteed.
"""
meta_copy = deepcopy(self.meta)
if "recipes" in self.meta and "dict_object" in self.meta["recipes"]:
if self.meta.recipes and "dict_object" in self.meta.recipes:
# We have a dict_object, so let's parse the recipes, and provide
# keep just the ids, discarding the values - as the values do not
# actually serialize.
recipes = self.parse_recipes()
meta_copy["recipes"] = [{"id": k} for k, v in recipes.items()]

return meta_copy
meta_copy.recipes = [
# In place of the values, we add a placeholder string, so that the
# re-assignment to the MetaYaml schema here will pass validation
# of the `recipes` field, which requires "id" and "object" fields.
{"id": k, "object": "DICT_VALUE_PLACEHOLDER"}
for k, _ in recipes.items()
]

return (
# the traitlets MetaYaml schema will give us empty containers
# by default, but in most cases lets assume we don't want that
{k: v for k, v in meta_copy.trait_values().items() if v}
if drop_none
else meta_copy.trait_values()
)
110 changes: 110 additions & 0 deletions pangeo_forge_runner/meta_yaml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import jsonschema
from traitlets import Dict, HasTraits, List, TraitError, Unicode, Union, validate

recipes_field_per_element_schema = {
"type": "object",
"properties": {
"id": {"type": "string"},
"object": {"type": "string"},
},
"required": ["id", "object"],
}


class MetaYaml(HasTraits):
"""Schema for the ``meta.yaml`` file which must be included in each feedstock directory.
Only the ``recipes`` field is strictly required for ``pangeo-forge-runner`` to function.
All other fields are recommended but not required.
"""

def __init__(self, recipes=None, **kwargs):
"""The only required field is ``recipes``, so we put it explicitly in the init
signature to ensure it is not omitted, as demonstrated in:
https://github.com/ipython/traitlets/issues/490#issuecomment-479716288
"""
super().__init__(**kwargs)
self.recipes = recipes

@validate("recipes")
def _validate_recipes(self, proposal):
"""Ensure the ``recipes`` trait is not passed as an empty container and that
each element of the field contains all expected subfields.
"""
if not proposal["value"]:
raise TraitError(
f"The ``recipes`` trait, passed as {proposal['value']}, cannot be empty."
)

if isinstance(proposal["value"], list):
for recipe_spec in proposal["value"]:
try:
jsonschema.validate(recipe_spec, recipes_field_per_element_schema)
except jsonschema.ValidationError as e:
raise TraitError(e)
return proposal["value"]

title = Unicode(
allow_none=True,
help="""
Title for this dataset.
""",
)
description = Unicode(
allow_none=True,
help="""
Description of the dataset.
""",
)
recipes = Union(
[List(Dict()), Dict()],
help="""
Specifies the deployable Python objects to run in the recipe module.
If the recipes are assigned to their own Python variable names,
should be of the form:
```yaml
recipes:
- id: "unique-identifier-for-recipe"
object: "recipe:transforms"
```
Alternatively, if the recipes are values in a Python dict:
```yaml
recipes:
dict_object: "recipe:name_of_recipes_dict"
```
""",
)
provenance = Dict(
allow_none=True,
help="""
Dataset provenance information including provider, license, etc.
""",
per_key_traits={
"providers": List(
Dict(
per_key_traits={
"name": Unicode(),
"description": Unicode(),
"roles": List(), # TODO: enum
"url": Unicode(),
},
),
),
"license": Unicode(), # TODO: guidance on suggested formatting (enum?)
},
)
maintainers = List(
Dict(
per_key_traits={
"name": Unicode(help="Full name of the maintainer."),
"orcid": Unicode(help="Maintainer's ORCID ID"), # TODO: regex
"github": Unicode(help="Maintainer's GitHub username."),
},
),
allow_none=True,
help="""
Maintainers of this Pangeo Forge feedstock.
""",
)
3 changes: 0 additions & 3 deletions tests/unit/test_expand_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
"meta": {
"title": "Global Precipitation Climatology Project",
"description": "Global Precipitation Climatology Project (GPCP) Daily Version 1.3 gridded, merged ty satellite/gauge precipitation Climate data Record (CDR) from 1996 to present.\n",
"pangeo_forge_version": "0.9.0",
"pangeo_notebook_version": "2022.06.02",
"recipes": [{"id": "gpcp-from-gcs", "object": "recipe:recipe"}],
"provenance": {
"providers": [
Expand All @@ -37,7 +35,6 @@
"github": "cisaacstern",
}
],
"bakery": {"id": "pangeo-ldeo-nsf-earthcube"},
},
}
]
Expand Down
81 changes: 81 additions & 0 deletions tests/unit/test_feedstock.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
from textwrap import dedent

import pytest
from ruamel.yaml import YAML

from pangeo_forge_runner.feedstock import Feedstock
from pangeo_forge_runner.meta_yaml import MetaYaml

yaml = YAML()


@pytest.fixture(params=["recipe_object", "dict_object"])
def tmp_feedstock(request, tmp_path_factory: pytest.TempPathFactory):
tmpdir = tmp_path_factory.mktemp("feedstock")
if request.param == "recipe_object":
meta_yaml = dedent(
"""\
recipes:
- id: aws-noaa-sea-surface-temp-whoi
object: 'recipe:recipe'
"""
)
recipe_py = dedent(
"""\
class Recipe:
pass
recipe = Recipe()
"""
)
elif request.param == "dict_object":
meta_yaml = dedent(
"""\
recipes:
dict_object: 'recipe:recipes'
"""
)
recipe_py = dedent(
"""\
class Recipe:
pass
recipes = {"my_recipe": Recipe()}
"""
)

with open(tmpdir / "meta.yaml", mode="w") as f:
f.write(meta_yaml)
with open(tmpdir / "recipe.py", mode="w") as f:
f.write(recipe_py)

yield tmpdir, meta_yaml, request.param


def test_feedstock(tmp_feedstock):
tmpdir, meta_yaml, recipes_section_type = tmp_feedstock
f = Feedstock(feedstock_dir=tmpdir)
# equality of HasTraits instances doesn't work as I might expect,
# so just check equality of the relevant trait (`.recipes`)
assert f.meta.recipes == MetaYaml(**yaml.load(meta_yaml)).recipes

expanded_meta = f.get_expanded_meta()
recipes = f.parse_recipes()

for recipe_metadata in expanded_meta["recipes"]:
# the recipe_object metadata looks something like this:
# {'recipes': [{'id': 'my_recipe', 'object': 'DICT_VALUE_PLACEHOLDER'}]}
# and the dict_object metadata looks like this:
# {'recipes': [{'id': 'aws-noaa-sea-surface-temp-whoi', 'object': 'recipe:recipe'}]}
# both have an "id" field:
assert "id" in recipe_metadata
# but only the "recipe_object" has an "object" field:
if recipes_section_type == "recipe_object":
assert "object" in recipe_metadata
elif recipes_section_type == "dict_object":
assert recipe_metadata["object"] == "DICT_VALUE_PLACEHOLDER"

for r in recipes.values():
# the values of the recipes dict should all be python objects
# we used the mock type `Recipe` here, so this should be true:
assert str(r).startswith("<Recipe object")
Loading

0 comments on commit 1c6178f

Please sign in to comment.