Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add meta yaml schema #134

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions .github/workflows/meta-yaml-test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: Meta Yaml Tests

on:
push:
branches: [ "main" ]
pull_request:
branches: [ "*" ]

jobs:
build:

runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.9", "3.10", "3.11"]

steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies & our package
run: |
python -m pip install --upgrade pip
python -m pip install -r dev-requirements.txt
python -m pip install -e .[validate-yaml]

- name: Test with pytest
run: |
pytest -vvv -s --cov=pangeo_forge_runner tests/test_meta_yaml.py

- name: Upload Coverage to Codecov
uses: codecov/codecov-action@v2
68 changes: 68 additions & 0 deletions pangeo_forge_runner/meta_yaml/schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from dataclasses import dataclass
from typing import List, Union


@dataclass
class RecipeObject:
id: str # TODO: require to be unique within meta.yaml namespace
object: str # TODO: require format '{module_name}:{recipe_instance_name}'


@dataclass
class RecipeDictObject:
dict_object: str # TODO: require format '{module_name}:{dict_instance_name}'


@dataclass
class Provider:
name: str
description: str
roles: List[str] # TODO: enum choices e.g. Roles.producer, Roles.licensor
url: str


@dataclass
class Provenance:
providers: List[Provider]
license: str # TODO: enum choices e.g. Licenses.cc_by_40 = "CC-BY-4.0" etc.


@dataclass
class Maintainer:
name: str
orcid: str # TODO: format requirement
github: str # TODO: allowable characters


@dataclass
class Bakery:
id: str # TODO: exists in database


@dataclass
class MetaYamlMinimal:
# TODO: this should always be a list
# recipes: List[Union[RecipeObject, RecipeDictObject]]
recipes: Union[List[RecipeObject], RecipeDictObject]


@dataclass
class MetaYamlExtended(MetaYamlMinimal):
title: str
description: str
pangeo_forge_version: str
pangeo_notebook_version: str
provenance: Provenance
maintainers: List[Maintainer]
bakery: Bakery


def get_schema():
from pydantic import TypeAdapter
from pydantic.dataclasses import dataclass as pydantic_dataclass

@pydantic_dataclass
class MetaYamlExtendedModel(MetaYamlExtended):
pass

return TypeAdapter(MetaYamlExtendedModel).json_schema()
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
extras_require={
"dataflow": ["apache-beam[gcp]"],
"flink": ["apache-beam>=2.47.0"],
"validate-yaml": ["jsonschema", "pydantic"],
},
entry_points={
"console_scripts": ["pangeo-forge-runner=pangeo_forge_runner.cli:main"]
Expand Down
175 changes: 175 additions & 0 deletions tests/test_meta_yaml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
import copy
from textwrap import dedent

import pytest
from jsonschema import ValidationError, validate
from ruamel.yaml import YAML

from pangeo_forge_runner.meta_yaml.schema import get_schema

yaml = YAML()


@pytest.fixture
def schema():
return get_schema()


@pytest.fixture
def with_recipes_list() -> str:
return dedent(
"""\
title: 'AWS NOAA WHOI SST'
description: 'Analysis-ready datasets derived from AWS NOAA WHOI NetCDF'
pangeo_forge_version: '0.9.2'
pangeo_notebook_version: '2021.07.17'
recipes:
- id: aws-noaa-sea-surface-temp-whoi
object: 'recipe:recipe'
provenance:
providers:
- name: 'AWS NOAA Oceanic CDR'
description: 'Registry of Open Data on AWS National Oceanographic & Atmospheric Administration National Centers for Environmental Information'
roles:
- producer
- licensor
url: s3://noaa-cdr-sea-surface-temp-whoi-pds/
license: 'Open Data'
maintainers:
- name: 'Jo Contributor'
orcid: '0000-0000-0000-0000'
github: jocontributor123
bakery:
id: 'pangeo-ldeo-nsf-earthcube'
""" # noqa: E501
)


@pytest.fixture
def valid_meta_yaml(with_recipes_list: str) -> dict:
return yaml.load(with_recipes_list)


@pytest.fixture
def valid_meta_yaml_dict_object(with_recipes_list: str) -> dict:
with_dict_object = with_recipes_list.replace(
dedent(
"""\
recipes:
- id: aws-noaa-sea-surface-temp-whoi
object: 'recipe:recipe'
"""
),
dedent(
"""\
recipes:
dict_object: 'recipe:recipes'
"""
),
)
return yaml.load(with_dict_object)


def test_schema_valid(valid_meta_yaml, schema):
validate(valid_meta_yaml, schema=schema)


def test_schema_valid_dict_object(valid_meta_yaml_dict_object, schema):
validate(valid_meta_yaml_dict_object, schema=schema)


@pytest.mark.parametrize(
"field",
[
"title",
"description",
"pangeo_forge_version",
"pangeo_notebook_version",
"recipes",
"provenance",
"maintainers",
"bakery",
],
)
def test_missing_toplevel_field(valid_meta_yaml, field, schema):
invalid_meta_yaml = copy.deepcopy(valid_meta_yaml)
del invalid_meta_yaml[field]
with pytest.raises(ValidationError, match=f"'{field}' is a required property"):
validate(invalid_meta_yaml, schema=schema)


@pytest.mark.parametrize(
"subfield",
[
"id",
"object",
],
)
def test_missing_recipes_subfield(valid_meta_yaml, subfield, schema):
invalid_meta_yaml = copy.deepcopy(valid_meta_yaml)
del invalid_meta_yaml["recipes"][0][subfield]

with pytest.raises(ValidationError, match=f"'{subfield}' is a required property"):
validate(invalid_meta_yaml, schema=schema)


@pytest.mark.parametrize(
"subfield",
[
"providers",
"license",
],
)
def test_missing_provenance_subfield(valid_meta_yaml, subfield, schema):
invalid_meta_yaml = copy.deepcopy(valid_meta_yaml)
del invalid_meta_yaml["provenance"][subfield]

with pytest.raises(ValidationError, match=f"'{subfield}' is a required property"):
validate(invalid_meta_yaml, schema=schema)


@pytest.mark.parametrize(
"subfield",
[
"name",
"description",
"roles",
"url",
],
)
def test_missing_providers_subfield(valid_meta_yaml, subfield, schema):
invalid_meta_yaml = copy.deepcopy(valid_meta_yaml)
del invalid_meta_yaml["provenance"]["providers"][0][subfield]

with pytest.raises(ValidationError, match=f"'{subfield}' is a required property"):
validate(invalid_meta_yaml, schema=schema)


@pytest.mark.parametrize(
"subfield",
[
"name",
"orcid",
"github",
],
)
def test_missing_maintainers_subfield(valid_meta_yaml, subfield, schema):
invalid_meta_yaml = copy.deepcopy(valid_meta_yaml)
del invalid_meta_yaml["maintainers"][0][subfield]

with pytest.raises(ValidationError, match=f"'{subfield}' is a required property"):
validate(invalid_meta_yaml, schema=schema)


@pytest.mark.parametrize(
"subfield",
[
"id",
],
)
def test_missing_bakery_subfield(valid_meta_yaml, subfield, schema):
invalid_meta_yaml = copy.deepcopy(valid_meta_yaml)
del invalid_meta_yaml["bakery"][subfield]

with pytest.raises(ValidationError, match=f"'{subfield}' is a required property"):
validate(invalid_meta_yaml, schema=schema)
59 changes: 59 additions & 0 deletions tests/unit/test_feedstock.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from textwrap import dedent

import pytest
from ruamel.yaml import YAML

from pangeo_forge_runner.feedstock import Feedstock

yaml = YAML()


@pytest.fixture(params=["recipe_object", "dict_object"])
def tmp_feedstock(request, tmp_path_factory: pytest.TempPathFactory):
tmpdir = tmp_path_factory.mktemp("feedstock")
if request.param == "recipe_object":
meta_yaml = dedent(
"""\
recipes:
- id: aws-noaa-sea-surface-temp-whoi
object: 'recipe:recipe'
"""
)
recipe_py = dedent(
"""\
class Recipe:
pass

recipe = Recipe()
"""
)
elif request.param == "dict_object":
meta_yaml = dedent(
"""\
recipes:
dict_object: 'recipe:recipes'
"""
)
recipe_py = dedent(
"""\
class Recipe:
pass

recipe = {"my_recipe": Recipe()}
"""
)

with open(tmpdir / "meta.yaml", mode="w") as f:
f.write(meta_yaml)
with open(tmpdir / "recipe.py", mode="w") as f:
f.write(recipe_py)

yield tmpdir, meta_yaml


def test_feedstock(tmp_feedstock):
tmpdir, meta_yaml = tmp_feedstock
f = Feedstock(feedstock_dir=tmpdir)
assert f.meta == yaml.load(meta_yaml)
# expanded_meta = f.get_expanded_meta()
# recipes = f.parse_recipes()
Loading