-
Notifications
You must be signed in to change notification settings - Fork 9
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Move integration testing feedstock to tests directory #171
Changes from 5 commits
5907220
9bad5c3
5368be9
e9df873
3acc761
89ee420
3ff6e7a
77c3875
59042fc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,25 @@ | ||
import json | ||
import subprocess | ||
import sys | ||
import tempfile | ||
import time | ||
from importlib.metadata import version | ||
from pathlib import Path | ||
|
||
import pytest | ||
import xarray as xr | ||
from packaging.version import parse as parse_version | ||
|
||
TEST_DATA_DIR = Path(__file__).parent.parent / "test-data" | ||
|
||
|
||
def test_dataflow_integration(): | ||
python_version = ( | ||
f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}" | ||
) | ||
pfr_version = parse_version(version("pangeo-forge-recipes")) | ||
if pfr_version >= parse_version("0.10"): | ||
recipe_version_ref = str(pfr_version) | ||
recipe_version_ref = "0.10.x" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. reviewer note: |
||
else: | ||
raise ValueError( | ||
f"Unsupported pfr_version: {pfr_version}. Please upgrade to 0.10 or newer." | ||
|
@@ -22,6 +29,7 @@ def test_dataflow_integration(): | |
"Bake": { | ||
"prune": True, | ||
"bakery_class": "pangeo_forge_runner.bakery.dataflow.DataflowBakery", | ||
"job_name": f"gpcp-from-gcs-py{python_version.replace('.','')}-v{recipe_version_ref.replace('.','')}", | ||
}, | ||
"DataflowBakery": {"temp_gcs_location": bucket + "/temp"}, | ||
"TargetStorage": { | ||
|
@@ -41,11 +49,9 @@ def test_dataflow_integration(): | |
"pangeo-forge-runner", | ||
"bake", | ||
"--repo", | ||
"https://github.com/pforgetest/gpcp-from-gcs-feedstock.git", | ||
"--ref", | ||
# in the test feedstock, tags are named for the recipes version | ||
# which was used to write the recipe module | ||
recipe_version_ref, | ||
str(TEST_DATA_DIR / "gpcp-from-gcs"), | ||
"--feedstock-subdir", | ||
f"feedstock-{recipe_version_ref}", | ||
"--json", | ||
"-f", | ||
f.name, | ||
|
@@ -70,6 +76,15 @@ def test_dataflow_integration(): | |
|
||
# okay, time to start checking if the job is done | ||
show_job = f"gcloud dataflow jobs show {job_id} --format=json".split() | ||
show_job_errors = [ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. reviewer note: let's use this command to dump logs when job fails so we don't have to use GCP console |
||
"gcloud", | ||
"logging", | ||
"read", | ||
f'resource.type="dataflow_step" AND resource.labels.job_id="{job_id}" AND severity>=ERROR', | ||
"--limit", | ||
"500", | ||
"--format=json", | ||
] | ||
while True: | ||
elapsed = time.time() - start | ||
print(f"Time {elapsed = }") | ||
|
@@ -93,6 +108,10 @@ def test_dataflow_integration(): | |
# still running, let's give it another 30s then check again | ||
time.sleep(30) | ||
else: | ||
# try to get some output to the stdout so we don't have to log into the GCP console | ||
state_proc = subprocess.run(show_job_errors, capture_output=True) | ||
assert state_proc.returncode == 0 | ||
print(json.loads(state_proc.stdout)) | ||
# consider any other state a failure | ||
pytest.fail(f"{state = } is neither 'Done' nor 'Running'") | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
title: "Test meta.yaml with list of recipes" | ||
recipes: | ||
- dict_object: 'recipe:recipes' |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
recipes = {"test_1": "test_1", "test_2": "test_2"} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
pangeo-forge-recipes>=0.10.0,<0.11 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
title: "Global Precipitation Climatology Project" | ||
description: > | ||
Global Precipitation Climatology Project (GPCP) Daily Version 1.3 gridded, merged ty | ||
satellite/gauge precipitation Climate data Record (CDR) from 1996 to present. | ||
pangeo_forge_version: "0.9.0" | ||
pangeo_notebook_version: "2022.06.02" | ||
recipes: | ||
- id: gpcp-from-gcs | ||
object: "recipe:recipe" | ||
provenance: | ||
providers: | ||
- name: "NOAA NCEI" | ||
description: "National Oceanographic & Atmospheric Administration National Centers for Environmental Information" | ||
roles: | ||
- host | ||
- licensor | ||
url: https://www.ncei.noaa.gov/products/global-precipitation-climatology-project | ||
- name: "University of Maryland" | ||
description: > | ||
University of Maryland College Park Earth System Science Interdisciplinary Center | ||
(ESSIC) and Cooperative Institute for Climate and Satellites (CICS). | ||
roles: | ||
- producer | ||
url: http://gpcp.umd.edu/ | ||
license: "No constraints on data access or use." | ||
maintainers: | ||
- name: "Charles Stern" | ||
orcid: "0000-0002-4078-0852" | ||
github: cisaacstern | ||
bakery: | ||
id: "pangeo-ldeo-nsf-earthcube" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
import apache_beam as beam | ||
import pandas as pd | ||
from pangeo_forge_recipes.patterns import ConcatDim, FilePattern | ||
from pangeo_forge_recipes.transforms import ( | ||
OpenURLWithFSSpec, | ||
OpenWithXarray, | ||
StoreToZarr, | ||
) | ||
|
||
dates = [ | ||
d.to_pydatetime().strftime("%Y%m%d") | ||
for d in pd.date_range("1996-10-01", "1999-02-01", freq="D") | ||
] | ||
|
||
|
||
def make_url(time): | ||
url_base = "https://storage.googleapis.com/pforge-test-data" | ||
return f"{url_base}/gpcp/v01r03_daily_d{time}.nc" | ||
|
||
|
||
concat_dim = ConcatDim("time", dates, nitems_per_file=1) | ||
pattern = FilePattern(make_url, concat_dim) | ||
|
||
|
||
recipe = ( | ||
beam.Create(pattern.items()) | ||
| OpenURLWithFSSpec() | ||
| OpenWithXarray( | ||
file_type=pattern.file_type, xarray_open_kwargs={"decode_coords": "all"} | ||
) | ||
| StoreToZarr( | ||
store_name="gpcp", | ||
combine_dims=pattern.combine_dim_keys, | ||
) | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
gcsfs | ||
pangeo-forge-recipes>=0.10.0,<0.11 | ||
s3fs |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
title: "Global Precipitation Climatology Project" | ||
description: > | ||
Global Precipitation Climatology Project (GPCP) Daily Version 1.3 gridded, merged ty | ||
satellite/gauge precipitation Climate data Record (CDR) from 1996 to present. | ||
pangeo_forge_version: "0.9.0" | ||
pangeo_notebook_version: "2022.06.02" | ||
recipes: | ||
- id: gpcp-from-gcs | ||
object: "recipe:recipe" | ||
provenance: | ||
providers: | ||
- name: "NOAA NCEI" | ||
description: "National Oceanographic & Atmospheric Administration National Centers for Environmental Information" | ||
roles: | ||
- host | ||
- licensor | ||
url: https://www.ncei.noaa.gov/products/global-precipitation-climatology-project | ||
- name: "University of Maryland" | ||
description: > | ||
University of Maryland College Park Earth System Science Interdisciplinary Center | ||
(ESSIC) and Cooperative Institute for Climate and Satellites (CICS). | ||
roles: | ||
- producer | ||
url: http://gpcp.umd.edu/ | ||
license: "No constraints on data access or use." | ||
maintainers: | ||
- name: "Charles Stern" | ||
orcid: "0000-0002-4078-0852" | ||
github: cisaacstern | ||
bakery: | ||
id: "pangeo-ldeo-nsf-earthcube" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
import pandas as pd | ||
from pangeo_forge_recipes.patterns import ConcatDim, FilePattern | ||
from pangeo_forge_recipes.recipes import XarrayZarrRecipe | ||
|
||
dates = [ | ||
d.to_pydatetime().strftime("%Y%m%d") | ||
for d in pd.date_range("1996-10-01", "1999-02-01", freq="D") | ||
] | ||
|
||
|
||
def make_url(time): | ||
url_base = "https://storage.googleapis.com/pforge-test-data" | ||
return f"{url_base}/gpcp/v01r03_daily_d{time}.nc" | ||
|
||
|
||
concat_dim = ConcatDim("time", dates, nitems_per_file=1) | ||
pattern = FilePattern(make_url, concat_dim) | ||
|
||
recipe = XarrayZarrRecipe( | ||
pattern, inputs_per_chunk=200, xarray_open_kwargs={"decode_coords": "all"} | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,13 +5,16 @@ | |
import sys | ||
import tempfile | ||
from importlib.metadata import distributions, version | ||
from pathlib import Path | ||
|
||
import pytest | ||
import xarray as xr | ||
from packaging.version import parse as parse_version | ||
|
||
from pangeo_forge_runner.commands.bake import Bake | ||
|
||
TEST_DATA_DIR = Path(__file__).parent.parent / "test-data" | ||
|
||
|
||
@pytest.fixture | ||
def recipes_uninstalled(): | ||
|
@@ -141,10 +144,9 @@ def test_gpcp_bake( | |
no_input_cache, | ||
recipes_version_ref, | ||
): | ||
if recipes_version_ref == "0.9.x-dictobj" or ( | ||
if ( | ||
recipes_version_ref == "0.10.x-dictobj" and recipe_id | ||
): | ||
# TODO: clarify fixturing story to avoid this hackiness | ||
pytest.skip( | ||
"We only test dictobjs for recipes >0.10.0, and without recipe_id's" | ||
) | ||
|
@@ -190,11 +192,9 @@ def test_gpcp_bake( | |
"pangeo-forge-runner", | ||
"bake", | ||
"--repo", | ||
"https://github.com/pforgetest/gpcp-from-gcs-feedstock.git", | ||
"--ref", | ||
# in the test feedstock, tags are named for the recipes version | ||
# which was used to write the recipe module | ||
recipes_version_ref, | ||
str(TEST_DATA_DIR / "gpcp-from-gcs"), | ||
"--feedstock-subdir", | ||
f"feedstock-{recipes_version_ref}", | ||
"--json", | ||
"-f", | ||
f.name, | ||
|
@@ -216,7 +216,7 @@ def test_gpcp_bake( | |
if custom_job_name: | ||
assert job_name.startswith(custom_job_name) | ||
else: | ||
assert job_name.startswith("gh-pforgetest-gpcp-from-gcs-") | ||
assert job_name.startswith("local-gpcp-2dfrom-2dgcs-feedstock-") | ||
|
||
if "dictobj" in recipes_version_ref: | ||
assert job_name.endswith( | ||
|
@@ -240,6 +240,11 @@ def test_gpcp_bake( | |
else: | ||
zarr_store_full_paths = [config["TargetStorage"]["root_path"]] | ||
|
||
# dictobj runs do not generate any datasets b/c they are not recipes | ||
# so we've asserted what we can already, just move on | ||
if recipes_version_ref.endswith("dictobj"): | ||
return | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. reviewer note: this couldn't have been actually producing output for |
||
|
||
# Open the generated datasets with xarray! | ||
for path in zarr_store_full_paths: | ||
print(f"Opening dataset for {path = }") | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
note to reviewer: zero idea how unit tests for
dict-object
are passing onmain
b/c we need the conditional here or they fail like they do on this PR 🤷There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
another question @cisaacstern: we have a lot of special logic for
dict_object
recipes where we seem to not test list recipes. Is the list version deprecated in favor of the dict version?