Skip to content

Commit

Permalink
Issue #5: benchmarks: initial implementation of comparing with refere…
Browse files Browse the repository at this point in the history
…nce data
  • Loading branch information
soxofaan committed Jul 16, 2024
1 parent 4f3d8c0 commit bf11993
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 4 deletions.
33 changes: 33 additions & 0 deletions benchmark_scenarios/max_ndvi.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,39 @@
},
"result": true
}
},
"reference_data": {
"job-results.json": "https://s3.waw3-1.cloudferro.com/APEx-benchmarks/max_ndvi.json:max_ndvi:reference:job-results.json",
"openEO.tif": "https://s3.waw3-1.cloudferro.com/APEx-benchmarks/max_ndvi.json:max_ndvi:reference:openEO.tif"
}
},
{
"id": "max_ndvi_fail",
"type": "openeo",
"description": "max_ndvi example, intentionally failing",
"backend": "openeofed.dataspace.copernicus.eu",
"process_graph": {
"maxndvi1": {
"process_id": "max_ndvi",
"namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/f99f351d74d291d628e3aaa07fd078527a0cb631/openeo_udp/examples/max_ndvi/max_ndvi.json",
"arguments": {
"bbox": {
"west": 6.07,
"east": 6.09,
"south": 51.21,
"north": 51.23
},
"temporal_extent": [
"2023-08-01",
"2023-09-30"
]
},
"result": true
}
},
"reference_data": {
"job-results.json": "https://s3.waw3-1.cloudferro.com/APEx-benchmarks/max_ndvi.json:max_ndvi:reference:job-results.json",
"openEO.tif": "https://s3.waw3-1.cloudferro.com/APEx-benchmarks/max_ndvi.json:max_ndvi:reference:openEO.tif"
}
}
]
7 changes: 6 additions & 1 deletion qa/benchmarks/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
# TODO: get rid of artifactory extra index url once openeo 0.31.0 is released
--extra-index-url https://artifactory.vgt.vito.be/api/pypi/python-openeo/simple
apex-algorithm-qa-tools
openeo>=0.30.0
openeo>=0.31.0.a2.dev
pytest>=8.2.0
requests>=2.32.0
xarray>=2024.6.0
netCDF4>=1.7.1
rioxarray>=0.15.7
27 changes: 24 additions & 3 deletions qa/benchmarks/tests/test_benchmarks.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
from pathlib import Path

import openeo
import pytest
from apex_algorithm_qa_tools.scenarios import BenchmarkScenario, get_benchmark_scenarios
from apex_algorithm_qa_tools.scenarios import (
BenchmarkScenario,
download_reference_data,
get_benchmark_scenarios,
)
from openeo.testing.results import assert_job_results_allclose


@pytest.mark.parametrize(
Expand All @@ -11,7 +18,7 @@
for uc in get_benchmark_scenarios()
],
)
def test_run_benchmark(scenario: BenchmarkScenario, connection_factory):
def test_run_benchmark(scenario: BenchmarkScenario, connection_factory, tmp_path: Path):
connection: openeo.Connection = connection_factory(url=scenario.backend)

# TODO #14 scenario option to use synchronous instead of batch job mode?
Expand All @@ -20,6 +27,20 @@ def test_run_benchmark(scenario: BenchmarkScenario, connection_factory):
title=f"APEx benchmark {scenario.id}",
)

# TODO: monitor timing and progress
# TODO: abort excessively long batch jobs? https://github.com/Open-EO/openeo-python-client/issues/589
job.start_and_wait()

# TODO #5 download job results and inspect
# Download actual results
actual_dir = tmp_path / "actual"
job.get_results().download_files(target=actual_dir, include_stac_metadata=True)
# TODO: upload actual results to somewhere?

# Compare actual results with reference data
reference_dir = download_reference_data(
scenario=scenario, reference_dir=tmp_path / "reference"
)
# TODO: allow to override rtol/atol options of assert_job_results_allclose
assert_job_results_allclose(
actual=actual_dir, expected=reference_dir, tmp_path=tmp_path
)
29 changes: 29 additions & 0 deletions qa/tools/apex_algorithm_qa_tools/scenarios.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
import json
import logging
import re
from pathlib import Path
from typing import List

import jsonschema
import requests
from apex_algorithm_qa_tools.common import get_project_root
from openeo.util import TimingLogger

_log = logging.getLogger(__name__)

Expand All @@ -28,6 +30,7 @@ class BenchmarkScenario:
description: str | None = None
backend: str
process_graph: dict
reference_data: dict | None

@classmethod
def from_dict(cls, data: dict) -> BenchmarkScenario:
Expand All @@ -41,6 +44,7 @@ def from_dict(cls, data: dict) -> BenchmarkScenario:
description=data.get("description"),
backend=data["backend"],
process_graph=data["process_graph"],
reference_data=data.get("reference_data"),
)


Expand Down Expand Up @@ -92,3 +96,28 @@ def lint_benchmark_scenario(scenario: BenchmarkScenario):
assert resp.json()["id"] == node["process_id"]
# TODO: check that github URL is a "pinned" reference
# TODO: check that provided parameters match expected process parameters


def download_reference_data(scenario: BenchmarkScenario, reference_dir: Path) -> Path:
with TimingLogger(
title=f"Downloading reference data for {scenario.id=} to {reference_dir=}",
logger=_log.info,
):
for path, source in scenario.reference_data.items():
path = reference_dir / path
if not path.is_relative_to(reference_dir):
raise ValueError(
f"Resolved {path=} is not relative to {reference_dir=} ({scenario.id=})"
)
path.parent.mkdir(parents=True, exist_ok=True)

with TimingLogger(
title=f"Downloading {source=} to {path=}", logger=_log.info
):
# TODO: support other sources than HTTP?
resp = requests.get(source, stream=True)
with path.open("wb") as f:
for chunk in resp.iter_content(chunk_size=128):
f.write(chunk)

return reference_dir

0 comments on commit bf11993

Please sign in to comment.