Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include DEA's new offshore territories tag #335

Merged
merged 6 commits into from
Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ exclude: tests/integration/data
repos:
# Normalise all Python code. (Black + isort + pyupgrade + autoflake)
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.2.2
rev: v0.3.1
hooks:
- id: ruff
args: [--fix, --show-fixes, --output-format, grouped]
Expand Down
3 changes: 1 addition & 2 deletions eodatasets3/assemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,8 +464,7 @@ def __init__(
def __enter__(self) -> "DatasetPrepare":
return self

def __exit__(self, exc_type, exc_val, exc_tb):
...
def __exit__(self, exc_type, exc_val, exc_tb): ...

@property
def collection_location(self) -> Path:
Expand Down
4 changes: 3 additions & 1 deletion eodatasets3/images.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,14 +430,16 @@ def as_geo_docs(self) -> Tuple[CRS, Dict[str, GridDoc], Dict[str, MeasurementDoc
grid_docs: Dict[str, GridDoc] = {}
measurement_docs: Dict[str, MeasurementDoc] = {}
crs = None

for grid_name, (grid, measurements) in self._as_named_grids().items():
# Validate assumption: All grids should have same CRS
if crs is None:
crs = grid.crs

# TODO: CRS equality is tricky. This may not work.
# We're assuming a group of measurements specify their CRS
# the same way if they are the same.
elif grid.crs != crs:
elif (grid.crs is not None) and grid.crs != crs:
raise ValueError(
f"Measurements have different CRSes in the same dataset:\n"
f"\t{crs.to_string()!r}\n"
Expand Down
3 changes: 1 addition & 2 deletions eodatasets3/names.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,8 +334,7 @@ def __get__(self, c: "NamingConventions", owner) -> str:
return f"{c.collection_prefix}/{offset}/"


class MissingRequiredFieldsError(ValueError):
...
class MissingRequiredFieldsError(ValueError): ...


class RequiredPropertyDict(Eo3Dict):
Expand Down
2 changes: 1 addition & 1 deletion eodatasets3/prepare/nasa_c_m_mcd43a1_6_prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def parse_xml(filepath: Path):
Extracts metadata attributes from the xml document distributed
alongside the MCD43A1 tiles.
"""
root = ElementTree.parse(str(filepath), forbid_dtd=True).getroot()
root = ElementTree.parse(str(filepath), forbid_dtd=False).getroot()

granule_id = root.find("*//ECSDataGranule/LocalGranuleID").text
instrument = root.find("*//Platform/Instrument/InstrumentShortName").text
Expand Down
3 changes: 3 additions & 0 deletions eodatasets3/properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,9 @@ class Eo3Dict(collections.abc.MutableMapping):
"datetime": datetime_type,
"dea:dataset_maturity": of_enum_type(("final", "interim", "nrt"), lower=True),
"dea:product_maturity": of_enum_type(("stable", "provisional"), lower=True),
"dea:processing_region": of_enum_type(
("continental_australia", "offshore_territories"), lower=True, strict=False
),
"dtr:end_datetime": datetime_type,
"dtr:start_datetime": datetime_type,
"eo:azimuth": float,
Expand Down
1 change: 1 addition & 0 deletions eodatasets3/scripts/recompress.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
We compress the inner tiffs and store them in an uncompressed tar. This allows random reads within the files.
We also append a checksum file at the end of the tar.
"""

import copy
import io
import socket
Expand Down
4 changes: 4 additions & 0 deletions eodatasets3/wagl.py
Original file line number Diff line number Diff line change
Expand Up @@ -1102,10 +1102,14 @@ def _read_wagl_metadata(granule_group: h5py.Group):

def _apply_wagl_metadata(p: DatasetAssembler, wagl_doc: Dict):
source = wagl_doc["source_datasets"]

p.datetime = source["acquisition_datetime"]
p.platform = source["platform_id"]
p.instrument = source["sensor_id"]

if "processing_region" in wagl_doc:
p.properties["dea:processing_region"] = wagl_doc["processing_region"]

try:
p.processed = get_path(wagl_doc, ("system_information", "time_processed"))
except PathAccessError:
Expand Down
9 changes: 7 additions & 2 deletions tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,7 @@ def assert_same_as_file(expected_doc: Dict, generated_file: Path, ignore_fields=

assert generated_file.exists(), f"Expected file to exist {generated_file.name}"

with generated_file.open("r") as f:
generated_doc = yaml.YAML(typ="safe").load(f)
generated_doc = load_yaml(generated_file)

expected_doc = dict(expected_doc)
for field in ignore_fields:
Expand All @@ -151,6 +150,12 @@ def assert_same_as_file(expected_doc: Dict, generated_file: Path, ignore_fields=
assert_same(generated_doc, expected_doc)


def load_yaml(generated_file):
with generated_file.open("r") as f:
generated_doc = yaml.YAML(typ="safe").load(f)
return generated_doc


def run_prepare_cli(invoke_script, *args, expect_success=True) -> Result:
"""Run the prepare script as a command-line command"""
__tracebackhide__ = True
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
---
# Dataset
$schema: https://schemas.opendatacube.org/dataset
id: a2931343-f172-5f70-9df5-dfe97d3f8bda

label: esa_s2am_level1_0-0-20210713_48LWP_2021-07-13
product:
name: esa_s2am_level1_0

crs: epsg:32748
geometry:
type: Polygon
coordinates: [[[501898.7824760385, 8790220.0], [501569.24658963777, 8790220.0],
[501531.5307222482, 8790220.0], [501462.95175615326, 8790234.148955643], [500208.2330318917,
8790401.165159458], [500030.1913737005, 8790529.750715956], [500027.85592705157,
8790530.232556568], [500005.25674122636, 8790547.759049654], [499980.0, 8790566.000017295],
[499980.0, 8790567.346573906], [499980.0, 8790657.667389687], [499980.0, 8884400.0],
[499980.0, 8884460.0], [499980.0, 8884540.0], [512831.52069638314, 8882138.883559292],
[534132.6698497859, 8877878.647036638], [576613.122687415, 8868638.547374621],
[609780.0, 8860961.31288806], [609780.0, 8860900.553567395], [609780.0, 8860870.855807938],
[609780.0, 8790220.0], [501898.7824760385, 8790220.0]]]
grids:
default:
shape: [5490, 5490]
transform: [20.0, 0.0, 499980.0, 0.0, -20.0, 8900020.0, 0.0, 0.0, 1.0]
'10':
shape: [10980, 10980]
transform: [10.0, 0.0, 499980.0, 0.0, -10.0, 8900020.0, 0.0, 0.0, 1.0]
'60':
shape: [1830, 1830]
transform: [60.0, 0.0, 499980.0, 0.0, -60.0, 8900020.0, 0.0, 0.0, 1.0]

properties:
datetime: 2021-07-13 03:21:04.397174Z
eo:cloud_cover: 98.9175
eo:constellation: sentinel-2
eo:gsd: 10 # Ground sample distance (m)
eo:instrument: MSI
eo:platform: sentinel-2a
eo:sun_azimuth: 37.8903447301577
eo:sun_elevation: 40.9276677669808
odc:dataset_version: 0.0.20210713
odc:file_format: JPEG2000
odc:processing_datetime: 2021-07-13 05:53:03.913897Z
odc:producer: esa.int
odc:product_family: level1
odc:region_code: 48LWP
sat:orbit_state: descending
sat:relative_orbit: 32
sentinel:datastrip_id: S2A_OPER_MSI_L1C_DS_VGS4_20210713T054224_S20210713T032054_N03.01
sentinel:datatake_start_datetime: 2021-07-13 05:42:24Z
sentinel:datatake_type: INS-NOBS
sentinel:processing_baseline: '03.01'
sentinel:processing_center: VGS4
sentinel:product_name: S2A_MSIL1C_20210713T032051_N0301_R032_T48LWP_20210713T054224
sentinel:reception_station: EDRS
sentinel:sentinel_tile_id: S2A_OPER_MSI_L1C_TL_VGS4_20210713T054224_A031632_T48LWP_N03.01

measurements:
blue:
path: S2A_MSIL1C_20210713T032051_N0301_R032_T48LWP_20210713T054224.SAFE/GRANULE/L1C_T48LWP_A031632_20210713T032054/IMG_DATA/T48LWP_20210713T032051_B02.jp2
grid: '10'
coastal_aerosol:
path: S2A_MSIL1C_20210713T032051_N0301_R032_T48LWP_20210713T054224.SAFE/GRANULE/L1C_T48LWP_A031632_20210713T032054/IMG_DATA/T48LWP_20210713T032051_B01.jp2
grid: '60'
green:
path: S2A_MSIL1C_20210713T032051_N0301_R032_T48LWP_20210713T054224.SAFE/GRANULE/L1C_T48LWP_A031632_20210713T032054/IMG_DATA/T48LWP_20210713T032051_B03.jp2
grid: '10'
nir_1:
path: S2A_MSIL1C_20210713T032051_N0301_R032_T48LWP_20210713T054224.SAFE/GRANULE/L1C_T48LWP_A031632_20210713T032054/IMG_DATA/T48LWP_20210713T032051_B08.jp2
grid: '10'
nir_2:
path: S2A_MSIL1C_20210713T032051_N0301_R032_T48LWP_20210713T054224.SAFE/GRANULE/L1C_T48LWP_A031632_20210713T032054/IMG_DATA/T48LWP_20210713T032051_B8A.jp2
red:
path: S2A_MSIL1C_20210713T032051_N0301_R032_T48LWP_20210713T054224.SAFE/GRANULE/L1C_T48LWP_A031632_20210713T032054/IMG_DATA/T48LWP_20210713T032051_B04.jp2
grid: '10'
red_edge_1:
path: S2A_MSIL1C_20210713T032051_N0301_R032_T48LWP_20210713T054224.SAFE/GRANULE/L1C_T48LWP_A031632_20210713T032054/IMG_DATA/T48LWP_20210713T032051_B05.jp2
red_edge_2:
path: S2A_MSIL1C_20210713T032051_N0301_R032_T48LWP_20210713T054224.SAFE/GRANULE/L1C_T48LWP_A031632_20210713T032054/IMG_DATA/T48LWP_20210713T032051_B06.jp2
red_edge_3:
path: S2A_MSIL1C_20210713T032051_N0301_R032_T48LWP_20210713T054224.SAFE/GRANULE/L1C_T48LWP_A031632_20210713T032054/IMG_DATA/T48LWP_20210713T032051_B07.jp2
swir_1_cirrus:
path: S2A_MSIL1C_20210713T032051_N0301_R032_T48LWP_20210713T054224.SAFE/GRANULE/L1C_T48LWP_A031632_20210713T032054/IMG_DATA/T48LWP_20210713T032051_B10.jp2
grid: '60'
swir_2:
path: S2A_MSIL1C_20210713T032051_N0301_R032_T48LWP_20210713T054224.SAFE/GRANULE/L1C_T48LWP_A031632_20210713T032054/IMG_DATA/T48LWP_20210713T032051_B11.jp2
swir_3:
path: S2A_MSIL1C_20210713T032051_N0301_R032_T48LWP_20210713T054224.SAFE/GRANULE/L1C_T48LWP_A031632_20210713T032054/IMG_DATA/T48LWP_20210713T032051_B12.jp2
water_vapour:
path: S2A_MSIL1C_20210713T032051_N0301_R032_T48LWP_20210713T054224.SAFE/GRANULE/L1C_T48LWP_A031632_20210713T032054/IMG_DATA/T48LWP_20210713T032051_B09.jp2
grid: '60'

accessories:
metadata:s2_datastrip:
path: S2A_MSIL1C_20210713T032051_N0301_R032_T48LWP_20210713T054224.SAFE/DATASTRIP/DS_VGS4_20210713T054224_S20210713T032054/MTD_DS.xml
metadata:s2_tile:
path: S2A_MSIL1C_20210713T032051_N0301_R032_T48LWP_20210713T054224.SAFE/GRANULE/L1C_T48LWP_A031632_20210713T032054/MTD_TL.xml
metadata:s2_user_product:
path: S2A_MSIL1C_20210713T032051_N0301_R032_T48LWP_20210713T054224.SAFE/MTD_MSIL1C.xml

lineage: {}
...
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
parameters:
cloud_buffer_distance_metres: 0.0
cloud_shadow_buffer_distance_metres: 0.0
frantz_parallax_sentinel_2: false
percent_class_distribution:
clear: 0.011040622935796248
cloud: 90.0387662717726
cloud_shadow: 0.4480671143142754
snow: 0.0013641263890381973
water: 9.500761864588277
software_versions:
eugl:
version: embedded
fmask:
repo_url: https://www.pythonfmask.org/
version: 0.5.7
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
colors:
blue: 0.0
green: 0.0
red: 14.0
teal: 0.0
yellow: 0.0
error_message: no errors
final_qa_count: 5
granule: S2A_OPER_MSI_L1C_TL_VGS4_20210713T054224_A031632_T48LWP_N03.01
ref_date: '2018-12-20T00:00:00+00:00'
ref_source: GQA_v3
ref_source_path: /g/data/v10/eoancillarydata-2/GCP/GQA_v3/wrs2/123/067/LC81230672018354LGN00_B6.TIF
residual:
abs:
x: 4.15
xy: 16.41
y: 15.87
abs_iterative_mean:
x: 2.49
xy: 15.21
y: 15.0
cep90: 16.81
iterative_mean:
x: 1.19
xy: 15.05
y: 15.0
iterative_stddev:
x: 3.2
xy: 3.51
y: 1.44
mean:
x: 2.23
xy: 16.03
y: 15.87
stddev:
x: 4.48
xy: 6.19
y: 4.27
software_versions:
eugl:
version: embedded
gverify:
version: v0.25c
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
parameters:
average_over: 4
dilation_size: 2
threshold: 0.4
percent_class_distribution:
clear: 27.831355574173696
cloud: 72.1686444258263
software_versions:
eugl:
version: embedded
s2cloudless:
repo_url: null
version: 1.7.1
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
software_versions:
tesp:
version: embedded
Binary file not shown.
6 changes: 3 additions & 3 deletions tests/integration/prepare/test_prepare_sentinel_l1.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,9 +448,9 @@ def test_filter_folder_structure_info(
# Our output metadata is in a different place than the data, so we expect it to
# embed the true location in the metadata (by default)
if input_dataset_path.is_dir():
expected_metadata_doc[
"location"
] = f"file://{input_dataset_path.as_posix()}/tileInfo.json"
expected_metadata_doc["location"] = (
f"file://{input_dataset_path.as_posix()}/tileInfo.json"
)
else:
expected_metadata_doc["location"] = f"zip:{input_dataset_path}!/"

Expand Down
12 changes: 6 additions & 6 deletions tests/integration/test_naming_conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,9 @@ def test_minimal_s2_dataset_normal(tmp_path: Path):
p.datetime = datetime(2018, 11, 4)
p.product_family = "blueberries"
p.processed = "2018-11-05T12:23:23"
p.properties[
"sentinel:sentinel_tile_id"
] = "S2A_OPER_MSI_L1C_TL_SGS__20170822T015626_A011310_T54KYU_N02.05"
p.properties["sentinel:sentinel_tile_id"] = (
"S2A_OPER_MSI_L1C_TL_SGS__20170822T015626_A011310_T54KYU_N02.05"
)

dataset_id, metadata_path = p.done()

Expand All @@ -139,9 +139,9 @@ def test_s2_naming_conventions(tmp_path: Path):
p.dataset_version = "1.0.0"
p.region_code = "Oz"
p.properties["odc:file_format"] = "GeoTIFF"
p.properties[
"sentinel:sentinel_tile_id"
] = "S2A_OPER_MSI_L1C_TL_SGS__20170822T015626_A011310_T54KYU_N02.05"
p.properties["sentinel:sentinel_tile_id"] = (
"S2A_OPER_MSI_L1C_TL_SGS__20170822T015626_A011310_T54KYU_N02.05"
)

p.note_source_datasets(
"telemetry",
Expand Down
Loading
Loading