diff --git a/eodatasets3/prepare/nasa_c_m_mcd43a1_6_prepare.py b/eodatasets3/prepare/nasa_c_m_mcd43a1_6_prepare.py
index 045ded88..c6bf5aa0 100644
--- a/eodatasets3/prepare/nasa_c_m_mcd43a1_6_prepare.py
+++ b/eodatasets3/prepare/nasa_c_m_mcd43a1_6_prepare.py
@@ -24,13 +24,28 @@ def parse_xml(filepath: Path):
root = ElementTree.parse(str(filepath), forbid_dtd=False).getroot()
granule_id = root.find("*//ECSDataGranule/LocalGranuleID").text
- instrument = root.find("*//Platform/Instrument/InstrumentShortName").text
- platform = "+".join(
- sorted(
- (ele.text for ele in root.findall("*//Platform/PlatformShortName")),
- reverse=True,
+
+ collection_name = root.find("*//CollectionMetaData/ShortName").text
+ collection_version = root.find("*//CollectionMetaData/VersionID").text
+
+ instrument_node = root.find("*//Platform/Instrument/InstrumentShortName")
+
+ if instrument_node is not None:
+ instrument = instrument_node.text
+ platform = "+".join(
+ sorted(
+ (ele.text for ele in root.findall("*//Platform/PlatformShortName")),
+ reverse=True,
+ )
)
- )
+ elif collection_name.startswith("MCD43"):
+ instrument = "MODIS"
+ platform = "Terra+Aqua"
+ else:
+ raise ValueError(
+ f"Could not determine instrument and platform from collection name {collection_name}"
+ )
+
start_date = root.find("*//RangeDateTime/RangeBeginningDate").text
start_time = root.find("*//RangeDateTime/RangeBeginningTime").text
end_date = root.find("*//RangeDateTime/RangeEndingDate").text
@@ -57,6 +72,7 @@ def parse_xml(filepath: Path):
creation_dt = root.find("*//InsertTime").text
return {
+ "collection_version": collection_version,
"granule_id": granule_id,
"instrument": instrument,
"platform": platform,
@@ -131,38 +147,42 @@ def process_datasets(input_path: Path, xml_file: Path) -> Iterable[Dict]:
xml_md = parse_xml(xml_file)
ds_props = _get_dataset_properties(datasets[0])
- md = {}
- md["id"] = str(uuid.uuid5(MCD43A1_NS, xml_md["granule_id"]))
- md["product"] = {"href": "https://collections.dea.ga.gov.au/nasa_c_m_mcd43a1_6"}
- md["crs"] = ds_props.pop("crs")
- md["geometry"] = valid_region(datasets)
- md["grids"] = ds_props.pop("grids")
- md["lineage"] = {}
- md["measurements"] = band_info
- md["properties"] = {
- "dtr:start_datetime": xml_md["from_dt"].isoformat(),
- "dtr:end_datetime": xml_md["to_dt"].isoformat(),
- "eo:instrument": xml_md["instrument"],
- "eo:platform": xml_md["platform"],
- "eo:gsd": ds_props.pop("eo:gsd"),
- "eo:epsg": None,
- "item:providers": [
- {
- "name": "National Aeronautics and Space Administration",
- "roles": [ItemProvider.PRODUCER.value, ItemProvider.PROCESSOR.value],
- "url": "https://modis.gsfc.nasa.gov/data/dataprod/mod43.php",
- },
- {
- "name": "United States Geological Society",
- "roles": [ItemProvider.PROCESSOR.value],
- "url": "https://lpdaac.usgs.gov/products/mcd43a1v006/",
- },
- ],
- "odc:creation_datetime": xml_md["creation_dt"].isoformat(),
- "odc:file_format": "HDF4_EOS:EOS_GRID",
- "odc:region_code": "h{}v{}".format(
- xml_md["horizontal_tile"], xml_md["vertical_tile"]
- ),
+ md = {
+ "id": str(uuid.uuid5(MCD43A1_NS, xml_md["granule_id"])),
+ "product": {"href": "https://collections.dea.ga.gov.au/nasa_c_m_mcd43a1_6"},
+ "crs": ds_props.pop("crs"),
+ "geometry": valid_region(datasets),
+ "grids": ds_props.pop("grids"),
+ "lineage": {},
+ "measurements": band_info,
+ "properties": {
+ "dtr:start_datetime": xml_md["from_dt"].isoformat(),
+ "dtr:end_datetime": xml_md["to_dt"].isoformat(),
+ "eo:instrument": xml_md["instrument"],
+ "eo:platform": xml_md["platform"],
+ "eo:gsd": ds_props.pop("eo:gsd"),
+ "eo:epsg": None,
+ "item:providers": [
+ {
+ "name": "National Aeronautics and Space Administration",
+ "roles": [
+ ItemProvider.PRODUCER.value,
+ ItemProvider.PROCESSOR.value,
+ ],
+ "url": "https://modis.gsfc.nasa.gov/data/dataprod/mod43.php",
+ },
+ {
+ "name": "United States Geological Society",
+ "roles": [ItemProvider.PROCESSOR.value],
+ "url": "https://lpdaac.usgs.gov/products/mcd43a1v006/",
+ },
+ ],
+ "odc:creation_datetime": xml_md["creation_dt"].isoformat(),
+ "odc:file_format": "HDF4_EOS:EOS_GRID",
+ "odc:region_code": "h{}v{}".format(
+ xml_md["horizontal_tile"], xml_md["vertical_tile"]
+ ),
+ },
}
return [md]
diff --git a/tests/integration/prepare/MCD43A1.A2024070.h19v16.061.2024079033215.hdf.xml b/tests/integration/prepare/MCD43A1.A2024070.h19v16.061.2024079033215.hdf.xml
new file mode 100644
index 00000000..216adc12
--- /dev/null
+++ b/tests/integration/prepare/MCD43A1.A2024070.h19v16.061.2024079033215.hdf.xml
@@ -0,0 +1,213 @@
+
+
+ 1.0
+ EDC
+
+ SC:MCD43A1.061:2679459485
+ 2679459485
+ 2024-03-18 22:52:25.228
+ 2024-03-18 22:52:25.228
+
+ MCD43A1
+ 61
+
+
+
+ MCD43A1.A2024070.h19v16.061.2024079033215.hdf
+ 166175778
+ MD5
+ 564edefa30e95cd6d7b5f682d30c8aa0
+ DPLIngst
+
+
+
+ 158.478
+ further update is anticipated
+ processed once
+ MCD43A1.A2024070.h19v16.061.2024079033215.hdf
+ Day
+ 2024-03-19 03:39:40.000
+ 6.1.34
+
+
+ 6.1.13
+
+
+ 23:59:59.999999
+ 2024-03-17
+ 00:00:00.000000
+ 2024-03-02
+
+
+
+
+
+
+ 58.0861219214408
+ -80.1214779421689
+
+
+ 28.4645734312001
+ -69.3533470211469
+
+
+ 61.2685062171821
+ -69.0789424401956
+
+
+ 125.319334242246
+ -78.7435199798004
+
+
+
+
+
+
+
+ NOT SET
+
+ Passed
+ Passed was set as a default value. More algorithm will be developed
+ Passed
+ Passed
+ Not Investigated
+ See http://landweb.nascom/nasa.gov/cgi-bin/QA_WWW/qaFlagPage.cgi?sat=aqua the product Science Quality status.
+
+
+
+
+ Terra
+
+ MODIS
+
+ MODIS
+
+
+
+
+ Aqua
+
+ MODIS
+
+ MODIS
+
+
+
+
+
+ QAPERCENTGOODQUALITY
+ 19
+
+
+ QAPERCENTOTHERQUALITY
+ 68
+
+
+ QAPERCENTNOTPRODUCEDCLOUD
+ 11
+
+
+ QAPERCENTNOTPRODUCEDOTHER
+ 0
+
+
+ HORIZONTALTILENUMBER
+ 19
+
+
+ VERTICALTILENUMBER
+ 16
+
+
+ TileID
+ 51019016
+
+
+ BRDFCODEID
+ AMBRALS_V4.0R1
+
+
+ SETUPFILEID
+ 06121997
+
+
+ ALBEDOFILEID
+ 06121997
+
+
+ BRDFDATABASEVERSION
+ v1.0(500m)
+
+
+ PERCENTLANDINTILE
+ 92
+
+
+ PERCENTPROCESSEDINTILE
+ 90
+
+
+ PERCENTNEWBRDFS
+ 22
+
+
+ PERCENTSHAPEFIXEDBRDFS
+ 77
+
+
+ PERCENTSUBSTITUTEBRDFS
+ 0
+
+
+ AVERAGENUMBEROBS
+ 17
+
+
+ identifier_product_doi
+ 10.5067/MODIS/MCD43A1.061
+
+
+ identifier_product_doi_authority
+ http://dx.doi.org
+
+
+
+ MYD09GA.A2024062.h19v16.061.2024064054054.hdf
+ MYD09GA.A2024063.h19v16.061.2024065123316.hdf
+ MYD09GA.A2024064.h19v16.061.2024066070025.hdf
+ MYD09GA.A2024065.h19v16.061.2024067045627.hdf
+ MYD09GA.A2024066.h19v16.061.2024068054232.hdf
+ MYD09GA.A2024067.h19v16.061.2024069033053.hdf
+ MYD09GA.A2024068.h19v16.061.2024070032630.hdf
+ MYD09GA.A2024069.h19v16.061.2024071031737.hdf
+ MYD09GA.A2024070.h19v16.061.2024072044438.hdf
+ MYD09GA.A2024071.h19v16.061.2024073030417.hdf
+ MYD09GA.A2024072.h19v16.061.2024074033105.hdf
+ MYD09GA.A2024073.h19v16.061.2024075035410.hdf
+ MYD09GA.A2024074.h19v16.061.2024076071755.hdf
+ MYD09GA.A2024075.h19v16.061.2024077061041.hdf
+ MYD09GA.A2024076.h19v16.061.2024078230426.hdf
+ MYD09GA.A2024077.h19v16.061.2024079031724.hdf
+ MOD09GA.A2024062.h19v16.061.2024064043216.hdf
+ MOD09GA.A2024063.h19v16.061.2024065104338.hdf
+ MOD09GA.A2024064.h19v16.061.2024066062058.hdf
+ MOD09GA.A2024065.h19v16.061.2024067035936.hdf
+ MOD09GA.A2024066.h19v16.061.2024068063239.hdf
+ MOD09GA.A2024067.h19v16.061.2024069030844.hdf
+ MOD09GA.A2024068.h19v16.061.2024070031548.hdf
+ MOD09GA.A2024069.h19v16.061.2024071030858.hdf
+ MOD09GA.A2024070.h19v16.061.2024073195456.hdf
+ MOD09GA.A2024071.h19v16.061.2024073193826.hdf
+ MOD09GA.A2024072.h19v16.061.2024074025416.hdf
+ MOD09GA.A2024073.h19v16.061.2024075033805.hdf
+ MOD09GA.A2024074.h19v16.061.2024076064841.hdf
+ MOD09GA.A2024075.h19v16.061.2024077061417.hdf
+ MOD09GA.A2024076.h19v16.061.2024078060346.hdf
+ MOD09GA.A2024077.h19v16.061.2024079031018.hdf
+ MCD43DB.A2024069.61.h19v16.hdf
+
+
+ UR:10:DsShESDTUR:UR:15:DsShSciServerUR:13:[EDC:DSSDSRV]:24:BR:Browse.001:2679459492
+
+
+
diff --git a/tests/integration/prepare/MCD43A1.A2024071.h16v14.061.2024085194442.hdf.xml b/tests/integration/prepare/MCD43A1.A2024071.h16v14.061.2024085194442.hdf.xml
new file mode 100644
index 00000000..35975de0
--- /dev/null
+++ b/tests/integration/prepare/MCD43A1.A2024071.h16v14.061.2024085194442.hdf.xml
@@ -0,0 +1,91 @@
+
+
+ 1.0
+ EDC
+
+ SC:MCD43A1.061:2685748605
+ 2685748605
+ 2024-03-25 14:55:23.768
+ 2024-03-25 14:55:23.768
+
+ MCD43A1
+ 61
+
+
+
+ MCD43A1.A2024071.h16v14.061.2024085194442.hdf
+ 676452
+ MD5
+ 4818ec003af27c35be711c08e142dd7a
+ DPLIngst
+
+
+
+ 0.645114898681641
+ further update is anticipated
+ processed once
+ MCD43A1.A2024071.h16v14.061.2024085194442.hdf
+ Both
+ 2024-03-25 19:47:10
+ 6.1.34
+
+
+ 6.1.13
+
+
+ 23:59:59.999000
+ 2024-03-18
+ 00:00:00.000000
+ 2024-03-11
+
+
+
+
+
+
+ -30.9862
+ -49.7344
+
+
+ -15.4217
+ -49.7431
+
+
+ -19.897
+ -60.026
+
+
+ -40
+ -60
+
+
+
+
+
+
+
+ TileID
+ 51016014
+
+
+ HORIZONTALTILENUMBER
+ 16
+
+
+ VERTICALTILENUMBER
+ 14
+
+
+ identifier_product_doi
+ 10.5067/MODIS/MCD43A1.061
+
+
+ identifier_product_doi_authority
+ http://dx.doi.org
+
+
+
+ UR:10:DsShESDTUR:UR:15:DsShSciServerUR:13:[EDC:DSSDSRV]:24:BR:Browse.001:2685748666
+
+
+
diff --git a/tests/integration/prepare/test_mcd43a_prepare.py b/tests/integration/prepare/test_mcd43a_prepare.py
new file mode 100644
index 00000000..07c66c4d
--- /dev/null
+++ b/tests/integration/prepare/test_mcd43a_prepare.py
@@ -0,0 +1,51 @@
+import datetime
+from pathlib import Path
+from pprint import pprint
+
+from eodatasets3.prepare.nasa_c_m_mcd43a1_6_prepare import parse_xml
+
+PRE_24_XML = Path(__file__).parent / "MCD43A1.A2024071.h16v14.061.2024085194442.hdf.xml"
+POST_24_XML = (
+ Path(__file__).parent / "MCD43A1.A2024070.h19v16.061.2024079033215.hdf.xml"
+)
+
+
+def test_parse_pre24_xml():
+ result = parse_xml(POST_24_XML)
+
+ pprint(result)
+ assert result == {
+ "collection_version": "61",
+ "granule_id": "MCD43A1.A2024070.h19v16.061.2024079033215.hdf",
+ "instrument": "MODIS",
+ "platform": "Terra+Aqua",
+ "horizontal_tile": 19,
+ "vertical_tile": 16,
+ "from_dt": datetime.datetime(2024, 3, 2, 0, 0, tzinfo=datetime.timezone.utc),
+ "to_dt": datetime.datetime(
+ 2024, 3, 17, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc
+ ),
+ "creation_dt": datetime.datetime(
+ 2024, 3, 18, 22, 52, 25, 228000, tzinfo=datetime.timezone.utc
+ ),
+ }
+
+
+def test_parse_post24_xml():
+ result = parse_xml(PRE_24_XML)
+ pprint(result)
+ assert result == {
+ "collection_version": "61",
+ "granule_id": "MCD43A1.A2024071.h16v14.061.2024085194442.hdf",
+ "instrument": "MODIS",
+ "platform": "Terra+Aqua",
+ "horizontal_tile": 16,
+ "vertical_tile": 14,
+ "from_dt": datetime.datetime(2024, 3, 11, 0, 0, tzinfo=datetime.timezone.utc),
+ "to_dt": datetime.datetime(
+ 2024, 3, 18, 23, 59, 59, 999000, tzinfo=datetime.timezone.utc
+ ),
+ "creation_dt": datetime.datetime(
+ 2024, 3, 25, 14, 55, 23, 768000, tzinfo=datetime.timezone.utc
+ ),
+ }