diff --git a/eodatasets3/prepare/nasa_c_m_mcd43a1_6_prepare.py b/eodatasets3/prepare/nasa_c_m_mcd43a1_6_prepare.py index 045ded88..c6bf5aa0 100644 --- a/eodatasets3/prepare/nasa_c_m_mcd43a1_6_prepare.py +++ b/eodatasets3/prepare/nasa_c_m_mcd43a1_6_prepare.py @@ -24,13 +24,28 @@ def parse_xml(filepath: Path): root = ElementTree.parse(str(filepath), forbid_dtd=False).getroot() granule_id = root.find("*//ECSDataGranule/LocalGranuleID").text - instrument = root.find("*//Platform/Instrument/InstrumentShortName").text - platform = "+".join( - sorted( - (ele.text for ele in root.findall("*//Platform/PlatformShortName")), - reverse=True, + + collection_name = root.find("*//CollectionMetaData/ShortName").text + collection_version = root.find("*//CollectionMetaData/VersionID").text + + instrument_node = root.find("*//Platform/Instrument/InstrumentShortName") + + if instrument_node is not None: + instrument = instrument_node.text + platform = "+".join( + sorted( + (ele.text for ele in root.findall("*//Platform/PlatformShortName")), + reverse=True, + ) ) - ) + elif collection_name.startswith("MCD43"): + instrument = "MODIS" + platform = "Terra+Aqua" + else: + raise ValueError( + f"Could not determine instrument and platform from collection name {collection_name}" + ) + start_date = root.find("*//RangeDateTime/RangeBeginningDate").text start_time = root.find("*//RangeDateTime/RangeBeginningTime").text end_date = root.find("*//RangeDateTime/RangeEndingDate").text @@ -57,6 +72,7 @@ def parse_xml(filepath: Path): creation_dt = root.find("*//InsertTime").text return { + "collection_version": collection_version, "granule_id": granule_id, "instrument": instrument, "platform": platform, @@ -131,38 +147,42 @@ def process_datasets(input_path: Path, xml_file: Path) -> Iterable[Dict]: xml_md = parse_xml(xml_file) ds_props = _get_dataset_properties(datasets[0]) - md = {} - md["id"] = str(uuid.uuid5(MCD43A1_NS, xml_md["granule_id"])) - md["product"] = {"href": "https://collections.dea.ga.gov.au/nasa_c_m_mcd43a1_6"} - md["crs"] = ds_props.pop("crs") - md["geometry"] = valid_region(datasets) - md["grids"] = ds_props.pop("grids") - md["lineage"] = {} - md["measurements"] = band_info - md["properties"] = { - "dtr:start_datetime": xml_md["from_dt"].isoformat(), - "dtr:end_datetime": xml_md["to_dt"].isoformat(), - "eo:instrument": xml_md["instrument"], - "eo:platform": xml_md["platform"], - "eo:gsd": ds_props.pop("eo:gsd"), - "eo:epsg": None, - "item:providers": [ - { - "name": "National Aeronautics and Space Administration", - "roles": [ItemProvider.PRODUCER.value, ItemProvider.PROCESSOR.value], - "url": "https://modis.gsfc.nasa.gov/data/dataprod/mod43.php", - }, - { - "name": "United States Geological Society", - "roles": [ItemProvider.PROCESSOR.value], - "url": "https://lpdaac.usgs.gov/products/mcd43a1v006/", - }, - ], - "odc:creation_datetime": xml_md["creation_dt"].isoformat(), - "odc:file_format": "HDF4_EOS:EOS_GRID", - "odc:region_code": "h{}v{}".format( - xml_md["horizontal_tile"], xml_md["vertical_tile"] - ), + md = { + "id": str(uuid.uuid5(MCD43A1_NS, xml_md["granule_id"])), + "product": {"href": "https://collections.dea.ga.gov.au/nasa_c_m_mcd43a1_6"}, + "crs": ds_props.pop("crs"), + "geometry": valid_region(datasets), + "grids": ds_props.pop("grids"), + "lineage": {}, + "measurements": band_info, + "properties": { + "dtr:start_datetime": xml_md["from_dt"].isoformat(), + "dtr:end_datetime": xml_md["to_dt"].isoformat(), + "eo:instrument": xml_md["instrument"], + "eo:platform": xml_md["platform"], + "eo:gsd": ds_props.pop("eo:gsd"), + "eo:epsg": None, + "item:providers": [ + { + "name": "National Aeronautics and Space Administration", + "roles": [ + ItemProvider.PRODUCER.value, + ItemProvider.PROCESSOR.value, + ], + "url": "https://modis.gsfc.nasa.gov/data/dataprod/mod43.php", + }, + { + "name": "United States Geological Society", + "roles": [ItemProvider.PROCESSOR.value], + "url": "https://lpdaac.usgs.gov/products/mcd43a1v006/", + }, + ], + "odc:creation_datetime": xml_md["creation_dt"].isoformat(), + "odc:file_format": "HDF4_EOS:EOS_GRID", + "odc:region_code": "h{}v{}".format( + xml_md["horizontal_tile"], xml_md["vertical_tile"] + ), + }, } return [md] diff --git a/tests/integration/prepare/MCD43A1.A2024070.h19v16.061.2024079033215.hdf.xml b/tests/integration/prepare/MCD43A1.A2024070.h19v16.061.2024079033215.hdf.xml new file mode 100644 index 00000000..216adc12 --- /dev/null +++ b/tests/integration/prepare/MCD43A1.A2024070.h19v16.061.2024079033215.hdf.xml @@ -0,0 +1,213 @@ + + + 1.0 + EDC + + SC:MCD43A1.061:2679459485 + 2679459485 + 2024-03-18 22:52:25.228 + 2024-03-18 22:52:25.228 + + MCD43A1 + 61 + + + + MCD43A1.A2024070.h19v16.061.2024079033215.hdf + 166175778 + MD5 + 564edefa30e95cd6d7b5f682d30c8aa0 + DPLIngst + + + + 158.478 + further update is anticipated + processed once + MCD43A1.A2024070.h19v16.061.2024079033215.hdf + Day + 2024-03-19 03:39:40.000 + 6.1.34 + + + 6.1.13 + + + 23:59:59.999999 + 2024-03-17 + 00:00:00.000000 + 2024-03-02 + + + + + + + 58.0861219214408 + -80.1214779421689 + + + 28.4645734312001 + -69.3533470211469 + + + 61.2685062171821 + -69.0789424401956 + + + 125.319334242246 + -78.7435199798004 + + + + + + + + NOT SET + + Passed + Passed was set as a default value. More algorithm will be developed + Passed + Passed + Not Investigated + See http://landweb.nascom/nasa.gov/cgi-bin/QA_WWW/qaFlagPage.cgi?sat=aqua the product Science Quality status. + + + + + Terra + + MODIS + + MODIS + + + + + Aqua + + MODIS + + MODIS + + + + + + QAPERCENTGOODQUALITY + 19 + + + QAPERCENTOTHERQUALITY + 68 + + + QAPERCENTNOTPRODUCEDCLOUD + 11 + + + QAPERCENTNOTPRODUCEDOTHER + 0 + + + HORIZONTALTILENUMBER + 19 + + + VERTICALTILENUMBER + 16 + + + TileID + 51019016 + + + BRDFCODEID + AMBRALS_V4.0R1 + + + SETUPFILEID + 06121997 + + + ALBEDOFILEID + 06121997 + + + BRDFDATABASEVERSION + v1.0(500m) + + + PERCENTLANDINTILE + 92 + + + PERCENTPROCESSEDINTILE + 90 + + + PERCENTNEWBRDFS + 22 + + + PERCENTSHAPEFIXEDBRDFS + 77 + + + PERCENTSUBSTITUTEBRDFS + 0 + + + AVERAGENUMBEROBS + 17 + + + identifier_product_doi + 10.5067/MODIS/MCD43A1.061 + + + identifier_product_doi_authority + http://dx.doi.org + + + + MYD09GA.A2024062.h19v16.061.2024064054054.hdf + MYD09GA.A2024063.h19v16.061.2024065123316.hdf + MYD09GA.A2024064.h19v16.061.2024066070025.hdf + MYD09GA.A2024065.h19v16.061.2024067045627.hdf + MYD09GA.A2024066.h19v16.061.2024068054232.hdf + MYD09GA.A2024067.h19v16.061.2024069033053.hdf + MYD09GA.A2024068.h19v16.061.2024070032630.hdf + MYD09GA.A2024069.h19v16.061.2024071031737.hdf + MYD09GA.A2024070.h19v16.061.2024072044438.hdf + MYD09GA.A2024071.h19v16.061.2024073030417.hdf + MYD09GA.A2024072.h19v16.061.2024074033105.hdf + MYD09GA.A2024073.h19v16.061.2024075035410.hdf + MYD09GA.A2024074.h19v16.061.2024076071755.hdf + MYD09GA.A2024075.h19v16.061.2024077061041.hdf + MYD09GA.A2024076.h19v16.061.2024078230426.hdf + MYD09GA.A2024077.h19v16.061.2024079031724.hdf + MOD09GA.A2024062.h19v16.061.2024064043216.hdf + MOD09GA.A2024063.h19v16.061.2024065104338.hdf + MOD09GA.A2024064.h19v16.061.2024066062058.hdf + MOD09GA.A2024065.h19v16.061.2024067035936.hdf + MOD09GA.A2024066.h19v16.061.2024068063239.hdf + MOD09GA.A2024067.h19v16.061.2024069030844.hdf + MOD09GA.A2024068.h19v16.061.2024070031548.hdf + MOD09GA.A2024069.h19v16.061.2024071030858.hdf + MOD09GA.A2024070.h19v16.061.2024073195456.hdf + MOD09GA.A2024071.h19v16.061.2024073193826.hdf + MOD09GA.A2024072.h19v16.061.2024074025416.hdf + MOD09GA.A2024073.h19v16.061.2024075033805.hdf + MOD09GA.A2024074.h19v16.061.2024076064841.hdf + MOD09GA.A2024075.h19v16.061.2024077061417.hdf + MOD09GA.A2024076.h19v16.061.2024078060346.hdf + MOD09GA.A2024077.h19v16.061.2024079031018.hdf + MCD43DB.A2024069.61.h19v16.hdf + + + UR:10:DsShESDTUR:UR:15:DsShSciServerUR:13:[EDC:DSSDSRV]:24:BR:Browse.001:2679459492 + + + diff --git a/tests/integration/prepare/MCD43A1.A2024071.h16v14.061.2024085194442.hdf.xml b/tests/integration/prepare/MCD43A1.A2024071.h16v14.061.2024085194442.hdf.xml new file mode 100644 index 00000000..35975de0 --- /dev/null +++ b/tests/integration/prepare/MCD43A1.A2024071.h16v14.061.2024085194442.hdf.xml @@ -0,0 +1,91 @@ + + + 1.0 + EDC + + SC:MCD43A1.061:2685748605 + 2685748605 + 2024-03-25 14:55:23.768 + 2024-03-25 14:55:23.768 + + MCD43A1 + 61 + + + + MCD43A1.A2024071.h16v14.061.2024085194442.hdf + 676452 + MD5 + 4818ec003af27c35be711c08e142dd7a + DPLIngst + + + + 0.645114898681641 + further update is anticipated + processed once + MCD43A1.A2024071.h16v14.061.2024085194442.hdf + Both + 2024-03-25 19:47:10 + 6.1.34 + + + 6.1.13 + + + 23:59:59.999000 + 2024-03-18 + 00:00:00.000000 + 2024-03-11 + + + + + + + -30.9862 + -49.7344 + + + -15.4217 + -49.7431 + + + -19.897 + -60.026 + + + -40 + -60 + + + + + + + + TileID + 51016014 + + + HORIZONTALTILENUMBER + 16 + + + VERTICALTILENUMBER + 14 + + + identifier_product_doi + 10.5067/MODIS/MCD43A1.061 + + + identifier_product_doi_authority + http://dx.doi.org + + + + UR:10:DsShESDTUR:UR:15:DsShSciServerUR:13:[EDC:DSSDSRV]:24:BR:Browse.001:2685748666 + + + diff --git a/tests/integration/prepare/test_mcd43a_prepare.py b/tests/integration/prepare/test_mcd43a_prepare.py new file mode 100644 index 00000000..07c66c4d --- /dev/null +++ b/tests/integration/prepare/test_mcd43a_prepare.py @@ -0,0 +1,51 @@ +import datetime +from pathlib import Path +from pprint import pprint + +from eodatasets3.prepare.nasa_c_m_mcd43a1_6_prepare import parse_xml + +PRE_24_XML = Path(__file__).parent / "MCD43A1.A2024071.h16v14.061.2024085194442.hdf.xml" +POST_24_XML = ( + Path(__file__).parent / "MCD43A1.A2024070.h19v16.061.2024079033215.hdf.xml" +) + + +def test_parse_pre24_xml(): + result = parse_xml(POST_24_XML) + + pprint(result) + assert result == { + "collection_version": "61", + "granule_id": "MCD43A1.A2024070.h19v16.061.2024079033215.hdf", + "instrument": "MODIS", + "platform": "Terra+Aqua", + "horizontal_tile": 19, + "vertical_tile": 16, + "from_dt": datetime.datetime(2024, 3, 2, 0, 0, tzinfo=datetime.timezone.utc), + "to_dt": datetime.datetime( + 2024, 3, 17, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc + ), + "creation_dt": datetime.datetime( + 2024, 3, 18, 22, 52, 25, 228000, tzinfo=datetime.timezone.utc + ), + } + + +def test_parse_post24_xml(): + result = parse_xml(PRE_24_XML) + pprint(result) + assert result == { + "collection_version": "61", + "granule_id": "MCD43A1.A2024071.h16v14.061.2024085194442.hdf", + "instrument": "MODIS", + "platform": "Terra+Aqua", + "horizontal_tile": 16, + "vertical_tile": 14, + "from_dt": datetime.datetime(2024, 3, 11, 0, 0, tzinfo=datetime.timezone.utc), + "to_dt": datetime.datetime( + 2024, 3, 18, 23, 59, 59, 999000, tzinfo=datetime.timezone.utc + ), + "creation_dt": datetime.datetime( + 2024, 3, 25, 14, 55, 23, 768000, tzinfo=datetime.timezone.utc + ), + }