Skip to content

Commit

Permalink
Merge pull request #913 from BENR0/modis_l2_available_datasets
Browse files Browse the repository at this point in the history
Modis l2 available datasets
  • Loading branch information
mraspaud authored Jan 21, 2025
2 parents 8082f99 + 27b53c3 commit d96b46f
Show file tree
Hide file tree
Showing 11 changed files with 151 additions and 43 deletions.
9 changes: 7 additions & 2 deletions doc/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,15 @@ def __getattr__(cls, name):
# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = ["sphinx.ext.autodoc", "sphinx.ext.intersphinx", "sphinx.ext.todo", "sphinx.ext.coverage",
"sphinx.ext.doctest", "sphinx.ext.napoleon", "sphinx.ext.autosummary", "doi_role",
"sphinx.ext.viewcode", "sphinxcontrib.apidoc",
"sphinx.ext.doctest", "sphinx.ext.napoleon", "sphinx.ext.autosummary", "sphinx.ext.autosectionlabel",
"doi_role", "sphinx.ext.viewcode", "sphinxcontrib.apidoc",
"sphinx.ext.mathjax"]

# Autosectionlabel
# Make sure target is unique
autosectionlabel_prefix_document = True
autosectionlabel_maxdepth = 3

# API docs
apidoc_module_dir = "../../satpy"
apidoc_output_dir = "api"
Expand Down
3 changes: 3 additions & 0 deletions doc/source/reading.rst
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,9 @@ load the datasets using e.g.::
:meth:`scn.missing_datasets <satpy.scene.Scene.missing_datasets>`
property for any ``DataID`` that could not be loaded.

Available datasets
------------------

To find out what datasets are available from a reader from the files that were
provided to the ``Scene`` use
:meth:`~satpy.scene.Scene.available_dataset_ids`::
Expand Down
58 changes: 31 additions & 27 deletions satpy/etc/readers/modis_l2.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
reader:
name: modis_l2
short_name: MODIS l2
long_name: MODIS Level 2 (mod35) data in HDF-EOS format
long_name: Terra and Aqua MODIS Level 2 (mod35) data in HDF-EOS format
description: MODIS HDF-EOS L2 Reader
status: Beta
supports_fsspec: false
Expand All @@ -11,77 +11,81 @@ reader:
file_types:
mod05_hdf:
file_patterns:
- 'M{platform_indicator:1s}D05_L2.A{start_time:%Y%j.%H%M}.{collection:03d}.{production_time:%Y%j%H%M%S}.hdf'
- '{platform_indicator:1s}1.{start_time:%y%j.%H%M}.mod05.hdf'
- "M{platform_indicator:1s}D05_L2.A{start_time:%Y%j.%H%M}.{collection:03d}.{production_time:%Y%j%H%M%S}.hdf"
- "{platform_indicator:1s}1.{start_time:%y%j.%H%M}.mod05.hdf"
file_reader: !!python/name:satpy.readers.modis_l2.ModisL2HDFFileHandler
mod35_hdf:
file_patterns:
- 'M{platform_indicator:1s}D35_L2.A{start_time:%Y%j.%H%M}.{collection:03d}.{production_time:%Y%j%H%M%S}.hdf'
- '{platform_indicator:1s}1.{start_time:%y%j.%H%M}.mod35.hdf'
- "M{platform_indicator:1s}D35_L2.A{start_time:%Y%j.%H%M}.{collection:03d}.{production_time:%Y%j%H%M%S}.hdf"
- "{platform_indicator:1s}1.{start_time:%y%j.%H%M}.mod35.hdf"
file_reader: !!python/name:satpy.readers.modis_l2.ModisL2HDFFileHandler
mod06_hdf:
file_patterns:
- 'M{platform_indicator:1s}D06_L2.A{start_time:%Y%j.%H%M}.{collection:03d}.{production_time:%Y%j%H%M%S}.hdf'
- '{platform_indicator:1s}1.{start_time:%y%j.%H%M}.mod06.hdf'
- "M{platform_indicator:1s}D06_L2.A{start_time:%Y%j.%H%M}.{collection:03d}.{production_time:%Y%j%H%M%S}.hdf"
- "{platform_indicator:1s}1.{start_time:%y%j.%H%M}.mod06.hdf"
file_reader: !!python/name:satpy.readers.modis_l2.ModisL2HDFFileHandler
mod06ct_hdf:
file_patterns:
- '{platform_indicator:1s}1.{start_time:%y%j.%H%M}.mod06ct.hdf'
- "{platform_indicator:1s}1.{start_time:%y%j.%H%M}.mod06ct.hdf"
file_reader: !!python/name:satpy.readers.modis_l2.ModisL2HDFFileHandler
modis_l2_product:
file_patterns:
- "M{platform_indicator:1s}D{product:2s}_L2.A{acquisition_time:%Y%j.%H%M}.{collection:03d}.{production_time:%Y%j%H%M%S}.hdf"
file_reader: !!python/name:satpy.readers.modis_l2.ModisL2HDFFileHandler
hdf_eos_geo:
file_patterns:
- 'M{platform_indicator:1s}D03_A{start_time:%y%j_%H%M%S}_{processing_time:%Y%j%H%M%S}.hdf'
- 'M{platform_indicator:1s}D03.A{start_time:%Y%j.%H%M}.{collection:03d}.{processing_time:%Y%j%H%M%S}.hdf'
- 'M{platform_indicator:1s}D03.A{start_time:%Y%j.%H%M}.{collection:03d}{suffix}.hdf'
- 'M{platform_indicator:1s}D03.{start_time:%y%j%H%M%S}.hdf'
- '{platform_indicator:1s}1.{start_time:%y%j.%H%M}.geo.hdf'
- "M{platform_indicator:1s}D03_A{start_time:%y%j_%H%M%S}_{processing_time:%Y%j%H%M%S}.hdf"
- "M{platform_indicator:1s}D03.A{start_time:%Y%j.%H%M}.{collection:03d}.{processing_time:%Y%j%H%M%S}.hdf"
- "M{platform_indicator:1s}D03.A{start_time:%Y%j.%H%M}.{collection:03d}{suffix}.hdf"
- "M{platform_indicator:1s}D03.{start_time:%y%j%H%M%S}.hdf"
- "{platform_indicator:1s}1.{start_time:%y%j.%H%M}.geo.hdf"
file_reader: !!python/name:satpy.readers.modis_l1b.HDFEOSGeoReader
icecon_hdf:
file_patterns:
- '{platform_indicator:1s}1.{start_time:%y%j.%H%M}.icecon.hdf'
- "{platform_indicator:1s}1.{start_time:%y%j.%H%M}.icecon.hdf"
file_reader: !!python/name:satpy.readers.modis_l2.ModisL2HDFFileHandler
inversion_hdf:
file_patterns:
- '{platform_indicator:1s}1.{start_time:%y%j.%H%M}.inversion.hdf'
- "{platform_indicator:1s}1.{start_time:%y%j.%H%M}.inversion.hdf"
file_reader: !!python/name:satpy.readers.modis_l2.ModisL2HDFFileHandler
ist_hdf:
file_patterns:
- '{platform_indicator:1s}1.{start_time:%y%j.%H%M}.ist.hdf'
- "{platform_indicator:1s}1.{start_time:%y%j.%H%M}.ist.hdf"
file_reader: !!python/name:satpy.readers.modis_l2.ModisL2HDFFileHandler
mask_byte1_hdf:
file_patterns:
- '{platform_indicator:1s}1.{start_time:%y%j.%H%M}.mask_byte1.hdf'
- "{platform_indicator:1s}1.{start_time:%y%j.%H%M}.mask_byte1.hdf"
file_reader: !!python/name:satpy.readers.modis_l2.ModisL2HDFFileHandler
mod07_hdf:
file_patterns:
- '{platform_indicator:1s}1.{start_time:%y%j.%H%M}.mod07.hdf'
- 'M{platform_indicator:1s}D07_L2.A{start_time:%Y%j.%H%M}.{collection:03d}.{production_time:%Y%j%H%M%S}.hdf'
- "{platform_indicator:1s}1.{start_time:%y%j.%H%M}.mod07.hdf"
- "M{platform_indicator:1s}D07_L2.A{start_time:%Y%j.%H%M}.{collection:03d}.{production_time:%Y%j%H%M%S}.hdf"
file_reader: !!python/name:satpy.readers.modis_l2.ModisL2HDFFileHandler
mod28_hdf:
file_patterns:
- '{platform_indicator:1s}1.{start_time:%y%j.%H%M}.mod28.hdf'
- "{platform_indicator:1s}1.{start_time:%y%j.%H%M}.mod28.hdf"
file_reader: !!python/name:satpy.readers.modis_l2.ModisL2HDFFileHandler
modlst_hdf:
file_patterns:
- '{platform_indicator:1s}1.{start_time:%y%j.%H%M}.modlst.hdf'
- "{platform_indicator:1s}1.{start_time:%y%j.%H%M}.modlst.hdf"
file_reader: !!python/name:satpy.readers.modis_l2.ModisL2HDFFileHandler
ndvi_1000m_hdf:
file_patterns:
- '{platform_indicator:1s}1.{start_time:%y%j.%H%M}.ndvi.1000m.hdf'
- "{platform_indicator:1s}1.{start_time:%y%j.%H%M}.ndvi.1000m.hdf"
file_reader: !!python/name:satpy.readers.modis_l2.ModisL2HDFFileHandler
snowmask_hdf:
file_patterns:
- '{platform_indicator:1s}1.{start_time:%y%j.%H%M}.snowmask.hdf'
- "{platform_indicator:1s}1.{start_time:%y%j.%H%M}.snowmask.hdf"
file_reader: !!python/name:satpy.readers.modis_l2.ModisL2HDFFileHandler

datasets:
longitude:
name: longitude
resolution:
5000:
file_type: [mod35_hdf, mod06_hdf, mod06ct_hdf, mod07_hdf, mod05_hdf]
file_type: [mod35_hdf, mod06_hdf, mod06ct_hdf, mod07_hdf, mod05_hdf, modis_l2_product]
1000:
file_type: [hdf_eos_geo, mod35_hdf, mod06_hdf, mod05_hdf]
file_type: [hdf_eos_geo, mod35_hdf, mod06_hdf, mod05_hdf, modis_l2_product]
500:
file_type: hdf_eos_geo
250:
Expand All @@ -94,9 +98,9 @@ datasets:
resolution:
5000:
# For EUM reduced (thinned) files
file_type: [mod35_hdf, mod06_hdf, mod06ct_hdf, mod07_hdf, mod05_hdf]
file_type: [mod35_hdf, mod06_hdf, mod06ct_hdf, mod07_hdf, mod05_hdf, modis_l2_product]
1000:
file_type: [hdf_eos_geo, mod35_hdf, mod06_hdf, mod05_hdf]
file_type: [hdf_eos_geo, mod35_hdf, mod06_hdf, mod05_hdf, modis_l2_product]
500:
file_type: hdf_eos_geo
250:
Expand Down
10 changes: 8 additions & 2 deletions satpy/readers/file_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,10 +251,16 @@ def available_datasets(self, configured_datasets=None):
Example 2 - Add dynamic datasets from the file::
def available_datasets(self, configured_datasets=None):
"Add information to configured datasets."
"Add datasets dynamically determined from the file."
# pass along existing datasets
for is_avail, ds_info in (configured_datasets or []):
yield is_avail, ds_info
if is_avail is not None:
# some other file handler said it has this dataset
# we don't know any more information than the previous
# file handler so let's yield early
yield is_avail, ds_info
continue
yield self.file_type_matches(ds_info["file_type"]), ds_info
# get dynamic variables known to this file (that we created)
for var_name, val in self.dynamic_variables.items():
Expand Down
2 changes: 1 addition & 1 deletion satpy/readers/hdfeos_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ def _get_good_data_mask(self, data_arr, is_category=False):
def _add_satpy_metadata(self, data_id: DataID, data_arr: xr.DataArray):
"""Add metadata that is specific to Satpy."""
new_attrs = {
"platform_name": "EOS-" + self.metadata_platform_name,
"platform_name": self.metadata_platform_name,
"sensor": "modis",
}

Expand Down
46 changes: 45 additions & 1 deletion satpy/readers/modis_l2.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@
- m[o/y]d35_l2: cloud_mask dataset
- some datasets in m[o/y]d06 files
To get a list of the available datasets for a given file refer to the "Load data" section in :doc:`../reading`.
Additionally the reader tries to add non yaml configured 2D datasets dynamically. As mentioned above there are a lot
of different level 2 datasets so this might not work in every case (for example bit encoded datasets similar to the
supported m[0/y]d35_l2 cloud mask are not decoded).
To get a list of the available datasets for a given file refer to the :ref:`reading:available datasets` section.
Geolocation files
Expand Down Expand Up @@ -145,6 +149,46 @@ def get_dataset(self, dataset_id, dataset_info):
self._add_satpy_metadata(dataset_id, dataset)
return dataset

def available_datasets(self, configured_datasets):
"""Add dataset information from arbitrary level 2 files.
Adds dataset information not specifically specified in reader yaml file
from arbitrary modis level 2 product files to available datasets.
Notes:
Currently only adds 2D datasets and does not decode bit encoded information.
"""
# pass along yaml configured (handled) datasets and collect their file keys to check against dynamically
# collected variables later on.
handled = set()
for is_avail, ds_info in (configured_datasets or []):
file_key = ds_info.get("file_key", ds_info["name"])
handled.add(file_key)

if is_avail is not None:
yield is_avail, ds_info
continue
yield self.file_type_matches(ds_info["file_type"]), ds_info

res_dict = {5416: 250, 2708: 500, 1354: 1000, 270: 5000, 135: 10000}

# get variables from file dynamically and only add those which are not already configured in yaml
for var_name, val in self.sd.datasets().items():
if var_name in handled:
continue
if len(val[0]) != 2:
continue
resolution = res_dict.get(val[1][-1])
if resolution is not None:
ds_info = {
"file_type": self.filetype_info["file_type"],
"resolution": resolution,
"name": var_name,
"file_key": var_name,
"coordinates": ["longitude", "latitude"]
}
yield True, ds_info

def _extract_and_mask_category_dataset(self, dataset_id, dataset_info, var_name):
# what dimension is per-byte
byte_dimension = None if self.is_imapp_mask_byte1 else dataset_info["byte_dimension"]
Expand Down
5 changes: 3 additions & 2 deletions satpy/readers/yaml_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,8 +636,9 @@ def create_filehandlers(self, filenames, fh_kwargs=None):
self.file_handlers.get(filetype, []) + filehandlers,
key=lambda fhd: (fhd.start_time, fhd.filename))

# load any additional dataset IDs determined dynamically from the file
# and update any missing metadata that only the file knows
# Update dataset IDs with IDs determined dynamically from the file
# and/or update any missing metadata that only the file knows.
# Check if the dataset ID is loadable from that file.
self.update_ds_ids_from_file_handlers()
return created_fhs

Expand Down
37 changes: 32 additions & 5 deletions satpy/tests/reader_tests/modis_tests/_modis_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from __future__ import annotations

import datetime as dt
from typing import Optional
from typing import Literal, Optional

import numpy as np
import pytest
Expand Down Expand Up @@ -496,20 +496,28 @@ def modis_l1b_nasa_1km_mod03_files(modis_l1b_nasa_mod021km_file, modis_l1b_nasa_
# Level 2 Fixtures


def _get_basic_variable_info(var_name: str, resolution: int) -> dict:
def _get_basic_variable_info(var_name: str, resolution: int, dim_size: Literal[2, 3]=2) -> dict:
shape = _shape_for_resolution(resolution)
data = np.ones((shape[0], shape[1]), dtype=np.uint16)

row_dim_name = f"Cell_Along_Swath_{resolution}m:modl2"
col_dim_name = f"Cell_Across_Swath_{resolution}m:modl2"

if dim_size == 3:
data = np.ones((1, shape[0], shape[1]), dtype=np.uint16)
dim_labels = ["channel", row_dim_name, col_dim_name]
elif dim_size == 2:
data = np.ones((shape[0], shape[1]), dtype=np.uint16)
dim_labels = [row_dim_name, col_dim_name]


return {
var_name: {
"data": data,
"type": SDC.UINT16,
"fill_value": 0,
"attrs": {
# dim_labels are just unique dimension names, may not match exactly with real world files
"dim_labels": [row_dim_name,
col_dim_name],
"dim_labels": dim_labels,
"valid_range": (0, 32767),
"scale_factor": 2.0,
"add_offset": -1.0,
Expand Down Expand Up @@ -728,13 +736,32 @@ def modis_l2_nasa_mod06_file(tmpdir_factory) -> list[str]:
full_path = str(tmpdir_factory.mktemp("modis_l2").join(filename))
variable_infos = _get_l1b_geo_variable_info(filename, 5000, include_angles=True)
variable_infos.update(_get_basic_variable_info("Surface_Pressure", 5000))
variable_infos.update(_get_basic_variable_info("non_yaml_configured_2D_var", 5000))
variable_infos.update(_get_basic_variable_info("non_yaml_configured_3D_var", 5000, dim_size=3))
create_hdfeos_test_file(full_path,
variable_infos,
_create_struct_metadata(5000),
_create_core_metadata("MOD06"),
_create_header_metadata())
return [full_path]

@pytest.fixture(scope="session")
def modis_l2_nasa_mod99_file(tmpdir_factory) -> list[str]:
"""Create an "artificial" MOD99 L2 HDF4 file with headers.
There exists no MOD99 Level 2 product. This is just for testing available datasets
in arbitrary level 2 file.
"""
filename = generate_nasa_l2_filename("MOD99")
full_path = str(tmpdir_factory.mktemp("modis_l2").join(filename))
variable_infos = _get_l1b_geo_variable_info(filename, 5000, include_angles=True)
variable_infos.update(_get_basic_variable_info("non_yaml_configured_2D_var", 1000))
create_hdfeos_test_file(full_path,
variable_infos,
_create_struct_metadata(5000),
_create_core_metadata("MOD99"),
_create_header_metadata())
return [full_path]

@pytest.fixture(scope="session")
def modis_l2_imapp_snowmask_file(tmpdir_factory) -> list[str]:
Expand Down
1 change: 1 addition & 0 deletions satpy/tests/reader_tests/modis_tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
modis_l2_nasa_mod06_file,
modis_l2_nasa_mod35_file,
modis_l2_nasa_mod35_mod03_files,
modis_l2_nasa_mod99_file,
modis_l3_nasa_mcd12q1_file,
modis_l3_nasa_mod09_file,
modis_l3_nasa_mod43_file,
Expand Down
2 changes: 1 addition & 1 deletion satpy/tests/reader_tests/modis_tests/test_modis_l1b.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@

def _check_shared_metadata(data_arr):
assert data_arr.attrs["sensor"] == "modis"
assert data_arr.attrs["platform_name"] == "EOS-Terra"
assert data_arr.attrs["platform_name"] == "Terra"
assert "rows_per_scan" in data_arr.attrs
assert isinstance(data_arr.attrs["rows_per_scan"], int)
assert data_arr.attrs["reader"] == "modis_l1b"
Expand Down
21 changes: 19 additions & 2 deletions satpy/tests/reader_tests/modis_tests/test_modis_l2.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,13 @@
# - modis_l2_imapp_snowmask_geo_files
# - modis_l2_nasa_mod06_file
# - modis_l2_nasa_mod35_file
# - modis_l2_nasa_mod99_file
# - modis_l2_nasa_mod35_mod03_files


def _check_shared_metadata(data_arr, expect_area=False):
assert data_arr.attrs["sensor"] == "modis"
assert data_arr.attrs["platform_name"] == "EOS-Terra"
assert data_arr.attrs["platform_name"] == "Terra"
assert "rows_per_scan" in data_arr.attrs
assert isinstance(data_arr.attrs["rows_per_scan"], int)
assert data_arr.attrs["reader"] == "modis_l2"
Expand Down Expand Up @@ -162,7 +163,9 @@ def test_load_250m_cloud_mask_dataset(self, input_files, exp_area):
@pytest.mark.parametrize(
("input_files", "loadables", "exp_resolution", "exp_area", "exp_value"),
[
(lazy_fixture("modis_l2_nasa_mod06_file"), ["surface_pressure"], 5000, True, 4.0),
(lazy_fixture("modis_l2_nasa_mod06_file"), ["surface_pressure", "non_yaml_configured_2D_var"],
5000, True, 4.0),
(lazy_fixture("modis_l2_nasa_mod99_file"), ["non_yaml_configured_2D_var"], 1000, True, 4.0),
# snow mask is considered a category product, factor/offset ignored
(lazy_fixture("modis_l2_imapp_snowmask_file"), ["snow_mask"], 1000, False, 1.0),
(lazy_fixture("modis_l2_imapp_snowmask_geo_files"), ["snow_mask"], 1000, True, 1.0),
Expand All @@ -181,3 +184,17 @@ def test_load_l2_dataset(self, input_files, loadables, exp_resolution, exp_area,
assert data_arr.shape == _shape_for_resolution(exp_resolution)
assert data_arr.attrs.get("resolution") == exp_resolution
_check_shared_metadata(data_arr, expect_area=exp_area)

def test_scene_dynamic_available_datasets(self, modis_l2_nasa_mod06_file):
"""Test available datasets method to dynmically add non configured datasets."""
import xarray as xr
scene = Scene(reader="modis_l2", filenames=modis_l2_nasa_mod06_file)
available_datasets = scene.all_dataset_names()
assert len(available_datasets) > 0
assert "surface_pressure" in available_datasets
# make sure configured datasets are added again
assert available_datasets.count("surface_pressure") == 1
assert "non_yaml_configured_2D_var" in available_datasets
file_ds = xr.open_dataset(modis_l2_nasa_mod06_file[0], engine="netcdf4")
assert "non_yaml_configured_3D_var" not in available_datasets and "non_yaml_configured_3D_var" in file_ds # noqa PT018
assert "non_yaml_configured_3D_var" in file_ds

0 comments on commit d96b46f

Please sign in to comment.