From b08cf030b88191207ecf8ff0007e9369f956dc6a Mon Sep 17 00:00:00 2001 From: Daniel da Silva Date: Mon, 25 Nov 2024 07:32:18 -0500 Subject: [PATCH 1/4] Write out netCDF files with attributes complete --- suncet_processing_pipeline/make_level3.py | 94 ++++++++++++++++------ suncet_processing_pipeline/metadata_mgr.py | 30 ++++++- 2 files changed, 96 insertions(+), 28 deletions(-) diff --git a/suncet_processing_pipeline/make_level3.py b/suncet_processing_pipeline/make_level3.py index 9d6a601..4ef6c74 100644 --- a/suncet_processing_pipeline/make_level3.py +++ b/suncet_processing_pipeline/make_level3.py @@ -5,12 +5,12 @@ from pathlib import Path from pprint import pprint -import h5netcdf +import netCDF4 import numpy as np from termcolor import cprint -from . import config_parser -from . import metadata_mgr +import config_parser +import metadata_mgr class Level3: @@ -38,9 +38,16 @@ def run(self): nc_output_path = self.run_dir / 'level3' / 'suncet_level3.nc' nc = Level3NetCDFWriter(nc_output_path, metadata) + # placeholder, TODO check real image size + image_shape = (16, 16) + image = np.random.rand(*image_shape) + image_height = np.arange(image_shape[0]) + image_width = np.arange(image_shape[1]) + # Write some blank values - nc.write_variable('carring_lat', np.zeros(100)) - nc.write_variable('carring_long', np.ones(100)) + nc.write_dimension('image_height', image_height) + nc.write_dimension('image_width', image_width) + nc.write_variable('image', image) nc.close() @@ -49,9 +56,43 @@ class Level3NetCDFWriter: def __init__(self, output_path, metadata): self._output_path = output_path self._metadata = metadata - self._nc_file = h5netcdf.File(self._output_path, 'w') - - def write_variable(self, internal_name, variable_value): + self._nc_file = netCDF4.Dataset( + self._output_path, 'w', format="NETCDF4" + ) + + def write_dimension(self, internal_name, dim_value): + """Write a dimension and its associated metadata to the file + + + This function is passed the internal name of the dimension, and uses + the metadata manager to look up the NetCDF4 name and associated + attrbutes. + + Args + internal_name: Internal name of dimension (within code) + var_value: Value for the dimension in the file + """ + # Create dimension in file + dim_name = self._metadata.get_netcdf4_variable_name(internal_name) + + self._nc_file.createDimension(dim_name, dim_value.size) + + # Write variable for dimension data (will be created automatically + # if we don't) + nc_dim_data = self._nc_file.createVariable( + dim_name, + dim_value.dtype, + (dim_name,) + ) + + nc_dim_data[:] = dim_value + + # Write attributes + attrs = self._metadata.get_netcdf4_attrs(internal_name) + for key, value in attrs.items(): + setattr(nc_dim_data, key, value) + + def write_variable(self, internal_name, var_value): """Write a variable and its associated metadata to the file. This function is passed the internal name of the variable, and uses @@ -60,38 +101,40 @@ def write_variable(self, internal_name, variable_value): Args internal_name: Internal name of variable (within code) - variable_value: Value for the variable in the file + var_value: Value for the variable in the file """ - variable_name = self._metadata.get_netcdf4_variable_name(internal_name) + var_name = self._metadata.get_netcdf4_variable_name(internal_name) + dim_names = self._metadata.get_netcdf4_dimension_names(internal_name) # Wrote variable data print(f'Writing internal variable ', end='') cprint(internal_name, 'yellow', end='') print(f' NetCDF variable ', end='') - cprint(variable_name, 'yellow') - - # TODO: this is broken - self._nc_file.dimensions[variable_name + '_dim'] = variable_value.shape + cprint(var_name, 'yellow') - nc_variable = self._nc_file.create_variable( - name=variable_name, - dimensions=(variable_name + '_dim',), - dtype=variable_value.dtype + # Add dimensions for this vairable + print('Dimensions ', end='') + cprint(dim_names, 'yellow') + + # Write variable to file + nc_variable = self._nc_file.createVariable( + var_name, + var_value.dtype, + dim_names, ) - nc_variable[:] = variable_value + nc_variable[:] = var_value # Write variable attributes attrs = self._metadata.get_netcdf4_attrs(internal_name) print('attributes:') - pprint(attrs) - + cprint(attrs, 'yellow') for key, value in attrs.items(): - nc_variable.attrs[key] = value - + setattr(nc_variable, key, value) + print() - + def close(self): """Close the NetCDF file, commiting all changes.""" self._nc_file.close() @@ -134,8 +177,7 @@ def main(): # Load config config_filename = Path('processing_runs') / args.run_name / 'config.ini' - config = config_parser.ConfigParser() - config.read(config_filename) + config = config_parser.Config(config_filename) # Call run() method on Level3 class level3 = Level3(args.run_name, config) diff --git a/suncet_processing_pipeline/metadata_mgr.py b/suncet_processing_pipeline/metadata_mgr.py index ea9db87..44deed2 100644 --- a/suncet_processing_pipeline/metadata_mgr.py +++ b/suncet_processing_pipeline/metadata_mgr.py @@ -55,7 +55,8 @@ def get_netcdf4_variable_name(self, internal_name): # Ensure variable is in the metadata dictionary if internal_name not in self._metadata_dict: raise RuntimeError( - f"Could not find metadata for variable with internal name '{internal_name}'" + f"Could not find metadata for variable with " + f"internal name '{internal_name}'" ) # Get the variable name, raising Exception if its not filled out in the @@ -64,12 +65,37 @@ def get_netcdf4_variable_name(self, internal_name): if not var_name: raise RuntimeError( - 'Needed NetCDF variable name for internal name "{internal_name}", but missing' + f"Needed NetCDF variable name for internal name " + f"{internal_name}\", but missing" ) # Return good result return var_name + + def get_netcdf4_dimension_names(self, internal_name): + """Get tuple of dimension names for the given variable. + + Args + internal_name: Internal name of variable (within code) + Returns + tuple of dimension names + """ + # Ensure variable is in the metadata dictionary + if internal_name not in self._metadata_dict: + raise RuntimeError( + f"Could not find metadata for variable with internal name " + f"'{internal_name}'." + ) + # Load variable dict and return subset of keys that are relevant + var_dict = self._metadata_dict[internal_name] + dim_csv = var_dict['netCDF dimensions'] + if dim_csv: + return tuple(dim_csv.split(',')) + else: + return tuple() # empty tuple + + def get_netcdf4_attrs(self, internal_name): """Get dictionary of static NetCDF4 attributes for a given variable. From 36d4898564acbfdf475850bd85ac2a7c5779f877 Mon Sep 17 00:00:00 2001 From: Daniel da Silva Date: Thu, 5 Dec 2024 13:28:06 -0500 Subject: [PATCH 2/4] Remove dependency on h5netcdf --- environment.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/environment.yml b/environment.yml index 2716fdf..5c1529f 100644 --- a/environment.yml +++ b/environment.yml @@ -14,7 +14,6 @@ dependencies: - sunpy=4.0 - xarray=2022.6.0 - netCDF4=1.6.0 - - h5netcdf=1.1.0 - bottleneck=1.3.7 - gnuradio=3.10.5.1 - gnuradio-satellites=5.2.0 From b3bed65c350b4e2c2f13ee075482c01a38469252 Mon Sep 17 00:00:00 2001 From: Daniel da Silva Date: Thu, 5 Dec 2024 13:47:15 -0500 Subject: [PATCH 3/4] Fix imports for tests --- suncet_processing_pipeline/make_level3.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/suncet_processing_pipeline/make_level3.py b/suncet_processing_pipeline/make_level3.py index 4ef6c74..041f38c 100644 --- a/suncet_processing_pipeline/make_level3.py +++ b/suncet_processing_pipeline/make_level3.py @@ -9,8 +9,13 @@ import numpy as np from termcolor import cprint -import config_parser -import metadata_mgr +import sys +from pathlib import Path +sys.path.append(str(Path(__file__).parent.parent)) + +from suncet_processing_pipeline import ( + config_parser, metadata_mgr +) class Level3: From a28f390c36f075ad8d68e2ef9ce12e286d1359c7 Mon Sep 17 00:00:00 2001 From: Daniel da Silva Date: Thu, 5 Dec 2024 13:48:27 -0500 Subject: [PATCH 4/4] Remove h5netcdf from requirements.txt --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 6ef0481..7984a6b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,7 +8,6 @@ sunpy==4.0 seaborn==0.11 xarray==2022.6.0 netCDF4==1.6.0 -h5netcdf==1.1.0 bottleneck==1.3.7 termcolor==2.4.0 pytest==7.1.3