diff --git a/.ci/scripts/disp_s1/disp_s1_compare.py b/.ci/scripts/disp_s1/disp_s1_compare.py index 97f55bda..8c194392 100755 --- a/.ci/scripts/disp_s1/disp_s1_compare.py +++ b/.ci/scripts/disp_s1/disp_s1_compare.py @@ -1,13 +1,16 @@ #!/usr/bin/env python +"""Compare DISP-S1 products""" import argparse import logging import sys from pathlib import Path -import h5py -import numpy as np from dolphin import io from dolphin._types import Filename + +import h5py + +import numpy as np from numpy.typing import ArrayLike logging.basicConfig(level=logging.INFO) @@ -29,21 +32,25 @@ class ComparisonError(ValidationError): def validation_failed(): + """Set flag to indicate validation failure""" global validation_match validation_match = False def ValidationError(msg): + """Handler function for validation failure""" logger.error(msg) validation_failed() def ComparisonError(msg): + """Handler function for comparison failure""" logger.error(msg) validation_failed() def ValueError(msg): + """Handler function for value error""" logger.error(msg) validation_failed() diff --git a/.ci/scripts/dswx_hls/dswx_hls_compare.py b/.ci/scripts/dswx_hls/dswx_hls_compare.py index a6893188..7618b4af 100755 --- a/.ci/scripts/dswx_hls/dswx_hls_compare.py +++ b/.ci/scripts/dswx_hls/dswx_hls_compare.py @@ -1,8 +1,10 @@ #!/usr/bin/env python3 - +"""Compare DSWX-HLS products""" import argparse -import numpy as np import os + +import numpy as np + from osgeo import gdal COMPARE_DSWX_HLS_PRODUCTS_ERROR_TOLERANCE_ATOL = 1e-6 @@ -16,7 +18,7 @@ def _get_prefix_str(current_flag, flag_all_ok): - """ Return an updated cumulative flag status and an OK/FAIL string for the current flag + """Return an updated cumulative flag status and an OK/FAIL string for the current flag Parameters ---------- @@ -56,7 +58,6 @@ def compare_dswx_hls_products(file_1, file_2, metadata_exclude_list): flag_all_ok: bool Overall comparison status """ - if not os.path.isfile(file_1): print(f'ERROR file not found: {file_1}') return False diff --git a/.ci/scripts/dswx_s1/diff_dswx_files.py b/.ci/scripts/dswx_s1/diff_dswx_files.py index ebc52ccf..039bb8ac 100644 --- a/.ci/scripts/dswx_s1/diff_dswx_files.py +++ b/.ci/scripts/dswx_s1/diff_dswx_files.py @@ -25,7 +25,7 @@ def _parse_args(): if only one argument is given, it gives a warning message and aborts. Returns - -------- + ------- result : <-1 if FAIL> <0 if HELP> @@ -61,16 +61,16 @@ def get_files(options): compares them file by file. Notes - ------ + ----- Calls external python script, dswx_comparison.py, to perform the file comparison. Parameters - ------------ + ---------- options : Directory names of expected_dir and output Returns - -------- + ------- result : <-1 if FAIL> FAILS if number of files in 2 directories are 0, or unequal. @@ -126,6 +126,7 @@ def get_files(options): def main(): + """Get options and start""" options = _parse_args() get_files(options) diff --git a/.ci/scripts/dswx_s1/dswx_comparison.py b/.ci/scripts/dswx_s1/dswx_comparison.py index 8e03b99e..64ae6dd1 100644 --- a/.ci/scripts/dswx_s1/dswx_comparison.py +++ b/.ci/scripts/dswx_s1/dswx_comparison.py @@ -1,10 +1,10 @@ #!/usr/bin/env python3 -# - +"""Compare DSWX products""" import argparse import os -import sys + import numpy as np + from osgeo import gdal COMPARE_DSWX_SAR_PRODUCTS_ERROR_TOLERANCE = 1e-6 @@ -44,14 +44,14 @@ def _print_first_value_diff(image_1, image_2, prefix): """ Print first value difference between two images. - Parameters - ---------- + Parameters + ---------- image_1 : numpy.ndarray - First input image + First input image image_2: numpy.ndarray - Second input image + Second input image prefix: str - Prefix to the message printed to the user + Prefix to the message printed to the user """ flag_error_found = False for i in range(image_1.shape[0]): @@ -74,8 +74,8 @@ def _compare_dswx_sar_metadata(metadata_1, metadata_2): """ Compare DSWx-SAR products' metadata - Parameters - ---------- + Parameters + ---------- metadata_1 : dict Metadata of the first DSWx-SAR product metadata_2: dict @@ -122,7 +122,16 @@ def _compare_dswx_sar_metadata(metadata_1, metadata_2): def compare_dswx_sar_products(file_1, file_2): - + """ + Compare DSWx-SAR products + + Parameters + ---------- + file_1 : dict + First DSWx-SAR product + file_2: dict + Second DSWx-SAR product + """ if not os.path.isfile(file_1): print(f'ERROR file not found: {file_1}') return False @@ -150,7 +159,7 @@ def compare_dswx_sar_products(file_1, file_2): nbands_2 = layer_gdal_dataset_2.RasterCount # compare number of bands - flag_same_nbands = nbands_1 == nbands_2 + flag_same_nbands = nbands_1 == nbands_2 flag_same_nbands_str = _get_prefix_str(flag_same_nbands, flag_all_ok) prefix = ' ' * 7 print(f'{flag_same_nbands_str}Comparing number of bands') @@ -167,7 +176,7 @@ def compare_dswx_sar_products(file_1, file_2): image_1 = gdal_band_1.ReadAsArray() image_2 = gdal_band_2.ReadAsArray() flag_bands_are_equal = np.allclose( - image_1, image_2, atol = COMPARE_DSWX_SAR_PRODUCTS_ERROR_TOLERANCE, + image_1, image_2, atol=COMPARE_DSWX_SAR_PRODUCTS_ERROR_TOLERANCE, equal_nan=True) flag_bands_are_equal_str = _get_prefix_str(flag_bands_are_equal, flag_all_ok) @@ -211,6 +220,7 @@ def compare_dswx_sar_products(file_1, file_2): def main(): + """Parse arguments and compare 2 files""" parser = _get_parser() args = parser.parse_args() diff --git a/.ci/scripts/metrics/plot_metric_data.py b/.ci/scripts/metrics/plot_metric_data.py index e92fa519..4923cb2f 100644 --- a/.ci/scripts/metrics/plot_metric_data.py +++ b/.ci/scripts/metrics/plot_metric_data.py @@ -1,10 +1,13 @@ #!/usr/bin/env python3 +"""Plat functions for metrics generation""" +# import datetime +import os +import sys -import datetime import matplotlib.pyplot as plt -import os + import pandas -import sys + def generate_plots_from_csv_file(metrics_csv_file, metrics_plot_file): """Generate plots of the metrics collected in the .csv file @@ -16,14 +19,13 @@ def generate_plots_from_csv_file(metrics_csv_file, metrics_plot_file): metrics_plot_file: str Path to output plot file. Matplotlib will use the extension to determine saved format. """ - columns = "SECONDS,Name,PIDs,CPU,Memory,MemoryP,NetSend,NetRecv,DiskRead,DiskWrite,Disk,Swap,Threads" - convert = {'SECONDS':int(),'Name':str(),'PIDs':int(),'CPU':float(),'Memory':float(), - 'MemoryP':float(),'NetSend':float(),'NetRecv':float(),'DiskRead':float(), - 'DiskWrite':float(),'Disk':int(),'Swap':int(),'Threads':int()} + convert = {'SECONDS': int(), 'Name': str(), 'PIDs': int(), 'CPU': float(), 'Memory': float(), + 'MemoryP': float(), 'NetSend': float(), 'NetRecv': float(), 'DiskRead': float(), + 'DiskWrite': float(), 'Disk': int(), 'Swap': int(), 'Threads': int()} # read in the new data and make lists out of the columns for analysis - colnames = columns.split(',') + colnames: list[str] = columns.split(',') data = pandas.read_csv(metrics_csv_file, header=1, names=colnames, converters=convert) @@ -46,95 +48,95 @@ def generate_plots_from_csv_file(metrics_csv_file, metrics_plot_file): max_pids = max(pids) max_cpu = max(cpu) max_mem = max(mem) - max_mem_p = max(mem_p) - max_net_s = max(net_s) - max_net_r = max(net_r) - max_disk_r = max(disk_r) - max_disk_w = max(disk_w) + # max_mem_p = max(mem_p) + # max_net_s = max(net_s) + # max_net_r = max(net_r) + # max_disk_r = max(disk_r) + # max_disk_w = max(disk_w) max_disk = round(max(disk), 2) min_disk = min(disk) max_swap = max(swap) max_threads = max(threads) - duration_s = secs[-1] - duration_hms = str(datetime.timedelta(seconds=duration_s)) + # duration_s = secs[-1] + # duration_hms = str(datetime.timedelta(seconds=duration_s)) disk_used = round(max_disk - min_disk, 2) # create list of plots to create pl = [ { - 'y' : pids, - 'title' : 'Container Process IDs (max {})'.format(max_pids), - 'xlabel' : 'Seconds', - 'ylabel' : '# Processes' + 'y': pids, + 'title': 'Container Process IDs (max {})'.format(max_pids), + 'xlabel': 'Seconds', + 'ylabel': '# Processes' }, { - 'y' : threads, - 'title' : 'Host System Threads (max {})'.format(max_threads), - 'xlabel' : 'Seconds', - 'ylabel' : '# Threads' + 'y': threads, + 'title': 'Host System Threads (max {})'.format(max_threads), + 'xlabel': 'Seconds', + 'ylabel': '# Threads' }, { - 'y' : cpu, - 'title' : 'Container CPU % (max {})'.format(max_cpu), - 'xlabel' : 'Seconds', - 'ylabel' : 'CPU % Usage' + 'y': cpu, + 'title': 'Container CPU % (max {})'.format(max_cpu), + 'xlabel': 'Seconds', + 'ylabel': 'CPU % Usage' }, { - 'y' : mem, - 'title' : 'Container Memory (max {:.2f} GB)'.format(max_mem), - 'xlabel' : 'Seconds', - 'ylabel' : 'Memory GB' + 'y': mem, + 'title': 'Container Memory (max {:.2f} GB)'.format(max_mem), + 'xlabel': 'Seconds', + 'ylabel': 'Memory GB' }, { - 'y' : mem_p, - 'title' : 'Container Memory %', - 'xlabel' : 'Seconds', - 'ylabel' : 'Memory %' + 'y': mem_p, + 'title': 'Container Memory %', + 'xlabel': 'Seconds', + 'ylabel': 'Memory %' }, { - 'y' : swap, - 'title' : 'Host System Swap Used (max {} GB)'.format(max_swap), - 'xlabel' : 'Seconds', - 'ylabel' : 'Swap Used GB' + 'y': swap, + 'title': 'Host System Swap Used (max {} GB)'.format(max_swap), + 'xlabel': 'Seconds', + 'ylabel': 'Swap Used GB' }, { - 'y' : disk, - 'title' : 'Host System Disk, max {} GB (Container start/end delta {} GB)'.format(max_disk, disk_used), - 'xlabel' : 'Seconds', - 'ylabel' : 'Disk GB' + 'y': disk, + 'title': 'Host System Disk, max {} GB (Container start/end delta {} GB)'.format(max_disk, disk_used), + 'xlabel': 'Seconds', + 'ylabel': 'Disk GB' }, { - 'y' : disk_r, - 'title' : 'Container Disk Read', - 'xlabel' : 'Seconds', - 'ylabel' : 'Disk Read GB' + 'y': disk_r, + 'title': 'Container Disk Read', + 'xlabel': 'Seconds', + 'ylabel': 'Disk Read GB' }, { - 'y' : disk_w, - 'title' : 'Container Disk Write', - 'xlabel' : 'Seconds', - 'ylabel' : 'Disk Write GB' + 'y': disk_w, + 'title': 'Container Disk Write', + 'xlabel': 'Seconds', + 'ylabel': 'Disk Write GB' }, { - 'y' : net_r, - 'title' : 'Container Net Recv', - 'xlabel' : 'Seconds', - 'ylabel' : 'Net Recv GB' + 'y': net_r, + 'title': 'Container Net Recv', + 'xlabel': 'Seconds', + 'ylabel': 'Net Recv GB' }, { - 'y' : net_s, - 'title' : 'Container Net Send', - 'xlabel' : 'Seconds', - 'ylabel' : 'Net Send GB' + 'y': net_s, + 'title': 'Container Net Send', + 'xlabel': 'Seconds', + 'ylabel': 'Net Send GB' } ] # create figure with plots of data plot_width = 12 plot_height = 5 - fig, axs = plt.subplots(len(pl), figsize=(plot_width, plot_height*(len(pl)))) + fig, axs = plt.subplots(len(pl), figsize=(plot_width, plot_height * (len(pl)))) fig.suptitle(os.path.basename(metrics_csv_file)) x = secs @@ -142,8 +144,8 @@ def generate_plots_from_csv_file(metrics_csv_file, metrics_plot_file): y = pl[i]['y'] axs[i].set_title(pl[i]['title']) axs[i].grid(True) - axs[i].plot(x,y,'.-') - axs[i].set(xlabel=pl[i]['xlabel'],ylabel=pl[i]['ylabel']) + axs[i].plot(x, y, '.-') + axs[i].set(xlabel=pl[i]['xlabel'], ylabel=pl[i]['ylabel']) plt.tight_layout(rect=[0, 0.03, 1, 0.95]) plt.savefig(metrics_plot_file) diff --git a/.ci/scripts/metrics/process_metric_data.py b/.ci/scripts/metrics/process_metric_data.py index 4d10f72f..408bfe3f 100644 --- a/.ci/scripts/metrics/process_metric_data.py +++ b/.ci/scripts/metrics/process_metric_data.py @@ -173,15 +173,16 @@ def make_lists(csv_file): def main(): """Main program in process_metric_data.py""" - container_info = sys.argv[1] - container_name = sys.argv[2] + # container_info = sys.argv[1] + # container_name = sys.argv[2] stats_file = sys.argv[3] output_file = sys.argv[4] temp_stats = "temp_opera_docker_stats.csv" # Remove lines that may have been recorded before Docker stated. - stats_columns = "SECONDS,{{.Name}},CPU,{{.CPUPerc}},MEM,{{.MemUsage}},MEM_PERC,{{.MemPerc}},NET,{{.NetIO}},BLOCK,{{.BlockIO}},PIDS,{{.PIDs}},disk_used,swap_used,total_threads" + stats_columns = "SECONDS,{{.Name}},CPU,{{.CPUPerc}},MEM,{{.MemUsage}},MEM_PERC,{{.MemPerc}},NET,{{.NetIO}},BLOCK," \ + "{{.BlockIO}},PIDS,{{.PIDs}},disk_used,swap_used,total_threads " expected_column_count = len(stats_columns.split(',')) remove_unwanted_lines(stats_file, temp_stats, expected_column_count) @@ -190,7 +191,8 @@ def main(): if stats_list: # Write out the docker stats file - output_columns = "Seconds, Name, PIDs, CPU, Memory, MemoryP, NetSend, NetRecv, DiskRead, DiskWrite, Disk, Swap, Threads, LastLogLine" + output_columns = "Seconds, Name, PIDs, CPU, Memory, MemoryP, NetSend, NetRecv, DiskRead, DiskWrite, Disk, " \ + "Swap, Threads, LastLogLine " with open(output_file, 'w') as out_file: out_file.write(f"{output_columns}\n") for stats_row in stats_list: @@ -202,5 +204,6 @@ def main(): # Remove temporary files os.remove(temp_stats) + if __name__ == "__main__": main() diff --git a/.ci/scripts/rtc_s1/rtc_s1_compare.py b/.ci/scripts/rtc_s1/rtc_s1_compare.py index 9b484e80..b5502e37 100755 --- a/.ci/scripts/rtc_s1/rtc_s1_compare.py +++ b/.ci/scripts/rtc_s1/rtc_s1_compare.py @@ -1,12 +1,15 @@ #!/usr/bin/env python - -import os -from osgeo import gdal +"""Compare RTC-S1 products""" import argparse +import glob import itertools +import os + import h5py + import numpy as np -import glob + +from osgeo import gdal PASSED_STR = '[PASS] ' FAILED_STR = '[FAIL]' @@ -65,24 +68,25 @@ def _get_parser(): def _unpack_array(val_in, hdf5_obj_in): - ''' + """ Unpack the array of array into ordinary numpy array. Convert an HDF5 object reference into the path it is pointing to. For internal use in this script. - Parameter: - ----------- + Parameter + --------- val_in: np.ndarray numpy array to unpack hdf5_obj_in: Source HDF5 object of `val_in` - Return: + Return + ------ val_out: np.ndarray unpacked array - ''' + """ list_val_in = list(itertools.chain.from_iterable(val_in)) list_val_out = [None] * len(list_val_in) @@ -97,17 +101,16 @@ def _unpack_array(val_in, hdf5_obj_in): def print_data_difference(val_1, val_2, indent=4): - ''' + """ Print out the difference of the data whose dimension is >= 1 Parameters - ----------- + ---------- val_1, val_2: np.array Data that has difference to each other indent: int Number of spaces for indentation - ''' - + """ str_indent = ' ' * indent + '-' # printout the difference @@ -147,13 +150,13 @@ def print_data_difference(val_1, val_2, indent=4): num_pixel_nan_discrepancy = mask_nan_discrepancy.sum() index_pixel_nan_discrepancy = np.where(mask_nan_discrepancy) print(f'{str_indent} Found {num_pixel_nan_discrepancy} ' - 'NaN inconsistencies between input arrays. ' - 'First index of the discrepancy: ' - f'[{index_pixel_nan_discrepancy[0][0]}]') + 'NaN inconsistencies between input arrays. ' + 'First index of the discrepancy: ' + f'[{index_pixel_nan_discrepancy[0][0]}]') print(f'{str_indent} val_1[{index_pixel_nan_discrepancy[0][0]}] = ' - f'{val_1[index_pixel_nan_discrepancy[0][0]]}') + f'{val_1[index_pixel_nan_discrepancy[0][0]]}') print(f'{str_indent} val_2[{index_pixel_nan_discrepancy[0][0]}] = ' - f'{val_2[index_pixel_nan_discrepancy[0][0]]}') + f'{val_2[index_pixel_nan_discrepancy[0][0]]}') # Operations to print out further info regarding the discrapancy num_nan_both = np.logical_and(mask_nan_val_1, mask_nan_val_2).sum() @@ -167,11 +170,10 @@ def print_data_difference(val_1, val_2, indent=4): def get_list_dataset_attrs_keys(hdf_obj_1: h5py.Group, - key_in: str='/', - list_dataset_so_far: list=None, - list_attrs_so_far: list=None): - - ''' + key_in: str = '/', + list_dataset_so_far: list = None, + list_attrs_so_far: list = None): + r""" Recursively traverse the datasets and attributes within the input HDF5 group. Returns the list of keys for datasets and attributes. @@ -190,16 +192,15 @@ def get_list_dataset_attrs_keys(hdf_obj_1: h5py.Group, list_attrs_so_far: list list of the attribute path/keys that have found so far - Return: - ------- + Return + ------ list_dataset_so_far : list List of datasets keys found for given HDF5 group list_attrs_so_far : list List of attributes found for given HDF5 group. Each attribute is identified by its path and key (attribute name). - ''' - + """ # default values for the lists if list_dataset_so_far is None: list_dataset_so_far = [] @@ -227,14 +228,14 @@ def get_list_dataset_attrs_keys(hdf_obj_1: h5py.Group, def compare_hdf5_elements(hdf5_obj_1, hdf5_obj_2, str_key, is_attr=False, id_key=None, total_key=None, print_passed_element=True, - list_exclude: list=None): - ''' + list_exclude: list = None): + r""" Compare the dataset or attribute defined by `str_key` NOTE: For attributes, the path and the key are separated by newline character ('\n') Parameters - ----------- + ---------- hdf5_obj_1: h5py.Group The 1st HDF5 object to compare hdf5_obj_2: h5py.Group @@ -245,6 +246,8 @@ def compare_hdf5_elements(hdf5_obj_1, hdf5_obj_2, str_key, is_attr=False, Designate if `str_key` is for dataset or attribute id_key: int index of the key in the list. Optional for printout purpose. + total_key: int + The total number of unique dataset keys that are common between the 2 files. id_key: int total number of the list. Optional for printout purpose. print_passed_element: bool, default = True @@ -252,13 +255,10 @@ def compare_hdf5_elements(hdf5_obj_1, hdf5_obj_2, str_key, is_attr=False, list_exclude: list(str) Absolute paths of the elements to be excluded from the comparison - - Return: - ------- - _: True when the dataset / attribute are equivalent; False otherwise - ''' - - + Return + ------ + True when the dataset / attribute are equivalent; False otherwise + """ if id_key is None or total_key is None: str_order = '' else: @@ -338,7 +338,7 @@ def compare_hdf5_elements(hdf5_obj_1, hdf5_obj_2, str_key, is_attr=False, print(f'{PASSED_STR} ', str_message_data_location) else: print(f'{FAILED_STR} ', str_message_data_location) - print( ' - numerical scalar. Failed to pass the test. ' + print(' - numerical scalar. Failed to pass the test. ' f'Relative tolerance = {RTC_S1_PRODUCTS_ERROR_REL_TOLERANCE}, ' f'Absolute tolerance = {RTC_S1_PRODUCTS_ERROR_ABS_TOLERANCE}') print(f' - 1st value: {val_1}') @@ -352,7 +352,7 @@ def compare_hdf5_elements(hdf5_obj_1, hdf5_obj_2, str_key, is_attr=False, print(f'{PASSED_STR} ', str_message_data_location) else: print(f'{FAILED_STR} ', str_message_data_location) - print( ' - non-numerical scalar. Failed to pass the test.') + print(' - non-numerical scalar. Failed to pass the test.') print(f' - 1st value: {val_1}') print(f' - 2nd value: {val_2}\n') return return_val @@ -390,7 +390,6 @@ def compare_hdf5_elements(hdf5_obj_1, hdf5_obj_2, str_key, is_attr=False, print_data_difference(val_1, val_2) return return_val - if len(shape_val_1) >= 2: return_val = np.allclose(val_1, val_2, @@ -415,26 +414,25 @@ def compare_hdf5_elements(hdf5_obj_1, hdf5_obj_2, str_key, is_attr=False, def compare_rtc_hdf5_files(file_1: str, file_2: str, - list_elements_to_exclude: list=None): - ''' + list_elements_to_exclude: list = None): + """ Compare the two RTC products (in HDF5) if they are equivalent within acceptable difference Parameters - ----------- + ---------- file_1, file_2: str Path to the RTC products (in HDF5) list_elements_to_exclude: list(str) Absolute paths to the elements to be excluded from the comparison - Return: - ------- - _: bool - `True` if the two products are equivalent; `False` otherwise + Return + ------ + bool + `True` if the two products are equivalent; `False` otherwise - ''' - - with h5py.File(file_1,'r') as hdf5_in_1, h5py.File(file_2,'r') as hdf5_in_2: + """ + with h5py.File(file_1, 'r') as hdf5_in_1, h5py.File(file_2, 'r') as hdf5_in_2: list_dataset_1, list_attrs_1 = get_list_dataset_attrs_keys(hdf5_in_1) set_dataset_1 = set(list_dataset_1) set_attrs_1 = set(list_attrs_1) @@ -503,9 +501,9 @@ def compare_rtc_hdf5_files(file_1: str, file_2: str, list_dataset_1st_only.sort() list_dataset_2nd_only = list(set_dataset_2 - set_dataset_1) list_dataset_2nd_only.sort() - print(' '+'\n '.join(list_dataset_1st_only)) + print(' ' + '\n '.join(list_dataset_1st_only)) print('\nIn the 2st HDF5, not in the 1nd data:') - print(' '+'\n '.join(list_dataset_2nd_only)) + print(' ' + '\n '.join(list_dataset_2nd_only)) # Print out the attribute structure discrepancy if there are any. # Omitting the print out when the dataset structure is not identical @@ -518,12 +516,12 @@ def compare_rtc_hdf5_files(file_1: str, file_2: str, 'Attribute structure not identical.') print('In the 1st HDF5, not in the 2nd data:') print('\r ' + - '\r '.join(list_attrs_1st_only).\ + '\r '.join(list_attrs_1st_only). replace('\n', ',\tattr: ').replace('\r', '\n')) print('\nIn the 2nd HDF5, not in the 1st data:') print('\r ' + - '\r '.join(list_attrs_2nd_only).\ + '\r '.join(list_attrs_2nd_only). replace('\n', ',\tattr: ').replace('\r', '\n')) # Print the test summary @@ -533,11 +531,11 @@ def compare_rtc_hdf5_files(file_1: str, file_2: str, if flag_identical_dataset_structure: print(f' {PASSED_STR} Same dataset structure confirmed.') else: - print( f' {FAILED_STR} ' + print(f' {FAILED_STR} ' f'{len(list_dataset_1st_only)} datasets from the 1st HDF are' - ' not found in the 2nd file.\n' + ' not found in the 2nd file.\n' f' {len(list_dataset_2nd_only)} datasets from the 2nd HDF are' - ' not found in the 1st file.') + ' not found in the 1st file.') # Attributes structure if flag_identical_attrs_structure: @@ -545,9 +543,9 @@ def compare_rtc_hdf5_files(file_1: str, file_2: str, else: print(f' {FAILED_STR} ' f'{len(list_attrs_1st_only)} attributes from the 1st HDF are' - ' not found in the 2nd file.\n' + ' not found in the 2nd file.\n' f' {len(list_attrs_2nd_only)} attributes from the 2nd HDF are' - ' not found in the 1st file.') + ' not found in the 1st file.') # Closeness of the common dataset if all(list_flag_identical_dataset): @@ -581,28 +579,29 @@ def compare_rtc_hdf5_files(file_1: str, file_2: str, def _get_prefix_str(flag_same, flag_all_ok): - ''' + """ Returns the prefix string for a comparison test, either the contents of PASSED_STR or the FAILED_STR. Parameters - ----------- + ---------- flag_same: bool Result of the comparison test flag_all_ok: list(bool) Mutable list of booleans that will hold the overall test status - Return: - ------- - _: str - Prefix string for the given comparison test + Return + ------ + str + Prefix string for the given comparison test - ''' + """ flag_all_ok[0] = flag_all_ok[0] and flag_same return f'{PASSED_STR} ' if flag_same else f'{FAILED_STR} ' def compare_rtc_s1_products(file_1, file_2): + """Compare two GeoTIFF files for consistency.""" if not os.path.isfile(file_1): print(f'ERROR file not found: {file_1}') return False @@ -625,7 +624,7 @@ def compare_rtc_s1_products(file_1, file_2): nbands_2 = layer_gdal_dataset_2.RasterCount # compare number of bands - flag_same_nbands = nbands_1 == nbands_2 + flag_same_nbands = nbands_1 == nbands_2 flag_same_nbands_str = _get_prefix_str(flag_same_nbands, flag_all_ok) prefix = ' ' * 7 print(f'{flag_same_nbands_str}Comparing number of bands') @@ -694,8 +693,8 @@ def _compare_rtc_s1_metadata(metadata_1, metadata_2): """ Compare RTC-S1 products' metadata - Parameters - ---------- + Parameters + ---------- metadata_1 : dict Metadata of the first RTC-S1 product metadata_2: dict @@ -749,8 +748,8 @@ def _print_first_value_diff(image_1, image_2, prefix): """ Print first value difference between two images. - Parameters - ---------- + Parameters + ---------- image_1 : numpy.ndarray First input image image_2: numpy.ndarray @@ -782,9 +781,7 @@ def _print_first_value_diff(image_1, image_2, prefix): def main(): - ''' - main function of the RTC product comparison script - ''' + """Main function of the RTC product comparison script""" parser = _get_parser() args = parser.parse_args() @@ -819,7 +816,7 @@ def main(): print('*** file 1:', file_1) print('*** file 2:', file_2) print('-------------------------------------------------------') - basename= os.path.basename(file_1) + basename = os.path.basename(file_1) results_dict[basename] = compare_rtc_s1_products(file_1, file_2) file_list_1 = glob.glob(os.path.join(args.input_dirs[0], '*h5')) diff --git a/src/opera/pge/disp_s1/disp_s1_pge.py b/src/opera/pge/disp_s1/disp_s1_pge.py index 7157b160..8a3b740c 100644 --- a/src/opera/pge/disp_s1/disp_s1_pge.py +++ b/src/opera/pge/disp_s1/disp_s1_pge.py @@ -70,7 +70,7 @@ def run_preprocessor(self, **kwargs): self.logger) # TODO gamma version of SAS seems to only support .grb format files, # reenable NetCDF format is ever supported/desired - #self.convert_troposphere_model_files() + # self.convert_troposphere_model_files() def convert_troposphere_model_files(self): """ @@ -81,7 +81,8 @@ def convert_troposphere_model_files(self): """ # Retrieve the troposphere weather model file group (if provided) from # the run config file - troposphere_model_files_list = self.runconfig.sas_config['dynamic_ancillary_file_group'].get('troposphere_files', {}) + troposphere_model_files_list = \ + self.runconfig.sas_config['dynamic_ancillary_file_group'].get('troposphere_files', {}) # Converted files will be stored in the scratch directory. scratch_dir = self.runconfig.sas_config['product_path_group']['scratch_path'] @@ -395,10 +396,10 @@ def _compressed_cslc_filename(self, inter_filename): level = "L2" name = "COMPRESSED-CSLC-S1" - ccslc_regex = ("compressed_(?P\w{4}_\w{6}_\w{3})_" - "(?P\d{8})_" - "(?P\d{8})_" - "(?P\d{8})[.](?Ph5)$") + ccslc_regex = (r'compressed_(?P\w{4}_\w{6}_\w{3})_' + r'(?P\d{8})_' + r'(?P\d{8})_' + r'(?P\d{8})[.](?Ph5)$') result = re.match(ccslc_regex, os.path.basename(inter_filename)) diff --git a/src/opera/pge/dswx_ni/dswx_ni_pge.py b/src/opera/pge/dswx_ni/dswx_ni_pge.py index b1112628..6f996e98 100755 --- a/src/opera/pge/dswx_ni/dswx_ni_pge.py +++ b/src/opera/pge/dswx_ni/dswx_ni_pge.py @@ -74,7 +74,6 @@ def _validate_output_product_filenames(self): If the pattern does not match a critical error will cause a RuntimeError. """ - validated_product_filenames = [] pattern = re.compile( r'(?POPERA)_(?PL3)_(?PDSWx)-(?PNI)_(?PT[^\W_]{5})_' r'(?P\d{8}T\d{6}Z)_(?P\d{8}T\d{6}Z)_(?PLSAR)_(?P30)_' diff --git a/src/opera/pge/rtc_s1/rtc_s1_pge.py b/src/opera/pge/rtc_s1/rtc_s1_pge.py index f0294578..8bf98989 100644 --- a/src/opera/pge/rtc_s1/rtc_s1_pge.py +++ b/src/opera/pge/rtc_s1/rtc_s1_pge.py @@ -15,14 +15,9 @@ from os import walk from os.path import basename, getsize, join -import h5py - -import numpy as np - from opera.pge.base.base_pge import PgeExecutor from opera.pge.base.base_pge import PostProcessorMixin from opera.pge.base.base_pge import PreProcessorMixin -from opera.util.dataset_utils import get_burst_id_from_file_name from opera.util.dataset_utils import get_sensor_from_spacecraft_name from opera.util.dataset_utils import parse_bounding_polygon_from_wkt from opera.util.error_codes import ErrorCode @@ -30,7 +25,6 @@ from opera.util.h5_utils import get_rtc_s1_product_metadata from opera.util.input_validation import validate_slc_s1_inputs from opera.util.render_jinja2 import render_jinja2 -from opera.util.tiff_utils import set_geotiff_metadata from opera.util.time import get_time_for_filename diff --git a/src/opera/test/pge/disp_s1/test_disp_s1_pge.py b/src/opera/test/pge/disp_s1/test_disp_s1_pge.py index cfc41468..22080038 100644 --- a/src/opera/test/pge/disp_s1/test_disp_s1_pge.py +++ b/src/opera/test/pge/disp_s1/test_disp_s1_pge.py @@ -17,9 +17,10 @@ from subprocess import CompletedProcess, Popen from unittest.mock import patch -import pytest from pkg_resources import resource_filename +import pytest + import yaml import opera.pge.disp_s1.disp_s1_pge @@ -157,7 +158,8 @@ def _compare_algorithm_parameters_runconfig_to_expected(self, runconfig): self.assertEqual(runconfig['output_options']['hdf5_creation_options']['compression_opts'], 4) self.assertEqual(runconfig['output_options']['hdf5_creation_options']['shuffle'], True) self.assertListEqual(runconfig['output_options']['gtiff_creation_options'], - ['COMPRESS=DEFLATE', 'ZLEVEL=4', 'BIGTIFF=YES', 'TILED=YES', 'BLOCKXSIZE=128', 'BLOCKYSIZE=128']) + ['COMPRESS=DEFLATE', 'ZLEVEL=4', 'BIGTIFF=YES', + 'TILED=YES', 'BLOCKXSIZE=128', 'BLOCKYSIZE=128']) self.assertEqual(runconfig['output_options']['add_overviews'], True) self.assertListEqual(runconfig['output_options']['overview_levels'], [4, 8, 16, 32, 64]) self.assertEqual(runconfig['subdataset'], '/data/VV') @@ -236,9 +238,8 @@ def test_disp_s1_pge_execution(self): ) self.assertTrue(os.path.exists(expected_browse_product)) - for compressed_cslc in [ - 'compressed_t042_088905_iw1_20221107_20221119_20221213.h5', - 'compressed_t042_088906_iw1_20221107_20221119_20221213.h5']: + for compressed_cslc in ['compressed_t042_088905_iw1_20221107_20221119_20221213.h5', + 'compressed_t042_088906_iw1_20221107_20221119_20221213.h5']: expected_compressed_cslc_product = join( pge.runconfig.output_product_path, pge._compressed_cslc_filename(compressed_cslc) @@ -282,9 +283,6 @@ def test_filename_application(self): rf'\d{{8}}T\d{{6}}Z.nc' ) - png_files = glob.glob(join(output_dir, '*.png')) - png_file = png_files[0] - expected_browse_filename = pge._browse_filename( inter_filename=abspath("disp_s1_pge_test/output_dir/20180101_20180330.unw.unwrapped_phase.png") ) @@ -303,7 +301,8 @@ def test_filename_application(self): h5_file = sorted(h5_files)[0] expected_ccslc_filename = pge._compressed_cslc_filename( - inter_filename="disp_s1_pge_test/output_dir/compressed_slcs/compressed_t042_088905_iw1_20221107_20221119_20221213.h5" + inter_filename="disp_s1_pge_test/output_dir/compressed_slcs/" + "compressed_t042_088905_iw1_20221107_20221119_20221213.h5" ) self.assertEqual(os.path.basename(h5_file), expected_ccslc_filename) @@ -569,6 +568,7 @@ def test_get_cslc_input_burst_id_set(self): cslc_input_files, amplitude_dispersion_files, amplitude_mean_files, and geometry_files. """ + def get_sample_input_files(file_type: str) -> list: """Helper function for test_get_cslc_input_burst_id_set()""" if file_type == 'compressed': @@ -838,15 +838,16 @@ def test_disp_s1_pge_validate_product_output(self): with open(expected_log_file, 'r', encoding='utf-8') as infile: log_contents = infile.read() - self.assertIn("SAS output file 20180101_20180330.unw.unwrapped_phase.png exists, but is empty", log_contents) + self.assertIn("SAS output file 20180101_20180330.unw.unwrapped_phase.png exists, but is empty", + log_contents) shutil.rmtree(pge.runconfig.output_product_path) # compressed_slc directory does not exist runconfig_dict['RunConfig']['Groups']['PGE']['PrimaryExecutable']['ProgramOptions'] = \ ['-p disp_s1_pge_test/output_dir/not_compressed_slcs;', 'dd if=/dev/urandom of=disp_s1_pge_test/output_dir/20180101_20180330.unw.nc bs=1M count=1;', - 'dd if=/dev/urandom of=disp_s1_pge_test/output_dir/20180101_20180330.unw.unwrapped_phase.png bs=1M count=1;', - '/bin/echo DISP-S1 invoked with RunConfig'] + 'dd if=/dev/urandom of=disp_s1_pge_test/output_dir/20180101_20180330.unw.unwrapped_phase.png ' + 'bs=1M count=1;', '/bin/echo DISP-S1 invoked with RunConfig'] with open(test_runconfig_path, 'w', encoding='utf-8') as outfile: yaml.safe_dump(runconfig_dict, outfile, sort_keys=False) @@ -868,8 +869,8 @@ def test_disp_s1_pge_validate_product_output(self): runconfig_dict['RunConfig']['Groups']['PGE']['PrimaryExecutable']['ProgramOptions'] = \ ['-p disp_s1_pge_test/output_dir/compressed_slcs;', 'dd if=/dev/urandom of=disp_s1_pge_test/output_dir/20180101_20180330.unw.nc bs=1M count=1;', - 'dd if=/dev/urandom of=disp_s1_pge_test/output_dir/20180101_20180330.unw.unwrapped_phase.png bs=1M count=1;', - '/bin/echo DISP-S1 invoked with RunConfig'] + 'dd if=/dev/urandom of=disp_s1_pge_test/output_dir/20180101_20180330.unw.unwrapped_phase.png ' + 'bs=1M count=1;', '/bin/echo DISP-S1 invoked with RunConfig'] with open(test_runconfig_path, 'w', encoding='utf-8') as outfile: yaml.safe_dump(runconfig_dict, outfile, sort_keys=False) @@ -891,8 +892,11 @@ def test_disp_s1_pge_validate_product_output(self): runconfig_dict['RunConfig']['Groups']['PGE']['PrimaryExecutable']['ProgramOptions'] = \ ['-p disp_s1_pge_test/output_dir/compressed_slcs;', 'dd if=/dev/urandom of=disp_s1_pge_test/output_dir/20180101_20180330.unw.nc bs=1M count=1;', - 'dd if=/dev/urandom of=disp_s1_pge_test/output_dir/20180101_20180330.unw.unwrapped_phase.png bs=1M count=1;', - 'touch disp_s1_pge_test/output_dir/compressed_slcs/compressed_slc_t087_185684_iw2_20180222_20180330.h5;', # noqa E501 + 'dd if=/dev/urandom of=disp_s1_pge_test/output_dir/20180101_20180330.unw.unwrapped_phase.png ' + 'bs=1M count=1;', + 'touch disp_s1_pge_test/output_dir/compressed_slcs/' + 'compressed_slc_t087_185684_iw2_20180222_20180330.h5;', + # noqa E501 '/bin/echo DISP-S1 invoked with RunConfig'] with open(test_runconfig_path, 'w', encoding='utf-8') as outfile: @@ -963,7 +967,7 @@ def test_scratch_sas_runconfig_for_grib_to_netcdf_files(self): ending_file_name = os.path.splitext(os.path.basename(ending_path))[0] if ending_file_name in starting_grb_file_names: self.assertIn('scratch_dir', ending_path) - self.assertTrue(exists(ending_path)) # verify the files exist on disk + self.assertTrue(exists(ending_path)) # verify the files exist on disk ending_grb_file_names.append(ending_file_name) self.assertEqual(starting_grb_file_names, ending_grb_file_names) diff --git a/src/opera/test/pge/dswx_s1/test_dswx_s1_pge.py b/src/opera/test/pge/dswx_s1/test_dswx_s1_pge.py index 4f339be6..9c5cad90 100644 --- a/src/opera/test/pge/dswx_s1/test_dswx_s1_pge.py +++ b/src/opera/test/pge/dswx_s1/test_dswx_s1_pge.py @@ -156,7 +156,8 @@ def _compare_algorithm_parameters_runconfig_to_expected(self, runconfig): ['Bare sparse vegetation', 'Urban', 'Moss and lichen']) self.assertEqual(runconfig['processing']['masking_ancillary']['land_cover_darkland_extension_list'], ['Grassland', 'Shrubs']) - self.assertListEqual(runconfig['processing']['masking_ancillary']['land_cover_water_label'], ['Permanent water bodies']) + self.assertListEqual(runconfig['processing']['masking_ancillary']['land_cover_water_label'], + ['Permanent water bodies']) self.assertEqual(runconfig['processing']['masking_ancillary']['co_pol_threshold'], -14.6) self.assertEqual(runconfig['processing']['masking_ancillary']['cross_pol_threshold'], -22.8) self.assertEqual(runconfig['processing']['masking_ancillary']['water_threshold'], 0.05) diff --git a/src/opera/test/pge/rtc_s1/test_rtc_s1_pge.py b/src/opera/test/pge/rtc_s1/test_rtc_s1_pge.py index 19cf924d..8c22ead9 100644 --- a/src/opera/test/pge/rtc_s1/test_rtc_s1_pge.py +++ b/src/opera/test/pge/rtc_s1/test_rtc_s1_pge.py @@ -17,9 +17,10 @@ from io import StringIO from os.path import abspath, join -import yaml from pkg_resources import resource_filename +import yaml + from opera.pge import RunConfig from opera.pge.rtc_s1.rtc_s1_pge import RtcS1Executor from opera.util import PgeLogger diff --git a/src/opera/util/input_validation.py b/src/opera/util/input_validation.py index 69a678d2..57afa8aa 100644 --- a/src/opera/util/input_validation.py +++ b/src/opera/util/input_validation.py @@ -139,7 +139,7 @@ def validate_slc_s1_inputs(runconfig, logger, name): logger.critical(name, ErrorCode.INVALID_INPUT, error_msg) -def get_burst_id_set(input_file_group : list, logger, name) -> set: +def get_burst_id_set(input_file_group: list, logger, name) -> set: """ Compiles a set of burst_ids from a list of files defined in the runconfig file. Each file in the list should have a burst_id in the file name. @@ -182,8 +182,8 @@ def get_burst_id_set(input_file_group : list, logger, name) -> set: return burst_ids -def check_disp_s1_ancillary_burst_ids(cslc_input_burst_ids : set, - ancillary_file_list : list, logger, name): +def check_disp_s1_ancillary_burst_ids(cslc_input_burst_ids: set, + ancillary_file_list: list, logger, name): # pylint: disable=C0103 """ Verify burst_ids from the ancillary input files: @@ -214,7 +214,7 @@ def check_disp_s1_ancillary_burst_ids(cslc_input_burst_ids : set, """ nl, tab, dtab = '\n', '\t', '\t\t' # used to format log output in fstrings. - ancillary_burst_ids : set = get_burst_id_set(ancillary_file_list, logger, name) + ancillary_burst_ids: set = get_burst_id_set(ancillary_file_list, logger, name) # Test none of the ancillary inputs have the same burst ID if len(ancillary_burst_ids) != len(ancillary_file_list): @@ -271,8 +271,8 @@ def get_cslc_input_burst_id_set(cslc_input_file_list, logger, name): cslc_input_file_list)) single_input_file_list = list(set(cslc_input_file_list) - set(compressed_input_file_list)) - compressed_file_burst_id_set : set = get_burst_id_set(compressed_input_file_list, logger, name) - single_file_burst_id_set : set = get_burst_id_set(single_input_file_list, logger, name) + compressed_file_burst_id_set: set = get_burst_id_set(compressed_input_file_list, logger, name) + single_file_burst_id_set: set = get_burst_id_set(single_input_file_list, logger, name) # Case 1: uncompressed files only in cslc inputs if len(compressed_file_burst_id_set) == 0: diff --git a/src/opera/util/mock_utils.py b/src/opera/util/mock_utils.py index 9cfa7f11..9c9a85b5 100644 --- a/src/opera/util/mock_utils.py +++ b/src/opera/util/mock_utils.py @@ -163,10 +163,11 @@ def __init__(self): 'RTC_BURST_ID': 't114_243013_iw1, t114_243014_iw1, t114_243015_iw1, ' 't114_243016_iw1', 'RTC_INPUT_L1_SLC_GRANULES': 'S1A_IW_SLC__1SDV_20231213T121214_20231213T121243_051636_063C28_8D5C.zip', - 'RTC_INPUT_LIST': "['OPERA_L2_RTC-S1_T114-243016-IW1_20231213T121235Z_20231213T184607Z_S1A_30_v1.0_VV.tif', " - "'OPERA_L2_RTC-S1_T114-243015-IW1_20231213T121233Z_20231213T184607Z_S1A_30_v1.0_VV.tif', " - "'OPERA_L2_RTC-S1_T114-243013-IW1_20231213T121227Z_20231213T184607Z_S1A_30_v1.0_VV.tif', " - "'OPERA_L2_RTC-S1_T114-243014-IW1_20231213T121230Z_20231213T184607Z_S1A_30_v1.0_VV.tif']", + 'RTC_INPUT_LIST': + "['OPERA_L2_RTC-S1_T114-243016-IW1_20231213T121235Z_20231213T184607Z_S1A_30_v1.0_VV.tif', " + "'OPERA_L2_RTC-S1_T114-243015-IW1_20231213T121233Z_20231213T184607Z_S1A_30_v1.0_VV.tif', " + "'OPERA_L2_RTC-S1_T114-243013-IW1_20231213T121227Z_20231213T184607Z_S1A_30_v1.0_VV.tif', " + "'OPERA_L2_RTC-S1_T114-243014-IW1_20231213T121230Z_20231213T184607Z_S1A_30_v1.0_VV.tif']", 'RTC_ORBIT_PASS_DIRECTION': 'ascending', 'RTC_PRODUCT_VERSION': '1.0', 'RTC_QA_RFI_INFO_AVAILABLE': 'True',