From 753bcb31dafc1ecb9c932d32cf02a711b4a1b090 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Tue, 5 Dec 2023 15:28:54 -0800 Subject: [PATCH 1/5] Fixing and testing UTF-8-SIG for the criteria table. RE:#1460 --- src/natcap/invest/hra.py | 2 +- tests/test_hra.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/natcap/invest/hra.py b/src/natcap/invest/hra.py index 99a8f035c9..2360618028 100644 --- a/src/natcap/invest/hra.py +++ b/src/natcap/invest/hra.py @@ -1830,7 +1830,7 @@ def _parse_criteria_table(criteria_table_path, target_composite_csv_path): # This function requires that the table is read as a numpy array, so it's # easiest to read the table directly. table = pandas.read_csv(criteria_table_path, header=None, sep=None, - engine='python').to_numpy() + engine='python', encoding='utf-8-sig').to_numpy() # clean up any leading or trailing whitespace. for row_num in range(table.shape[0]): diff --git a/tests/test_hra.py b/tests/test_hra.py index 9e0a7ead45..391642e0f7 100644 --- a/tests/test_hra.py +++ b/tests/test_hra.py @@ -326,6 +326,36 @@ def test_criteria_table_parsing(self): pandas.testing.assert_frame_equal( expected_composite_dataframe, composite_dataframe) + def test_criteria_table_parsing_with_bom(self): + """HRA: criteria table - parse a BOM.""" + from natcap.invest import hra + + criteria_table_path = os.path.join(self.workspace_dir, 'criteria.csv') + with open(criteria_table_path, 'w') as criteria_table: + bom_char = "\uFEFF" # byte-order marker in 16-bit hex value + criteria_table.write( + textwrap.dedent( + f"""\ + {bom_char}HABITAT NAME,eelgrass,,,hardbottom,,,CRITERIA TYPE + HABITAT RESILIENCE ATTRIBUTES,RATING,DQ,WEIGHT,RATING,DQ,WEIGHT,E/C + recruitment rate,2,2,2,2,2,2,C + connectivity rate,2,2,2,2,2,2,C + ,,,,,,, + HABITAT STRESSOR OVERLAP PROPERTIES,,,,,,, + oil,RATING,DQ,WEIGHT,RATING,DQ,WEIGHT,E/C + frequency of disturbance,2,2,3,2,2,3,C + management effectiveness,2,2,1,2,2,1,E + ,,,,,,, + fishing,RATING,DQ,WEIGHT,RATING,DQ,WEIGHT,E/C + frequency of disturbance,2,2,3,2,2,3,C + management effectiveness,2,2,1,2,2,1,E + """ + )) + target_composite_csv_path = os.path.join(self.workspace_dir, + 'composite.csv') + hra._parse_criteria_table(criteria_table_path, + target_composite_csv_path) + def test_criteria_table_file_not_found(self): """HRA: criteria table - spatial file not found.""" from natcap.invest import hra From dcf897b7e893b46d17e4a55ee55ccc3cc0a6d6de Mon Sep 17 00:00:00 2001 From: James Douglass Date: Tue, 5 Dec 2023 15:32:11 -0800 Subject: [PATCH 2/5] Noting change in HISTORY. RE:#1460 --- HISTORY.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/HISTORY.rst b/HISTORY.rst index 3608338b41..bec64e02da 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -62,6 +62,10 @@ Unreleased Changes * Fixed a bug where the model would crash when processing a float type bathymetry raster with no nodata value. https://github.com/natcap/invest/issues/992 +* HRA + * Fixed an issue preventing the HRA criteria table from loading when the + table was UTF-8 encoded with a Byte-Order Marker. + https://github.com/natcap/invest/issues/1460 * NDR * Fixing an issue where minor geometric issues in the watersheds input (such as a ring self-intersection) would raise an error in the model. From b19b946bbde7cc9552981e7769893bfd3edafd4d Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 6 Dec 2023 18:27:22 -0800 Subject: [PATCH 3/5] Using utf-8-sig encoding directly. RE:#1460 --- tests/test_hra.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/test_hra.py b/tests/test_hra.py index 391642e0f7..57cf837ad7 100644 --- a/tests/test_hra.py +++ b/tests/test_hra.py @@ -331,12 +331,11 @@ def test_criteria_table_parsing_with_bom(self): from natcap.invest import hra criteria_table_path = os.path.join(self.workspace_dir, 'criteria.csv') - with open(criteria_table_path, 'w') as criteria_table: - bom_char = "\uFEFF" # byte-order marker in 16-bit hex value + with open(criteria_table_path, 'w', encoding='utf-8-sig') as criteria_table: criteria_table.write( textwrap.dedent( - f"""\ - {bom_char}HABITAT NAME,eelgrass,,,hardbottom,,,CRITERIA TYPE + """\ + HABITAT NAME,eelgrass,,,hardbottom,,,CRITERIA TYPE HABITAT RESILIENCE ATTRIBUTES,RATING,DQ,WEIGHT,RATING,DQ,WEIGHT,E/C recruitment rate,2,2,2,2,2,2,C connectivity rate,2,2,2,2,2,2,C @@ -351,6 +350,11 @@ def test_criteria_table_parsing_with_bom(self): management effectiveness,2,2,1,2,2,1,E """ )) + + # Sanity check: make sure the file has the expected BOM + bom_char = "\uFEFF" # byte-order marker in 16-bit hex value + assert open(criteria_table_path).read().startswith(bom_char) + target_composite_csv_path = os.path.join(self.workspace_dir, 'composite.csv') hra._parse_criteria_table(criteria_table_path, From 4dbe7f5fc6742c2eb066bbe248af5b92161d1fbd Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 6 Dec 2023 19:43:51 -0800 Subject: [PATCH 4/5] Making sure the new file is closed after we're done with it. RE:#1460 --- tests/test_hra.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_hra.py b/tests/test_hra.py index 57cf837ad7..dff9811026 100644 --- a/tests/test_hra.py +++ b/tests/test_hra.py @@ -353,7 +353,8 @@ def test_criteria_table_parsing_with_bom(self): # Sanity check: make sure the file has the expected BOM bom_char = "\uFEFF" # byte-order marker in 16-bit hex value - assert open(criteria_table_path).read().startswith(bom_char) + with open(criteria_table_path) as criteria_table: + assert criteria_table.read().startswith(bom_char) target_composite_csv_path = os.path.join(self.workspace_dir, 'composite.csv') From 0bc66becdef7aa45166e6365d38220ff771ad7d5 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Thu, 7 Dec 2023 20:39:21 -0800 Subject: [PATCH 5/5] Doing the test by opening the file in binary mode. RE:#1460 --- tests/test_hra.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_hra.py b/tests/test_hra.py index dff9811026..e30fd799c6 100644 --- a/tests/test_hra.py +++ b/tests/test_hra.py @@ -352,9 +352,9 @@ def test_criteria_table_parsing_with_bom(self): )) # Sanity check: make sure the file has the expected BOM - bom_char = "\uFEFF" # byte-order marker in 16-bit hex value - with open(criteria_table_path) as criteria_table: - assert criteria_table.read().startswith(bom_char) + # Gotta use binary mode so that python doesn't silently strip the BOM + with open(criteria_table_path, 'rb') as criteria_table: + self.assertTrue(criteria_table.read().startswith(b"\xef\xbb\xbf")) target_composite_csv_path = os.path.join(self.workspace_dir, 'composite.csv')