diff --git a/tests/assets/csv_row_filters_test/csv_row_filters_test.csv b/tests/assets/csv_row_filters_test/csv_row_filters_test.csv new file mode 100644 index 0000000..97675a9 --- /dev/null +++ b/tests/assets/csv_row_filters_test/csv_row_filters_test.csv @@ -0,0 +1,7 @@ +file,id,title,field_model,field_edtf_date +,issue_812_001,Issue 812 item 1,Image,2020-01-01 +foo.jpg,issue_812_002,Issue 812 item 2,Image, +noo.jpg,issue_812_003,Issue 812 item 3,Binary,1999-01-01|2000 +,issue_812_004,Issue 812 item 4,Digital document,2000|2001 +bar.jpg,issue_812_005,Issue 812 item 5,Digital document,2012-12-12|2001 +,issue_812_006,Issue 812 item 6,Compound object, diff --git a/tests/assets/csv_row_filters_test/csv_row_filters_test.yml b/tests/assets/csv_row_filters_test/csv_row_filters_test.yml new file mode 100644 index 0000000..a8d5d00 --- /dev/null +++ b/tests/assets/csv_row_filters_test/csv_row_filters_test.yml @@ -0,0 +1,12 @@ +task: create +host: https://islandora.traefik.me +username: admin +password: password +input_csv: csv_row_filters_test.csv +nodes_only: true +input_dir: tests/assets/csv_row_filters_test +secure_ssl_only: false +csv_row_filters: + - field_model:isnot:Digital document + - field_edtf_date:is:2020-01-01 + - field_edtf_date:is:2000 diff --git a/tests/islandora_tests_check.py b/tests/islandora_tests_check.py index 6b22478..856fe40 100644 --- a/tests/islandora_tests_check.py +++ b/tests/islandora_tests_check.py @@ -1565,5 +1565,44 @@ def tearDown(self): os.remove(self.true_log_file_path) +class TestCsvRowFilters(unittest.TestCase): + + def setUp(self): + self.current_dir = os.path.dirname(os.path.abspath(__file__)) + config_file_path = os.path.join( + self.current_dir, + "assets", + "csv_row_filters_test", + "csv_row_filters_test.yml", + ) + self.temp_dir = tempfile.gettempdir() + self.preprocessed_csv_file_path = os.path.join( + self.temp_dir, "csv_row_filters_test.csv.preprocessed" + ) + + cmd = ["./workbench", "--config", config_file_path, "--check"] + output = subprocess.check_output(cmd) + self.output = output.decode().strip() + + def test_update_check(self): + file = open(self.preprocessed_csv_file_path) + csv_rows = file.readlines() + file.close() + + self.assertEqual(len(csv_rows), 3, "") + self.assertEqual( + csv_rows[1].strip(), ",issue_812_001,Issue 812 item 1,Image,2020-01-01", "" + ) + self.assertEqual( + csv_rows[2].strip(), + "noo.jpg,issue_812_003,Issue 812 item 3,Binary,1999-01-01|2000", + "", + ) + + def tearDown(self): + if os.path.exists(self.preprocessed_csv_file_path): + os.remove(self.preprocessed_csv_file_path) + + if __name__ == "__main__": unittest.main() diff --git a/workbench_utils.py b/workbench_utils.py index c2047a2..c691382 100644 --- a/workbench_utils.py +++ b/workbench_utils.py @@ -5803,6 +5803,9 @@ def get_csv_data(config, csv_file_target="node_fields", file_path=None): # Then populate the lists of filter values. for filter_config in config["csv_row_filters"]: filter_group = filter_config.split(":", 2) + # Prepare the '' filter value. + if filter_group[2] == "''" or filter_group[2] == '""': + filter_group[2] = "" if filter_group[1] == "is": filter_group_field = filter_group[0] filter_group_value = filter_group[2] @@ -5837,41 +5840,40 @@ def get_csv_data(config, csv_file_target="node_fields", file_path=None): # WIP on #812. # Apply the "is" and "isnot" csv_row_filters defined defined above. # If the field/value combo is in the 'isnot' list, skip this row. + filter_out_this_csv_row = False if "csv_row_filters" in config and len(config["csv_row_filters"]) > 0: - filter_out_this_csv_row = False + # filter_out_this_csv_row = False if len(row_filters_isnot) > 0: for filter_field, filter_values in row_filters_isnot.items(): - if ( - len(filter_values) > 0 - and filter_field in row - and len(row[filter_field]) > 0 - ): + if len(filter_values) > 0 and filter_field in row: # Split out multiple field values to test each one. values_in_row_field = row[filter_field].split( config["subdelimiter"] ) for value_in_row_field in values_in_row_field: + filter_out_this_csv_row = False if value_in_row_field.strip() in filter_values: filter_out_this_csv_row = True + else: + break if filter_out_this_csv_row is True: continue # If the field/value combo is not in the 'is' list, skip this row. - filter_out_this_csv_row = False if len(row_filters_is) > 0: + # filter_out_this_csv_row = False for filter_field, filter_values in row_filters_is.items(): - if ( - len(filter_values) > 0 - and filter_field in row - and len(row[filter_field]) > 0 - ): + if len(filter_values) > 0 and filter_field in row: # Split out multiple field values to test each one. values_in_row_field = row[filter_field].split( config["subdelimiter"] ) for value_in_row_field in values_in_row_field: + filter_out_this_csv_row = False if value_in_row_field.strip() not in filter_values: filter_out_this_csv_row = True + else: + break if filter_out_this_csv_row is True: continue