From 5e5675f3129c3609eeb90cd824e2c5ccf12b8398 Mon Sep 17 00:00:00 2001 From: Mark Jordan Date: Mon, 6 May 2024 19:43:55 -0700 Subject: [PATCH 1/5] Resolves #773. --- workbench_fields.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/workbench_fields.py b/workbench_fields.py index 11cb2ab..974f250 100644 --- a/workbench_fields.py +++ b/workbench_fields.py @@ -87,6 +87,10 @@ def create(self, config, field_definitions, entity, row, field_name): ): field_values.append({"value": subvalue, "format": text_format}) else: + if field_definitions[field_name]["field_type"] == "integer": + subvalue = int(subvalue) + if field_definitions[field_name]["field_type"] == "float": + subvalue = float(subvalue) field_values.append({"value": subvalue}) field_values = self.dedupe_values(field_values) entity[field_name] = field_values @@ -160,6 +164,10 @@ def update( {"value": subvalue, "format": text_format} ) else: + if field_definitions[field_name]["field_type"] == "integer": + subvalue = int(subvalue) + if field_definitions[field_name]["field_type"] == "float": + subvalue = float(subvalue) entity[field_name].append({"value": subvalue}) entity[field_name] = self.dedupe_values(entity[field_name]) if -1 < cardinality < len(entity[field_name]): @@ -192,6 +200,11 @@ def update( ): field_values.append({"value": subvalue, "format": text_format}) else: + if field_definitions[field_name]["type"] == "integer": + subvalue = int(subvalue) + if field_definitions[field_name]["type"] == "float": + subvalue = float(subvalue) + entity[field_name].append({"value": subvalue}) field_values.append({"value": subvalue}) field_values = self.dedupe_values(field_values) entity[field_name] = field_values From 609e6b3c1e8954e5cec4b2c10bf31f8fdd3aac10 Mon Sep 17 00:00:00 2001 From: Mark Jordan Date: Mon, 6 May 2024 19:50:54 -0700 Subject: [PATCH 2/5] WIP on #774. --- workbench_fields.py | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/workbench_fields.py b/workbench_fields.py index 974f250..9374d67 100644 --- a/workbench_fields.py +++ b/workbench_fields.py @@ -294,10 +294,11 @@ def serialize(self, config, field_definitions, field_name, field_data): Returns ------- string - A string structured same as the Workbench CSV field data for this field type. + A string structured same as the Workbench CSV field data for this field type, + or None if there is nothing to return. """ if "field_type" not in field_definitions[field_name]: - return values + return None subvalues = list() for subvalue in field_data: @@ -498,10 +499,11 @@ def serialize(self, config, field_definitions, field_name, field_data): Returns ------- string - A string structured same as the Workbench CSV field data for this field type. + A string structured same as the Workbench CSV field data for this field type, + or None if there is nothing to return. """ if "field_type" not in field_definitions[field_name]: - return values + return None subvalues = list() for subvalue in field_data: @@ -693,9 +695,10 @@ def serialize(self, config, field_definitions, field_name, field_data): ------- string A string structured same as the Workbench CSV field data for this field type. + or None if there is nothing to return. """ if "field_type" not in field_definitions[field_name]: - return values + return None subvalues = list() for subvalue in field_data: @@ -937,10 +940,11 @@ def serialize(self, config, field_definitions, field_name, field_data): Returns ------- string - A string structured same as the Workbench CSV field data for this field type. + A string structured same as the Workbench CSV field data for this field type, + or None if there is nothing to return. """ if "field_type" not in field_definitions[field_name]: - return values + return None subvalues = list() for subvalue in field_data: @@ -1166,10 +1170,11 @@ def serialize(self, config, field_definitions, field_name, field_data): Returns ------- string - A string structured same as the Workbench CSV field data for this field type. + A string structured same as the Workbench CSV field data for this field type, + or None if there is nothing to return. """ if "field_type" not in field_definitions[field_name]: - return values + return None subvalues = list() for subvalue in field_data: @@ -1370,10 +1375,11 @@ def serialize(self, config, field_definitions, field_name, field_data): Returns ------- string - A string structured same as the Workbench CSV field data for this field type. + A string structured same as the Workbench CSV field data for this field type, + or None if there is nothing to return. """ if "field_type" not in field_definitions[field_name]: - return values + return None subvalues = list() for subvalue in field_data: @@ -1568,10 +1574,11 @@ def serialize(self, config, field_definitions, field_name, field_data): Returns ------- string - A string structured same as the Workbench CSV field data for this field type. + A string structured same as the Workbench CSV field data for this field type, + or None if there is nothing to return. """ if "field_type" not in field_definitions[field_name]: - return values + return None subvalues = list() for subvalue in field_data: @@ -1918,10 +1925,11 @@ def serialize(self, config, field_definitions, field_name, field_data): Returns ------- string - A string structured same as the Workbench CSV field data for this field type. + A string structured same as the Workbench CSV field data for this field type, + or None if there is nothing to return. """ if "field_type" not in field_definitions[field_name]: - return field_data + return None # We allow fields to overide the global subdelimiter. paragraph_configs = ( From fe70000ef468a53569b6f0c5a564d49fef08ff30 Mon Sep 17 00:00:00 2001 From: Mark Jordan Date: Mon, 6 May 2024 20:20:17 -0700 Subject: [PATCH 3/5] WIP on #773. --- tests/field_tests.py | 169 ++++++++++++++++++++++++++++++++++++++----- workbench_fields.py | 5 +- 2 files changed, 152 insertions(+), 22 deletions(-) diff --git a/tests/field_tests.py b/tests/field_tests.py index 473ff6b..dd8df5b 100644 --- a/tests/field_tests.py +++ b/tests/field_tests.py @@ -3,7 +3,6 @@ import sys import os -import io import unittest import collections @@ -34,7 +33,11 @@ def test_create_with_simple_field(self): # Create a node with a simple field of cardinality 1, no subdelimiters. self.field_definitions = { - "field_foo": {"cardinality": 1, "formatted_text": False} + "field_foo": { + "cardinality": 1, + "formatted_text": False, + "field_type": "text", + } } field = workbench_fields.SimpleField() @@ -79,7 +82,11 @@ def test_create_with_simple_field(self): # Create a node with a simple field of cardinality unlimited, no subdelimiters. self.field_definitions = { - "field_foo": {"cardinality": -1, "formatted_text": False} + "field_foo": { + "cardinality": -1, + "formatted_text": False, + "field_type": "text", + } } field = workbench_fields.SimpleField() @@ -115,7 +122,11 @@ def test_create_with_simple_field(self): # Create a node with a simple field of cardinality limited, no subdelimiters. self.field_definitions = { - "field_foo": {"cardinality": 2, "formatted_text": False} + "field_foo": { + "cardinality": 2, + "formatted_text": False, + "field_type": "text", + } } field = workbench_fields.SimpleField() @@ -164,6 +175,72 @@ def test_create_with_simple_field(self): r"simple_006 would exceed maximum number of allowed values \(2\)", ) + def test_create_with_simple_integer_field(self): + existing_node = { + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + } + + # Create a node with a simple integer field of cardinality 1, no subdelimiters. + self.field_definitions = { + "field_int": { + "cardinality": 1, + "formatted_text": False, + "field_type": "integer", + } + } + + field = workbench_fields.SimpleField() + csv_record = collections.OrderedDict() + csv_record["id"] = "simple_001_int" + csv_record["field_int"] = "5" + node = field.create( + self.config, self.field_definitions, existing_node, csv_record, "field_int" + ) + expected_node = { + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_int": [{"value": 5}], + } + self.assertDictEqual(node, expected_node) + + def test_create_with_simple_float_field(self): + existing_node = { + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + } + + # Create a node with a simple integer field of cardinality 1, no subdelimiters. + self.field_definitions = { + "field_float": { + "cardinality": 1, + "formatted_text": False, + "field_type": "float", + } + } + + field = workbench_fields.SimpleField() + csv_record = collections.OrderedDict() + csv_record["id"] = "simple_001_int" + csv_record["field_float"] = "6.0" + node = field.create( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_float", + ) + expected_node = { + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_float": [{"value": 6.0}], + } + self.assertDictEqual(node, expected_node) + def test_simple_field_title_update_replace(self): # Update the node title, first with an 'update_mode' of replace. existing_node = { @@ -172,7 +249,9 @@ def test_simple_field_title_update_replace(self): "status": [{"value": 1}], } - self.field_definitions = {"title": {"cardinality": 1, "formatted_text": False}} + self.field_definitions = { + "title": {"cardinality": 1, "formatted_text": False, "field_type": "text"} + } self.config["task"] = "update" self.config["update_mode"] = "replace" @@ -204,7 +283,9 @@ def test_simple_field_title_update_append(self): "status": [{"value": 1}], } - self.field_definitions = {"title": {"cardinality": 1, "formatted_text": False}} + self.field_definitions = { + "title": {"cardinality": 1, "formatted_text": False, "field_type": "text"} + } self.config["task"] = "update" self.config["update_mode"] = "append" @@ -242,7 +323,11 @@ def test_simple_field_update_replace_cardinality_1_no_subdelims(self): } self.field_definitions = { - "field_foo": {"cardinality": 1, "formatted_text": False} + "field_foo": { + "cardinality": 1, + "formatted_text": False, + "field_type": "text", + } } self.config["task"] = "update" @@ -277,7 +362,11 @@ def test_simple_field_update_append_cardinality_1_no_subdelims(self): } self.field_definitions = { - "field_foo": {"cardinality": 1, "formatted_text": False} + "field_foo": { + "cardinality": 1, + "formatted_text": False, + "field_type": "text", + } } self.config["task"] = "update" @@ -317,7 +406,11 @@ def test_simple_field_update_replace_cardinality_1_with_subdelims(self): } self.field_definitions = { - "field_foo": {"cardinality": 1, "formatted_text": False} + "field_foo": { + "cardinality": 1, + "formatted_text": False, + "field_type": "text", + } } self.config["task"] = "update" @@ -358,7 +451,11 @@ def test_simple_field_update_replace_cardinality_unlimited_no_subdelims(self): } self.field_definitions = { - "field_foo": {"cardinality": -1, "formatted_text": False} + "field_foo": { + "cardinality": -1, + "formatted_text": False, + "field_type": "text", + } } self.config["task"] = "update" @@ -394,7 +491,11 @@ def test_simple_field_update_replace_cardinality_unlimited_with_subdelims(self): } self.field_definitions = { - "field_foo": {"cardinality": -1, "formatted_text": False} + "field_foo": { + "cardinality": -1, + "formatted_text": False, + "field_type": "text", + } } self.config["task"] = "update" @@ -422,7 +523,11 @@ def test_simple_field_update_replace_cardinality_unlimited_with_subdelims(self): def test_simple_field_update_append_cardinality_unlimited_no_subdelims(self): self.field_definitions = { - "field_foo": {"cardinality": -1, "formatted_text": False} + "field_foo": { + "cardinality": -1, + "formatted_text": False, + "field_type": "text", + } } self.config["task"] = "update" @@ -500,7 +605,11 @@ def test_simple_field_update_append_cardinality_unlimited_with_subdelims(self): } self.field_definitions = { - "field_foo": {"cardinality": -1, "formatted_text": False} + "field_foo": { + "cardinality": -1, + "formatted_text": False, + "field_type": "text", + } } self.config["task"] = "update" @@ -539,7 +648,11 @@ def test_simple_field_update_replace_cardinality_limited_no_subdelims(self): } self.field_definitions = { - "field_foo": {"cardinality": 2, "formatted_text": False} + "field_foo": { + "cardinality": 2, + "formatted_text": False, + "field_type": "text", + } } self.config["task"] = "update" @@ -577,7 +690,11 @@ def test_simple_field_update_append_cardinality_limited_no_subdelims(self): } self.field_definitions = { - "field_foo": {"cardinality": 2, "formatted_text": False} + "field_foo": { + "cardinality": 2, + "formatted_text": False, + "field_type": "text", + } } self.config["task"] = "update" @@ -623,7 +740,11 @@ def test_simple_field_update_replace_cardinality_limited_with_subdelims(self): } self.field_definitions = { - "field_foo": {"cardinality": 2, "formatted_text": False} + "field_foo": { + "cardinality": 2, + "formatted_text": False, + "field_type": "text", + } } self.config["task"] = "update" @@ -668,7 +789,11 @@ def test_simple_field_update_append_cardinality_limited_with_subdelims(self): } self.field_definitions = { - "field_foo": {"cardinality": 3, "formatted_text": False} + "field_foo": { + "cardinality": 3, + "formatted_text": False, + "field_type": "text", + } } self.config["task"] = "update" @@ -717,7 +842,11 @@ def test_simple_field_update_delete(self): } self.field_definitions = { - "field_foo": {"cardinality": 3, "formatted_text": False} + "field_foo": { + "cardinality": 3, + "formatted_text": False, + "field_type": "text", + } } self.config["update_mode"] = "delete" @@ -941,7 +1070,9 @@ def test_simple_field_title_update_replace(self): "status": [{"value": 1}], } - self.field_definitions = {"title": {"cardinality": 1, "formatted_text": False}} + self.field_definitions = { + "title": {"cardinality": 1, "formatted_text": False, "field_type": "text"} + } self.config["task"] = "update" self.config["update_mode"] = "replace" diff --git a/workbench_fields.py b/workbench_fields.py index 9374d67..ff93224 100644 --- a/workbench_fields.py +++ b/workbench_fields.py @@ -18,7 +18,6 @@ """ import json -import copy from workbench_utils import * @@ -200,9 +199,9 @@ def update( ): field_values.append({"value": subvalue, "format": text_format}) else: - if field_definitions[field_name]["type"] == "integer": + if field_definitions[field_name]["field_type"] == "integer": subvalue = int(subvalue) - if field_definitions[field_name]["type"] == "float": + if field_definitions[field_name]["field_type"] == "float": subvalue = float(subvalue) entity[field_name].append({"value": subvalue}) field_values.append({"value": subvalue}) From 4eb35e7c111a20e73cc16edf038d6dc9080ed758 Mon Sep 17 00:00:00 2001 From: Mark Jordan Date: Tue, 14 May 2024 07:05:30 -0700 Subject: [PATCH 4/5] WIP on #773. --- tests/unit_tests.py | 5 ++++ workbench_fields.py | 56 ++++++++++++++++++++++++++++++++++++++++----- workbench_utils.py | 23 ++++++++++++++++--- 3 files changed, 75 insertions(+), 9 deletions(-) diff --git a/tests/unit_tests.py b/tests/unit_tests.py index eed1b29..22c9019 100644 --- a/tests/unit_tests.py +++ b/tests/unit_tests.py @@ -844,6 +844,11 @@ def test_value_is_numeric(self): res = workbench_utils.value_is_numeric(value) self.assertTrue(res, "Value " + str(value) + " is not numeric.") + values = ["200.23", "0.5", 999.999] + for value in values: + res = workbench_utils.value_is_numeric(value, allow_decimals=True) + self.assertTrue(res, "Value " + str(value) + " is not numeric.") + def test_value_is_not_numeric(self): values = ["n200", False, "999-1000"] for value in values: diff --git a/workbench_fields.py b/workbench_fields.py index ff93224..09463bd 100644 --- a/workbench_fields.py +++ b/workbench_fields.py @@ -86,9 +86,13 @@ def create(self, config, field_definitions, entity, row, field_name): ): field_values.append({"value": subvalue, "format": text_format}) else: - if field_definitions[field_name]["field_type"] == "integer": + if field_definitions[field_name][ + "field_type" + ] == "integer" and value_is_numeric(subvalue): subvalue = int(subvalue) - if field_definitions[field_name]["field_type"] == "float": + if field_definitions[field_name][ + "field_type" + ] == "float" and value_is_numeric(subvalue, allow_decimals=True): subvalue = float(subvalue) field_values.append({"value": subvalue}) field_values = self.dedupe_values(field_values) @@ -163,9 +167,13 @@ def update( {"value": subvalue, "format": text_format} ) else: - if field_definitions[field_name]["field_type"] == "integer": + if field_definitions[field_name][ + "field_type" + ] == "integer" and value_is_numeric(subvalue): subvalue = int(subvalue) - if field_definitions[field_name]["field_type"] == "float": + if field_definitions[field_name][ + "field_type" + ] == "float" and value_is_numeric(subvalue, allow_decimals=True): subvalue = float(subvalue) entity[field_name].append({"value": subvalue}) entity[field_name] = self.dedupe_values(entity[field_name]) @@ -199,9 +207,13 @@ def update( ): field_values.append({"value": subvalue, "format": text_format}) else: - if field_definitions[field_name]["field_type"] == "integer": + if field_definitions[field_name][ + "field_type" + ] == "integer" and value_is_numeric(subvalue): subvalue = int(subvalue) - if field_definitions[field_name]["field_type"] == "float": + if field_definitions[field_name][ + "field_type" + ] == "float" and value_is_numeric(subvalue, allow_decimals=True): subvalue = float(subvalue) entity[field_name].append({"value": subvalue}) field_values.append({"value": subvalue}) @@ -259,6 +271,38 @@ def remove_invalid_values(self, config, field_definitions, field_name, values): ) logging.warning(message) return valid_values + elif field_definitions[field_name]["field_type"] == "integer": + valid_values = list() + for subvalue in values: + if value_is_numeric(subvalue) is True: + valid_values.append(subvalue) + else: + message = ( + 'Value "' + + subvalue + + '" in field "' + + field_name + + '" is not a valid integer field value.' + ) + logging.warning(message) + return valid_values + elif field_definitions[field_name]["field_type"] in ["decimal", "float"]: + valid_values = list() + for subvalue in values: + if value_is_numeric(subvalue, allow_decimals=True) is True: + valid_values.append(subvalue) + else: + message = ( + 'Value "' + + subvalue + + '" in field "' + + field_name + + '" is not a valid ' + + field_definitions[field_name]["field_type"] + + " field value." + ) + logging.warning(message) + return valid_values elif field_definitions[field_name]["field_type"] == "list_string": valid_values = list() for subvalue in values: diff --git a/workbench_utils.py b/workbench_utils.py index a2bd23f..0f64987 100644 --- a/workbench_utils.py +++ b/workbench_utils.py @@ -6483,9 +6483,26 @@ def get_field_vocabularies(config, field_definitions, field_name): return False -def value_is_numeric(value): - """Tests to see if value is numeric.""" - var = str(value) +def value_is_numeric(value, allow_decimals=False): + """Tests to see if value is numeric.""" + + """Parameters + ---------- + value : varies + The value to check. By design, we don't know what data type it is. + allow_decimals: boolean + Whether or not to allow '.' in the value. Decimal and float number types have decimals. + + Returns + ------- + boolean + """ + if allow_decimals is True: + if "." in str(value): + var = str(value).replace(".", "") + else: + var = str(value) + var = var.strip() if var.isnumeric(): return True From ff9eecd8d585407e308d9d5d9682a1a0d605d60c Mon Sep 17 00:00:00 2001 From: Mark Jordan Date: Tue, 14 May 2024 13:10:55 -0700 Subject: [PATCH 5/5] WIP on #773. --- workbench_utils.py | 70 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 64 insertions(+), 6 deletions(-) diff --git a/workbench_utils.py b/workbench_utils.py index 0f64987..8ee078c 100644 --- a/workbench_utils.py +++ b/workbench_utils.py @@ -2868,6 +2868,10 @@ def check_input(config, args): "Warning: Issues detected with validating typed relation field values in the CSV file. See the log for more detail." ) + validate_numeric_fields_data = get_csv_data(config) + # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). + validate_numeric_fields(config, field_definitions, validate_numeric_fields_data) + validate_media_track_csv_data = get_csv_data(config) # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). validate_media_track_fields(config, validate_media_track_csv_data) @@ -6497,13 +6501,11 @@ def value_is_numeric(value, allow_decimals=False): ------- boolean """ - if allow_decimals is True: - if "." in str(value): - var = str(value).replace(".", "") + var = str(value) + if allow_decimals is True and "." in str(value): + var = str(value).replace(".", "") else: - var = str(value) - - var = var.strip() + var = var.strip() if var.isnumeric(): return True else: @@ -6765,6 +6767,62 @@ def validate_csv_field_length(config, field_definitions, csv_data): logging.warning(message + message_2) +def validate_numeric_fields(config, field_definitions, csv_data): + """Validate integer, decimal, and float fields.""" + numeric_fields_present = False + for count, row in enumerate(csv_data, start=1): + for field_name in field_definitions.keys(): + if field_definitions[field_name]["field_type"] == "integer": + if field_name in row: + numeric_fields_present = True + delimited_field_values = row[field_name].split( + config["subdelimiter"] + ) + for field_value in delimited_field_values: + if len(field_value.strip()): + if not value_is_numeric(field_value.strip()): + message = ( + 'Value in field "' + + field_name + + '" in row with ID ' + + row[config["id_field"]] + + " (" + + field_value + + ") is not a valid integer value." + ) + logging.error(message) + sys.exit("Error: " + message) + if field_definitions[field_name]["field_type"] in ["decimal", "float"]: + if field_name in row: + numeric_fields_present = True + delimited_field_values = row[field_name].split( + config["subdelimiter"] + ) + for field_value in delimited_field_values: + if len(field_value.strip()): + if not value_is_numeric( + field_value.strip(), allow_decimals=True + ): + message = ( + 'Value in field "' + + field_name + + '" in row with ID ' + + row[config["id_field"]] + + " (" + + field_value + + ") is not a valid " + + field_definitions[field_name]["field_type"] + + " value." + ) + logging.error(message) + sys.exit("Error: " + message) + + if numeric_fields_present is True: + message = "OK, numeric field values in the CSV file validate." + print(message) + logging.info(message) + + def validate_geolocation_fields(config, field_definitions, csv_data): """Validate lat,long values in fields that are of type 'geolocation'.""" geolocation_fields_present = False