From df1df23210149021d8d3b734229984552284fb6f Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Mon, 18 Mar 2024 12:53:33 -0400 Subject: [PATCH 1/2] Simplify fields code to always split on subdelimiter --- workbench_fields.py | 1250 +++++++++++++++---------------------------- 1 file changed, 441 insertions(+), 809 deletions(-) diff --git a/workbench_fields.py b/workbench_fields.py index d002d99..8a0a51c 100644 --- a/workbench_fields.py +++ b/workbench_fields.py @@ -68,81 +68,52 @@ def create(self, config, field_definitions, entity, row, field_name): id_field = row.get(config.get("id_field", "not_applicable"), "not_applicable") # Cardinality is unlimited. if field_definitions[field_name]["cardinality"] == -1: - if config["subdelimiter"] in row[field_name]: - field_values = [] - subvalues = row[field_name].split(config["subdelimiter"]) - subvalues = self.remove_invalid_values( - config, field_definitions, field_name, subvalues - ) - subvalues = self.dedupe_values(subvalues) - for subvalue in subvalues: - subvalue = truncate_csv_value( - field_name, id_field, field_definitions[field_name], subvalue - ) - if ( - "formatted_text" in field_definitions[field_name] - and field_definitions[field_name]["formatted_text"] is True - ): - field_values.append({"value": subvalue, "format": text_format}) - else: - field_values.append({"value": subvalue}) - entity[field_name] = field_values - else: - row[field_name] = truncate_csv_value( - field_name, id_field, field_definitions[field_name], row[field_name] + field_values = [] + subvalues = row[field_name].split(config["subdelimiter"]) + subvalues = self.remove_invalid_values( + config, field_definitions, field_name, subvalues + ) + subvalues = self.dedupe_values(subvalues) + for subvalue in subvalues: + subvalue = truncate_csv_value( + field_name, id_field, field_definitions[field_name], subvalue ) if ( "formatted_text" in field_definitions[field_name] and field_definitions[field_name]["formatted_text"] is True ): - entity[field_name] = [ - {"value": row[field_name], "format": text_format} - ] + field_values.append({"value": subvalue, "format": text_format}) else: - entity[field_name] = [{"value": row[field_name]}] - + field_values.append({"value": subvalue}) + entity[field_name] = field_values # Cardinality has a limit, including 1. else: - if config["subdelimiter"] in row[field_name]: - field_values = [] - subvalues = row[field_name].split(config["subdelimiter"]) - subvalues = self.remove_invalid_values( - config, field_definitions, field_name, subvalues + field_values = [] + subvalues = row[field_name].split(config["subdelimiter"]) + subvalues = self.remove_invalid_values( + config, field_definitions, field_name, subvalues + ) + subvalues = self.dedupe_values(subvalues) + if len(subvalues) > int(field_definitions[field_name]["cardinality"]): + log_field_cardinality_violation( + field_name, + id_field, + field_definitions[field_name]["cardinality"], ) - subvalues = self.dedupe_values(subvalues) - if len(subvalues) > int(field_definitions[field_name]["cardinality"]): - log_field_cardinality_violation( - field_name, - id_field, - field_definitions[field_name]["cardinality"], - ) - subvalues = subvalues[: field_definitions[field_name]["cardinality"]] - for subvalue in subvalues: - subvalue = truncate_csv_value( - field_name, id_field, field_definitions[field_name], subvalue - ) - if ( - "formatted_text" in field_definitions[field_name] - and field_definitions[field_name]["formatted_text"] is True - ): - field_values.append({"value": subvalue, "format": text_format}) - else: - field_values.append({"value": subvalue}) - field_values = self.dedupe_values(field_values) - entity[field_name] = field_values - else: - row[field_name] = truncate_csv_value( - field_name, id_field, field_definitions[field_name], row[field_name] + subvalues = subvalues[: field_definitions[field_name]["cardinality"]] + for subvalue in subvalues: + subvalue = truncate_csv_value( + field_name, id_field, field_definitions[field_name], subvalue ) if ( "formatted_text" in field_definitions[field_name] and field_definitions[field_name]["formatted_text"] is True ): - entity[field_name] = [ - {"value": row[field_name], "format": text_format} - ] + field_values.append({"value": subvalue, "format": text_format}) else: - entity[field_name] = [{"value": row[field_name]}] + field_values.append({"value": subvalue}) + field_values = self.dedupe_values(field_values) + entity[field_name] = field_values return entity @@ -195,212 +166,117 @@ def update( # Cardinality has a limit. if field_definitions[field_name]["cardinality"] > 0: if config["update_mode"] == "append": - if config["subdelimiter"] in row[field_name]: - subvalues = row[field_name].split(config["subdelimiter"]) - subvalues = self.remove_invalid_values( - config, field_definitions, field_name, subvalues - ) - for subvalue in subvalues: - subvalue = truncate_csv_value( - field_name, - row[entity_id_field], - field_definitions[field_name], - subvalue, - ) - if ( - "formatted_text" in field_definitions[field_name] - and field_definitions[field_name]["formatted_text"] is True - ): - entity[field_name].append( - {"value": subvalue, "format": text_format} - ) - else: - entity[field_name].append({"value": subvalue}) - entity[field_name] = self.dedupe_values(entity[field_name]) - if len(entity[field_name]) > int( - field_definitions[field_name]["cardinality"] - ): - log_field_cardinality_violation( - field_name, - row[entity_id_field], - field_definitions[field_name]["cardinality"], - ) - entity[field_name] = entity[field_name][ - : field_definitions[field_name]["cardinality"] - ] - else: - row[field_name] = self.remove_invalid_values( - config, field_definitions, field_name, row[field_name] - ) - row[field_name] = truncate_csv_value( + subvalues = row[field_name].split(config["subdelimiter"]) + subvalues = self.remove_invalid_values( + config, field_definitions, field_name, subvalues + ) + for subvalue in subvalues: + subvalue = truncate_csv_value( field_name, row[entity_id_field], field_definitions[field_name], - row[field_name], + subvalue, ) if ( "formatted_text" in field_definitions[field_name] and field_definitions[field_name]["formatted_text"] is True ): entity[field_name].append( - {"value": row[field_name], "format": text_format} + {"value": subvalue, "format": text_format} ) else: - entity[field_name].append({"value": row[field_name]}) - entity[field_name] = self.dedupe_values(entity[field_name]) - if len(entity[field_name]) > int( - field_definitions[field_name]["cardinality"] - ): - log_field_cardinality_violation( - field_name, - row[entity_id_field], - field_definitions[field_name]["cardinality"], - ) - entity[field_name] = entity[field_name][ - : field_definitions[field_name]["cardinality"] - ] - + entity[field_name].append({"value": subvalue}) + entity[field_name] = self.dedupe_values(entity[field_name]) + if len(entity[field_name]) > int( + field_definitions[field_name]["cardinality"] + ): + log_field_cardinality_violation( + field_name, + row[entity_id_field], + field_definitions[field_name]["cardinality"], + ) + entity[field_name] = entity[field_name][ + : field_definitions[field_name]["cardinality"] + ] if config["update_mode"] == "replace": - if config["subdelimiter"] in row[field_name]: - field_values = [] - subvalues = row[field_name].split(config["subdelimiter"]) - subvalues = self.remove_invalid_values( - config, field_definitions, field_name, subvalues + field_values = [] + subvalues = row[field_name].split(config["subdelimiter"]) + subvalues = self.remove_invalid_values( + config, field_definitions, field_name, subvalues + ) + subvalues = self.dedupe_values(subvalues) + if len(subvalues) > int(field_definitions[field_name]["cardinality"]): + log_field_cardinality_violation( + field_name, + row[entity_id_field], + field_definitions[field_name]["cardinality"], ) - subvalues = self.dedupe_values(subvalues) - if len(subvalues) > int( - field_definitions[field_name]["cardinality"] - ): - log_field_cardinality_violation( - field_name, - row[entity_id_field], - field_definitions[field_name]["cardinality"], - ) - subvalues = subvalues[ - : field_definitions[field_name]["cardinality"] - ] - for subvalue in subvalues: - subvalue = truncate_csv_value( - field_name, - row[entity_id_field], - field_definitions[field_name], - subvalue, - ) - if ( - "formatted_text" in field_definitions[field_name] - and field_definitions[field_name]["formatted_text"] is True - ): - field_values.append( - {"value": subvalue, "format": text_format} - ) - else: - field_values.append({"value": subvalue}) - field_values = self.dedupe_values(field_values) - entity[field_name] = field_values - else: - row[field_name] = truncate_csv_value( + subvalues = subvalues[ + : field_definitions[field_name]["cardinality"] + ] + for subvalue in subvalues: + subvalue = truncate_csv_value( field_name, row[entity_id_field], field_definitions[field_name], - row[field_name], + subvalue, ) if ( "formatted_text" in field_definitions[field_name] and field_definitions[field_name]["formatted_text"] is True ): - entity[field_name] = [ - {"value": row[field_name], "format": text_format} - ] + field_values.append({"value": subvalue, "format": text_format}) else: - entity[field_name] = [{"value": row[field_name]}] + field_values.append({"value": subvalue}) + field_values = self.dedupe_values(field_values) + entity[field_name] = field_values # Cardinatlity is unlimited. else: if config["update_mode"] == "append": - if config["subdelimiter"] in row[field_name]: - field_values = [] - subvalues = row[field_name].split(config["subdelimiter"]) - subvalues = self.remove_invalid_values( - config, field_definitions, field_name, subvalues - ) - for subvalue in subvalues: - subvalue = truncate_csv_value( - field_name, - row[entity_id_field], - field_definitions[field_name], - subvalue, - ) - if ( - "formatted_text" in field_definitions[field_name] - and field_definitions[field_name]["formatted_text"] is True - ): - field_values.append( - {"value": subvalue, "format": text_format} - ) - else: - field_values.append({"value": subvalue}) - entity[field_name] = entity_field_values + field_values - entity[field_name] = self.dedupe_values(entity[field_name]) - else: - row[field_name] = truncate_csv_value( + field_values = [] + subvalues = row[field_name].split(config["subdelimiter"]) + subvalues = self.remove_invalid_values( + config, field_definitions, field_name, subvalues + ) + for subvalue in subvalues: + subvalue = truncate_csv_value( field_name, row[entity_id_field], field_definitions[field_name], - row[field_name], + subvalue, ) if ( "formatted_text" in field_definitions[field_name] and field_definitions[field_name]["formatted_text"] is True ): - entity[field_name] = entity_field_values + [ - {"value": row[field_name], "format": text_format} - ] + field_values.append({"value": subvalue, "format": text_format}) else: - entity[field_name] = entity_field_values + [ - {"value": row[field_name]} - ] - entity[field_name] = self.dedupe_values(entity[field_name]) + field_values.append({"value": subvalue}) + entity[field_name] = entity_field_values + field_values + entity[field_name] = self.dedupe_values(entity[field_name]) if config["update_mode"] == "replace": - if config["subdelimiter"] in row[field_name]: - field_values = [] - subvalues = row[field_name].split(config["subdelimiter"]) - subvalues = self.remove_invalid_values( - config, field_definitions, field_name, subvalues - ) - for subvalue in subvalues: - subvalue = truncate_csv_value( - field_name, - row[entity_id_field], - field_definitions[field_name], - subvalue, - ) - if ( - "formatted_text" in field_definitions[field_name] - and field_definitions[field_name]["formatted_text"] is True - ): - field_values.append( - {"value": subvalue, "format": text_format} - ) - else: - field_values.append({"value": subvalue}) - entity[field_name] = field_values - entity[field_name] = self.dedupe_values(entity[field_name]) - else: - row[field_name] = truncate_csv_value( + field_values = [] + subvalues = row[field_name].split(config["subdelimiter"]) + subvalues = self.remove_invalid_values( + config, field_definitions, field_name, subvalues + ) + for subvalue in subvalues: + subvalue = truncate_csv_value( field_name, row[entity_id_field], field_definitions[field_name], - row[field_name], + subvalue, ) if ( "formatted_text" in field_definitions[field_name] and field_definitions[field_name]["formatted_text"] is True ): - entity[field_name] = [ - {"value": row[field_name], "format": text_format} - ] + field_values.append({"value": subvalue, "format": text_format}) else: - entity[field_name] = [{"value": row[field_name]}] + field_values.append({"value": subvalue}) + entity[field_name] = field_values + entity[field_name] = self.dedupe_values(entity[field_name]) return entity @@ -548,34 +424,24 @@ def create(self, config, field_definitions, entity, row, field_name): id_field = row.get(config.get("id_field", "not_applicable"), "not_applicable") # Cardinality is unlimited. if field_definitions[field_name]["cardinality"] == -1: - if config["subdelimiter"] in row[field_name]: - field_values = [] - subvalues = split_geolocation_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - for subvalue in subvalues: - field_values.append(subvalue) - entity[field_name] = field_values - else: - field_value = split_geolocation_string(config, row[field_name]) - entity[field_name] = field_value + field_values = [] + subvalues = split_geolocation_string(config, row[field_name]) + subvalues = self.dedupe_values(subvalues) + for subvalue in subvalues: + field_values.append(subvalue) + entity[field_name] = field_values # Cardinality has a limit. else: - if config["subdelimiter"] in row[field_name]: - subvalues = split_geolocation_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - if len(subvalues) > int(field_definitions[field_name]["cardinality"]): - subvalues = subvalues[ - : field_definitions[field_name]["cardinality"] - ] - log_field_cardinality_violation( - field_name, - id_field, - field_definitions[field_name]["cardinality"], - ) - entity[field_name] = subvalues - else: - field_value = split_geolocation_string(config, row[field_name]) - entity[field_name] = field_value + subvalues = split_geolocation_string(config, row[field_name]) + subvalues = self.dedupe_values(subvalues) + if len(subvalues) > int(field_definitions[field_name]["cardinality"]): + subvalues = subvalues[: field_definitions[field_name]["cardinality"]] + log_field_cardinality_violation( + field_name, + id_field, + field_definitions[field_name]["cardinality"], + ) + entity[field_name] = subvalues return entity @@ -623,17 +489,13 @@ def update( # Cardinality is unlimited. if field_definitions[field_name]["cardinality"] == -1: if config["update_mode"] == "replace": - if config["subdelimiter"] in row[field_name]: - field_values = [] - subvalues = split_geolocation_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - for subvalue in subvalues: - field_values.append(subvalue) - field_values = self.dedupe_values(field_values) - entity[field_name] = field_values - else: - field_value = split_geolocation_string(config, row[field_name]) - entity[field_name] = field_value + field_values = [] + subvalues = split_geolocation_string(config, row[field_name]) + subvalues = self.dedupe_values(subvalues) + for subvalue in subvalues: + field_values.append(subvalue) + field_values = self.dedupe_values(field_values) + entity[field_name] = field_values if config["update_mode"] == "append": field_values = split_geolocation_string(config, row[field_name]) if field_name in entity: @@ -645,56 +507,38 @@ def update( if config["update_mode"] == "replace": subvalues = split_geolocation_string(config, row[field_name]) subvalues = self.dedupe_values(subvalues) - if config["subdelimiter"] in row[field_name]: - field_values = [] - for subvalue in subvalues: - field_values.append(subvalue) - if len(field_values) > int( - field_definitions[field_name]["cardinality"] - ): - log_field_cardinality_violation( - field_name, - row[entity_id_field], - field_definitions[field_name]["cardinality"], - ) - field_values = field_values[ - : field_definitions[field_name]["cardinality"] - ] - entity[field_name] = field_values - else: - entity[field_name] = subvalues + field_values = [] + for subvalue in subvalues: + field_values.append(subvalue) + if len(field_values) > int( + field_definitions[field_name]["cardinality"] + ): + log_field_cardinality_violation( + field_name, + row[entity_id_field], + field_definitions[field_name]["cardinality"], + ) + field_values = field_values[ + : field_definitions[field_name]["cardinality"] + ] + entity[field_name] = field_values if config["update_mode"] == "append": subvalues = split_geolocation_string(config, row[field_name]) subvalues = self.dedupe_values(subvalues) - if config["subdelimiter"] in row[field_name]: - for subvalue in subvalues: - entity_field_values.append(subvalue) - if len(entity[field_name]) > int( - field_definitions[field_name]["cardinality"] - ): - entity[field_name] = entity_field_values[ - : field_definitions[field_name]["cardinality"] - ] - log_field_cardinality_violation( - field_name, - row[entity_id_field], - field_definitions[field_name]["cardinality"], - ) - else: - for subvalue in subvalues: - entity_field_values.append(subvalue) - if len(entity_field_values) > int( - field_definitions[field_name]["cardinality"] - ): - entity[field_name] = entity_field_values[ - : field_definitions[field_name]["cardinality"] - ] - log_field_cardinality_violation( - field_name, - row[entity_id_field], - field_definitions[field_name]["cardinality"], - ) + for subvalue in subvalues: + entity_field_values.append(subvalue) + if len(entity[field_name]) > int( + field_definitions[field_name]["cardinality"] + ): + entity[field_name] = entity_field_values[ + : field_definitions[field_name]["cardinality"] + ] + log_field_cardinality_violation( + field_name, + row[entity_id_field], + field_definitions[field_name]["cardinality"], + ) return entity @@ -810,31 +654,21 @@ def create(self, config, field_definitions, entity, row, field_name): id_field = row.get(config.get("id_field", "not_applicable"), "not_applicable") # Cardinality is unlimited. if field_definitions[field_name]["cardinality"] == -1: - if config["subdelimiter"] in row[field_name]: - subvalues = split_link_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - entity[field_name] = subvalues - else: - field_value = split_link_string(config, row[field_name]) - entity[field_name] = field_value + subvalues = split_link_string(config, row[field_name]) + subvalues = self.dedupe_values(subvalues) + entity[field_name] = subvalues # Cardinality has a limit, including 1. else: - if config["subdelimiter"] in row[field_name]: - subvalues = split_link_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - if len(subvalues) > int(field_definitions[field_name]["cardinality"]): - subvalues = subvalues[ - : field_definitions[field_name]["cardinality"] - ] - log_field_cardinality_violation( - field_name, - id_field, - field_definitions[field_name]["cardinality"], - ) - entity[field_name] = subvalues - else: - field_value = split_link_string(config, row[field_name]) - entity[field_name] = field_value + subvalues = split_link_string(config, row[field_name]) + subvalues = self.dedupe_values(subvalues) + if len(subvalues) > int(field_definitions[field_name]["cardinality"]): + subvalues = subvalues[: field_definitions[field_name]["cardinality"]] + log_field_cardinality_violation( + field_name, + id_field, + field_definitions[field_name]["cardinality"], + ) + entity[field_name] = subvalues return entity @@ -882,59 +716,41 @@ def update( # Cardinality is unlimited. if field_definitions[field_name]["cardinality"] == -1: if config["update_mode"] == "replace": - if config["subdelimiter"] in row[field_name]: - field_values = [] - subvalues = split_link_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - for subvalue in subvalues: - field_values.append(subvalue) - entity[field_name] = field_values - else: - field_value = split_link_string(config, row[field_name]) - entity[field_name] = field_value + field_values = [] + subvalues = split_link_string(config, row[field_name]) + subvalues = self.dedupe_values(subvalues) + for subvalue in subvalues: + field_values.append(subvalue) + entity[field_name] = field_values if config["update_mode"] == "append": - if config["subdelimiter"] in row[field_name]: - field_values = [] - subvalues = split_link_string(config, row[field_name]) - for subvalue in subvalues: - field_values.append(subvalue) - if field_name in entity: - for field_subvalue in field_values: - entity_field_values.append(field_subvalue) - entity_field_values = subvalues = self.dedupe_values( - entity_field_values - ) - entity[field_name] = entity_field_values - else: - field_value = split_link_string(config, row[field_name]) - if field_name in entity: - for field_subvalue in field_value: - entity_field_values.append(field_subvalue) - entity[field_name] = entity_field_values + field_values = [] + subvalues = split_link_string(config, row[field_name]) + for subvalue in subvalues: + field_values.append(subvalue) + if field_name in entity: + for field_subvalue in field_values: + entity_field_values.append(field_subvalue) + entity_field_values = subvalues = self.dedupe_values( + entity_field_values + ) + entity[field_name] = entity_field_values + # Cardinality has a limit. else: if config["update_mode"] == "replace": - if config["subdelimiter"] in row[field_name]: - field_values = [] - subvalues = split_link_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - if len(subvalues) > int( - field_definitions[field_name]["cardinality"] - ): - log_field_cardinality_violation( - field_name, - row[entity_id_field], - field_definitions[field_name]["cardinality"], - ) - subvalues = subvalues[ - : field_definitions[field_name]["cardinality"] - ] - for subvalue in subvalues: - field_values.append(subvalue) - entity[field_name] = field_values - else: - field_value = split_link_string(config, row[field_name]) - entity[field_name] = field_value + field_values = [] + subvalues = split_link_string(config, row[field_name]) + subvalues = self.dedupe_values(subvalues) + if len(subvalues) > int(field_definitions[field_name]["cardinality"]): + log_field_cardinality_violation( + field_name, + row[entity_id_field], + field_definitions[field_name]["cardinality"], + ) + subvalues = subvalues[: field_definitions[field_name]["cardinality"]] + for subvalue in subvalues: + field_values.append(subvalue) + entity[field_name] = field_values if config["update_mode"] == "append": subvalues = split_link_string(config, row[field_name]) for subvalue in subvalues: @@ -1073,25 +889,16 @@ def create(self, config, field_definitions, entity, row, field_name): if field_definitions[field_name]["target_type"] == "taxonomy_term": target_type = "taxonomy_term" field_vocabs = get_field_vocabularies(config, field_definitions, field_name) - if config["subdelimiter"] in row[field_name]: - prepared_tids = [] - delimited_values = row[field_name].split(config["subdelimiter"]) - for delimited_value in delimited_values: - tid = prepare_term_id( - config, field_vocabs, field_name, delimited_value - ) - if value_is_numeric(tid): - tid = str(tid) - prepared_tids.append(tid) - else: - continue - row[field_name] = config["subdelimiter"].join(prepared_tids) - else: - row[field_name] = prepare_term_id( - config, field_vocabs, field_name, row[field_name] - ) - if value_is_numeric(row[field_name]): - row[field_name] = str(row[field_name]) + prepared_tids = [] + delimited_values = row[field_name].split(config["subdelimiter"]) + for delimited_value in delimited_values: + tid = prepare_term_id(config, field_vocabs, field_name, delimited_value) + if value_is_numeric(tid): + tid = str(tid) + prepared_tids.append(tid) + else: + continue + row[field_name] = config["subdelimiter"].join(prepared_tids) if field_definitions[field_name]["target_type"] == "node": target_type = "node_type" @@ -1101,48 +908,33 @@ def create(self, config, field_definitions, entity, row, field_name): # Cardinality is unlimited. if field_definitions[field_name]["cardinality"] == -1: - if config["subdelimiter"] in str(row[field_name]): - field_values = [] - subvalues = row[field_name].split(config["subdelimiter"]) - subvalues = self.dedupe_values(subvalues) - for subvalue in subvalues: - subvalue = str(subvalue) - field_values.append( - {"target_id": subvalue, "target_type": target_type} - ) - entity[field_name] = field_values - else: - entity[field_name] = [ - {"target_id": str(row[field_name]), "target_type": target_type} - ] + field_values = [] + subvalues = row[field_name].split(config["subdelimiter"]) + subvalues = self.dedupe_values(subvalues) + for subvalue in subvalues: + subvalue = str(subvalue) + field_values.append({"target_id": subvalue, "target_type": target_type}) + entity[field_name] = field_values + # Cardinality has a limit. elif field_definitions[field_name]["cardinality"] > 0: - if config["subdelimiter"] in str(row[field_name]): - field_values = [] - subvalues = row[field_name].split(config["subdelimiter"]) - subvalues = self.dedupe_values(subvalues) - for subvalue in subvalues: - subvalue = str(subvalue) - field_values.append( - {"target_id": subvalue, "target_type": target_type} - ) - if len(field_values) > int( - field_definitions[field_name]["cardinality"] - ): - entity[field_name] = field_values[ - : field_definitions[field_name]["cardinality"] - ] - log_field_cardinality_violation( - field_name, - id_field, - field_definitions[field_name]["cardinality"], - ) - else: - entity[field_name] = field_values - else: - entity[field_name] = [ - {"target_id": str(row[field_name]), "target_type": target_type} + field_values = [] + subvalues = row[field_name].split(config["subdelimiter"]) + subvalues = self.dedupe_values(subvalues) + for subvalue in subvalues: + subvalue = str(subvalue) + field_values.append({"target_id": subvalue, "target_type": target_type}) + if len(field_values) > int(field_definitions[field_name]["cardinality"]): + entity[field_name] = field_values[ + : field_definitions[field_name]["cardinality"] ] + log_field_cardinality_violation( + field_name, + id_field, + field_definitions[field_name]["cardinality"], + ) + else: + entity[field_name] = field_values # Cardinality is 1. else: subvalues = row[field_name].split(config["subdelimiter"]) @@ -1198,25 +990,16 @@ def update( if field_definitions[field_name]["target_type"] == "taxonomy_term": target_type = "taxonomy_term" field_vocabs = get_field_vocabularies(config, field_definitions, field_name) - if config["subdelimiter"] in str(row[field_name]): - prepared_tids = [] - delimited_values = row[field_name].split(config["subdelimiter"]) - for delimited_value in delimited_values: - tid = prepare_term_id( - config, field_vocabs, field_name, delimited_value - ) - if value_is_numeric(tid): - tid = str(tid) - prepared_tids.append(tid) - else: - continue - row[field_name] = config["subdelimiter"].join(prepared_tids) - else: - row[field_name] = prepare_term_id( - config, field_vocabs, field_name, row[field_name] - ) - if value_is_numeric(row[field_name]): - row[field_name] = str(row[field_name]) + prepared_tids = [] + delimited_values = row[field_name].split(config["subdelimiter"]) + for delimited_value in delimited_values: + tid = prepare_term_id(config, field_vocabs, field_name, delimited_value) + if value_is_numeric(tid): + tid = str(tid) + prepared_tids.append(tid) + else: + continue + row[field_name] = config["subdelimiter"].join(prepared_tids) if field_definitions[field_name]["target_type"] == "node": target_type = "node_type" @@ -1224,101 +1007,68 @@ def update( # Cardinality has a limit. if field_definitions[field_name]["cardinality"] > 0: if config["update_mode"] == "replace": - if config["subdelimiter"] in str(row[field_name]): - field_values = [] - subvalues = row[field_name].split(config["subdelimiter"]) - subvalues = self.dedupe_values(subvalues) - for subvalue in subvalues: - field_values.append( - {"target_id": str(subvalue), "target_type": target_type} - ) - if len(field_values) > int( - field_definitions[field_name]["cardinality"] - ): - entity[field_name] = field_values[ - : field_definitions[field_name]["cardinality"] - ] - log_field_cardinality_violation( - field_name, - row[entity_id_field], - field_definitions[field_name]["cardinality"], - ) - else: - entity[field_name] = field_values - else: - entity[field_name] = [ - {"target_id": row[field_name], "target_type": target_type} + field_values = [] + subvalues = row[field_name].split(config["subdelimiter"]) + subvalues = self.dedupe_values(subvalues) + for subvalue in subvalues: + field_values.append( + {"target_id": str(subvalue), "target_type": target_type} + ) + if len(field_values) > int( + field_definitions[field_name]["cardinality"] + ): + entity[field_name] = field_values[ + : field_definitions[field_name]["cardinality"] ] - if config["update_mode"] == "append": - if config["subdelimiter"] in str(row[field_name]): - subvalues = row[field_name].split(config["subdelimiter"]) - for subvalue in subvalues: - entity_field_values.append( - {"target_id": str(subvalue), "target_type": target_type} - ) - entity_field_values = self.dedupe_values(entity_field_values) - if len(entity_field_values) > int( - field_definitions[field_name]["cardinality"] - ): - entity[field_name] = entity_field_values[ - : field_definitions[field_name]["cardinality"] - ] - log_field_cardinality_violation( - field_name, - row[entity_id_field], - field_definitions[field_name]["cardinality"], - ) - else: - entity[field_name] = entity_field_values + log_field_cardinality_violation( + field_name, + row[entity_id_field], + field_definitions[field_name]["cardinality"], + ) else: + entity[field_name] = field_values + + if config["update_mode"] == "append": + subvalues = row[field_name].split(config["subdelimiter"]) + for subvalue in subvalues: entity_field_values.append( - {"target_id": str(row[field_name]), "target_type": target_type} + {"target_id": str(subvalue), "target_type": target_type} ) - entity_field_values = self.dedupe_values(entity_field_values) - if len(entity_field_values) > int( - field_definitions[field_name]["cardinality"] - ): - entity[field_name] = entity_field_values[ - : field_definitions[field_name]["cardinality"] - ] - log_field_cardinality_violation( - field_name, - row[entity_id_field], - field_definitions[field_name]["cardinality"], - ) - else: - entity[field_name] = entity_field_values + entity_field_values = self.dedupe_values(entity_field_values) + if len(entity_field_values) > int( + field_definitions[field_name]["cardinality"] + ): + entity[field_name] = entity_field_values[ + : field_definitions[field_name]["cardinality"] + ] + log_field_cardinality_violation( + field_name, + row[entity_id_field], + field_definitions[field_name]["cardinality"], + ) + else: + entity[field_name] = entity_field_values # Cardinality is unlimited. else: if config["update_mode"] == "replace": - if config["subdelimiter"] in str(row[field_name]): - field_values = [] - subvalues = row[field_name].split(config["subdelimiter"]) - subvalues = self.dedupe_values(subvalues) - for subvalue in subvalues: - field_values.append( - {"target_id": str(subvalue), "target_type": target_type} - ) - entity[field_name] = field_values - else: - entity[field_name] = [ - {"target_id": str(row[field_name]), "target_type": target_type} - ] + field_values = [] + subvalues = row[field_name].split(config["subdelimiter"]) + subvalues = self.dedupe_values(subvalues) + for subvalue in subvalues: + field_values.append( + {"target_id": str(subvalue), "target_type": target_type} + ) + entity[field_name] = field_values + if config["update_mode"] == "append": - if config["subdelimiter"] in str(row[field_name]): - field_values = [] - subvalues = row[field_name].split(config["subdelimiter"]) - for subvalue in subvalues: - entity_field_values.append( - {"target_id": str(subvalue), "target_type": target_type} - ) - entity[field_name] = self.dedupe_values(entity_field_values) - else: + field_values = [] + subvalues = row[field_name].split(config["subdelimiter"]) + for subvalue in subvalues: entity_field_values.append( - {"target_id": str(row[field_name]), "target_type": target_type} + {"target_id": str(subvalue), "target_type": target_type} ) - entity[field_name] = self.dedupe_values(entity_field_values) + entity[field_name] = self.dedupe_values(entity_field_values) return entity @@ -1455,49 +1205,34 @@ def create(self, config, field_definitions, entity, row, field_name): config, row[field_name], target_type ) subvalues = self.dedupe_values(subvalues) - if config["subdelimiter"] in row[field_name]: - for subvalue in subvalues: - subvalue["target_id"] = prepare_term_id( - config, field_vocabs, field_name, subvalue["target_id"] - ) - field_values.append(subvalue) - entity[field_name] = field_values - else: - subvalues[0]["target_id"] = prepare_term_id( - config, field_vocabs, field_name, subvalues[0]["target_id"] + for subvalue in subvalues: + subvalue["target_id"] = prepare_term_id( + config, field_vocabs, field_name, subvalue["target_id"] ) - entity[field_name] = subvalues + field_values.append(subvalue) + entity[field_name] = field_values # Cardinality has a limit. elif field_definitions[field_name]["cardinality"] > 1: - if config["subdelimiter"] in row[field_name]: - field_values = [] - subvalues = split_typed_relation_string( - config, row[field_name], target_type - ) - subvalues = self.dedupe_values(subvalues) - if len(subvalues) > field_definitions[field_name]["cardinality"]: - log_field_cardinality_violation( - field_name, - id_field, - field_definitions[field_name]["cardinality"], - ) - subvalues = subvalues[ - : field_definitions[field_name]["cardinality"] - ] - for subvalue in subvalues: - subvalue["target_id"] = prepare_term_id( - config, field_vocabs, field_name, subvalue["target_id"] - ) - field_values.append(subvalue) - entity[field_name] = field_values - else: - field_value = split_typed_relation_string( - config, row[field_name], target_type + field_values = [] + subvalues = split_typed_relation_string( + config, row[field_name], target_type + ) + subvalues = self.dedupe_values(subvalues) + if len(subvalues) > field_definitions[field_name]["cardinality"]: + log_field_cardinality_violation( + field_name, + id_field, + field_definitions[field_name]["cardinality"], ) - field_value[0]["target_id"] = prepare_term_id( - config, field_vocabs, field_name, field_value[0]["target_id"] + subvalues = subvalues[ + : field_definitions[field_name]["cardinality"] + ] + for subvalue in subvalues: + subvalue["target_id"] = prepare_term_id( + config, field_vocabs, field_name, subvalue["target_id"] ) - entity[field_name] = field_value + field_values.append(subvalue) + entity[field_name] = field_values # Cardinality is 1. else: subvalues = split_typed_relation_string( @@ -1566,80 +1301,48 @@ def update( config, row[field_name], target_type ) subvalues = self.dedupe_values(subvalues) - if config["subdelimiter"] in row[field_name]: - field_values = [] - for subvalue in subvalues: - subvalue["target_id"] = prepare_term_id( - config, field_vocabs, field_name, subvalue["target_id"] - ) - field_values.append(subvalue) - if len(field_values) > int( - field_definitions[field_name]["cardinality"] - ): - field_values = field_values[ - : field_definitions[field_name]["cardinality"] - ] - log_field_cardinality_violation( - field_name, - row[entity_id_field], - field_definitions[field_name]["cardinality"], - ) - entity[field_name] = field_values - else: - subvalues[0]["target_id"] = prepare_term_id( - config, field_vocabs, field_name, subvalues[0]["target_id"] + field_values = [] + for subvalue in subvalues: + subvalue["target_id"] = prepare_term_id( + config, field_vocabs, field_name, subvalue["target_id"] ) - entity[field_name] = subvalues - if config["update_mode"] == "append": - if config["subdelimiter"] in row[field_name]: - field_values = [] - subvalues = split_typed_relation_string( - config, row[field_name], target_type + field_values.append(subvalue) + if len(field_values) > int( + field_definitions[field_name]["cardinality"] + ): + field_values = field_values[ + : field_definitions[field_name]["cardinality"] + ] + log_field_cardinality_violation( + field_name, + row[entity_id_field], + field_definitions[field_name]["cardinality"], ) - for subvalue in subvalues: - subvalue["target_id"] = prepare_term_id( - config, field_vocabs, field_name, subvalue["target_id"] - ) - entity_field_values.append(subvalue) - entity_field_values = self.dedupe_values(entity_field_values) - if len(entity_field_values) > int( - field_definitions[field_name]["cardinality"] - ): - entity[field_name] = entity_field_values[ - : field_definitions[field_name]["cardinality"] - ] - log_field_cardinality_violation( - field_name, - row[entity_id_field], - field_definitions[field_name]["cardinality"], - ) - else: - entity[field_name] = entity_field_values - else: - csv_typed_relation_value = split_typed_relation_string( - config, row[field_name], target_type + entity[field_name] = field_values + if config["update_mode"] == "append": + field_values = [] + subvalues = split_typed_relation_string( + config, row[field_name], target_type + ) + for subvalue in subvalues: + subvalue["target_id"] = prepare_term_id( + config, field_vocabs, field_name, subvalue["target_id"] ) - csv_typed_relation_value[0]["target_id"] = prepare_term_id( - config, - field_vocabs, + entity_field_values.append(subvalue) + entity_field_values = self.dedupe_values(entity_field_values) + if len(entity_field_values) > int( + field_definitions[field_name]["cardinality"] + ): + entity[field_name] = entity_field_values[ + : field_definitions[field_name]["cardinality"] + ] + log_field_cardinality_violation( field_name, - csv_typed_relation_value[0]["target_id"], + row[entity_id_field], + field_definitions[field_name]["cardinality"], ) - entity_field_values.append(csv_typed_relation_value[0]) - entity_field_values = self.dedupe_values(entity_field_values) - if len(entity_field_values) > int( - field_definitions[field_name]["cardinality"] - ): - entity[field_name] = entity_field_values[ - : field_definitions[field_name]["cardinality"] - ] - log_field_cardinality_violation( - field_name, - row[entity_id_field], - field_definitions[field_name]["cardinality"], - ) - else: - entity[field_name] = entity_field_values + else: + entity[field_name] = entity_field_values # Cardinality is unlimited. else: @@ -1648,37 +1351,24 @@ def update( config, row[field_name], target_type ) subvalues = self.dedupe_values(subvalues) - if config["subdelimiter"] in row[field_name]: - field_values = [] - for subvalue in subvalues: - subvalue["target_id"] = prepare_term_id( - config, field_vocabs, field_name, subvalue["target_id"] - ) - field_values.append(subvalue) - entity[field_name] = field_values - else: - subvalues[0]["target_id"] = prepare_term_id( - config, field_vocabs, field_name, subvalues[0]["target_id"] + field_values = [] + for subvalue in subvalues: + subvalue["target_id"] = prepare_term_id( + config, field_vocabs, field_name, subvalue["target_id"] ) - entity[field_name] = subvalues + field_values.append(subvalue) + entity[field_name] = field_values if config["update_mode"] == "append": subvalues = split_typed_relation_string( config, row[field_name], target_type ) - if config["subdelimiter"] in row[field_name]: - field_values = [] - for subvalue in subvalues: - subvalue["target_id"] = prepare_term_id( - config, field_vocabs, field_name, subvalue["target_id"] - ) - entity_field_values.append(subvalue) - entity[field_name] = self.dedupe_values(entity_field_values) - else: - subvalues[0]["target_id"] = prepare_term_id( - config, field_vocabs, field_name, subvalues[0]["target_id"] + field_values = [] + for subvalue in subvalues: + subvalue["target_id"] = prepare_term_id( + config, field_vocabs, field_name, subvalue["target_id"] ) - entity_field_values.append(subvalues[0]) - entity[field_name] = self.dedupe_values(entity_field_values) + entity_field_values.append(subvalue) + entity[field_name] = self.dedupe_values(entity_field_values) return entity @@ -1801,31 +1491,21 @@ def create(self, config, field_definitions, entity, row, field_name): id_field = row.get(config.get("id_field", "not_applicable"), "not_applicable") # Cardinality is unlimited. if field_definitions[field_name]["cardinality"] == -1: - if config["subdelimiter"] in row[field_name]: - subvalues = split_authority_link_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - entity[field_name] = subvalues - else: - field_value = split_authority_link_string(config, row[field_name]) - entity[field_name] = field_value + subvalues = split_authority_link_string(config, row[field_name]) + subvalues = self.dedupe_values(subvalues) + entity[field_name] = subvalues # Cardinality has a limit, including 1. else: - if config["subdelimiter"] in row[field_name]: - subvalues = split_authority_link_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - if len(subvalues) > int(field_definitions[field_name]["cardinality"]): - subvalues = subvalues[ - : field_definitions[field_name]["cardinality"] - ] - log_field_cardinality_violation( - field_name, - id_field, - field_definitions[field_name]["cardinality"], - ) - entity[field_name] = subvalues - else: - field_value = split_authority_link_string(config, row[field_name]) - entity[field_name] = field_value + subvalues = split_authority_link_string(config, row[field_name]) + subvalues = self.dedupe_values(subvalues) + if len(subvalues) > int(field_definitions[field_name]["cardinality"]): + subvalues = subvalues[: field_definitions[field_name]["cardinality"]] + log_field_cardinality_violation( + field_name, + id_field, + field_definitions[field_name]["cardinality"], + ) + entity[field_name] = subvalues return entity @@ -1873,57 +1553,38 @@ def update( # Cardinality is unlimited. if field_definitions[field_name]["cardinality"] == -1: if config["update_mode"] == "replace": - if config["subdelimiter"] in row[field_name]: - field_values = [] - subvalues = split_authority_link_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - for subvalue in subvalues: - field_values.append(subvalue) - entity[field_name] = field_values - else: - field_value = split_authority_link_string(config, row[field_name]) - entity[field_name] = field_value + field_values = [] + subvalues = split_authority_link_string(config, row[field_name]) + subvalues = self.dedupe_values(subvalues) + for subvalue in subvalues: + field_values.append(subvalue) + entity[field_name] = field_values if config["update_mode"] == "append": - if config["subdelimiter"] in row[field_name]: - field_values = [] - subvalues = split_authority_link_string(config, row[field_name]) - for subvalue in subvalues: - field_values.append(subvalue) - if field_name in entity: - for field_subvalue in field_values: - entity_field_values.append(field_subvalue) - entity_field_values = self.dedupe_values(entity_field_values) - entity[field_name] = entity_field_values - else: - field_value = split_authority_link_string(config, row[field_name]) - if field_name in entity: - for field_subvalue in field_value: - entity_field_values.append(field_subvalue) - entity[field_name] = entity_field_values + field_values = [] + subvalues = split_authority_link_string(config, row[field_name]) + for subvalue in subvalues: + field_values.append(subvalue) + if field_name in entity: + for field_subvalue in field_values: + entity_field_values.append(field_subvalue) + entity_field_values = self.dedupe_values(entity_field_values) + entity[field_name] = entity_field_values # Cardinality has a limit. else: if config["update_mode"] == "replace": - if config["subdelimiter"] in row[field_name]: - field_values = [] - subvalues = split_authority_link_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - if len(subvalues) > int( - field_definitions[field_name]["cardinality"] - ): - log_field_cardinality_violation( - field_name, - row[entity_id_field], - field_definitions[field_name]["cardinality"], - ) - subvalues = subvalues[ - : field_definitions[field_name]["cardinality"] - ] - for subvalue in subvalues: - field_values.append(subvalue) - entity[field_name] = field_values - else: - field_value = split_authority_link_string(config, row[field_name]) - entity[field_name] = field_value + field_values = [] + subvalues = split_authority_link_string(config, row[field_name]) + subvalues = self.dedupe_values(subvalues) + if len(subvalues) > int(field_definitions[field_name]["cardinality"]): + log_field_cardinality_violation( + field_name, + row[entity_id_field], + field_definitions[field_name]["cardinality"], + ) + subvalues = subvalues[: field_definitions[field_name]["cardinality"]] + for subvalue in subvalues: + field_values.append(subvalue) + entity[field_name] = field_values if config["update_mode"] == "append": subvalues = split_authority_link_string(config, row[field_name]) for subvalue in subvalues: @@ -2068,31 +1729,21 @@ def create(self, config, field_definitions, entity, row, field_name): id_field = row.get(config.get("id_field", "not_applicable"), "not_applicable") # Cardinality is unlimited. if field_definitions[field_name]["cardinality"] == -1: - if config["subdelimiter"] in row[field_name]: - subvalues = split_media_track_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - entity[field_name] = subvalues - else: - field_value = split_media_track_string(config, row[field_name]) - entity[field_name] = field_value + subvalues = split_media_track_string(config, row[field_name]) + subvalues = self.dedupe_values(subvalues) + entity[field_name] = subvalues # Cardinality has a limit, including 1. else: - if config["subdelimiter"] in row[field_name]: - subvalues = split_media_track_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - if len(subvalues) > int(field_definitions[field_name]["cardinality"]): - subvalues = subvalues[ - : field_definitions[field_name]["cardinality"] - ] - log_field_cardinality_violation( - field_name, - id_field, - field_definitions[field_name]["cardinality"], - ) - entity[field_name] = subvalues - else: - field_value = split_media_track_string(config, row[field_name]) - entity[field_name] = field_value + subvalues = split_media_track_string(config, row[field_name]) + subvalues = self.dedupe_values(subvalues) + if len(subvalues) > int(field_definitions[field_name]["cardinality"]): + subvalues = subvalues[: field_definitions[field_name]["cardinality"]] + log_field_cardinality_violation( + field_name, + id_field, + field_definitions[field_name]["cardinality"], + ) + entity[field_name] = subvalues return entity @@ -2133,57 +1784,38 @@ def update( # Cardinality is unlimited. if field_definitions[field_name]["cardinality"] == -1: if config["update_mode"] == "replace": - if config["subdelimiter"] in row[field_name]: - field_values = [] - subvalues = split_media_track_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - for subvalue in subvalues: - field_values.append(subvalue) - entity[field_name] = field_values - else: - field_value = split_media_track_string(config, row[field_name]) - entity[field_name] = field_value + field_values = [] + subvalues = split_media_track_string(config, row[field_name]) + subvalues = self.dedupe_values(subvalues) + for subvalue in subvalues: + field_values.append(subvalue) + entity[field_name] = field_values if config["update_mode"] == "append": - if config["subdelimiter"] in row[field_name]: - field_values = [] - subvalues = split_media_track_string(config, row[field_name]) - for subvalue in subvalues: - field_values.append(subvalue) - if field_name in entity: - for field_subvalue in field_values: - entity_field_values.append(field_subvalue) - entity_field_values = self.dedupe_values(entity_field_values) - entity[field_name] = entity_field_values - else: - field_value = split_media_track_string(config, row[field_name]) - if field_name in entity: - for field_subvalue in field_value: - entity_field_values.append(field_subvalue) - entity[field_name] = entity_field_values + field_values = [] + subvalues = split_media_track_string(config, row[field_name]) + for subvalue in subvalues: + field_values.append(subvalue) + if field_name in entity: + for field_subvalue in field_values: + entity_field_values.append(field_subvalue) + entity_field_values = self.dedupe_values(entity_field_values) + entity[field_name] = entity_field_values # Cardinality has a limit. else: if config["update_mode"] == "replace": - if config["subdelimiter"] in row[field_name]: - field_values = [] - subvalues = split_media_track_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - if len(subvalues) > int( - field_definitions[field_name]["cardinality"] - ): - log_field_cardinality_violation( - field_name, - row["node_id"], - field_definitions[field_name]["cardinality"], - ) - subvalues = subvalues[ - : field_definitions[field_name]["cardinality"] - ] - for subvalue in subvalues: - field_values.append(subvalue) - entity[field_name] = field_values - else: - field_value = split_media_track_string(config, row[field_name]) - entity[field_name] = field_value + field_values = [] + subvalues = split_media_track_string(config, row[field_name]) + subvalues = self.dedupe_values(subvalues) + if len(subvalues) > int(field_definitions[field_name]["cardinality"]): + log_field_cardinality_violation( + field_name, + row["node_id"], + field_definitions[field_name]["cardinality"], + ) + subvalues = subvalues[: field_definitions[field_name]["cardinality"]] + for subvalue in subvalues: + field_values.append(subvalue) + entity[field_name] = field_values if config["update_mode"] == "append": subvalues = split_media_track_string(config, row[field_name]) for subvalue in subvalues: From cca4d9466827c5f60096b002ed3ac235dab5cd68 Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Mon, 18 Mar 2024 14:04:05 -0400 Subject: [PATCH 2/2] Simplify cardinality checks --- workbench_fields.py | 954 ++++++++++++++------------------------------ 1 file changed, 303 insertions(+), 651 deletions(-) diff --git a/workbench_fields.py b/workbench_fields.py index 8a0a51c..a0da14d 100644 --- a/workbench_fields.py +++ b/workbench_fields.py @@ -66,54 +66,30 @@ def create(self, config, field_definitions, entity, row, field_name): text_format = config["text_format_id"] id_field = row.get(config.get("id_field", "not_applicable"), "not_applicable") - # Cardinality is unlimited. - if field_definitions[field_name]["cardinality"] == -1: - field_values = [] - subvalues = row[field_name].split(config["subdelimiter"]) - subvalues = self.remove_invalid_values( - config, field_definitions, field_name, subvalues - ) - subvalues = self.dedupe_values(subvalues) - for subvalue in subvalues: - subvalue = truncate_csv_value( - field_name, id_field, field_definitions[field_name], subvalue - ) - if ( - "formatted_text" in field_definitions[field_name] - and field_definitions[field_name]["formatted_text"] is True - ): - field_values.append({"value": subvalue, "format": text_format}) - else: - field_values.append({"value": subvalue}) - entity[field_name] = field_values - # Cardinality has a limit, including 1. - else: - field_values = [] - subvalues = row[field_name].split(config["subdelimiter"]) - subvalues = self.remove_invalid_values( - config, field_definitions, field_name, subvalues + field_values = [] + subvalues = row[field_name].split(config["subdelimiter"]) + subvalues = self.remove_invalid_values( + config, field_definitions, field_name, subvalues + ) + subvalues = self.dedupe_values(subvalues) + + cardinality = int(field_definitions[field_name].get("cardinality", -1)) + if -1 < cardinality < len(subvalues): + log_field_cardinality_violation(field_name, id_field, str(cardinality)) + subvalues = subvalues[:cardinality] + for subvalue in subvalues: + subvalue = truncate_csv_value( + field_name, id_field, field_definitions[field_name], subvalue ) - subvalues = self.dedupe_values(subvalues) - if len(subvalues) > int(field_definitions[field_name]["cardinality"]): - log_field_cardinality_violation( - field_name, - id_field, - field_definitions[field_name]["cardinality"], - ) - subvalues = subvalues[: field_definitions[field_name]["cardinality"]] - for subvalue in subvalues: - subvalue = truncate_csv_value( - field_name, id_field, field_definitions[field_name], subvalue - ) - if ( - "formatted_text" in field_definitions[field_name] - and field_definitions[field_name]["formatted_text"] is True - ): - field_values.append({"value": subvalue, "format": text_format}) - else: - field_values.append({"value": subvalue}) - field_values = self.dedupe_values(field_values) - entity[field_name] = field_values + if ( + "formatted_text" in field_definitions[field_name] + and field_definitions[field_name]["formatted_text"] is True + ): + field_values.append({"value": subvalue, "format": text_format}) + else: + field_values.append({"value": subvalue}) + field_values = self.dedupe_values(field_values) + entity[field_name] = field_values return entity @@ -163,120 +139,62 @@ def update( if config["task"] == "update_media": entity_id_field = "media_id" - # Cardinality has a limit. - if field_definitions[field_name]["cardinality"] > 0: - if config["update_mode"] == "append": - subvalues = row[field_name].split(config["subdelimiter"]) - subvalues = self.remove_invalid_values( - config, field_definitions, field_name, subvalues + cardinality = int(field_definitions[field_name].get("cardinality", -1)) + if config["update_mode"] == "append": + subvalues = row[field_name].split(config["subdelimiter"]) + subvalues = self.remove_invalid_values( + config, field_definitions, field_name, subvalues + ) + for subvalue in subvalues: + subvalue = truncate_csv_value( + field_name, + row[entity_id_field], + field_definitions[field_name], + subvalue, ) - for subvalue in subvalues: - subvalue = truncate_csv_value( - field_name, - row[entity_id_field], - field_definitions[field_name], - subvalue, - ) - if ( - "formatted_text" in field_definitions[field_name] - and field_definitions[field_name]["formatted_text"] is True - ): - entity[field_name].append( - {"value": subvalue, "format": text_format} - ) - else: - entity[field_name].append({"value": subvalue}) - entity[field_name] = self.dedupe_values(entity[field_name]) - if len(entity[field_name]) > int( - field_definitions[field_name]["cardinality"] + if ( + "formatted_text" in field_definitions[field_name] + and field_definitions[field_name]["formatted_text"] is True ): - log_field_cardinality_violation( - field_name, - row[entity_id_field], - field_definitions[field_name]["cardinality"], + entity[field_name].append( + {"value": subvalue, "format": text_format} ) - entity[field_name] = entity[field_name][ - : field_definitions[field_name]["cardinality"] - ] - if config["update_mode"] == "replace": - field_values = [] - subvalues = row[field_name].split(config["subdelimiter"]) - subvalues = self.remove_invalid_values( - config, field_definitions, field_name, subvalues + else: + entity[field_name].append({"value": subvalue}) + entity[field_name] = self.dedupe_values(entity[field_name]) + if -1 < cardinality < len(entity[field_name]): + log_field_cardinality_violation( + field_name, row[entity_id_field], str(cardinality) ) - subvalues = self.dedupe_values(subvalues) - if len(subvalues) > int(field_definitions[field_name]["cardinality"]): - log_field_cardinality_violation( - field_name, - row[entity_id_field], - field_definitions[field_name]["cardinality"], - ) - subvalues = subvalues[ - : field_definitions[field_name]["cardinality"] - ] - for subvalue in subvalues: - subvalue = truncate_csv_value( - field_name, - row[entity_id_field], - field_definitions[field_name], - subvalue, - ) - if ( - "formatted_text" in field_definitions[field_name] - and field_definitions[field_name]["formatted_text"] is True - ): - field_values.append({"value": subvalue, "format": text_format}) - else: - field_values.append({"value": subvalue}) - field_values = self.dedupe_values(field_values) - entity[field_name] = field_values - - # Cardinatlity is unlimited. - else: - if config["update_mode"] == "append": - field_values = [] - subvalues = row[field_name].split(config["subdelimiter"]) - subvalues = self.remove_invalid_values( - config, field_definitions, field_name, subvalues + entity[field_name] = entity[field_name][:cardinality] + if config["update_mode"] == "replace": + field_values = [] + subvalues = row[field_name].split(config["subdelimiter"]) + subvalues = self.remove_invalid_values( + config, field_definitions, field_name, subvalues + ) + subvalues = self.dedupe_values(subvalues) + if -1 < cardinality < len(subvalues): + log_field_cardinality_violation( + field_name, row[entity_id_field], str(cardinality) ) - for subvalue in subvalues: - subvalue = truncate_csv_value( - field_name, - row[entity_id_field], - field_definitions[field_name], - subvalue, - ) - if ( - "formatted_text" in field_definitions[field_name] - and field_definitions[field_name]["formatted_text"] is True - ): - field_values.append({"value": subvalue, "format": text_format}) - else: - field_values.append({"value": subvalue}) - entity[field_name] = entity_field_values + field_values - entity[field_name] = self.dedupe_values(entity[field_name]) - if config["update_mode"] == "replace": - field_values = [] - subvalues = row[field_name].split(config["subdelimiter"]) - subvalues = self.remove_invalid_values( - config, field_definitions, field_name, subvalues + subvalues = subvalues[:cardinality] + for subvalue in subvalues: + subvalue = truncate_csv_value( + field_name, + row[entity_id_field], + field_definitions[field_name], + subvalue, ) - for subvalue in subvalues: - subvalue = truncate_csv_value( - field_name, - row[entity_id_field], - field_definitions[field_name], - subvalue, - ) - if ( - "formatted_text" in field_definitions[field_name] - and field_definitions[field_name]["formatted_text"] is True - ): - field_values.append({"value": subvalue, "format": text_format}) - else: - field_values.append({"value": subvalue}) - entity[field_name] = field_values - entity[field_name] = self.dedupe_values(entity[field_name]) + if ( + "formatted_text" in field_definitions[field_name] + and field_definitions[field_name]["formatted_text"] is True + ): + field_values.append({"value": subvalue, "format": text_format}) + else: + field_values.append({"value": subvalue}) + field_values = self.dedupe_values(field_values) + entity[field_name] = field_values return entity @@ -422,26 +340,18 @@ def create(self, config, field_definitions, entity, row, field_name): return entity id_field = row.get(config.get("id_field", "not_applicable"), "not_applicable") - # Cardinality is unlimited. - if field_definitions[field_name]["cardinality"] == -1: - field_values = [] - subvalues = split_geolocation_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - for subvalue in subvalues: - field_values.append(subvalue) - entity[field_name] = field_values - # Cardinality has a limit. - else: - subvalues = split_geolocation_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - if len(subvalues) > int(field_definitions[field_name]["cardinality"]): - subvalues = subvalues[: field_definitions[field_name]["cardinality"]] - log_field_cardinality_violation( - field_name, - id_field, - field_definitions[field_name]["cardinality"], - ) - entity[field_name] = subvalues + subvalues = split_geolocation_string(config, row[field_name]) + subvalues = self.dedupe_values(subvalues) + + cardinality = int(field_definitions[field_name].get("cardinality", -1)) + if -1 < cardinality < len(subvalues): + subvalues = subvalues[:cardinality] + log_field_cardinality_violation( + field_name, + id_field, + cardinality, + ) + entity[field_name] = subvalues return entity @@ -486,59 +396,31 @@ def update( if config["task"] == "update_media": entity_id_field = "media_id" - # Cardinality is unlimited. - if field_definitions[field_name]["cardinality"] == -1: - if config["update_mode"] == "replace": - field_values = [] - subvalues = split_geolocation_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - for subvalue in subvalues: - field_values.append(subvalue) - field_values = self.dedupe_values(field_values) - entity[field_name] = field_values - if config["update_mode"] == "append": - field_values = split_geolocation_string(config, row[field_name]) - if field_name in entity: - for field_value in field_values: - entity_field_values.append(field_value) - entity[field_name] = self.dedupe_values(entity_field_values) - # Cardinality has a limit. - else: - if config["update_mode"] == "replace": - subvalues = split_geolocation_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - field_values = [] - for subvalue in subvalues: - field_values.append(subvalue) - if len(field_values) > int( - field_definitions[field_name]["cardinality"] - ): - log_field_cardinality_violation( - field_name, - row[entity_id_field], - field_definitions[field_name]["cardinality"], - ) - field_values = field_values[ - : field_definitions[field_name]["cardinality"] - ] - entity[field_name] = field_values + cardinality = int(field_definitions[field_name].get("cardinality", -1)) + if config["update_mode"] == "replace": + subvalues = split_geolocation_string(config, row[field_name]) + subvalues = self.dedupe_values(subvalues) + field_values = [] + for subvalue in subvalues: + field_values.append(subvalue) + if -1 < cardinality < len(field_values): + log_field_cardinality_violation( + field_name, row[entity_id_field], str(cardinality) + ) + field_values = field_values[:cardinality] + entity[field_name] = field_values - if config["update_mode"] == "append": - subvalues = split_geolocation_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - for subvalue in subvalues: - entity_field_values.append(subvalue) - if len(entity[field_name]) > int( - field_definitions[field_name]["cardinality"] - ): - entity[field_name] = entity_field_values[ - : field_definitions[field_name]["cardinality"] - ] - log_field_cardinality_violation( - field_name, - row[entity_id_field], - field_definitions[field_name]["cardinality"], - ) + if config["update_mode"] == "append": + subvalues = split_geolocation_string(config, row[field_name]) + subvalues = self.dedupe_values(subvalues) + for subvalue in subvalues: + entity_field_values.append(subvalue) + if -1 < cardinality < len(entity_field_values): + entity_field_values = entity_field_values[:cardinality] + log_field_cardinality_violation( + field_name, row[entity_id_field], str(cardinality) + ) + entity[field_name] = entity_field_values return entity @@ -652,23 +534,19 @@ def create(self, config, field_definitions, entity, row, field_name): return entity id_field = row.get(config.get("id_field", "not_applicable"), "not_applicable") - # Cardinality is unlimited. - if field_definitions[field_name]["cardinality"] == -1: - subvalues = split_link_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - entity[field_name] = subvalues - # Cardinality has a limit, including 1. - else: - subvalues = split_link_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - if len(subvalues) > int(field_definitions[field_name]["cardinality"]): - subvalues = subvalues[: field_definitions[field_name]["cardinality"]] - log_field_cardinality_violation( - field_name, - id_field, - field_definitions[field_name]["cardinality"], - ) - entity[field_name] = subvalues + + subvalues = split_link_string(config, row[field_name]) + subvalues = self.dedupe_values(subvalues) + + cardinality = int(field_definitions[field_name].get("cardinality", -1)) + if -1 < cardinality < len(subvalues): + subvalues = subvalues[:cardinality] + log_field_cardinality_violation( + field_name, + id_field, + cardinality, + ) + entity[field_name] = subvalues return entity @@ -713,59 +591,30 @@ def update( if config["task"] == "update_media": entity_id_field = "media_id" - # Cardinality is unlimited. - if field_definitions[field_name]["cardinality"] == -1: - if config["update_mode"] == "replace": - field_values = [] - subvalues = split_link_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - for subvalue in subvalues: - field_values.append(subvalue) - entity[field_name] = field_values - if config["update_mode"] == "append": - field_values = [] - subvalues = split_link_string(config, row[field_name]) - for subvalue in subvalues: - field_values.append(subvalue) - if field_name in entity: - for field_subvalue in field_values: - entity_field_values.append(field_subvalue) - entity_field_values = subvalues = self.dedupe_values( - entity_field_values - ) - entity[field_name] = entity_field_values - - # Cardinality has a limit. - else: - if config["update_mode"] == "replace": - field_values = [] - subvalues = split_link_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - if len(subvalues) > int(field_definitions[field_name]["cardinality"]): - log_field_cardinality_violation( - field_name, - row[entity_id_field], - field_definitions[field_name]["cardinality"], - ) - subvalues = subvalues[: field_definitions[field_name]["cardinality"]] - for subvalue in subvalues: - field_values.append(subvalue) - entity[field_name] = field_values - if config["update_mode"] == "append": - subvalues = split_link_string(config, row[field_name]) - for subvalue in subvalues: - entity_field_values.append(subvalue) - entity[field_name] = entity_field_values[ - : field_definitions[field_name]["cardinality"] - ] - if len(entity[field_name]) > int( - field_definitions[field_name]["cardinality"] - ): - log_field_cardinality_violation( - field_name, - row[entity_id_field], - field_definitions[field_name]["cardinality"], - ) + cardinality = int(field_definitions[field_name].get("cardinality", -1)) + if config["update_mode"] == "replace": + field_values = [] + subvalues = split_link_string(config, row[field_name]) + subvalues = self.dedupe_values(subvalues) + if -1 < cardinality < len(subvalues): + log_field_cardinality_violation( + field_name, row[entity_id_field], str(cardinality) + ) + subvalues = subvalues[:cardinality] + for subvalue in subvalues: + field_values.append(subvalue) + entity[field_name] = field_values + if config["update_mode"] == "append": + subvalues = split_link_string(config, row[field_name]) + subvalues = self.dedupe_values(subvalues) + for subvalue in subvalues: + entity_field_values.append(subvalue) + if -1 < cardinality < len(entity_field_values): + entity_field_values = entity_field_values[:cardinality] + log_field_cardinality_violation( + field_name, row[entity_id_field], str(cardinality) + ) + entity[field_name] = entity_field_values return entity @@ -906,43 +755,19 @@ def create(self, config, field_definitions, entity, row, field_name): if field_definitions[field_name]["target_type"] == "media": target_type = "media_type" - # Cardinality is unlimited. - if field_definitions[field_name]["cardinality"] == -1: - field_values = [] - subvalues = row[field_name].split(config["subdelimiter"]) - subvalues = self.dedupe_values(subvalues) - for subvalue in subvalues: - subvalue = str(subvalue) - field_values.append({"target_id": subvalue, "target_type": target_type}) - entity[field_name] = field_values + field_values = [] + subvalues = row[field_name].split(config["subdelimiter"]) + subvalues = self.dedupe_values(subvalues) + for subvalue in subvalues: + subvalue = str(subvalue) + field_values.append({"target_id": subvalue, "target_type": target_type}) - # Cardinality has a limit. - elif field_definitions[field_name]["cardinality"] > 0: - field_values = [] - subvalues = row[field_name].split(config["subdelimiter"]) - subvalues = self.dedupe_values(subvalues) - for subvalue in subvalues: - subvalue = str(subvalue) - field_values.append({"target_id": subvalue, "target_type": target_type}) - if len(field_values) > int(field_definitions[field_name]["cardinality"]): - entity[field_name] = field_values[ - : field_definitions[field_name]["cardinality"] - ] - log_field_cardinality_violation( - field_name, - id_field, - field_definitions[field_name]["cardinality"], - ) - else: - entity[field_name] = field_values - # Cardinality is 1. + cardinality = int(field_definitions[field_name].get("cardinality", -1)) + if -1 < cardinality < len(field_values): + entity[field_name] = field_values[:cardinality] + log_field_cardinality_violation(field_name, id_field, str(cardinality)) else: - subvalues = row[field_name].split(config["subdelimiter"]) - entity[field_name] = [ - {"target_id": str(subvalues[0]), "target_type": target_type} - ] - if len(subvalues) > 1: - log_field_cardinality_violation(field_name, id_field, "1") + entity[field_name] = field_values return entity @@ -1004,71 +829,37 @@ def update( if field_definitions[field_name]["target_type"] == "node": target_type = "node_type" - # Cardinality has a limit. - if field_definitions[field_name]["cardinality"] > 0: - if config["update_mode"] == "replace": - field_values = [] - subvalues = row[field_name].split(config["subdelimiter"]) - subvalues = self.dedupe_values(subvalues) - for subvalue in subvalues: - field_values.append( - {"target_id": str(subvalue), "target_type": target_type} - ) - if len(field_values) > int( - field_definitions[field_name]["cardinality"] - ): - entity[field_name] = field_values[ - : field_definitions[field_name]["cardinality"] - ] - log_field_cardinality_violation( - field_name, - row[entity_id_field], - field_definitions[field_name]["cardinality"], - ) - else: - entity[field_name] = field_values - - if config["update_mode"] == "append": - subvalues = row[field_name].split(config["subdelimiter"]) - for subvalue in subvalues: - entity_field_values.append( - {"target_id": str(subvalue), "target_type": target_type} - ) - entity_field_values = self.dedupe_values(entity_field_values) - if len(entity_field_values) > int( - field_definitions[field_name]["cardinality"] - ): - entity[field_name] = entity_field_values[ - : field_definitions[field_name]["cardinality"] - ] - log_field_cardinality_violation( - field_name, - row[entity_id_field], - field_definitions[field_name]["cardinality"], - ) - else: - entity[field_name] = entity_field_values + cardinality = int(field_definitions[field_name].get("cardinality", -1)) + if config["update_mode"] == "replace": + field_values = [] + subvalues = row[field_name].split(config["subdelimiter"]) + subvalues = self.dedupe_values(subvalues) + for subvalue in subvalues: + field_values.append( + {"target_id": str(subvalue), "target_type": target_type} + ) + if -1 < cardinality < len(field_values): + entity[field_name] = field_values[:cardinality] + log_field_cardinality_violation( + field_name, row[entity_id_field], str(cardinality) + ) + else: + entity[field_name] = field_values - # Cardinality is unlimited. - else: - if config["update_mode"] == "replace": - field_values = [] - subvalues = row[field_name].split(config["subdelimiter"]) - subvalues = self.dedupe_values(subvalues) - for subvalue in subvalues: - field_values.append( - {"target_id": str(subvalue), "target_type": target_type} - ) - entity[field_name] = field_values - - if config["update_mode"] == "append": - field_values = [] - subvalues = row[field_name].split(config["subdelimiter"]) - for subvalue in subvalues: - entity_field_values.append( - {"target_id": str(subvalue), "target_type": target_type} - ) - entity[field_name] = self.dedupe_values(entity_field_values) + if config["update_mode"] == "append": + subvalues = row[field_name].split(config["subdelimiter"]) + for subvalue in subvalues: + entity_field_values.append( + {"target_id": str(subvalue), "target_type": target_type} + ) + entity_field_values = self.dedupe_values(entity_field_values) + if -1 < cardinality < len(entity_field_values): + entity[field_name] = entity_field_values[:cardinality] + log_field_cardinality_violation( + field_name, row[entity_id_field], str(cardinality) + ) + else: + entity[field_name] = entity_field_values return entity @@ -1198,53 +989,21 @@ def create(self, config, field_definitions, entity, row, field_name): if field_definitions[field_name]["target_type"] == "taxonomy_term": target_type = "taxonomy_term" field_vocabs = get_field_vocabularies(config, field_definitions, field_name) - # Cardinality is unlimited. - if field_definitions[field_name]["cardinality"] == -1: - field_values = [] - subvalues = split_typed_relation_string( - config, row[field_name], target_type - ) - subvalues = self.dedupe_values(subvalues) - for subvalue in subvalues: - subvalue["target_id"] = prepare_term_id( - config, field_vocabs, field_name, subvalue["target_id"] - ) - field_values.append(subvalue) - entity[field_name] = field_values - # Cardinality has a limit. - elif field_definitions[field_name]["cardinality"] > 1: - field_values = [] - subvalues = split_typed_relation_string( - config, row[field_name], target_type - ) - subvalues = self.dedupe_values(subvalues) - if len(subvalues) > field_definitions[field_name]["cardinality"]: - log_field_cardinality_violation( - field_name, - id_field, - field_definitions[field_name]["cardinality"], - ) - subvalues = subvalues[ - : field_definitions[field_name]["cardinality"] - ] - for subvalue in subvalues: - subvalue["target_id"] = prepare_term_id( - config, field_vocabs, field_name, subvalue["target_id"] - ) - field_values.append(subvalue) - entity[field_name] = field_values - # Cardinality is 1. - else: - subvalues = split_typed_relation_string( - config, row[field_name], target_type - ) - subvalues = self.dedupe_values(subvalues) - subvalues[0]["target_id"] = prepare_term_id( - config, field_vocabs, field_name, subvalues[0]["target_id"] + field_values = [] + subvalues = split_typed_relation_string( + config, row[field_name], target_type + ) + subvalues = self.dedupe_values(subvalues) + cardinality = int(field_definitions[field_name].get("cardinality", -1)) + if -1 < cardinality < len(subvalues): + log_field_cardinality_violation(field_name, id_field, str(cardinality)) + subvalues = subvalues[:cardinality] + for subvalue in subvalues: + subvalue["target_id"] = prepare_term_id( + config, field_vocabs, field_name, subvalue["target_id"] ) - entity[field_name] = [subvalues[0]] - if len(subvalues) > 1: - log_field_cardinality_violation(field_name, id_field, "1") + field_values.append(subvalue) + entity[field_name] = field_values return entity @@ -1294,81 +1053,42 @@ def update( target_type = "taxonomy_term" field_vocabs = get_field_vocabularies(config, field_definitions, field_name) - # Cardinality has a limit. - if field_definitions[field_name]["cardinality"] > 0: - if config["update_mode"] == "replace": - subvalues = split_typed_relation_string( - config, row[field_name], target_type + cardinality = int(field_definitions[field_name].get("cardinality", -1)) + if config["update_mode"] == "replace": + subvalues = split_typed_relation_string( + config, row[field_name], target_type + ) + subvalues = self.dedupe_values(subvalues) + field_values = [] + for subvalue in subvalues: + subvalue["target_id"] = prepare_term_id( + config, field_vocabs, field_name, subvalue["target_id"] ) - subvalues = self.dedupe_values(subvalues) - field_values = [] - for subvalue in subvalues: - subvalue["target_id"] = prepare_term_id( - config, field_vocabs, field_name, subvalue["target_id"] - ) - field_values.append(subvalue) - if len(field_values) > int( - field_definitions[field_name]["cardinality"] - ): - field_values = field_values[ - : field_definitions[field_name]["cardinality"] - ] - log_field_cardinality_violation( - field_name, - row[entity_id_field], - field_definitions[field_name]["cardinality"], - ) - entity[field_name] = field_values - if config["update_mode"] == "append": - field_values = [] - subvalues = split_typed_relation_string( - config, row[field_name], target_type + field_values.append(subvalue) + if -1 < cardinality < len(field_values): + field_values = field_values[:cardinality] + log_field_cardinality_violation( + field_name, row[entity_id_field], str(cardinality) ) - for subvalue in subvalues: - subvalue["target_id"] = prepare_term_id( - config, field_vocabs, field_name, subvalue["target_id"] - ) - entity_field_values.append(subvalue) - entity_field_values = self.dedupe_values(entity_field_values) - if len(entity_field_values) > int( - field_definitions[field_name]["cardinality"] - ): - entity[field_name] = entity_field_values[ - : field_definitions[field_name]["cardinality"] - ] - log_field_cardinality_violation( - field_name, - row[entity_id_field], - field_definitions[field_name]["cardinality"], - ) - else: - entity[field_name] = entity_field_values - - # Cardinality is unlimited. - else: - if config["update_mode"] == "replace": - subvalues = split_typed_relation_string( - config, row[field_name], target_type + entity[field_name] = field_values + if config["update_mode"] == "append": + field_values = [] + subvalues = split_typed_relation_string( + config, row[field_name], target_type + ) + for subvalue in subvalues: + subvalue["target_id"] = prepare_term_id( + config, field_vocabs, field_name, subvalue["target_id"] ) - subvalues = self.dedupe_values(subvalues) - field_values = [] - for subvalue in subvalues: - subvalue["target_id"] = prepare_term_id( - config, field_vocabs, field_name, subvalue["target_id"] - ) - field_values.append(subvalue) - entity[field_name] = field_values - if config["update_mode"] == "append": - subvalues = split_typed_relation_string( - config, row[field_name], target_type + entity_field_values.append(subvalue) + entity_field_values = self.dedupe_values(entity_field_values) + if -1 < cardinality < len(entity_field_values): + entity[field_name] = entity_field_values[:cardinality] + log_field_cardinality_violation( + field_name, row[entity_id_field], str(cardinality) ) - field_values = [] - for subvalue in subvalues: - subvalue["target_id"] = prepare_term_id( - config, field_vocabs, field_name, subvalue["target_id"] - ) - entity_field_values.append(subvalue) - entity[field_name] = self.dedupe_values(entity_field_values) + else: + entity[field_name] = entity_field_values return entity @@ -1489,23 +1209,14 @@ def create(self, config, field_definitions, entity, row, field_name): return entity id_field = row.get(config.get("id_field", "not_applicable"), "not_applicable") - # Cardinality is unlimited. - if field_definitions[field_name]["cardinality"] == -1: - subvalues = split_authority_link_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - entity[field_name] = subvalues - # Cardinality has a limit, including 1. - else: - subvalues = split_authority_link_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - if len(subvalues) > int(field_definitions[field_name]["cardinality"]): - subvalues = subvalues[: field_definitions[field_name]["cardinality"]] - log_field_cardinality_violation( - field_name, - id_field, - field_definitions[field_name]["cardinality"], - ) - entity[field_name] = subvalues + subvalues = split_authority_link_string(config, row[field_name]) + subvalues = self.dedupe_values(subvalues) + + cardinality = int(field_definitions[field_name].get("cardinality", -1)) + if -1 < cardinality < len(subvalues): + subvalues = subvalues[:cardinality] + log_field_cardinality_violation(field_name, id_field, str(cardinality)) + entity[field_name] = subvalues return entity @@ -1550,56 +1261,30 @@ def update( if config["task"] == "update_media": entity_id_field = "media_id" - # Cardinality is unlimited. - if field_definitions[field_name]["cardinality"] == -1: - if config["update_mode"] == "replace": - field_values = [] - subvalues = split_authority_link_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - for subvalue in subvalues: - field_values.append(subvalue) - entity[field_name] = field_values - if config["update_mode"] == "append": - field_values = [] - subvalues = split_authority_link_string(config, row[field_name]) - for subvalue in subvalues: - field_values.append(subvalue) - if field_name in entity: - for field_subvalue in field_values: - entity_field_values.append(field_subvalue) - entity_field_values = self.dedupe_values(entity_field_values) - entity[field_name] = entity_field_values - # Cardinality has a limit. - else: - if config["update_mode"] == "replace": - field_values = [] - subvalues = split_authority_link_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - if len(subvalues) > int(field_definitions[field_name]["cardinality"]): - log_field_cardinality_violation( - field_name, - row[entity_id_field], - field_definitions[field_name]["cardinality"], - ) - subvalues = subvalues[: field_definitions[field_name]["cardinality"]] - for subvalue in subvalues: - field_values.append(subvalue) - entity[field_name] = field_values - if config["update_mode"] == "append": - subvalues = split_authority_link_string(config, row[field_name]) - for subvalue in subvalues: - entity_field_values.append(subvalue) - entity[field_name] = entity_field_values[ - : field_definitions[field_name]["cardinality"] - ] - if len(entity[field_name]) > int( - field_definitions[field_name]["cardinality"] - ): - log_field_cardinality_violation( - field_name, - row[entity_id_field], - field_definitions[field_name]["cardinality"], - ) + cardinality = int(field_definitions[field_name].get("cardinality", -1)) + if config["update_mode"] == "replace": + field_values = [] + subvalues = split_authority_link_string(config, row[field_name]) + subvalues = self.dedupe_values(subvalues) + if -1 < cardinality < len(subvalues): + log_field_cardinality_violation( + field_name, row[entity_id_field], str(cardinality) + ) + subvalues = subvalues[:cardinality] + for subvalue in subvalues: + field_values.append(subvalue) + entity[field_name] = field_values + if config["update_mode"] == "append": + subvalues = split_authority_link_string(config, row[field_name]) + for subvalue in subvalues: + entity_field_values.append(subvalue) + if -1 < cardinality < len(entity_field_values): + log_field_cardinality_violation( + field_name, row[entity_id_field], str(cardinality) + ) + entity[field_name] = entity_field_values[:cardinality] + else: + entity[field_name] = entity_field_values return entity @@ -1727,23 +1412,17 @@ def create(self, config, field_definitions, entity, row, field_name): return entity id_field = row.get(config.get("id_field", "not_applicable"), "not_applicable") - # Cardinality is unlimited. - if field_definitions[field_name]["cardinality"] == -1: - subvalues = split_media_track_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - entity[field_name] = subvalues - # Cardinality has a limit, including 1. - else: - subvalues = split_media_track_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - if len(subvalues) > int(field_definitions[field_name]["cardinality"]): - subvalues = subvalues[: field_definitions[field_name]["cardinality"]] - log_field_cardinality_violation( - field_name, - id_field, - field_definitions[field_name]["cardinality"], - ) - entity[field_name] = subvalues + subvalues = split_media_track_string(config, row[field_name]) + subvalues = self.dedupe_values(subvalues) + cardinality = int(field_definitions[field_name].get("cardinality", -1)) + if -1 < cardinality < len(subvalues): + subvalues = subvalues[:cardinality] + log_field_cardinality_violation( + field_name, + id_field, + cardinality, + ) + entity[field_name] = subvalues return entity @@ -1781,57 +1460,30 @@ def update( if row[field_name] is None: return entity - # Cardinality is unlimited. - if field_definitions[field_name]["cardinality"] == -1: - if config["update_mode"] == "replace": - field_values = [] - subvalues = split_media_track_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - for subvalue in subvalues: - field_values.append(subvalue) - entity[field_name] = field_values - if config["update_mode"] == "append": - field_values = [] - subvalues = split_media_track_string(config, row[field_name]) - for subvalue in subvalues: - field_values.append(subvalue) - if field_name in entity: - for field_subvalue in field_values: - entity_field_values.append(field_subvalue) - entity_field_values = self.dedupe_values(entity_field_values) - entity[field_name] = entity_field_values - # Cardinality has a limit. - else: - if config["update_mode"] == "replace": - field_values = [] - subvalues = split_media_track_string(config, row[field_name]) - subvalues = self.dedupe_values(subvalues) - if len(subvalues) > int(field_definitions[field_name]["cardinality"]): - log_field_cardinality_violation( - field_name, - row["node_id"], - field_definitions[field_name]["cardinality"], - ) - subvalues = subvalues[: field_definitions[field_name]["cardinality"]] - for subvalue in subvalues: - field_values.append(subvalue) - entity[field_name] = field_values - if config["update_mode"] == "append": - subvalues = split_media_track_string(config, row[field_name]) - for subvalue in subvalues: - entity_field_values.append(subvalue) - entity[field_name] = entity_field_values[ - : field_definitions[field_name]["cardinality"] - ] - if len(entity[field_name]) > int( - field_definitions[field_name]["cardinality"] - ): - log_field_cardinality_violation( - field_name, - row["node_id"], - field_definitions[field_name]["cardinality"], - ) - + cardinality = int(field_definitions[field_name].get("cardinality", -1)) + if config["update_mode"] == "replace": + field_values = [] + subvalues = split_media_track_string(config, row[field_name]) + subvalues = self.dedupe_values(subvalues) + if -1 < cardinality < len(subvalues): + log_field_cardinality_violation( + field_name, row["node_id"], str(cardinality) + ) + subvalues = subvalues[:cardinality] + for subvalue in subvalues: + field_values.append(subvalue) + entity[field_name] = field_values + if config["update_mode"] == "append": + subvalues = split_media_track_string(config, row[field_name]) + for subvalue in subvalues: + entity_field_values.append(subvalue) + if -1 < cardinality < len(entity_field_values): + entity[field_name] = entity_field_values[:cardinality] + log_field_cardinality_violation( + field_name, row["node_id"], str(cardinality) + ) + else: + entity[field_name] = entity_field_values return entity def dedupe_values(self, values): @@ -1986,10 +1638,10 @@ def create(self, config, field_definitions, entity, row, field_name): # @todo self.dedup_values # Enforce cardinality. - cardinality = field_definitions[field_name].get("cardinality", -1) + cardinality = int(field_definitions[field_name].get("cardinality", -1)) if -1 < cardinality < len(subvalues): log_field_cardinality_violation(field_name, id_field, str(cardinality)) - subvalues = subvalues[slice(0, cardinality)] + subvalues = subvalues[:cardinality] # Paragraphs are essentially field bundles, like any other entity, # the difference is that this node "owns" the entity rather @@ -2211,7 +1863,7 @@ def update( entity = self.create(config, field_definitions, entity, row, field_name) entity[field_name] = entity_field_values + entity[field_name] # Enforce cardinality - cardinality = field_definitions[field_name].get("cardinality", -1) + cardinality = int(field_definitions[field_name].get("cardinality", -1)) if -1 < cardinality < len(entity[field_name]): log_field_cardinality_violation( field_name,