From cb85d04754d622a0755c05660fee910a9910628f Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Tue, 20 Feb 2024 15:04:05 -0500 Subject: [PATCH 1/4] Add black linter GHA --- .github/workflows/black.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 .github/workflows/black.yml diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml new file mode 100644 index 00000000..042c856a --- /dev/null +++ b/.github/workflows/black.yml @@ -0,0 +1,12 @@ +name: Lint + +on: [push, pull_request] + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: psf/black@stable + with: + options: "--check --verbose" From 1828611f06a877bd835ee4d37f7b70737a8b14b7 Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Tue, 20 Feb 2024 15:06:10 -0500 Subject: [PATCH 2/4] Update README.md --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index f9daf3b6..336d58cf 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,12 @@ Metadata, files, and Drupal configurations are, in the real world, extremly comp Using Workbench and reporting problems is the best way you can help make it better! +### Linting + +All code must be formatted using [black](https://black.readthedocs.io/en/stable/usage_and_configuration/the_basics.html) + +You can automatically style your code [using black in your IDE of choice](https://black.readthedocs.io/en/stable/integrations/editors.html). + ### Documentation and code * If you have a suggestion for improving the documentation, please open an issue on [this repository's queue](https://github.com/mjordan/islandora_workbench/issues) and tag your issue "documentation". From 868b06c45a8a54d3fd8712f7eaabb5979067d7b4 Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Tue, 20 Feb 2024 15:08:15 -0500 Subject: [PATCH 3/4] Apply black linter to all python files --- WorkbenchConfig.py | 346 +- i7Import/get_islandora_7_content.py | 77 +- i7Import/i7ImportUtilities.py | 179 +- scripts/bootstrap_example.py | 11 +- scripts/entity_post_action_example.py | 16 +- scripts/generate_image_files.py | 60 +- scripts/manage_csv_to_node_id_map.py | 115 +- .../post_get_data_from_view_export_child.py | 16 +- scripts/shutdown_example.py | 11 +- .../execute_bootstrap_script_test/script.py | 4 +- .../script.py | 4 +- tests/csv_id_to_node_id_map_tests.py | 30 +- tests/field_tests.py | 6818 ++++++------- tests/field_tests_values.py | 45 +- tests/islandora_tests.py | 994 +- tests/islandora_tests_check.py | 1060 +- tests/islandora_tests_hooks.py | 76 +- tests/islandora_tests_paged_content.py | 93 +- tests/unit_tests.py | 1164 ++- tests/unit_tests_workbench_config.py | 162 +- workbench_fields.py | 2098 ++-- workbench_utils.py | 8846 +++++++++++------ 22 files changed, 13329 insertions(+), 8896 deletions(-) diff --git a/WorkbenchConfig.py b/WorkbenchConfig.py index 960414a2..d2b05a07 100644 --- a/WorkbenchConfig.py +++ b/WorkbenchConfig.py @@ -19,11 +19,12 @@ def __init__(self, args): self.config = self.get_config() self.validate() logging.basicConfig( - filename=self.config['log_file_path'], + filename=self.config["log_file_path"], level=logging.INFO, - filemode='a', - format='%(asctime)s - %(levelname)s - %(message)s', - datefmt='%d-%b-%y %H:%M:%S') + filemode="a", + format="%(asctime)s - %(levelname)s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", + ) # Get fully constructed config dictionary. def get_config(self): @@ -31,112 +32,135 @@ def get_config(self): user_mods = self.get_user_config() # If the password is not set in the config file, or in the environment # variable, prompt the user for the password. - if 'password' not in user_mods: - if 'ISLANDORA_WORKBENCH_PASSWORD' in os.environ: - config['password'] = os.environ['ISLANDORA_WORKBENCH_PASSWORD'] + if "password" not in user_mods: + if "ISLANDORA_WORKBENCH_PASSWORD" in os.environ: + config["password"] = os.environ["ISLANDORA_WORKBENCH_PASSWORD"] else: - config['password'] = getpass(f"Password for Drupal user {user_mods['username']}:") + config["password"] = getpass( + f"Password for Drupal user {user_mods['username']}:" + ) # Blend defaults with user mods for key, value in user_mods.items(): config[key] = value # Modify some conditional values. - if 'temp_dir' not in user_mods.keys(): - config['temp_dir'] = tempfile.gettempdir() - if user_mods['task'] in ['add_media', 'update', 'delete', 'export_csv']: - config['id_field'] = 'node_id' - if 'task' == 'delete_media': - config['id_field'] = 'media_id' - if user_mods['task'] == 'create_terms': - config['id_field'] = 'term_name' - config['allow_adding_terms'] = True + if "temp_dir" not in user_mods.keys(): + config["temp_dir"] = tempfile.gettempdir() + if user_mods["task"] in ["add_media", "update", "delete", "export_csv"]: + config["id_field"] = "node_id" + if "task" == "delete_media": + config["id_field"] = "media_id" + if user_mods["task"] == "create_terms": + config["id_field"] = "term_name" + config["allow_adding_terms"] = True # @todo: These two overrides aren't working. For now, they are set within workbench.update_terms(). - if 'task' == 'update_terms': - config['id_field'] = 'term_id' - if 'paged_content_page_content_type' not in user_mods: - config['paged_content_page_content_type'] = config['content_type'] + if "task" == "update_terms": + config["id_field"] = "term_id" + if "paged_content_page_content_type" not in user_mods: + config["paged_content_page_content_type"] = config["content_type"] # Add preprocessor, if specified. - if 'preprocessors' in user_mods: - config['preprocessors'] = {} - for preprocessor in user_mods['preprocessors']: + if "preprocessors" in user_mods: + config["preprocessors"] = {} + for preprocessor in user_mods["preprocessors"]: for key, value in preprocessor.items(): - config['preprocessors'][key] = value + config["preprocessors"][key] = value - config['host'] = config['host'].rstrip('/') - config['current_config_file_path'] = os.path.abspath(self.args.config) - config['field_text_format_ids'] = self.get_field_level_text_output_formats() + config["host"] = config["host"].rstrip("/") + config["current_config_file_path"] = os.path.abspath(self.args.config) + config["field_text_format_ids"] = self.get_field_level_text_output_formats() - if 'csv_id_to_node_id_map_path' in user_mods: - if user_mods['csv_id_to_node_id_map_path'] is not False: - if os.path.isabs(config['csv_id_to_node_id_map_path']) is False: - config['csv_id_to_node_id_map_path'] = os.path.join(config['temp_dir'], 'csv_id_to_node_id_map.db') + if "csv_id_to_node_id_map_path" in user_mods: + if user_mods["csv_id_to_node_id_map_path"] is not False: + if os.path.isabs(config["csv_id_to_node_id_map_path"]) is False: + config["csv_id_to_node_id_map_path"] = os.path.join( + config["temp_dir"], "csv_id_to_node_id_map.db" + ) else: - config['csv_id_to_node_id_map_path'] = False + config["csv_id_to_node_id_map_path"] = False else: - config['csv_id_to_node_id_map_path'] = os.path.join(config['temp_dir'], 'csv_id_to_node_id_map.db') + config["csv_id_to_node_id_map_path"] = os.path.join( + config["temp_dir"], "csv_id_to_node_id_map.db" + ) - config['config_file'] = self.args.config + config["config_file"] = self.args.config return config # Get user input as dictionary. def get_user_config(self): yaml = YAML() - with open(self.args.config, 'r') as stream: + with open(self.args.config, "r") as stream: try: loaded = yaml.load(stream) except YAMLError as exc: print(exc) # 'media_file_fields' has been replaced with 'media_fields' and 'media_type_file_fields'. # This is aliasing code that can be removed at some point in the future. - if 'media_file_fields' in loaded: + if "media_file_fields" in loaded: media_fields = self.get_media_fields() - for media_field in loaded['media_file_fields']: + for media_field in loaded["media_file_fields"]: for media_type, media_field in media_field.items(): media_fields[media_type] = media_field - loaded['media_fields'] = media_fields - loaded['media_type_file_fields'] = media_fields + loaded["media_fields"] = media_fields + loaded["media_type_file_fields"] = media_fields if os.path.isabs(self.args.config): - loaded['config_file_path'] = self.args.config + loaded["config_file_path"] = self.args.config else: - loaded['config_file_path'] = os.path.join(os.getcwd(), self.args.config) + loaded["config_file_path"] = os.path.join(os.getcwd(), self.args.config) return loaded # Returns standard media fields. def get_media_fields(self): - return dict({ - 'file': 'field_media_file', - 'document': 'field_media_document', - 'image': 'field_media_image', - 'audio': 'field_media_audio_file', - 'video': 'field_media_video_file', - 'extracted_text': 'field_media_file', - 'fits_technical_metadata': 'field_media_file', - 'remote_video': 'field_media_oembed_video' - }) + return dict( + { + "file": "field_media_file", + "document": "field_media_document", + "image": "field_media_image", + "audio": "field_media_audio_file", + "video": "field_media_video_file", + "extracted_text": "field_media_file", + "fits_technical_metadata": "field_media_file", + "remote_video": "field_media_oembed_video", + } + ) # Returns standard media extensions for given media type. def get_media_types(self): return [ - {'image': ['png', 'gif', 'jpg', 'jpeg']}, - {'document': ['pdf', 'doc', 'docx', 'ppt', 'pptx']}, - {'file': ['tif', 'tiff', 'jp2', 'zip', 'tar']}, - {'audio': ['mp3', 'wav', 'aac']}, - {'video': ['mp4', 'mov', 'wmv', 'avi', 'mts', 'flv', 'f4v', 'swf', 'mkv', 'webm', 'ogv', 'mpeg']}, - {'extracted_text': ['txt']} + {"image": ["png", "gif", "jpg", "jpeg"]}, + {"document": ["pdf", "doc", "docx", "ppt", "pptx"]}, + {"file": ["tif", "tiff", "jp2", "zip", "tar"]}, + {"audio": ["mp3", "wav", "aac"]}, + { + "video": [ + "mp4", + "mov", + "wmv", + "avi", + "mts", + "flv", + "f4v", + "swf", + "mkv", + "webm", + "ogv", + "mpeg", + ] + }, + {"extracted_text": ["txt"]}, ] # Returns standard field name for media track files for given media type. def get_media_track_file_fields(self): - return {'audio': 'field_track', 'video': 'field_track'} + return {"audio": "field_track", "video": "field_track"} # Gets the field->text output format mapping dict from the optional 'field_text_format_ids' # config setting. If the setting is absent, returns an empty dict. def get_field_level_text_output_formats(self): user_config = self.get_user_config() field_text_output_map = {} - if 'field_text_format_ids' in user_config: - for map_entry in user_config['field_text_format_ids']: + if "field_text_format_ids" in user_config: + for map_entry in user_config["field_text_format_ids"]: for fieldname, text_format_id in map_entry.items(): field_text_output_map[fieldname] = text_format_id return field_text_output_map @@ -144,102 +168,102 @@ def get_field_level_text_output_formats(self): # Returns the standard allowed oEmbed provider URLs for a given media type. These # are used to identify URLs in the 'file' CSV column as being remote media. def get_oembed_media_types(self): - return [ - {'remote_video': ['https://www.youtube.com/', 'https://youtu.be']} - ] + return [{"remote_video": ["https://www.youtube.com/", "https://youtu.be"]}] # Returns default configs, to be updated by user-supplied config. def get_default_config(self): return { - 'input_dir': 'input_data', - 'input_csv': 'metadata.csv', - 'media_use_tid': 'http://pcdm.org/use#OriginalFile', + "input_dir": "input_data", + "input_csv": "metadata.csv", + "media_use_tid": "http://pcdm.org/use#OriginalFile", # 'drupal_filesystem' is used only in Drupal 8.x - 9.1; after that, # the filesystem is automatically detected from the media's configuration. - 'drupal_filesystem': 'fedora://', - 'id_field': 'id', - 'content_type': 'islandora_object', - 'delimiter': ',', - 'subdelimiter': '|', - 'log_file_path': 'workbench.log', - 'log_file_mode': 'a', - 'allow_missing_files': False, + "drupal_filesystem": "fedora://", + "id_field": "id", + "content_type": "islandora_object", + "delimiter": ",", + "subdelimiter": "|", + "log_file_path": "workbench.log", + "log_file_mode": "a", + "allow_missing_files": False, # See Issue 620. Allows the "--check" function to keep running past errors that can be handled # well with batch operations, rather than having to fix it in real-time when one error is hit. - 'perform_soft_checks': False, - 'update_mode': 'replace', - 'max_node_title_length': 255, - 'paged_content_from_directories': False, - 'delete_media_with_nodes': True, - 'allow_adding_terms': False, - 'nodes_only': False, - 'log_response_time': False, - 'adaptive_pause_threshold': 2, - 'log_response_time_sample': False, - 'log_request_url': False, - 'log_json': False, - 'log_response_body': False, - 'log_response_status_code': False, - 'log_headers': False, - 'log_term_creation': True, - 'progress_bar': False, - 'user_agent': 'Islandora Workbench', - 'allow_redirects': True, - 'secure_ssl_only': True, - 'google_sheets_csv_filename': 'google_sheet.csv', - 'google_sheets_gid': '0', - 'excel_worksheet': 'Sheet1', - 'excel_csv_filename': 'excel.csv', - 'ignore_csv_columns': list(), - 'use_node_title_for_media': False, - 'use_nid_in_media_title': False, - 'use_node_title_for_media_title': False, - 'field_for_remote_filename': False, - 'field_for_media_title': False, - 'delete_tmp_upload': False, - 'list_missing_drupal_fields': False, - 'secondary_tasks': None, - 'sqlite_db_filename': 'workbench_temp_data.db', - 'fixity_algorithm': None, - 'validate_fixity_during_check': False, - 'output_csv_include_input_csv': False, - 'timestamp_rollback': False, - 'rollback_dir': None, - 'enable_http_cache': True, - 'http_cache_storage': 'memory', - 'http_cache_storage_expire_after': 1200, - 'validate_terms_exist': True, - 'validate_parent_node_exists': True, - 'media_types': self.get_media_types(), - 'preprocessors': {}, - 'check': self.args.check, - 'get_csv_template': self.args.get_csv_template, - 'paged_content_sequence_separator': '-', - 'media_type_file_fields': self.get_media_fields(), - 'media_track_file_fields': self.get_media_track_file_fields(), - 'media_fields': self.get_media_fields(), - 'delete_media_by_node_media_use_tids': [], - 'export_csv_term_mode': 'tid', - 'export_csv_file_path': None, - 'export_csv_field_list': [], - 'export_file_directory': None, - 'export_file_media_use_term_id': 'http://pcdm.org/use#OriginalFile', - 'standalone_media_url': False, - 'require_entity_reference_views': True, - 'csv_start_row': 0, - 'csv_stop_row': None, - 'path_to_python': 'python', - 'path_to_workbench_script': os.path.join(os.getcwd(), 'workbench'), - 'oembed_providers': self.get_oembed_media_types(), - 'contact_sheet_output_dir': 'contact_sheet_output', - 'contact_sheet_css_path': os.path.join('assets', 'contact_sheet', 'contact-sheet.css'), - 'page_title_template': '$parent_title, page $weight', - 'csv_headers': 'names', - 'clean_csv_values_skip': [], - 'text_format_id': 'basic_html', - 'ignore_existing_parent_ids': True, - 'query_csv_id_to_node_id_map_for_parents': False, - 'ignore_duplicate_parent_ids': True + "perform_soft_checks": False, + "update_mode": "replace", + "max_node_title_length": 255, + "paged_content_from_directories": False, + "delete_media_with_nodes": True, + "allow_adding_terms": False, + "nodes_only": False, + "log_response_time": False, + "adaptive_pause_threshold": 2, + "log_response_time_sample": False, + "log_request_url": False, + "log_json": False, + "log_response_body": False, + "log_response_status_code": False, + "log_headers": False, + "log_term_creation": True, + "progress_bar": False, + "user_agent": "Islandora Workbench", + "allow_redirects": True, + "secure_ssl_only": True, + "google_sheets_csv_filename": "google_sheet.csv", + "google_sheets_gid": "0", + "excel_worksheet": "Sheet1", + "excel_csv_filename": "excel.csv", + "ignore_csv_columns": list(), + "use_node_title_for_media": False, + "use_nid_in_media_title": False, + "use_node_title_for_media_title": False, + "field_for_remote_filename": False, + "field_for_media_title": False, + "delete_tmp_upload": False, + "list_missing_drupal_fields": False, + "secondary_tasks": None, + "sqlite_db_filename": "workbench_temp_data.db", + "fixity_algorithm": None, + "validate_fixity_during_check": False, + "output_csv_include_input_csv": False, + "timestamp_rollback": False, + "rollback_dir": None, + "enable_http_cache": True, + "http_cache_storage": "memory", + "http_cache_storage_expire_after": 1200, + "validate_terms_exist": True, + "validate_parent_node_exists": True, + "media_types": self.get_media_types(), + "preprocessors": {}, + "check": self.args.check, + "get_csv_template": self.args.get_csv_template, + "paged_content_sequence_separator": "-", + "media_type_file_fields": self.get_media_fields(), + "media_track_file_fields": self.get_media_track_file_fields(), + "media_fields": self.get_media_fields(), + "delete_media_by_node_media_use_tids": [], + "export_csv_term_mode": "tid", + "export_csv_file_path": None, + "export_csv_field_list": [], + "export_file_directory": None, + "export_file_media_use_term_id": "http://pcdm.org/use#OriginalFile", + "standalone_media_url": False, + "require_entity_reference_views": True, + "csv_start_row": 0, + "csv_stop_row": None, + "path_to_python": "python", + "path_to_workbench_script": os.path.join(os.getcwd(), "workbench"), + "oembed_providers": self.get_oembed_media_types(), + "contact_sheet_output_dir": "contact_sheet_output", + "contact_sheet_css_path": os.path.join( + "assets", "contact_sheet", "contact-sheet.css" + ), + "page_title_template": "$parent_title, page $weight", + "csv_headers": "names", + "clean_csv_values_skip": [], + "text_format_id": "basic_html", + "ignore_existing_parent_ids": True, + "query_csv_id_to_node_id_map_for_parents": False, + "ignore_duplicate_parent_ids": True, } # Tests validity and existence of configuration file path. @@ -250,9 +274,10 @@ def path_check(self): # need to define a local logger to write to the default log file location, # 'workbench.log'. logging.basicConfig( - filename='workbench.log', - format='%(asctime)s - %(levelname)s - %(message)s', - datefmt='%d-%b-%y %H:%M:%S') + filename="workbench.log", + format="%(asctime)s - %(levelname)s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", + ) message = 'Error: Configuration file "' + self.args.config + '" not found.' logging.error(message) sys.exit(message) @@ -260,22 +285,29 @@ def path_check(self): # Validates config. def validate(self): error_messages = [] - type_check = issue_request(self.config, 'GET', - f"{self.config['host']}/entity/entity_form_display/node.{self.config['content_type']}.default?_format=json") + type_check = issue_request( + self.config, + "GET", + f"{self.config['host']}/entity/entity_form_display/node.{self.config['content_type']}.default?_format=json", + ) if type_check.status_code == 404: message = f"Content type {self.config['content_type']} does not exist on {self.config['host']}." error_messages.append(message) - mutators = ['use_node_title_for_media', 'use_nid_in_media_title', 'field_for_media_title'] + mutators = [ + "use_node_title_for_media", + "use_nid_in_media_title", + "field_for_media_title", + ] selected = [mutator for mutator in mutators if self.config[mutator]] if len(selected) > 1: message = f"You may only select one of {mutators}.\n - This config has selected {selected}." error_messages.append(message) if error_messages: - output = '' + output = "" for error_message in error_messages: output += f"{error_message}\n" - sys.exit('Error: ' + output) + sys.exit("Error: " + output) # Convenience function for debugging - Prints config to console screen. def print_config(self): diff --git a/i7Import/get_islandora_7_content.py b/i7Import/get_islandora_7_content.py index 70102a76..aec95fa6 100755 --- a/i7Import/get_islandora_7_content.py +++ b/i7Import/get_islandora_7_content.py @@ -1,9 +1,9 @@ #!/usr/bin/env python3 -'''Script for exporting Islandora 7 content (metadata and OBJ datastreams). See +"""Script for exporting Islandora 7 content (metadata and OBJ datastreams). See https://mjordan.github.io/islandora_workbench_docs/exporting_islandora_7_content/ for more info. -''' +""" import os import sys @@ -20,8 +20,10 @@ ############################ parser = argparse.ArgumentParser() -parser.add_argument('--config', required=True, help='Configuration file to use.') -parser.add_argument('--metadata_solr_request', required=False, help='Option to solr metadata request.') +parser.add_argument("--config", required=True, help="Configuration file to use.") +parser.add_argument( + "--metadata_solr_request", required=False, help="Option to solr metadata request." +) args = parser.parse_args() utils = i7ImportUtilities(args.config) config = utils.config @@ -31,85 +33,90 @@ ####################### logging.basicConfig( - filename=config['log_file_path'], + filename=config["log_file_path"], level=logging.INFO, - filemode='a', - format='%(asctime)s - %(levelname)s - %(message)s', - datefmt='%d-%b-%y %H:%M:%S') + filemode="a", + format="%(asctime)s - %(levelname)s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", +) if args.metadata_solr_request: metadata_solr_request = utils.get_metadata_solr_request(args.metadata_solr_request) else: metadata_solr_request = utils.get_default_metadata_solr_request() -if config['debug']: - pretty_print = metadata_solr_request.replace('&', "\n&") +if config["debug"]: + pretty_print = metadata_solr_request.replace("&", "\n&") print(f"Solr request: {pretty_print}") utils.print_config() try: - metadata_solr_response = requests.get(url=metadata_solr_request, allow_redirects=True) + metadata_solr_response = requests.get( + url=metadata_solr_request, allow_redirects=True + ) except requests.exceptions.RequestException as e: logging.info("Solr Query failed.") raise SystemExit(e) if not metadata_solr_response.ok: - warning = '' + warning = "" if len(metadata_solr_request) > 2000: - warning = 'The default query may be too long for a url request. See docs' - print(f"Illegal request: Server returned status of {metadata_solr_response.status_code} \n{warning} ") + warning = "The default query may be too long for a url request. See docs" + print( + f"Illegal request: Server returned status of {metadata_solr_response.status_code} \n{warning} " + ) sys.exit() rows = metadata_solr_response.content.decode().splitlines() logging.info(f"Processing {len(rows)} items.") reader = csv.DictReader(rows) headers = reader.fieldnames # We add a 'sequence' column to store the Islandora 7.x property "isSequenceNumberOfxxx"/"isSequenceNumber". -headers.append('sequence') +headers.append("sequence") # Add a column to store the files -headers.append('file') -if config['id_field'] not in headers: - headers.append(config['id_field']) - index = config['id_start_number'] +headers.append("file") +if config["id_field"] not in headers: + headers.append(config["id_field"]) + index = config["id_start_number"] -if config['fetch_files'] is True: - if not os.path.exists(config['obj_directory']): - os.makedirs(config['obj_directory']) +if config["fetch_files"] is True: + if not os.path.exists(config["obj_directory"]): + os.makedirs(config["obj_directory"]) row_count = 0 pbar = InitBar() num_csv_rows = len(rows) print(f"Processing {num_csv_rows -1}.") -with open(config['csv_output_path'], 'w', newline='') as csvfile: +with open(config["csv_output_path"], "w", newline="") as csvfile: writer = csv.DictWriter(csvfile, fieldnames=headers) writer.writeheader() failed_pids = [] for row in reader: - rels_ext = utils.parse_rels_ext(row['PID']) + rels_ext = utils.parse_rels_ext(row["PID"]) if rels_ext: for key, value in rels_ext.items(): - if 'isSequenceNumber' in key: - row['sequence'] = str(value) + if "isSequenceNumber" in key: + row["sequence"] = str(value) else: - failed_pids.append(row['PID']) + failed_pids.append(row["PID"]) logging.error(f"{row['PID']} was unsuccessful.") continue - if config['fetch_files'] or config['get_file_url']: + if config["fetch_files"] or config["get_file_url"]: row_count += 1 row_position = utils.get_percentage(row_count, num_csv_rows) pbar(row_position) - for datastream in config['datastreams']: - file = utils.get_i7_asset(row['PID'], datastream) + for datastream in config["datastreams"]: + file = utils.get_i7_asset(row["PID"], datastream) if file: - row['file'] = file + row["file"] = file break - if config['id_field'] in headers: - row[config['id_field']] = index + reader.line_num - 2 + if config["id_field"] in headers: + row[config["id_field"]] = index + reader.line_num - 2 writer.writerow(row) if failed_pids: - output = 'The following PIDS returned no data:\n' + output = "The following PIDS returned no data:\n" for pid in failed_pids: output += f"{pid}\n" print(output) - if config['debug']: + if config["debug"]: with open("failure_report.txt", "w") as f: f.write(output) pbar(100) diff --git a/i7Import/i7ImportUtilities.py b/i7Import/i7ImportUtilities.py index 275f21a1..8aab6f20 100644 --- a/i7Import/i7ImportUtilities.py +++ b/i7Import/i7ImportUtilities.py @@ -17,58 +17,64 @@ def __init__(self, config_location): self.config = self.get_config() self.validate() logging.basicConfig( - filename=self.config['log_file_path'], + filename=self.config["log_file_path"], level=logging.INFO, - filemode='a', - format='%(asctime)s - %(levelname)s - %(message)s', - datefmt='%d-%b-%y %H:%M:%S') + filemode="a", + format="%(asctime)s - %(levelname)s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", + ) default_config = { - 'solr_base_url': 'http://localhost:8080/solr', - 'islandora_base_url': 'http://localhost:8000', - 'csv_output_path': 'islandora7_metadata.csv', - 'obj_directory': '/tmp/objs', - 'failure_report': 'failure_report.txt', - 'log_file_path': 'islandora_content.log', - 'fetch_files': False, - 'get_file_url': True, - 'namespace': '*', - 'standard_fields': ['PID', 'RELS_EXT_hasModel_uri_s', 'RELS_EXT_isMemberOfCollection_uri_ms', - 'RELS_EXT_isMemberOf_uri_ms', 'RELS_EXT_isConstituentOf_uri_ms', - 'RELS_EXT_isPageOf_uri_ms'], - 'field_pattern': 'mods_.*(_s|_ms)$', - 'field_pattern_do_not_want': '(marcrelator|isSequenceNumberOf)', - 'id_field': 'PID', - 'id_start_number': 1, - 'datastreams': ['OBJ', 'PDF'], - 'debug': False, - 'deep_debug': False, - 'collection': False, - 'collections': False, - 'content_model': False, - 'solr_filters': False, - 'start': 0, - 'rows': 100000 + "solr_base_url": "http://localhost:8080/solr", + "islandora_base_url": "http://localhost:8000", + "csv_output_path": "islandora7_metadata.csv", + "obj_directory": "/tmp/objs", + "failure_report": "failure_report.txt", + "log_file_path": "islandora_content.log", + "fetch_files": False, + "get_file_url": True, + "namespace": "*", + "standard_fields": [ + "PID", + "RELS_EXT_hasModel_uri_s", + "RELS_EXT_isMemberOfCollection_uri_ms", + "RELS_EXT_isMemberOf_uri_ms", + "RELS_EXT_isConstituentOf_uri_ms", + "RELS_EXT_isPageOf_uri_ms", + ], + "field_pattern": "mods_.*(_s|_ms)$", + "field_pattern_do_not_want": "(marcrelator|isSequenceNumberOf)", + "id_field": "PID", + "id_start_number": 1, + "datastreams": ["OBJ", "PDF"], + "debug": False, + "deep_debug": False, + "collection": False, + "collections": False, + "content_model": False, + "solr_filters": False, + "start": 0, + "rows": 100000, } def get_config(self): yaml = YAML() config = self.default_config - with open(self.config_location, 'r') as stream: + with open(self.config_location, "r") as stream: try: loaded = yaml.load(stream) except OSError: - print('Failed') + print("Failed") for key, value in loaded.items(): config[key] = value - if 'get_file_url' in loaded.keys() and 'fetch_files' not in loaded.keys(): - config['fetch_files'] = False - if config['deep_debug']: - config['debug'] = True + if "get_file_url" in loaded.keys() and "fetch_files" not in loaded.keys(): + config["fetch_files"] = False + if config["deep_debug"]: + config["debug"] = True return config def get_metadata_solr_request(self, location): - with open(location, 'r') as file: + with open(location, "r") as file: solr_metadata_request = file.read() return solr_metadata_request @@ -76,10 +82,7 @@ def get_extension_from_mimetype(self, mimetype): # mimetypes.add_type() is not working, e.g. mimetypes.add_type('image/jpeg', '.jpg') # Maybe related to https://bugs.python.org/issue4963? In the meantime, provide our own # MIMETYPE to extension mapping for common types, then let mimetypes guess at others. - map = {'image/jpeg': '.jpg', - 'image/jp2': '.jp2', - 'image/png': '.png' - } + map = {"image/jpeg": ".jpg", "image/jp2": ".jp2", "image/png": ".png"} if mimetype in map: return map[mimetype] else: @@ -90,75 +93,89 @@ def get_percentage(self, part, whole): def parse_rels_ext(self, pid): rels_ext_url = f"{self.config['islandora_base_url']}/islandora/object/{pid}/datastream/RELS-EXT/download" - if self.config['deep_debug']: + if self.config["deep_debug"]: print(f"\n{rels_ext_url}") try: - rels_ext_download_response = requests.get(url=rels_ext_url, allow_redirects=True) + rels_ext_download_response = requests.get( + url=rels_ext_url, allow_redirects=True + ) if rels_ext_download_response.ok: rel_ext = {} rels_ext_xml = rels_ext_download_response.content.decode() - if self.config['deep_debug']: + if self.config["deep_debug"]: print(rels_ext_xml) root = ET.fromstring(rels_ext_xml) - description = root.find('.//{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description') + description = root.find( + ".//{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description" + ) for x in description: - tag = x.tag[x.tag.find('}') + 1:] + tag = x.tag[x.tag.find("}") + 1 :] text = x.text if x.attrib.items(): text = next(iter(x.attrib.items()))[1] - text = text[text.find('/') + 1:] + text = text[text.find("/") + 1 :] rel_ext[tag] = text return rel_ext else: message = f"\nBad response from server for item {pid} : {rels_ext_download_response.status_code}" - logging.error(f"\nBad response from server for item {pid} : {rels_ext_download_response.status_code}") - if self.config['debug']: + logging.error( + f"\nBad response from server for item {pid} : {rels_ext_download_response.status_code}" + ) + if self.config["debug"]: print(message) except requests.exceptions.RequestException as e: raise SystemExit(e) def get_default_metadata_solr_request(self): # This query gets all fields in the index. Does not need to be user-configurable. - fields_solr_url = f"{self.config['solr_base_url']}/select?q=*:*&wt=csv&rows=0&fl=*" + fields_solr_url = ( + f"{self.config['solr_base_url']}/select?q=*:*&wt=csv&rows=0&fl=*" + ) # Get the complete field list from Solr and filter it. The filtered field list is # then used in another query to get the populated CSV data. try: - field_list_response = requests.get(url=fields_solr_url, allow_redirects=True) + field_list_response = requests.get( + url=fields_solr_url, allow_redirects=True + ) raw_field_list = field_list_response.content.decode() except requests.exceptions.RequestException as e: raise SystemExit(e) - field_list = raw_field_list.split(',') - filtered_field_list = [keep for keep in field_list if re.search(self.config['field_pattern'], keep)] - filtered_field_list = [discard for discard in filtered_field_list if - not re.search(self.config['field_pattern_do_not_want'], discard)] + field_list = raw_field_list.split(",") + filtered_field_list = [ + keep for keep in field_list if re.search(self.config["field_pattern"], keep) + ] + filtered_field_list = [ + discard + for discard in filtered_field_list + if not re.search(self.config["field_pattern_do_not_want"], discard) + ] # Add required fieldnames. - self.config['standard_fields'].reverse() - for standard_field in self.config['standard_fields']: + self.config["standard_fields"].reverse() + for standard_field in self.config["standard_fields"]: filtered_field_list.insert(0, standard_field) - fields_param = ','.join(filtered_field_list) + fields_param = ",".join(filtered_field_list) query = f"{self.config['solr_base_url']}/select?q=PID:{self.config['namespace']}*&wt=csv&start={self.config['start']}&rows={self.config['rows']}&fl={fields_param}" - if self.config['collection']: - collection = self.config['collection'] + if self.config["collection"]: + collection = self.config["collection"] query = f'{query}&fq=RELS_EXT_isMemberOfCollection_uri_s: "info:fedora/{collection}"' - if self.config['content_model']: - model = self.config['content_model'] + if self.config["content_model"]: + model = self.config["content_model"] query = f'{query}&fq=RELS_EXT_hasModel_uri_s:"info:fedora/{model}"' - if self.config['solr_filters']: - for key, value in self.config['solr_filters'].items(): + if self.config["solr_filters"]: + for key, value in self.config["solr_filters"].items(): query = f'{query}&fq={key}:"{value}"' fedora_prefix = 'RELS_EXT_isMemberOfCollection_uri_s:"info\:fedora/' - if self.config['collections']: - collections = self.config['collections'] + if self.config["collections"]: + collections = self.config["collections"] fedora_collections = [] for collection in collections: fedora_collections.append(f'{fedora_prefix}"{collection}"') - fq_string = "&fq=" + ' or '.join(fedora_collections) - query = f'{query}{fq_string}' - + fq_string = "&fq=" + " or ".join(fedora_collections) + query = f"{query}{fq_string}" # Get the populated CSV from Solr, with the object namespace and field list filters applied. return query @@ -166,33 +183,35 @@ def get_default_metadata_solr_request(self): # Validates config. def validate(self): error_messages = [] - if self.config['get_file_url'] and self.config['fetch_files']: + if self.config["get_file_url"] and self.config["fetch_files"]: message = f"'get_file_url' and 'fetch_files' cannot both be selected." error_messages.append(message) if error_messages: - sys.exit('Error: ' + message) + sys.exit("Error: " + message) # Gets file from i7 installation def get_i7_asset(self, pid, datastream): try: obj_url = f"{self.config['islandora_base_url']}/islandora/object/{pid}/datastream/{datastream}/download" - if self.config['get_file_url']: + if self.config["get_file_url"]: obj_download_response = requests.head(url=obj_url, allow_redirects=True) else: obj_download_response = requests.get(url=obj_url, allow_redirects=True) if obj_download_response.status_code == 200: # Get MIMETYPE from 'Content-Type' header - obj_mimetype = obj_download_response.headers['content-type'] + obj_mimetype = obj_download_response.headers["content-type"] obj_extension = self.get_extension_from_mimetype(obj_mimetype) - if self.config['fetch_files'] and obj_extension: - obj_filename = pid.replace(':', '_') + if self.config["fetch_files"] and obj_extension: + obj_filename = pid.replace(":", "_") obj_basename = obj_filename + obj_extension # Save to file with name based on PID and extension based on MIMETYPE - obj_file_path = os.path.join(self.config['obj_directory'], obj_basename) - open(obj_file_path, 'wb+').write(obj_download_response.content) + obj_file_path = os.path.join( + self.config["obj_directory"], obj_basename + ) + open(obj_file_path, "wb+").write(obj_download_response.content) return obj_basename - if self.config['get_file_url'] and obj_extension: + if self.config["get_file_url"] and obj_extension: return obj_url if obj_download_response.status_code == 404: logging.warning(f"{obj_url} not found.") @@ -208,11 +227,11 @@ def print_config(self): table.add_column("Parameter", justify="left") table.add_column("Value", justify="left") for key, value in self.config.items(): - if str(type(value)) == '': - new_value = '' + if str(type(value)) == "": + new_value = "" for k, v in value.items(): new_value += f"{k}: {v}\n" value = new_value table.add_row(key, str(value)) console = Console() - console.print(table) \ No newline at end of file + console.print(table) diff --git a/scripts/bootstrap_example.py b/scripts/bootstrap_example.py index 382826a1..d5243398 100755 --- a/scripts/bootstrap_example.py +++ b/scripts/bootstrap_example.py @@ -1,17 +1,18 @@ #!/usr/bin/env python3 -'''Example bootstrap script that logs sys.args. +"""Example bootstrap script that logs sys.args. These scripts must be executable. -''' +""" import sys import logging logging.basicConfig( - filename='bootstrap_example.log', + filename="bootstrap_example.log", level=logging.INFO, - format='%(asctime)s - %(levelname)s - %(message)s', - datefmt='%d-%b-%y %H:%M:%S') + format="%(asctime)s - %(levelname)s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", +) logging.info(sys.argv) diff --git a/scripts/entity_post_action_example.py b/scripts/entity_post_action_example.py index 6c32c37a..32ecefa1 100755 --- a/scripts/entity_post_action_example.py +++ b/scripts/entity_post_action_example.py @@ -1,26 +1,28 @@ #!/usr/bin/env python3 -'''Example/template entity post-action script. +"""Example/template entity post-action script. Docs at https://mjordan.github.io/islandora_workbench_docs/hooks/. -''' +""" import sys import json import logging logging.basicConfig( - filename='entity_post_create.log', + filename="entity_post_create.log", level=logging.INFO, - format='%(asctime)s - %(levelname)s - %(message)s', - datefmt='%d-%b-%y %H:%M:%S') + format="%(asctime)s - %(levelname)s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", +) workbench_config_file_path = sys.argv[1] http_response_code = sys.argv[2] http_response_body = sys.argv[3] entity = json.loads(http_response_body) -if http_response_code == '201': +if http_response_code == "201": # Execute code if entity was successfully created. + print("Success") else: # Execute code if entity was not successfully created. - + print("Failure") diff --git a/scripts/generate_image_files.py b/scripts/generate_image_files.py index a5f2a459..14081bac 100755 --- a/scripts/generate_image_files.py +++ b/scripts/generate_image_files.py @@ -13,12 +13,12 @@ # Change the variables below. # Must exist and be absolute. -dest_dir = '/tmp/colors' +dest_dir = "/tmp/colors" # Must exist. Relative to this script -title_file_path = 'sample_filenames.txt' +title_file_path = "sample_filenames.txt" num_images_to_generate = 10 # @todo: Check for path to convert; if not found, bail. -path_to_convert = '/usr/bin/convert' +path_to_convert = "/usr/bin/convert" # Change the variables above. @@ -27,16 +27,16 @@ sys.exit("Error: output directory " + dest_dir + " does not exist.") colors = [ - 'crimson', - 'orchid', - 'DarkViolet', - 'SlateBlue', - 'navy', - 'SlateGrey', - 'black', - 'burlywood4', - 'SeaGreen', - 'DeepSkyBlue' + "crimson", + "orchid", + "DarkViolet", + "SlateBlue", + "navy", + "SlateGrey", + "black", + "burlywood4", + "SeaGreen", + "DeepSkyBlue", ] with open(title_file_path) as f: @@ -44,18 +44,28 @@ if len(lines) >= num_images_to_generate: lines = lines[:num_images_to_generate] for line in lines: - line = re.sub(r'[^\w\s]', ' ', line) - line = re.sub(r'\s', '_', line) - line = re.sub(r'_{2,5}', '_', line) - filename = line.rstrip('_') - words = line.split('_') + line = re.sub(r"[^\w\s]", " ", line) + line = re.sub(r"\s", "_", line) + line = re.sub(r"_{2,5}", "_", line) + filename = line.rstrip("_") + words = line.split("_") first_three_words = words[:3] - first_three_words_string = '\n'.join(first_three_words) + first_three_words_string = "\n".join(first_three_words) color = random.choice(colors) - cmd = path_to_convert + ' -size 1000x1000 xc:' + color + \ - ' ' + os.path.join(dest_dir, filename + '.png') + '; ' - cmd += path_to_convert + ' -size 1000x1000 xc:' + color - cmd += ' -pointsize 100 -fill white -gravity center -annotate +0+0 ' + \ - '"' + first_three_words_string + '"' - cmd += ' ' + os.path.join(dest_dir, filename + '.png') + cmd = ( + path_to_convert + + " -size 1000x1000 xc:" + + color + + " " + + os.path.join(dest_dir, filename + ".png") + + "; " + ) + cmd += path_to_convert + " -size 1000x1000 xc:" + color + cmd += ( + " -pointsize 100 -fill white -gravity center -annotate +0+0 " + + '"' + + first_three_words_string + + '"' + ) + cmd += " " + os.path.join(dest_dir, filename + ".png") subprocess.call(cmd, shell=True) diff --git a/scripts/manage_csv_to_node_id_map.py b/scripts/manage_csv_to_node_id_map.py index 4ca4ad0b..ecc3c129 100755 --- a/scripts/manage_csv_to_node_id_map.py +++ b/scripts/manage_csv_to_node_id_map.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -'''Utility script to dump the CSV ID to Node ID map from its SQLite database, or to remove +"""Utility script to dump the CSV ID to Node ID map from its SQLite database, or to remove entries from the database with specificed config files names or prior to/before a provided timestamp. Usage: python dump_id_map.py --db_path csv_id_to_node_id_map.db --csv_path /tmp/test.csv @@ -10,7 +10,7 @@ python dump_id_map.py --db_path csv_id_to_node_id_map.db --remove_entries_after "2023-05-22" python dump_id_map.py --db_path csv_id_to_node_id_map.db --remove_entries_with_config_files create.yml,test_dir/create_testing.yml python dump_id_map.py --db_path csv_id_to_node_id_map.db --remove_entries_for_deleted_nodes https://islandora.traefik.me -''' +""" import os import sys @@ -20,21 +20,45 @@ import requests parser = argparse.ArgumentParser() -parser.add_argument('--db_path', required=True, help='Relative or absolute path to the SQLite database file.') -parser.add_argument('--csv_path', help='Relative or absolute path to the output CSV file.') -parser.add_argument('--nonunique', help='Name of the column that contains nonunique/duplicate values.') -parser.add_argument('--remove_entries_before', help='Date string in yyyy:mm:dd hh:mm:ss (or truncated for of that pattern).') -parser.add_argument('--remove_entries_after', help='Date string in yyyy:mm:dd hh:mm:ss (or truncated for of that pattern).') -parser.add_argument('--remove_entries_with_config_files', help='comma-separated list of Workbench config files (exactly as passed to Workbench).') -parser.add_argument('--remove_entries_for_deleted_nodes', help='Hostname (and port if applicable) of your Islandora.') +parser.add_argument( + "--db_path", + required=True, + help="Relative or absolute path to the SQLite database file.", +) +parser.add_argument( + "--csv_path", help="Relative or absolute path to the output CSV file." +) +parser.add_argument( + "--nonunique", help="Name of the column that contains nonunique/duplicate values." +) +parser.add_argument( + "--remove_entries_before", + help="Date string in yyyy:mm:dd hh:mm:ss (or truncated for of that pattern).", +) +parser.add_argument( + "--remove_entries_after", + help="Date string in yyyy:mm:dd hh:mm:ss (or truncated for of that pattern).", +) +parser.add_argument( + "--remove_entries_with_config_files", + help="comma-separated list of Workbench config files (exactly as passed to Workbench).", +) +parser.add_argument( + "--remove_entries_for_deleted_nodes", + help="Hostname (and port if applicable) of your Islandora.", +) args = parser.parse_args() -if args.csv_path is None and \ - args.remove_entries_before is None and \ - args.remove_entries_after is None and \ - args.remove_entries_with_config_files is None and \ - args.remove_entries_for_deleted_nodes is None: - sys.exit("You need to provide either --csv_path, --remove_entries_before, --remove_entries_after, --remove_entries_with_config_files, or --remove_entries_for_deleted_nodes option.") +if ( + args.csv_path is None + and args.remove_entries_before is None + and args.remove_entries_after is None + and args.remove_entries_with_config_files is None + and args.remove_entries_for_deleted_nodes is None +): + sys.exit( + "You need to provide either --csv_path, --remove_entries_before, --remove_entries_after, --remove_entries_with_config_files, or --remove_entries_for_deleted_nodes option." + ) ####################################### # Check existence of specified paths. # @@ -46,7 +70,7 @@ db_path = os.path.join(os.getcwd(), args.db_path) if not os.path.exists(db_path): message = f"Can't find SQLite database path ({os.path.abspath(db_path)}). Please confirm that it exsits." - sys.exit('Error: ' + message) + sys.exit("Error: " + message) if args.csv_path is not None: if os.path.isabs(args.csv_path): @@ -57,7 +81,7 @@ csv_path_dir = os.path.dirname(csv_path) if not os.path.exists(csv_path_dir): message = f"Can't find directory specified for output CSV file path ({csv_path_dir}). Please confirm that it exsits." - sys.exit('Error: ' + message) + sys.exit("Error: " + message) ######################################################### # Get contents of the db, then write them out to a CSV. # @@ -71,8 +95,10 @@ else: # Using parameterized fieldnames wraps them in '', which interferes with the query. # So we revert to (very likely low risk) string interpolation. - query = f"SELECT * FROM csv_id_to_node_id_map WHERE {args.nonunique} IN " + \ - f"(SELECT {args.nonunique} FROM csv_id_to_node_id_map GROUP BY {args.nonunique} HAVING COUNT(*) > 1)" + query = ( + f"SELECT * FROM csv_id_to_node_id_map WHERE {args.nonunique} IN " + + f"(SELECT {args.nonunique} FROM csv_id_to_node_id_map GROUP BY {args.nonunique} HAVING COUNT(*) > 1)" + ) params = () con = sqlite3.connect(db_path) con.row_factory = sqlite3.Row @@ -82,19 +108,26 @@ except sqlite3.OperationalError as e: sys.exit(f"Error executing database query: {e}") - csv_writer_file_handle = open(csv_path, 'w+', newline='', encoding='utf-8') - csv_headers = ['Timestamp', 'Config file', 'Parent CSV ID', 'Parent node ID', 'CSV ID', "Node ID"] + csv_writer_file_handle = open(csv_path, "w+", newline="", encoding="utf-8") + csv_headers = [ + "Timestamp", + "Config file", + "Parent CSV ID", + "Parent node ID", + "CSV ID", + "Node ID", + ] csv_writer = csv.DictWriter(csv_writer_file_handle, fieldnames=csv_headers) csv_writer.writeheader() for row in res: csv_row = dict() - csv_row['Timestamp'] = row[0] - csv_row['Config file'] = row[1] - csv_row['Parent CSV ID'] = row[2] - csv_row['Parent node ID'] = row[3] - csv_row['CSV ID'] = row[4] - csv_row['Node ID'] = row[5] + csv_row["Timestamp"] = row[0] + csv_row["Config file"] = row[1] + csv_row["Parent CSV ID"] = row[2] + csv_row["Parent node ID"] = row[3] + csv_row["CSV ID"] = row[4] + csv_row["Node ID"] = row[5] csv_writer.writerow(csv_row) print(f"Dumped {len(res)} rows into CSV file {csv_path}.") @@ -113,7 +146,9 @@ except sqlite3.OperationalError as e: sys.exit(f"Error executing database query: {e}") - print(f"Removed {num_rows_deleted} entries added to the database before {args.remove_entries_before}.") + print( + f"Removed {num_rows_deleted} entries added to the database before {args.remove_entries_before}." + ) if args.remove_entries_after is not None: try: @@ -128,14 +163,18 @@ except sqlite3.OperationalError as e: sys.exit(f"Error executing database query: {e}") - print(f"Removed {num_rows_deleted} entries added to the database after {args.remove_entries_after}.") + print( + f"Removed {num_rows_deleted} entries added to the database after {args.remove_entries_after}." + ) if args.remove_entries_with_config_files is not None: - config_files = args.remove_entries_with_config_files.split(',') + config_files = args.remove_entries_with_config_files.split(",") config_files_tuple = tuple(config_files) - placeholders = ', '.join('?' for x in config_files) + placeholders = ", ".join("?" for x in config_files) try: - query = f"delete from csv_id_to_node_id_map where config_file in ({placeholders})" + query = ( + f"delete from csv_id_to_node_id_map where config_file in ({placeholders})" + ) con = sqlite3.connect(db_path) con.row_factory = sqlite3.Row cur = con.cursor() @@ -146,7 +185,9 @@ except sqlite3.OperationalError as e: sys.exit(f"Error executing database query: {e}") - print(f"Removed {num_rows_deleted} entries added to the database using config file(s) {args.remove_entries_with_config_files}.") + print( + f"Removed {num_rows_deleted} entries added to the database using config file(s) {args.remove_entries_with_config_files}." + ) if args.remove_entries_for_deleted_nodes is not None: try: @@ -162,7 +203,7 @@ deleted_nodes = [] for row in res: - url = args.remove_entries_for_deleted_nodes.rstrip('/') + '/node/' + str(row[5]) + url = args.remove_entries_for_deleted_nodes.rstrip("/") + "/node/" + str(row[5]) ping_response = requests.head(url, allow_redirects=True) if ping_response.status_code == 404: deleted_nodes.append(row[5]) @@ -170,8 +211,10 @@ if len(deleted_nodes) > 0: deleted_nodes_tuple = tuple(deleted_nodes) try: - placeholders = ', '.join('?' for x in deleted_nodes) - query = f"delete from csv_id_to_node_id_map where node_id in ({placeholders})" + placeholders = ", ".join("?" for x in deleted_nodes) + query = ( + f"delete from csv_id_to_node_id_map where node_id in ({placeholders})" + ) con = sqlite3.connect(db_path) con.row_factory = sqlite3.Row cur = con.cursor() diff --git a/scripts/post_get_data_from_view_export_child.py b/scripts/post_get_data_from_view_export_child.py index 5af3a32c..daf2e15f 100755 --- a/scripts/post_get_data_from_view_export_child.py +++ b/scripts/post_get_data_from_view_export_child.py @@ -1,25 +1,25 @@ #!/usr/bin/env python3 -'''WIP on #603. -''' +"""WIP on #603. +""" import sys import json import logging logging.basicConfig( - filename='issue_603.log', + filename="issue_603.log", level=logging.INFO, - format='%(asctime)s - %(levelname)s - %(message)s', - datefmt='%d-%b-%y %H:%M:%S') + format="%(asctime)s - %(levelname)s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", +) workbench_config_file_path = sys.argv[1] http_response_code = sys.argv[2] http_response_body = sys.argv[3] entity = json.loads(http_response_body) -if http_response_code == '200': +if http_response_code == "200": logging.info(entity) else: - logging.error('Response code was %s', http_response_code) - + logging.error("Response code was %s", http_response_code) diff --git a/scripts/shutdown_example.py b/scripts/shutdown_example.py index bab7f9a7..6e6fa594 100755 --- a/scripts/shutdown_example.py +++ b/scripts/shutdown_example.py @@ -1,17 +1,18 @@ #!/usr/bin/env python3 -'''Example shutdown script that logs sys.args. +"""Example shutdown script that logs sys.args. These scripts must be executable. -''' +""" import sys import logging logging.basicConfig( - filename='shutdown_example.log', + filename="shutdown_example.log", level=logging.INFO, - format='%(asctime)s - %(levelname)s - %(message)s', - datefmt='%d-%b-%y %H:%M:%S') + format="%(asctime)s - %(levelname)s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", +) logging.info(sys.argv) diff --git a/tests/assets/execute_bootstrap_script_test/script.py b/tests/assets/execute_bootstrap_script_test/script.py index 9a1e35d5..3f86ed21 100755 --- a/tests/assets/execute_bootstrap_script_test/script.py +++ b/tests/assets/execute_bootstrap_script_test/script.py @@ -6,7 +6,7 @@ config_file_path = sys.argv[1] yaml = YAML() -with open(config_file_path, 'r') as f: +with open(config_file_path, "r") as f: config_file_contents = f.read() config_yaml = yaml.load(config_file_contents) @@ -14,5 +14,5 @@ for k, v in config_yaml.items(): config[k] = v -if config['media_type'] == 'document': +if config["media_type"] == "document": print("Hello") diff --git a/tests/assets/execute_post_action_entity_script_test/script.py b/tests/assets/execute_post_action_entity_script_test/script.py index f9f4f887..bb9d6d3b 100755 --- a/tests/assets/execute_post_action_entity_script_test/script.py +++ b/tests/assets/execute_post_action_entity_script_test/script.py @@ -6,10 +6,10 @@ import tempfile temp_dir = tempfile.gettempdir() -output_file_path = os.path.join(temp_dir, 'execute_post_action_entity_script.dat') +output_file_path = os.path.join(temp_dir, "execute_post_action_entity_script.dat") http_response_body = sys.argv[3] entity = json.loads(http_response_body) with open(output_file_path, "a+") as file_object: - file_object.write(entity['title'][0]['value'] + "\n") + file_object.write(entity["title"][0]["value"] + "\n") diff --git a/tests/csv_id_to_node_id_map_tests.py b/tests/csv_id_to_node_id_map_tests.py index c9b20176..13635259 100644 --- a/tests/csv_id_to_node_id_map_tests.py +++ b/tests/csv_id_to_node_id_map_tests.py @@ -12,18 +12,30 @@ class TestDumpCsv(unittest.TestCase): def setUp(self): current_dir = os.path.dirname(os.path.abspath(__file__)) - self.script_path = os.path.join('scripts', 'manage_csv_to_node_id_map.py') - self.asset_db_path = os.path.join(current_dir, 'assets', 'csv_id_to_node_id_map', 'csv_id_to_node_id_map.db') - self.dump_db_path = os.path.join(tempfile.gettempdir(), 'csv_id_to_node_id_map_dump.db') + self.script_path = os.path.join("scripts", "manage_csv_to_node_id_map.py") + self.asset_db_path = os.path.join( + current_dir, "assets", "csv_id_to_node_id_map", "csv_id_to_node_id_map.db" + ) + self.dump_db_path = os.path.join( + tempfile.gettempdir(), "csv_id_to_node_id_map_dump.db" + ) shutil.copyfile(self.asset_db_path, self.dump_db_path) - def test_dump_csv(self): - self.output_csv_path = os.path.join(tempfile.gettempdir(), 'testing_csv_id_to_node_id_map_dump.csv') - self.cmd = [self.script_path, "--db_path", self.dump_db_path, '--csv_path', self.output_csv_path] + self.output_csv_path = os.path.join( + tempfile.gettempdir(), "testing_csv_id_to_node_id_map_dump.csv" + ) + self.cmd = [ + self.script_path, + "--db_path", + self.dump_db_path, + "--csv_path", + self.output_csv_path, + ] script_output = subprocess.check_output(self.cmd).decode().strip() - self.assertRegex(script_output, f'Dumped 60 rows into CSV file {self.output_csv_path}', '') - + self.assertRegex( + script_output, f"Dumped 60 rows into CSV file {self.output_csv_path}", "" + ) def tearDown(self): if os.path.exists(self.output_csv_path): @@ -33,5 +45,5 @@ def tearDown(self): os.remove(self.dump_db_path) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/field_tests.py b/tests/field_tests.py index 23fe55ba..473ff6b4 100644 --- a/tests/field_tests.py +++ b/tests/field_tests.py @@ -15,52 +15,40 @@ class TestSimpleField(unittest.TestCase): def setUp(self): self.config = { - 'task': 'create', - 'subdelimiter': '|', - 'id_field': 'id', - 'text_format_id': 'basic_html', - 'field_text_format_ids': {'field_foo': 'restricted_html', 'field_bar': 'basic_html'} + "task": "create", + "subdelimiter": "|", + "id_field": "id", + "text_format_id": "basic_html", + "field_text_format_ids": { + "field_foo": "restricted_html", + "field_bar": "basic_html", + }, } def test_create_with_simple_field(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], } # Create a node with a simple field of cardinality 1, no subdelimiters. self.field_definitions = { - 'field_foo': { - 'cardinality': 1, - 'formatted_text': False - } + "field_foo": {"cardinality": 1, "formatted_text": False} } field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['id'] = "simple_001" - csv_record['field_foo'] = "Field foo value" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "simple_001" + csv_record["field_foo"] = "Field foo value" + node = field.create( + self.config, self.field_definitions, existing_node, csv_record, "field_foo" + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "Field foo value"} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"value": "Field foo value"}], } self.assertDictEqual(node, expected_node) @@ -68,104 +56,80 @@ def test_create_with_simple_field(self): with self.assertLogs() as message: field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['id'] = "simple_002" - csv_record['field_foo'] = "Field foo value|Extraneous value" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "simple_002" + csv_record["field_foo"] = "Field foo value|Extraneous value" + node = field.create( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "Field foo value"} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"value": "Field foo value"}], } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'simple_002 would exceed maximum number of allowed values \(1\)') + self.assertRegex( + str(message.output), + r"simple_002 would exceed maximum number of allowed values \(1\)", + ) # Create a node with a simple field of cardinality unlimited, no subdelimiters. self.field_definitions = { - 'field_foo': { - 'cardinality': -1, - 'formatted_text': False - } + "field_foo": {"cardinality": -1, "formatted_text": False} } field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['id'] = "simple_003" - csv_record['field_foo'] = "First value" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "simple_003" + csv_record["field_foo"] = "First value" + node = field.create( + self.config, self.field_definitions, existing_node, csv_record, "field_foo" + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "First value"} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"value": "First value"}], } self.assertDictEqual(node, expected_node) # Create a node with a simple field of cardinality unlimited, with subdelimiters. field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['id'] = "simple_004" - csv_record['field_foo'] = "First value|Second value|First value" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "simple_004" + csv_record["field_foo"] = "First value|Second value|First value" + node = field.create( + self.config, self.field_definitions, existing_node, csv_record, "field_foo" + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "First value"}, - {'value': "Second value"} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"value": "First value"}, {"value": "Second value"}], } self.assertDictEqual(node, expected_node) # Create a node with a simple field of cardinality limited, no subdelimiters. self.field_definitions = { - 'field_foo': { - 'cardinality': 2, - 'formatted_text': False - } + "field_foo": {"cardinality": 2, "formatted_text": False} } field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['id'] = "simple_005" - csv_record['field_foo'] = "First value" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "simple_005" + csv_record["field_foo"] = "First value" + node = field.create( + self.config, self.field_definitions, existing_node, csv_record, "field_foo" + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "First value"} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"value": "First value"}], } self.assertDictEqual(node, expected_node) @@ -173,803 +137,682 @@ def test_create_with_simple_field(self): with self.assertLogs() as message: field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['id'] = "simple_006" - csv_record['field_foo'] = "First 006 value|First 006 value|Second 006 value|Third 006 value" + csv_record["id"] = "simple_006" + csv_record["field_foo"] = ( + "First 006 value|First 006 value|Second 006 value|Third 006 value" + ) # csv_record['field_foo'] = "First 006 value|Second 006 value|Third 006 value" - self.node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + self.node = field.create( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "First 006 value"}, + {"value": "Second 006 value"}, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "First 006 value"}, - {'value': "Second 006 value"} - ] } self.assertDictEqual(self.node, expected_node) - self.assertRegex(str(message.output), r'simple_006 would exceed maximum number of allowed values \(2\)') + self.assertRegex( + str(message.output), + r"simple_006 would exceed maximum number of allowed values \(2\)", + ) def test_simple_field_title_update_replace(self): # Update the node title, first with an 'update_mode' of replace. existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Old title - replace."} - ], - 'status': [ - {'value': 1} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Old title - replace."}], + "status": [{"value": 1}], } - self.field_definitions = { - 'title': { - 'cardinality': 1, - 'formatted_text': False - } - } + self.field_definitions = {"title": {"cardinality": 1, "formatted_text": False}} - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['title'] = "New title - replace." - csv_record['node_id'] = 1 - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "title", existing_node["title"]) + csv_record["title"] = "New title - replace." + csv_record["node_id"] = 1 + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "title", + existing_node["title"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "New title - replace."} - ], - 'status': [ - {'value': 1} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "New title - replace."}], + "status": [{"value": 1}], } self.assertDictEqual(node, expected_node) def test_simple_field_title_update_append(self): # Update the node title, first with an update_mode of 'append'. existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Old title - append."} - ], - 'status': [ - {'value': 1} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Old title - append."}], + "status": [{"value": 1}], } - self.field_definitions = { - 'title': { - 'cardinality': 1, - 'formatted_text': False - } - } + self.field_definitions = {"title": {"cardinality": 1, "formatted_text": False}} - self.config['task'] = 'update' - self.config['update_mode'] = 'append' + self.config["task"] = "update" + self.config["update_mode"] = "append" with self.assertLogs() as message: field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['title'] = "New title - append." - csv_record['node_id'] = 1 - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "title", existing_node["title"]) + csv_record["title"] = "New title - append." + csv_record["node_id"] = 1 + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "title", + existing_node["title"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Old title - append."} - ], - 'status': [ - {'value': 1} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Old title - append."}], + "status": [{"value": 1}], } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'record 1 would exceed maximum number of allowed values \(1\)') + self.assertRegex( + str(message.output), + r"record 1 would exceed maximum number of allowed values \(1\)", + ) def test_simple_field_update_replace_cardinality_1_no_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "Field foo original value"} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"value": "Field foo original value"}], } self.field_definitions = { - 'field_foo': { - 'cardinality': 1, - 'formatted_text': False - } + "field_foo": {"cardinality": 1, "formatted_text": False} } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['field_foo'] = "Field foo new value" - csv_record['node_id'] = 1 - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["field_foo"] = "Field foo new value" + csv_record["node_id"] = 1 + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "Field foo new value"} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"value": "Field foo new value"}], } self.assertDictEqual(node, expected_node) def test_simple_field_update_append_cardinality_1_no_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "Field foo original value"} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"value": "Field foo original value"}], } self.field_definitions = { - 'field_foo': { - 'cardinality': 1, - 'formatted_text': False - } + "field_foo": {"cardinality": 1, "formatted_text": False} } - self.config['task'] = 'update' - self.config['update_mode'] = 'append' + self.config["task"] = "update" + self.config["update_mode"] = "append" with self.assertLogs() as message: field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['field_foo'] = "Field foo new value" - csv_record['node_id'] = 1 - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["field_foo"] = "Field foo new value" + csv_record["node_id"] = 1 + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "Field foo original value"} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"value": "Field foo original value"}], } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'record 1 would exceed maximum number of allowed values \(1\)') + self.assertRegex( + str(message.output), + r"record 1 would exceed maximum number of allowed values \(1\)", + ) def test_simple_field_update_replace_cardinality_1_with_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "Field foo original value"} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"value": "Field foo original value"}], } self.field_definitions = { - 'field_foo': { - 'cardinality': 1, - 'formatted_text': False - } + "field_foo": {"cardinality": 1, "formatted_text": False} } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" with self.assertLogs() as message: field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['field_foo'] = "Field foo new value|Second foo new value" - csv_record['node_id'] = 2 - node_field_values = [{'value': "Field foo original value"}] - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", node_field_values) + csv_record["field_foo"] = "Field foo new value|Second foo new value" + csv_record["node_id"] = 2 + node_field_values = [{"value": "Field foo original value"}] + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + node_field_values, + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "Field foo new value"} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"value": "Field foo new value"}], } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'record 2 would exceed maximum number of allowed values \(1\)') + self.assertRegex( + str(message.output), + r"record 2 would exceed maximum number of allowed values \(1\)", + ) def test_simple_field_update_replace_cardinality_unlimited_no_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "Field foo original value"} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"value": "Field foo original value"}], } self.field_definitions = { - 'field_foo': { - 'cardinality': -1, - 'formatted_text': False - } + "field_foo": {"cardinality": -1, "formatted_text": False} } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 3 - csv_record['field_foo'] = "New value" - self.config['update_mode'] = 'replace' - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 3 + csv_record["field_foo"] = "New value" + self.config["update_mode"] = "replace" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "New value"} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"value": "New value"}], } self.assertDictEqual(node, expected_node) def test_simple_field_update_replace_cardinality_unlimited_with_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "Field foo original value"} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"value": "Field foo original value"}], } self.field_definitions = { - 'field_foo': { - 'cardinality': -1, - 'formatted_text': False - } + "field_foo": {"cardinality": -1, "formatted_text": False} } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 4 - csv_record['field_foo'] = "New value 1|New value 2|New value 2" - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 4 + csv_record["field_foo"] = "New value 1|New value 2|New value 2" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "New value 1"}, - {'value': "New value 2"} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"value": "New value 1"}, {"value": "New value 2"}], } self.assertDictEqual(node, expected_node) def test_simple_field_update_append_cardinality_unlimited_no_subdelims(self): self.field_definitions = { - 'field_foo': { - 'cardinality': -1, - 'formatted_text': False - } + "field_foo": {"cardinality": -1, "formatted_text": False} } - self.config['task'] = 'update' - self.config['update_mode'] = 'append' + self.config["task"] = "update" + self.config["update_mode"] = "append" existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "Field foo original value"} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"value": "Field foo original value"}], } field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 5 - csv_record['field_foo'] = "New value" - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 5 + csv_record["field_foo"] = "New value" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "Field foo original value"}, + {"value": "New value"}, ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "Field foo original value"}, - {'value': "New value"} - - ] } self.assertDictEqual(node, expected_node) existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "Field foo original value"}, + {"value": "New value"}, ], - 'field_foo': [ - {'value': "Field foo original value"}, - {'value': "New value"} - ] } field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 55 - csv_record['field_foo'] = "New value" - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 55 + csv_record["field_foo"] = "New value" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "Field foo original value"}, + {"value": "New value"}, ], - 'field_foo': [ - {'value': "Field foo original value"}, - {'value': "New value"} - - ] } self.assertDictEqual(node, expected_node) def test_simple_field_update_append_cardinality_unlimited_with_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "Field foo original value"} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"value": "Field foo original value"}], } self.field_definitions = { - 'field_foo': { - 'cardinality': -1, - 'formatted_text': False - } + "field_foo": {"cardinality": -1, "formatted_text": False} } - self.config['task'] = 'update' - self.config['update_mode'] = 'append' + self.config["task"] = "update" + self.config["update_mode"] = "append" field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 6 - csv_record['field_foo'] = "New value 1|New value 2|New value 1" - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 6 + csv_record["field_foo"] = "New value 1|New value 2|New value 1" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "Field foo original value"}, + {"value": "New value 1"}, + {"value": "New value 2"}, ], - 'field_foo': [ - {'value': "Field foo original value"}, - {'value': "New value 1"}, - {'value': "New value 2"} - - ] } self.assertDictEqual(node, expected_node) def test_simple_field_update_replace_cardinality_limited_no_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "Field foo original value"} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"value": "Field foo original value"}], } self.field_definitions = { - 'field_foo': { - 'cardinality': 2, - 'formatted_text': False - } + "field_foo": {"cardinality": 2, "formatted_text": False} } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 7 - csv_record['field_foo'] = "New value" - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 7 + csv_record["field_foo"] = "New value" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "New value"} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"value": "New value"}], } self.assertDictEqual(node, expected_node) def test_simple_field_update_append_cardinality_limited_no_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "Field foo original value 1"}, + {"value": "Field foo original value 2"}, ], - 'field_foo': [ - {'value': "Field foo original value 1"}, - {'value': "Field foo original value 2"} - ] } self.field_definitions = { - 'field_foo': { - 'cardinality': 2, - 'formatted_text': False - } + "field_foo": {"cardinality": 2, "formatted_text": False} } - self.config['task'] = 'update' - self.config['update_mode'] = 'append' + self.config["task"] = "update" + self.config["update_mode"] = "append" with self.assertLogs() as message: field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 8 - csv_record['field_foo'] = "New value 3" - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 8 + csv_record["field_foo"] = "New value 3" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "Field foo original value 1"}, + {"value": "Field foo original value 2"}, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "Field foo original value 1"}, - {'value': "Field foo original value 2"} - ] } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'record 8 would exceed maximum number of allowed values \(2\)') + self.assertRegex( + str(message.output), + r"record 8 would exceed maximum number of allowed values \(2\)", + ) def test_simple_field_update_replace_cardinality_limited_with_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "Field foo original value 1"}, + {"value": "Field foo original value 2"}, ], - 'field_foo': [ - {'value': "Field foo original value 1"}, - {'value': "Field foo original value 2"} - ] } self.field_definitions = { - 'field_foo': { - 'cardinality': 2, - 'formatted_text': False - } + "field_foo": {"cardinality": 2, "formatted_text": False} } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" with self.assertLogs() as message: field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 9 - csv_record['field_foo'] = "First node 9 value|Second node 9 value|Third node 9 value" - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 9 + csv_record["field_foo"] = ( + "First node 9 value|Second node 9 value|Third node 9 value" + ) + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "First node 9 value"}, + {"value": "Second node 9 value"}, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "First node 9 value"}, - {'value': "Second node 9 value"} - ] } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'record 9 would exceed maximum number of allowed values \(2\)') + self.assertRegex( + str(message.output), + r"record 9 would exceed maximum number of allowed values \(2\)", + ) def test_simple_field_update_append_cardinality_limited_with_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "Field foo original value 1"} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"value": "Field foo original value 1"}], } self.field_definitions = { - 'field_foo': { - 'cardinality': 3, - 'formatted_text': False - } + "field_foo": {"cardinality": 3, "formatted_text": False} } - self.config['task'] = 'update' - self.config['update_mode'] = 'append' + self.config["task"] = "update" + self.config["update_mode"] = "append" with self.assertLogs() as message: field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 10 - csv_record['field_foo'] = "First node 10 value|First node 10 value|Second node 10 value|Third node 10 value" - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 10 + csv_record["field_foo"] = ( + "First node 10 value|First node 10 value|Second node 10 value|Third node 10 value" + ) + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "Field foo original value 1"}, + {"value": "First node 10 value"}, + {"value": "Second node 10 value"}, ], - 'field_foo': [ - {'value': "Field foo original value 1"}, - {'value': "First node 10 value"}, - {'value': "Second node 10 value"} - ] } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'record 10 would exceed maximum number of allowed values \(3\)') + self.assertRegex( + str(message.output), + r"record 10 would exceed maximum number of allowed values \(3\)", + ) def test_simple_field_update_delete(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "Field foo original value 1"}, + {"value": "Field foo original value 2"}, ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "Field foo original value 1"}, - {'value': "Field foo original value 2"} - ] } self.field_definitions = { - 'field_foo': { - 'cardinality': 3, - 'formatted_text': False - } + "field_foo": {"cardinality": 3, "formatted_text": False} } - self.config['update_mode'] = 'delete' + self.config["update_mode"] = "delete" field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 11 - csv_record['field_foo'] = "First node 11 value|Second node 11 value" - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 11 + csv_record["field_foo"] = "First node 11 value|Second node 11 value" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [], } self.assertDictEqual(node, expected_node) def test_simple_field_dudupe_values(self): # First, split values from CSV. - input = ['first value', 'first value', 'second value', 'second value', 'third value'] + input = [ + "first value", + "first value", + "second value", + "second value", + "third value", + ] field = workbench_fields.SimpleField() output = field.dedupe_values(input) - self.assertEqual(output, ['first value', 'second value', 'third value']) + self.assertEqual(output, ["first value", "second value", "third value"]) # Then fully formed dictionaries. - input = [{'value': 'First string'}, {'value': 'Second string'}, {'value': 'First string'}, {'value': 'Second string'}, {'value': 'Third string'}] + input = [ + {"value": "First string"}, + {"value": "Second string"}, + {"value": "First string"}, + {"value": "Second string"}, + {"value": "Third string"}, + ] field = workbench_fields.SimpleField() output = field.dedupe_values(input) - self.assertEqual(output, [{'value': 'First string'}, {'value': 'Second string'}, {'value': 'Third string'}]) + self.assertEqual( + output, + [ + {"value": "First string"}, + {"value": "Second string"}, + {"value": "Third string"}, + ], + ) class TestSimpleFieldFormatted(unittest.TestCase): def setUp(self): self.config = { - 'task': 'create', - 'subdelimiter': '|', - 'id_field': 'id', - 'text_format_id': 'basic_html', - 'field_text_format_ids': {'field_foo': 'restricted_html', 'field_bar': 'basic_html'} + "task": "create", + "subdelimiter": "|", + "id_field": "id", + "text_format_id": "basic_html", + "field_text_format_ids": { + "field_foo": "restricted_html", + "field_bar": "basic_html", + }, } def test_create_with_simple_field(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], } # Create a node with a simple field of cardinality 1, no subdelimiters. self.field_definitions = { - 'field_foo': { - 'cardinality': 1, - 'formatted_text': True - } + "field_foo": {"cardinality": 1, "formatted_text": True} } field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['id'] = "simple_001" - csv_record['field_foo'] = "Field foo value" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "simple_001" + csv_record["field_foo"] = "Field foo value" + node = field.create( + self.config, self.field_definitions, existing_node, csv_record, "field_foo" + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "Field foo value", 'format': 'restricted_html'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"value": "Field foo value", "format": "restricted_html"}], } self.assertDictEqual(node, expected_node) @@ -977,104 +820,85 @@ def test_create_with_simple_field(self): with self.assertLogs() as message: field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['id'] = "simple_002" - csv_record['field_foo'] = "Field foo value|Extraneous value" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "simple_002" + csv_record["field_foo"] = "Field foo value|Extraneous value" + node = field.create( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "Field foo value", "format": "restricted_html"} ], - 'field_foo': [ - {'value': "Field foo value", 'format': 'restricted_html'} - ] } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'simple_002 would exceed maximum number of allowed values \(1\)') + self.assertRegex( + str(message.output), + r"simple_002 would exceed maximum number of allowed values \(1\)", + ) # Create a node with a simple field of cardinality unlimited, no subdelimiters. self.field_definitions = { - 'field_foo': { - 'cardinality': -1, - 'formatted_text': True - } + "field_foo": {"cardinality": -1, "formatted_text": True} } field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['id'] = "simple_003" - csv_record['field_foo'] = "First value" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "simple_003" + csv_record["field_foo"] = "First value" + node = field.create( + self.config, self.field_definitions, existing_node, csv_record, "field_foo" + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "First value", 'format': 'restricted_html'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"value": "First value", "format": "restricted_html"}], } self.assertDictEqual(node, expected_node) # Create a node with a simple field of cardinality unlimited, with subdelimiters. field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['id'] = "simple_004" - csv_record['field_foo'] = "First value|Second value|First value" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "simple_004" + csv_record["field_foo"] = "First value|Second value|First value" + node = field.create( + self.config, self.field_definitions, existing_node, csv_record, "field_foo" + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "First value", "format": "restricted_html"}, + {"value": "Second value", "format": "restricted_html"}, ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "First value", 'format': 'restricted_html'}, - {'value': "Second value", 'format': 'restricted_html'} - ] } self.assertDictEqual(node, expected_node) # Create a node with a simple field of cardinality limited, no subdelimiters. self.field_definitions = { - 'field_foo': { - 'cardinality': 2, - 'formatted_text': True - } + "field_foo": {"cardinality": 2, "formatted_text": True} } field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['id'] = "simple_005" - csv_record['field_foo'] = "First value" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "simple_005" + csv_record["field_foo"] = "First value" + node = field.create( + self.config, self.field_definitions, existing_node, csv_record, "field_foo" + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "First value", 'format': 'restricted_html'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"value": "First value", "format": "restricted_html"}], } self.assertDictEqual(node, expected_node) @@ -1082,800 +906,709 @@ def test_create_with_simple_field(self): with self.assertLogs() as message: field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['id'] = "simple_006" - csv_record['field_foo'] = "First 006 value|First 006 value|Second 006 value|Third 006 value" + csv_record["id"] = "simple_006" + csv_record["field_foo"] = ( + "First 006 value|First 006 value|Second 006 value|Third 006 value" + ) # csv_record['field_foo'] = "First 006 value|Second 006 value|Third 006 value" - self.node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + self.node = field.create( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "First 006 value", "format": "restricted_html"}, + {"value": "Second 006 value", "format": "restricted_html"}, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "First 006 value", 'format': 'restricted_html'}, - {'value': "Second 006 value", 'format': 'restricted_html'} - ] } self.assertDictEqual(self.node, expected_node) - self.assertRegex(str(message.output), r'simple_006 would exceed maximum number of allowed values \(2\)') + self.assertRegex( + str(message.output), + r"simple_006 would exceed maximum number of allowed values \(2\)", + ) def test_simple_field_title_update_replace(self): # Update the node title, first with an 'update_mode' of replace. existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Old title - replace."} - ], - 'status': [ - {'value': 1} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Old title - replace."}], + "status": [{"value": 1}], } - self.field_definitions = { - 'title': { - 'cardinality': 1, - 'formatted_text': False - } - } + self.field_definitions = {"title": {"cardinality": 1, "formatted_text": False}} - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['title'] = "New title - replace." - csv_record['node_id'] = 1 - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "title", existing_node["title"]) + csv_record["title"] = "New title - replace." + csv_record["node_id"] = 1 + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "title", + existing_node["title"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "New title - replace."} - ], - 'status': [ - {'value': 1} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "New title - replace."}], + "status": [{"value": 1}], } self.assertDictEqual(node, expected_node) def test_simple_field_title_update_append(self): # Update the node title, first with an update_mode of 'append'. existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Old title - append."} - ], - 'status': [ - {'value': 1, 'format': 'restricted_html'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Old title - append."}], + "status": [{"value": 1, "format": "restricted_html"}], } - self.field_definitions = { - 'title': { - 'cardinality': 1, - 'formatted_text': True - } - } + self.field_definitions = {"title": {"cardinality": 1, "formatted_text": True}} - self.config['task'] = 'update' - self.config['update_mode'] = 'append' + self.config["task"] = "update" + self.config["update_mode"] = "append" with self.assertLogs() as message: field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['title'] = "New title - append." - csv_record['node_id'] = 1 - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "title", existing_node["title"]) + csv_record["title"] = "New title - append." + csv_record["node_id"] = 1 + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "title", + existing_node["title"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Old title - append."} - ], - 'status': [ - {'value': 1, 'format': 'restricted_html'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Old title - append."}], + "status": [{"value": 1, "format": "restricted_html"}], } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'record 1 would exceed maximum number of allowed values \(1\)') + self.assertRegex( + str(message.output), + r"record 1 would exceed maximum number of allowed values \(1\)", + ) def test_simple_field_update_replace_cardinality_1_no_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "Field foo original value", "format": "restricted_html"} ], - 'field_foo': [ - {'value': "Field foo original value", 'format': 'restricted_html'} - ] } self.field_definitions = { - 'field_foo': { - 'cardinality': 1, - 'formatted_text': True - } + "field_foo": {"cardinality": 1, "formatted_text": True} } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['field_foo'] = "Field foo new value" - csv_record['node_id'] = 1 - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["field_foo"] = "Field foo new value" + csv_record["node_id"] = 1 + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "Field foo new value", "format": "restricted_html"} ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "Field foo new value", 'format': 'restricted_html'} - ] } self.assertDictEqual(node, expected_node) def test_simple_field_update_append_cardinality_1_no_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "Field foo original value", "format": "restricted_html"} ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "Field foo original value", 'format': 'restricted_html'} - ] } self.field_definitions = { - 'field_foo': { - 'cardinality': 1, - 'formatted_text': True - } + "field_foo": {"cardinality": 1, "formatted_text": True} } - self.config['task'] = 'update' - self.config['update_mode'] = 'append' + self.config["task"] = "update" + self.config["update_mode"] = "append" with self.assertLogs() as message: field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['field_foo'] = "Field foo new value" - csv_record['node_id'] = 1 - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["field_foo"] = "Field foo new value" + csv_record["node_id"] = 1 + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "Field foo original value", "format": "restricted_html"} ], - 'field_foo': [ - {'value': "Field foo original value", 'format': 'restricted_html'} - ] } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'record 1 would exceed maximum number of allowed values \(1\)') + self.assertRegex( + str(message.output), + r"record 1 would exceed maximum number of allowed values \(1\)", + ) def test_simple_field_update_replace_cardinality_1_with_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "Field foo original value", "format": "restricted_html"} ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "Field foo original value", 'format': 'restricted_html'} - ] } self.field_definitions = { - 'field_foo': { - 'cardinality': 1, - 'formatted_text': True - } + "field_foo": {"cardinality": 1, "formatted_text": True} } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" with self.assertLogs() as message: field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['field_foo'] = "Field foo new value|Second foo new value" - csv_record['node_id'] = 2 - node_field_values = [{'value': "Field foo original value"}] - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", node_field_values) + csv_record["field_foo"] = "Field foo new value|Second foo new value" + csv_record["node_id"] = 2 + node_field_values = [{"value": "Field foo original value"}] + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + node_field_values, + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "Field foo new value", "format": "restricted_html"} ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "Field foo new value", 'format': 'restricted_html'} - ] } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'record 2 would exceed maximum number of allowed values \(1\)') + self.assertRegex( + str(message.output), + r"record 2 would exceed maximum number of allowed values \(1\)", + ) def test_simple_field_update_replace_cardinality_unlimited_no_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "Field foo original value", "format": "restricted_html"} ], - 'field_foo': [ - {'value': "Field foo original value", 'format': 'restricted_html'} - ] } self.field_definitions = { - 'field_foo': { - 'cardinality': -1, - 'formatted_text': True - } + "field_foo": {"cardinality": -1, "formatted_text": True} } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 3 - csv_record['field_foo'] = "New value" - self.config['update_mode'] = 'replace' - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 3 + csv_record["field_foo"] = "New value" + self.config["update_mode"] = "replace" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "New value", 'format': 'restricted_html'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"value": "New value", "format": "restricted_html"}], } self.assertDictEqual(node, expected_node) def test_simple_field_update_replace_cardinality_unlimited_with_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "Field foo original value", "format": "restricted_html"} ], - 'field_foo': [ - {'value': "Field foo original value", 'format': 'restricted_html'} - ] } self.field_definitions = { - 'field_foo': { - 'cardinality': -1, - 'formatted_text': True - } + "field_foo": {"cardinality": -1, "formatted_text": True} } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 4 - csv_record['field_foo'] = "New value 1|New value 2|New value 2" - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 4 + csv_record["field_foo"] = "New value 1|New value 2|New value 2" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "New value 1", "format": "restricted_html"}, + {"value": "New value 2", "format": "restricted_html"}, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "New value 1", 'format': 'restricted_html'}, - {'value': "New value 2", 'format': 'restricted_html'} - ] } self.assertDictEqual(node, expected_node) def test_simple_field_update_append_cardinality_unlimited_no_subdelims(self): self.field_definitions = { - 'field_foo': { - 'cardinality': -1, - 'formatted_text': True - } + "field_foo": {"cardinality": -1, "formatted_text": True} } - self.config['task'] = 'update' - self.config['update_mode'] = 'append' + self.config["task"] = "update" + self.config["update_mode"] = "append" existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "Field foo original value", "format": "restricted_html"} ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "Field foo original value", 'format': 'restricted_html'} - ] } field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 5 - csv_record['field_foo'] = "New value" - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 5 + csv_record["field_foo"] = "New value" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "Field foo original value", "format": "restricted_html"}, + {"value": "New value", "format": "restricted_html"}, ], - 'field_foo': [ - {'value': "Field foo original value", 'format': 'restricted_html'}, - {'value': "New value", 'format': 'restricted_html'} - - ] } self.assertDictEqual(node, expected_node) existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "Field foo original value", "format": "restricted_html"} ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "Field foo original value", 'format': 'restricted_html'} - ] } field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 55 - csv_record['field_foo'] = "New value" - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 55 + csv_record["field_foo"] = "New value" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "Field foo original value", "format": "restricted_html"}, + {"value": "New value", "format": "restricted_html"}, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "Field foo original value", 'format': 'restricted_html'}, - {'value': "New value", 'format': 'restricted_html'} - - ] } self.assertDictEqual(node, expected_node) def test_simple_field_update_append_cardinality_unlimited_with_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "Field foo original value", "format": "restricted_html"} ], - 'field_foo': [ - {'value': "Field foo original value", 'format': 'restricted_html'} - ] } self.field_definitions = { - 'field_foo': { - 'cardinality': -1, - 'formatted_text': True - } + "field_foo": {"cardinality": -1, "formatted_text": True} } - self.config['task'] = 'update' - self.config['update_mode'] = 'append' + self.config["task"] = "update" + self.config["update_mode"] = "append" field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 6 - csv_record['field_foo'] = "New value 1|New value 2|New value 1" - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 6 + csv_record["field_foo"] = "New value 1|New value 2|New value 1" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "Field foo original value", "format": "restricted_html"}, + {"value": "New value 1", "format": "restricted_html"}, + {"value": "New value 2", "format": "restricted_html"}, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "Field foo original value", 'format': 'restricted_html'}, - {'value': "New value 1", 'format': 'restricted_html'}, - {'value': "New value 2", 'format': 'restricted_html'} - - ] } self.assertDictEqual(node, expected_node) def test_simple_field_update_replace_cardinality_limited_no_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "Field foo original value"} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"value": "Field foo original value"}], } self.field_definitions = { - 'field_foo': { - 'cardinality': 2, - 'formatted_text': True - } + "field_foo": {"cardinality": 2, "formatted_text": True} } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 7 - csv_record['field_foo'] = "New value" - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 7 + csv_record["field_foo"] = "New value" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "New value", 'format': 'restricted_html'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"value": "New value", "format": "restricted_html"}], } self.assertDictEqual(node, expected_node) def test_simple_field_update_append_cardinality_limited_no_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "Field foo original value 1", "format": "restricted_html"}, + {"value": "Field foo original value 2", "format": "restricted_html"}, ], - 'field_foo': [ - {'value': "Field foo original value 1", 'format': 'restricted_html'}, - {'value': "Field foo original value 2", 'format': 'restricted_html'} - ] } self.field_definitions = { - 'field_foo': { - 'cardinality': 2, - 'formatted_text': True - } + "field_foo": {"cardinality": 2, "formatted_text": True} } - self.config['task'] = 'update' - self.config['update_mode'] = 'append' + self.config["task"] = "update" + self.config["update_mode"] = "append" with self.assertLogs() as message: field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 8 - csv_record['field_foo'] = "New value 3" - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 8 + csv_record["field_foo"] = "New value 3" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "value": "Field foo original value 1", + "format": "restricted_html", + }, + { + "value": "Field foo original value 2", + "format": "restricted_html", + }, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "Field foo original value 1", 'format': 'restricted_html'}, - {'value': "Field foo original value 2", 'format': 'restricted_html'} - ] } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'record 8 would exceed maximum number of allowed values \(2\)') + self.assertRegex( + str(message.output), + r"record 8 would exceed maximum number of allowed values \(2\)", + ) def test_simple_field_update_replace_cardinality_limited_with_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "Field foo original value 1"}, + {"value": "Field foo original value 2"}, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "Field foo original value 1"}, - {'value': "Field foo original value 2"} - ] } self.field_definitions = { - 'field_foo': { - 'cardinality': 2, - 'formatted_text': True - } + "field_foo": {"cardinality": 2, "formatted_text": True} } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" with self.assertLogs() as message: field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 9 - csv_record['field_foo'] = "First node 9 value|Second node 9 value|Third node 9 value" - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 9 + csv_record["field_foo"] = ( + "First node 9 value|Second node 9 value|Third node 9 value" + ) + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "First node 9 value", "format": "restricted_html"}, + {"value": "Second node 9 value", "format": "restricted_html"}, ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "First node 9 value", 'format': 'restricted_html'}, - {'value': "Second node 9 value", 'format': 'restricted_html'} - ] } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'record 9 would exceed maximum number of allowed values \(2\)') + self.assertRegex( + str(message.output), + r"record 9 would exceed maximum number of allowed values \(2\)", + ) def test_simple_field_update_append_cardinality_limited_with_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "Field foo original value 1", "format": "restricted_html"} ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "Field foo original value 1", 'format': 'restricted_html'} - ] } self.field_definitions = { - 'field_foo': { - 'cardinality': 3, - 'formatted_text': True - } + "field_foo": {"cardinality": 3, "formatted_text": True} } - self.config['task'] = 'update' - self.config['update_mode'] = 'append' + self.config["task"] = "update" + self.config["update_mode"] = "append" with self.assertLogs() as message: field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 10 - csv_record['field_foo'] = "First node 10 value|First node 10 value|Second node 10 value|Third node 10 value" - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 10 + csv_record["field_foo"] = ( + "First node 10 value|First node 10 value|Second node 10 value|Third node 10 value" + ) + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "value": "Field foo original value 1", + "format": "restricted_html", + }, + {"value": "First node 10 value", "format": "restricted_html"}, + {"value": "Second node 10 value", "format": "restricted_html"}, ], - 'field_foo': [ - {'value': "Field foo original value 1", 'format': 'restricted_html'}, - {'value': "First node 10 value", 'format': 'restricted_html'}, - {'value': "Second node 10 value", 'format': 'restricted_html'} - ] } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'record 10 would exceed maximum number of allowed values \(3\)') + self.assertRegex( + str(message.output), + r"record 10 would exceed maximum number of allowed values \(3\)", + ) def test_simple_field_update_delete(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"value": "Field foo original value 1"}, + {"value": "Field foo original value 2"}, ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'value': "Field foo original value 1"}, - {'value': "Field foo original value 2"} - ] } self.field_definitions = { - 'field_foo': { - 'cardinality': 3, - 'formatted_text': False - } + "field_foo": {"cardinality": 3, "formatted_text": False} } - self.config['update_mode'] = 'delete' + self.config["update_mode"] = "delete" field = workbench_fields.SimpleField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 11 - csv_record['field_foo'] = "First node 11 value|Second node 11 value" - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 11 + csv_record["field_foo"] = "First node 11 value|Second node 11 value" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [], } self.assertDictEqual(node, expected_node) def test_simple_field_dudupe_values(self): # First, split values from CSV. - input = ['first value', 'first value', 'second value', 'second value', 'third value'] + input = [ + "first value", + "first value", + "second value", + "second value", + "third value", + ] field = workbench_fields.SimpleField() output = field.dedupe_values(input) - self.assertEqual(output, ['first value', 'second value', 'third value']) + self.assertEqual(output, ["first value", "second value", "third value"]) # Then fully formed dictionaries. - input = [{'value': 'First string'}, {'value': 'Second string'}, {'value': 'First string'}, {'value': 'Second string'}, {'value': 'Third string'}] + input = [ + {"value": "First string"}, + {"value": "Second string"}, + {"value": "First string"}, + {"value": "Second string"}, + {"value": "Third string"}, + ] field = workbench_fields.SimpleField() output = field.dedupe_values(input) - self.assertEqual(output, [{'value': 'First string'}, {'value': 'Second string'}, {'value': 'Third string'}]) + self.assertEqual( + output, + [ + {"value": "First string"}, + {"value": "Second string"}, + {"value": "Third string"}, + ], + ) class TestGeolocationField(unittest.TestCase): def setUp(self): self.maxDiff = None - self.config = { - 'task': 'create', - 'subdelimiter': '|', - 'id_field': 'id' - } + self.config = {"task": "create", "subdelimiter": "|", "id_field": "id"} def test_create_with_geolocation_field(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], } # Create a node with a geolocation field of cardinality 1, no subdelimiters. self.field_definitions = { - 'field_foo': { - 'cardinality': 1, + "field_foo": { + "cardinality": 1, } } field = workbench_fields.GeolocationField() csv_record = collections.OrderedDict() - csv_record['id'] = "geo_001" - csv_record['field_foo'] = "48.16667,-123.93333" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "geo_001" + csv_record["field_foo"] = "48.16667,-123.93333" + node = field.create( + self.config, self.field_definitions, existing_node, csv_record, "field_foo" + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'lat': '48.16667', 'lng': '-123.93333'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"lat": "48.16667", "lng": "-123.93333"}], } self.assertDictEqual(node, expected_node) @@ -1883,706 +1616,645 @@ def test_create_with_geolocation_field(self): with self.assertLogs() as message: field = workbench_fields.GeolocationField() csv_record = collections.OrderedDict() - csv_record['id'] = "geo_002" - csv_record['field_foo'] = "47.16667,-123.93333|49.1222,-123.99999" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "geo_002" + csv_record["field_foo"] = "47.16667,-123.93333|49.1222,-123.99999" + node = field.create( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'lat': '47.16667', 'lng': '-123.93333'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"lat": "47.16667", "lng": "-123.93333"}], } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'for record geo_002 would exceed maximum number of allowed values \(1\)') + self.assertRegex( + str(message.output), + r"for record geo_002 would exceed maximum number of allowed values \(1\)", + ) # Create a node with a geolocation field of cardinality unlimited, no subdelimiters. self.field_definitions = { - 'field_foo': { - 'cardinality': -1, + "field_foo": { + "cardinality": -1, } } field = workbench_fields.GeolocationField() csv_record = collections.OrderedDict() - csv_record['id'] = "geo_003" - csv_record['field_foo'] = "59.16667,-123.93333" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "geo_003" + csv_record["field_foo"] = "59.16667,-123.93333" + node = field.create( + self.config, self.field_definitions, existing_node, csv_record, "field_foo" + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'lat': '59.16667', 'lng': '-123.93333'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"lat": "59.16667", "lng": "-123.93333"}], } self.assertDictEqual(node, expected_node) # Create a node with a geolocation field of cardinality unlimited, with subdelimiters. self.field_definitions = { - 'field_foo': { - 'cardinality': -1, + "field_foo": { + "cardinality": -1, } } field = workbench_fields.GeolocationField() csv_record = collections.OrderedDict() - csv_record['id'] = "geo_004" - csv_record['field_foo'] = "59.16667,-123.93333|69.16667,-123.93333|69.16667,-123.93333" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "geo_004" + csv_record["field_foo"] = ( + "59.16667,-123.93333|69.16667,-123.93333|69.16667,-123.93333" + ) + node = field.create( + self.config, self.field_definitions, existing_node, csv_record, "field_foo" + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"lat": "59.16667", "lng": "-123.93333"}, + {"lat": "69.16667", "lng": "-123.93333"}, ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'lat': '59.16667', 'lng': '-123.93333'}, - {'lat': '69.16667', 'lng': '-123.93333'} - ] } self.assertDictEqual(node, expected_node) # Create a node with a geolocation field of cardinality limited, no subdelimiters. self.field_definitions = { - 'field_foo': { - 'cardinality': 2, + "field_foo": { + "cardinality": 2, } } field = workbench_fields.GeolocationField() csv_record = collections.OrderedDict() - csv_record['id'] = "geo_005" - csv_record['field_foo'] = "58.16667,-123.93333" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "geo_005" + csv_record["field_foo"] = "58.16667,-123.93333" + node = field.create( + self.config, self.field_definitions, existing_node, csv_record, "field_foo" + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'lat': '58.16667', 'lng': '-123.93333'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"lat": "58.16667", "lng": "-123.93333"}], } self.assertDictEqual(node, expected_node) # Create a node with a geolocation field of cardinality limited, with subdelimiters. self.field_definitions = { - 'field_foo': { - 'cardinality': 3, + "field_foo": { + "cardinality": 3, } } with self.assertLogs() as message: field = workbench_fields.GeolocationField() csv_record = collections.OrderedDict() - csv_record['id'] = "geo_006" - csv_record['field_foo'] = "51.16667,-123.93333|61.16667,-123.93333|61.16667,-123.93333|63.16667,-123.93333|61.16667,-123.93334" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "geo_006" + csv_record["field_foo"] = ( + "51.16667,-123.93333|61.16667,-123.93333|61.16667,-123.93333|63.16667,-123.93333|61.16667,-123.93334" + ) + node = field.create( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"lat": "51.16667", "lng": "-123.93333"}, + {"lat": "61.16667", "lng": "-123.93333"}, + {"lat": "63.16667", "lng": "-123.93333"}, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'lat': '51.16667', 'lng': '-123.93333'}, - {'lat': '61.16667', 'lng': '-123.93333'}, - {'lat': '63.16667', 'lng': '-123.93333'} - ] } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'for record geo_006 would exceed maximum number of allowed values \(3\)') + self.assertRegex( + str(message.output), + r"for record geo_006 would exceed maximum number of allowed values \(3\)", + ) def test_geolocation_field_update_replace_cardinality_1_no_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], } self.field_definitions = { - 'field_foo': { - 'cardinality': 1, + "field_foo": { + "cardinality": 1, } } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" geolocation = workbench_fields.GeolocationField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 100 - csv_record['field_foo'] = "50.16667,-123.93333" + csv_record["node_id"] = 100 + csv_record["field_foo"] = "50.16667,-123.93333" node_field_values = [{"lat": "49.16667", "lng": "-122.93333"}] - node = geolocation.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", node_field_values) + node = geolocation.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + node_field_values, + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'lat': '50.16667', 'lng': '-123.93333'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"lat": "50.16667", "lng": "-123.93333"}], } self.assertDictEqual(node, expected_node) def test_geolocation_field_update_replace_cardinality_1_with_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], } self.field_definitions = { - 'field_foo': { - 'cardinality': 1, + "field_foo": { + "cardinality": 1, } } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" with self.assertLogs() as message: geolocation = workbench_fields.GeolocationField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 101 - csv_record['field_foo'] = "50.16667,-123.93333|46.16667,-113.93333" + csv_record["node_id"] = 101 + csv_record["field_foo"] = "50.16667,-123.93333|46.16667,-113.93333" node_field_values = [{"lat": "49.16667", "lng": "-122.93333"}] - node = geolocation.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", node_field_values) + node = geolocation.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + node_field_values, + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'lat': '50.16667', 'lng': '-123.93333'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"lat": "50.16667", "lng": "-123.93333"}], } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'for record 101 would exceed maximum number of allowed values \(1\)') + self.assertRegex( + str(message.output), + r"for record 101 would exceed maximum number of allowed values \(1\)", + ) def test_geolocation_field_update_replace_cardinality_unlimited_no_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], } self.field_definitions = { - 'field_foo': { - 'cardinality': -1, + "field_foo": { + "cardinality": -1, } } - self.config['update_mode'] = 'replace' + self.config["update_mode"] = "replace" geolocation = workbench_fields.GeolocationField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 102 - csv_record['field_foo'] = "55.26667,-113.93333" + csv_record["node_id"] = 102 + csv_record["field_foo"] = "55.26667,-113.93333" node_field_values = [{"lat": "49.16667", "lng": "-122.93333"}] - node = geolocation.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", node_field_values) + node = geolocation.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + node_field_values, + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'lat': '55.26667', 'lng': '-113.93333'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"lat": "55.26667", "lng": "-113.93333"}], } self.assertDictEqual(node, expected_node) - def test_geolocation_field_update_replace_cardinality_unlimited_with_subdelims(self): + def test_geolocation_field_update_replace_cardinality_unlimited_with_subdelims( + self, + ): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], } - self.config['update_mode'] = 'replace' + self.config["update_mode"] = "replace" self.field_definitions = { - 'field_foo': { - 'cardinality': -1, + "field_foo": { + "cardinality": -1, } } geolocation = workbench_fields.GeolocationField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 103 - csv_record['field_foo'] = "55.26661,-113.93331|51.26667,-111.93333|55.26661,-113.93331" + csv_record["node_id"] = 103 + csv_record["field_foo"] = ( + "55.26661,-113.93331|51.26667,-111.93333|55.26661,-113.93331" + ) node_field_values = [{"lat": "49.16667", "lng": "-122.93333"}] - node103 = geolocation.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", node_field_values) + node103 = geolocation.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + node_field_values, + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"lat": "55.26661", "lng": "-113.93331"}, + {"lat": "51.26667", "lng": "-111.93333"}, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'lat': '55.26661', 'lng': '-113.93331'}, - {'lat': '51.26667', 'lng': '-111.93333'} - ] } self.assertDictEqual(node103, expected_node) def test_geolocation_field_update_append_cardinality_unlimited_no_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {"lat": "49.1", "lng": "-122.9"} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"lat": "49.1", "lng": "-122.9"}], } self.field_definitions = { - 'field_foo': { - 'cardinality': -1, + "field_foo": { + "cardinality": -1, } } - self.config['update_mode'] = 'append' + self.config["update_mode"] = "append" geolocation = workbench_fields.GeolocationField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 104 - csv_record['field_foo'] = "35.2,-99.9" - node = geolocation.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 104 + csv_record["field_foo"] = "35.2,-99.9" + node = geolocation.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"lat": "49.1", "lng": "-122.9"}, + {"lat": "35.2", "lng": "-99.9"}, ], - 'field_foo': [ - {'lat': '49.1', 'lng': '-122.9'}, - {'lat': '35.2', 'lng': '-99.9'} - ] } self.assertDictEqual(node, expected_node) def test_geolocation_field_update_append_cardinality_unlimited_with_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'lat': "49.1", 'lng': "-122.9"} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"lat": "49.1", "lng": "-122.9"}], } self.field_definitions = { - 'field_foo': { - 'cardinality': -1, + "field_foo": { + "cardinality": -1, } } geolocation = workbench_fields.GeolocationField() csv_record = collections.OrderedDict() - self.config['update_mode'] = 'append' - csv_record['node_id'] = 105 - csv_record['field_foo'] = "56.2,-113.9|51.2,-100.9|51.2,-100.9" - node = geolocation.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) - expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + self.config["update_mode"] = "append" + csv_record["node_id"] = 105 + csv_record["field_foo"] = "56.2,-113.9|51.2,-100.9|51.2,-100.9" + node = geolocation.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) + expected_node = { + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"lat": "49.1", "lng": "-122.9"}, + {"lat": "56.2", "lng": "-113.9"}, + {"lat": "51.2", "lng": "-100.9"}, ], - 'field_foo': [ - {'lat': '49.1', 'lng': '-122.9'}, - {'lat': '56.2', 'lng': '-113.9'}, - {'lat': '51.2', 'lng': '-100.9'} - ] } self.assertDictEqual(node, expected_node) def test_geolocation_field_update_replace_cardinality_limited_no_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], } self.field_definitions = { - 'field_foo': { - 'cardinality': 2, + "field_foo": { + "cardinality": 2, } } - self.config['update_mode'] = 'replace' + self.config["update_mode"] = "replace" geolocation = workbench_fields.GeolocationField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 106 - csv_record['field_foo'] = "53.26667,-133.93333" + csv_record["node_id"] = 106 + csv_record["field_foo"] = "53.26667,-133.93333" node_field_values = [{"lat": "43.16667", "lng": "-123.63"}] - node = geolocation.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", node_field_values) + node = geolocation.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + node_field_values, + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'lat': '53.26667', 'lng': '-133.93333'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"lat": "53.26667", "lng": "-133.93333"}], } self.assertDictEqual(node, expected_node) def test_geolocation_field_update_replace_cardinality_limited_with_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {"lat": "43.16667", "lng": "-123.63"} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"lat": "43.16667", "lng": "-123.63"}], } self.field_definitions = { - 'field_foo': { - 'cardinality': 2, + "field_foo": { + "cardinality": 2, } } - self.config['update_mode'] = 'replace' + self.config["update_mode"] = "replace" with self.assertLogs() as message: geolocation = workbench_fields.GeolocationField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 106 - csv_record['field_foo'] = "53.26667,-133.93333|51.34,-111.1|51.51,-111.999|53.26667,-133.93333" - node = geolocation.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 106 + csv_record["field_foo"] = ( + "53.26667,-133.93333|51.34,-111.1|51.51,-111.999|53.26667,-133.93333" + ) + node = geolocation.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"lat": "53.26667", "lng": "-133.93333"}, + {"lat": "51.34", "lng": "-111.1"}, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'lat': '53.26667', 'lng': '-133.93333'}, - {'lat': '51.34', 'lng': '-111.1'} - ] } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'for record 106 would exceed maximum number of allowed values \(2\)') + self.assertRegex( + str(message.output), + r"for record 106 would exceed maximum number of allowed values \(2\)", + ) def test_geolocation_field_update_append_cardinality_limited_no_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {"lat": "47.1", "lng": "-127.6"} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"lat": "47.1", "lng": "-127.6"}], } self.field_definitions = { - 'field_foo': { - 'cardinality': 1, + "field_foo": { + "cardinality": 1, } } - self.config['task'] = 'update' - self.config['update_mode'] = 'append' + self.config["task"] = "update" + self.config["update_mode"] = "append" with self.assertLogs() as message: geolocation = workbench_fields.GeolocationField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 107 - csv_record['field_foo'] = "57.2,-133.7" - node_107 = geolocation.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 107 + csv_record["field_foo"] = "57.2,-133.7" + node_107 = geolocation.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'lat': '47.1', 'lng': '-127.6'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"lat": "47.1", "lng": "-127.6"}], } self.assertDictEqual(node_107, expected_node) - self.assertRegex(str(message.output), r'for record 107 would exceed maximum number of allowed values \(1\)') + self.assertRegex( + str(message.output), + r"for record 107 would exceed maximum number of allowed values \(1\)", + ) - self.config['update_mode'] = 'append' + self.config["update_mode"] = "append" self.field_definitions = { - 'field_foo': { - 'cardinality': 3, + "field_foo": { + "cardinality": 3, } } def test_geolocation_field_update_replace_cardinality_limited_with_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {"lat": "49.16667", "lng": "-122.93333"} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"lat": "49.16667", "lng": "-122.93333"}], } self.field_definitions = { - 'field_foo': { - 'cardinality': 2, + "field_foo": { + "cardinality": 2, } } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" with self.assertLogs() as message: geolocation = workbench_fields.GeolocationField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 108 - csv_record['field_foo'] = "55.80,-113.80|55.82,-113.82|55.82,-113.82|55.83,-113.83" - node = geolocation.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 108 + csv_record["field_foo"] = ( + "55.80,-113.80|55.82,-113.82|55.82,-113.82|55.83,-113.83" + ) + node = geolocation.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"lat": "55.80", "lng": "-113.80"}, + {"lat": "55.82", "lng": "-113.82"}, ], - 'field_foo': [ - {'lat': '55.80', 'lng': '-113.80'}, - {'lat': '55.82', 'lng': '-113.82'} - ] } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'for record 108 would exceed maximum number of allowed values \(2\)') + self.assertRegex( + str(message.output), + r"for record 108 would exceed maximum number of allowed values \(2\)", + ) def test_geolocation_field_update_append_cardinality_limited_with_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {"lat": "49.9", "lng": "-122.9"} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"lat": "49.9", "lng": "-122.9"}], } self.field_definitions = { - 'field_foo': { - 'cardinality': 2, + "field_foo": { + "cardinality": 2, } } - self.config['task'] = 'update' - self.config['update_mode'] = 'append' + self.config["task"] = "update" + self.config["update_mode"] = "append" with self.assertLogs() as message: geolocation = workbench_fields.GeolocationField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 109 - csv_record['field_foo'] = "55.90,-113.90|55.92,-113.92|55.92,-113.92|55.93,-113.93" - node = geolocation.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node['field_foo']) + csv_record["node_id"] = 109 + csv_record["field_foo"] = ( + "55.90,-113.90|55.92,-113.92|55.92,-113.92|55.93,-113.93" + ) + node = geolocation.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"lat": "49.9", "lng": "-122.9"}, + {"lat": "55.90", "lng": "-113.90"}, ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'lat': '49.9', 'lng': '-122.9'}, - {'lat': '55.90', 'lng': '-113.90'} - ] } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'for record 109 would exceed maximum number of allowed values \(2\)') + self.assertRegex( + str(message.output), + r"for record 109 would exceed maximum number of allowed values \(2\)", + ) def test_geolocation_field_update_delete(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {"lat": "49.9", "lng": "-122.9"} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"lat": "49.9", "lng": "-122.9"}], } # Update a node with update_mode of 'delete'. self.field_definitions = { - 'field_foo': { - 'cardinality': 2, + "field_foo": { + "cardinality": 2, } } - self.config['task'] = 'update' - self.config['update_mode'] = 'delete' + self.config["task"] = "update" + self.config["update_mode"] = "delete" geolocation = workbench_fields.GeolocationField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 110 - csv_record['field_foo'] = "55.90,-113.90|55.92,-113.92|55.93,-113.93" - node = geolocation.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 110 + csv_record["field_foo"] = "55.90,-113.90|55.92,-113.92|55.93,-113.93" + node = geolocation.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [], } self.assertDictEqual(node, expected_node) def test_geolocation_field_dudupe_values(self): # Split values from CSV. - input = ['49.16667,-123.93333', '49.25,-124.8', '49.16667,-123.93333', '49.25,-124.8', '49.16667,-123.93333'] + input = [ + "49.16667,-123.93333", + "49.25,-124.8", + "49.16667,-123.93333", + "49.25,-124.8", + "49.16667,-123.93333", + ] field = workbench_fields.GeolocationField() output = field.dedupe_values(input) - self.assertEqual(output, ['49.16667,-123.93333', '49.25,-124.8']) + self.assertEqual(output, ["49.16667,-123.93333", "49.25,-124.8"]) # Dictionaries. input = [ @@ -2590,63 +2262,52 @@ def test_geolocation_field_dudupe_values(self): {"lat": "58.8", "lng": "-125.3"}, {"lat": "12.5", "lng": "-122.9"}, {"lat": "58.8", "lng": "-125.3"}, - {"lat": "58.8", "lng": "-125.3"}] + {"lat": "58.8", "lng": "-125.3"}, + ] field = workbench_fields.GeolocationField() output = field.dedupe_values(input) - self.assertEqual(output, [ - {"lat": "51.9", "lng": "-22.9"}, - {"lat": "58.8", "lng": "-125.3"}, - {"lat": "12.5", "lng": "-122.9"}]) + self.assertEqual( + output, + [ + {"lat": "51.9", "lng": "-22.9"}, + {"lat": "58.8", "lng": "-125.3"}, + {"lat": "12.5", "lng": "-122.9"}, + ], + ) class TestLinkField(unittest.TestCase): def setUp(self): self.maxDiff = None - self.config = { - 'task': 'create', - 'subdelimiter': '|', - 'id_field': 'id' - } + self.config = {"task": "create", "subdelimiter": "|", "id_field": "id"} def test_create_with_link_field(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], } # Create a node with a link field of cardinality 1, no subdelimiters. self.field_definitions = { - 'field_foo': { - 'cardinality': 1, + "field_foo": { + "cardinality": 1, } } field = workbench_fields.LinkField() csv_record = collections.OrderedDict() - csv_record['id'] = "link_001" - csv_record['field_foo'] = "http://www.foo.com%%Foo's website" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "link_001" + csv_record["field_foo"] = "http://www.foo.com%%Foo's website" + node = field.create( + self.config, self.field_definitions, existing_node, csv_record, "field_foo" + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'uri': 'http://www.foo.com', 'title': "Foo's website"} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"uri": "http://www.foo.com", "title": "Foo's website"}], } self.assertDictEqual(node, expected_node) @@ -2654,730 +2315,725 @@ def test_create_with_link_field(self): with self.assertLogs() as message: field = workbench_fields.LinkField() csv_record = collections.OrderedDict() - csv_record['id'] = "link_002" - csv_record['field_foo'] = "http://bar.com%%Bar website|http://biz.com%%Biz website" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "link_002" + csv_record["field_foo"] = ( + "http://bar.com%%Bar website|http://biz.com%%Biz website" + ) + node = field.create( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'uri': 'http://bar.com', 'title': 'Bar website'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"uri": "http://bar.com", "title": "Bar website"}], } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'for record link_002 would exceed maximum number of allowed values \(1\)') + self.assertRegex( + str(message.output), + r"for record link_002 would exceed maximum number of allowed values \(1\)", + ) # Create a node with a link field of cardinality unlimited, no subdelimiters. self.field_definitions = { - 'field_foo': { - 'cardinality': -1, + "field_foo": { + "cardinality": -1, } } field = workbench_fields.LinkField() csv_record = collections.OrderedDict() - csv_record['id'] = "link_003" - csv_record['field_foo'] = "http://geo003.net%%Geo 3 Blog" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "link_003" + csv_record["field_foo"] = "http://geo003.net%%Geo 3 Blog" + node = field.create( + self.config, self.field_definitions, existing_node, csv_record, "field_foo" + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'uri': 'http://geo003.net', 'title': 'Geo 3 Blog'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"uri": "http://geo003.net", "title": "Geo 3 Blog"}], } self.assertDictEqual(node, expected_node) # Create a node with a link field of cardinality unlimited, with subdelimiters. self.field_definitions = { - 'field_foo': { - 'cardinality': -1, + "field_foo": { + "cardinality": -1, } } field = workbench_fields.LinkField() csv_record = collections.OrderedDict() - csv_record['id'] = "link_004" - csv_record['field_foo'] = "http://link4-1.net%%Link 004-1 website|http://link4-1.net%%Link 004-1 website|http://link4-2.net%%Link 004-2 website" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "link_004" + csv_record["field_foo"] = ( + "http://link4-1.net%%Link 004-1 website|http://link4-1.net%%Link 004-1 website|http://link4-2.net%%Link 004-2 website" + ) + node = field.create( + self.config, self.field_definitions, existing_node, csv_record, "field_foo" + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"uri": "http://link4-1.net", "title": "Link 004-1 website"}, + {"uri": "http://link4-2.net", "title": "Link 004-2 website"}, ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'uri': 'http://link4-1.net', 'title': 'Link 004-1 website'}, - {'uri': 'http://link4-2.net', 'title': 'Link 004-2 website'} - ] } self.assertDictEqual(node, expected_node) # Create a node with a link field of cardinality limited, no subdelimiters. self.field_definitions = { - 'field_foo': { - 'cardinality': 2, + "field_foo": { + "cardinality": 2, } } field = workbench_fields.LinkField() csv_record = collections.OrderedDict() - csv_record['id'] = "link_005" - csv_record['field_foo'] = "http://link5.net%%Link 005 website" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "link_005" + csv_record["field_foo"] = "http://link5.net%%Link 005 website" + node = field.create( + self.config, self.field_definitions, existing_node, csv_record, "field_foo" + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'uri': 'http://link5.net', 'title': 'Link 005 website'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"uri": "http://link5.net", "title": "Link 005 website"}], } self.assertDictEqual(node, expected_node) # Create a node with a link field of cardinality limited, with subdelimiters. self.field_definitions = { - 'field_foo': { - 'cardinality': 2, + "field_foo": { + "cardinality": 2, } } with self.assertLogs() as message: field = workbench_fields.LinkField() csv_record = collections.OrderedDict() - csv_record['id'] = "link_006" - csv_record['field_foo'] = "http://link6-1.net%%Link 006-1 website|http://link6-2.net%%Link 006-2 website|http://link6-3.net%%Link 006-3 website" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "link_006" + csv_record["field_foo"] = ( + "http://link6-1.net%%Link 006-1 website|http://link6-2.net%%Link 006-2 website|http://link6-3.net%%Link 006-3 website" + ) + node = field.create( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"uri": "http://link6-1.net", "title": "Link 006-1 website"}, + {"uri": "http://link6-2.net", "title": "Link 006-2 website"}, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'uri': 'http://link6-1.net', 'title': 'Link 006-1 website'}, - {'uri': 'http://link6-2.net', 'title': 'Link 006-2 website'} - ] } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'for record link_006 would exceed maximum number of allowed values \(2\)') + self.assertRegex( + str(message.output), + r"for record link_006 would exceed maximum number of allowed values \(2\)", + ) def test_link_field_update_replace_cardinality_1_no_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': { - "uri": "http://update1original.net", "title": "Update 1 original's website" - } + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": { + "uri": "http://update1original.net", + "title": "Update 1 original's website", + }, } self.field_definitions = { - 'field_foo': { - 'cardinality': 1, + "field_foo": { + "cardinality": 1, } } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" field = workbench_fields.LinkField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 100 - csv_record['field_foo'] = "http://update1replacement.net%%Update 1 replacement's website" - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 100 + csv_record["field_foo"] = ( + "http://update1replacement.net%%Update 1 replacement's website" + ) + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "uri": "http://update1replacement.net", + "title": "Update 1 replacement's website", + } ], - 'field_foo': [ - {'uri': 'http://update1replacement.net', 'title': "Update 1 replacement's website"} - ] } self.assertDictEqual(node, expected_node) def test_link_field_update_replace_cardinality_1_with_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': { - "uri": "http://update2original.net", "title": "Update 2 original's website" - } + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": { + "uri": "http://update2original.net", + "title": "Update 2 original's website", + }, } self.field_definitions = { - 'field_foo': { - 'cardinality': 1, + "field_foo": { + "cardinality": 1, } } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" with self.assertLogs() as message: field = workbench_fields.LinkField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 101 - csv_record['field_foo'] = "http://update2replacement.net%%Update 2 replacement's website|http://update2-1replacement.net%%Update 2-1 replacement's website" - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 101 + csv_record["field_foo"] = ( + "http://update2replacement.net%%Update 2 replacement's website|http://update2-1replacement.net%%Update 2-1 replacement's website" + ) + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "uri": "http://update2replacement.net", + "title": "Update 2 replacement's website", + } ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'uri': 'http://update2replacement.net', 'title': "Update 2 replacement's website"} - ] } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'for record 101 would exceed maximum number of allowed values \(1\)') + self.assertRegex( + str(message.output), + r"for record 101 would exceed maximum number of allowed values \(1\)", + ) def test_link_field_update_replace_cardinality_unlimited_no_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': { - "uri": "http://updatenode102original.net", "title": "Update node 102 original's website" - } + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": { + "uri": "http://updatenode102original.net", + "title": "Update node 102 original's website", + }, } self.field_definitions = { - 'field_foo': { - 'cardinality': -1, + "field_foo": { + "cardinality": -1, } } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" field = workbench_fields.LinkField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 102 - csv_record['field_foo'] = "http://updatenode102replace.net%%Update to node 102 replacement's website" - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 102 + csv_record["field_foo"] = ( + "http://updatenode102replace.net%%Update to node 102 replacement's website" + ) + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "uri": "http://updatenode102replace.net", + "title": "Update to node 102 replacement's website", + } ], - 'field_foo': [ - {'uri': 'http://updatenode102replace.net', 'title': "Update to node 102 replacement's website"} - ] } self.assertDictEqual(node, expected_node) def test_link_field_update_replace_cardinality_unlimited_with_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': { - "uri": "http://updatenode103original.net", "title": "Update node 103 original's website" - } + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": { + "uri": "http://updatenode103original.net", + "title": "Update node 103 original's website", + }, } self.field_definitions = { - 'field_foo': { - 'cardinality': -1, + "field_foo": { + "cardinality": -1, } } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" field = workbench_fields.LinkField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 103 - csv_record['field_foo'] = "http://updatenode103replace1.net%%103 replacement 1|http://updatenode103replacement2.net%%103 replacement 2|http://updatenode103replacement2.net%%103 replacement 2" - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 103 + csv_record["field_foo"] = ( + "http://updatenode103replace1.net%%103 replacement 1|http://updatenode103replacement2.net%%103 replacement 2|http://updatenode103replacement2.net%%103 replacement 2" + ) + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "uri": "http://updatenode103replace1.net", + "title": "103 replacement 1", + }, + { + "uri": "http://updatenode103replacement2.net", + "title": "103 replacement 2", + }, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'uri': 'http://updatenode103replace1.net', 'title': "103 replacement 1"}, - {'uri': 'http://updatenode103replacement2.net', 'title': "103 replacement 2"} - ] } self.assertDictEqual(node, expected_node) def test_link_field_update_append_cardinality_unlimited_no_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {"uri": "http://node104o.net", "title": "Node 104 o"} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"uri": "http://node104o.net", "title": "Node 104 o"}], } self.field_definitions = { - 'field_foo': { - 'cardinality': -1, + "field_foo": { + "cardinality": -1, } } - self.config['task'] = 'update' - self.config['update_mode'] = 'append' + self.config["task"] = "update" + self.config["update_mode"] = "append" field = workbench_fields.LinkField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 104 - csv_record['field_foo'] = "http://node104a.net%%Node 104 a" - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 104 + csv_record["field_foo"] = "http://node104a.net%%Node 104 a" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"uri": "http://node104o.net", "title": "Node 104 o"}, + {"uri": "http://node104a.net", "title": "Node 104 a"}, ], - 'field_foo': [ - {'uri': 'http://node104o.net', 'title': "Node 104 o"}, - {'uri': 'http://node104a.net', 'title': "Node 104 a"} - ] } self.assertDictEqual(node, expected_node) def test_link_field_update_append_cardinality_unlimited_with_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ {"uri": "http://node105original.net", "title": "Node 105 original"} - ] + ], } self.field_definitions = { - 'field_foo': { - 'cardinality': -1, + "field_foo": { + "cardinality": -1, } } - self.config['task'] = 'update' - self.config['update_mode'] = 'append' + self.config["task"] = "update" + self.config["update_mode"] = "append" field = workbench_fields.LinkField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 105 - csv_record['field_foo'] = "http://node105-1.net%%Node 105-1|http://node105-2.net%%Node 105-2|http://node105-2.net%%Node 105-2" - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 105 + csv_record["field_foo"] = ( + "http://node105-1.net%%Node 105-1|http://node105-2.net%%Node 105-2|http://node105-2.net%%Node 105-2" + ) + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"uri": "http://node105original.net", "title": "Node 105 original"}, + {"uri": "http://node105-1.net", "title": "Node 105-1"}, + {"uri": "http://node105-2.net", "title": "Node 105-2"}, ], - 'field_foo': [ - {'uri': 'http://node105original.net', 'title': "Node 105 original"}, - {'uri': 'http://node105-1.net', 'title': "Node 105-1"}, - {'uri': 'http://node105-2.net', 'title': "Node 105-2"} - ] } self.assertDictEqual(node, expected_node) def test_link_field_update_replace_cardinality_limited_no_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ {"uri": "http://106o-1.net", "title": "Node 106 1 original"}, - {"uri": "http://106o-2.net", "title": "Node 106 2 original"} - ] + {"uri": "http://106o-2.net", "title": "Node 106 2 original"}, + ], } self.field_definitions = { - 'field_foo': { - 'cardinality': 2, + "field_foo": { + "cardinality": 2, } } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" field = workbench_fields.LinkField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 106 - csv_record['field_foo'] = "http://node06r.net%%Node 106 replacement" - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 106 + csv_record["field_foo"] = "http://node06r.net%%Node 106 replacement" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"uri": "http://node06r.net", "title": "Node 106 replacement"} ], - 'field_foo': [ - {'uri': 'http://node06r.net', 'title': "Node 106 replacement"} - ] } self.assertDictEqual(node, expected_node) def test_link_field_update_append_cardinality_limited_no_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ {"uri": "http://107o-1.net", "title": "Node 107 1 original"}, - {"uri": "http://107o-2.net", "title": "Node 107 2 original"} - ] + {"uri": "http://107o-2.net", "title": "Node 107 2 original"}, + ], } self.field_definitions = { - 'field_foo': { - 'cardinality': 2, + "field_foo": { + "cardinality": 2, } } - self.config['task'] = 'update' - self.config['update_mode'] = 'append' + self.config["task"] = "update" + self.config["update_mode"] = "append" field = workbench_fields.LinkField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 107 - csv_record['field_foo'] = "http://node07a.net%%Node 107 appended" - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 107 + csv_record["field_foo"] = "http://node07a.net%%Node 107 appended" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ {"uri": "http://107o-1.net", "title": "Node 107 1 original"}, - {"uri": "http://107o-2.net", "title": "Node 107 2 original"} - ] + {"uri": "http://107o-2.net", "title": "Node 107 2 original"}, + ], } self.assertDictEqual(node, expected_node) def test_link_field_update_append_cardinality_limited_with_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ {"uri": "http://108o-1.net", "title": "Node 108 1 original"}, - {"uri": "http://108o-2.net", "title": "Node 108 2 original"} - ] + {"uri": "http://108o-2.net", "title": "Node 108 2 original"}, + ], } self.field_definitions = { - 'field_foo': { - 'cardinality': 2, + "field_foo": { + "cardinality": 2, } } - self.config['task'] = 'update' - self.config['update_mode'] = 'append' + self.config["task"] = "update" + self.config["update_mode"] = "append" field = workbench_fields.LinkField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 108 - csv_record['field_foo'] = "http://08a-1.net%%Node 108 1 appended|http://108a-2.net%%Node 108 2 appended|http://108a-2.net%%Node 108 2 appended" + csv_record["node_id"] = 108 + csv_record["field_foo"] = ( + "http://08a-1.net%%Node 108 1 appended|http://108a-2.net%%Node 108 2 appended|http://108a-2.net%%Node 108 2 appended" + ) node_field_values = [] - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ {"uri": "http://108o-1.net", "title": "Node 108 1 original"}, - {"uri": "http://108o-2.net", "title": "Node 108 2 original"} - ] + {"uri": "http://108o-2.net", "title": "Node 108 2 original"}, + ], } self.assertDictEqual(node, expected_node) # Violate cardinality. self.field_definitions = { - 'field_foo': { - 'cardinality': 3, + "field_foo": { + "cardinality": 3, } } - self.config['task'] = 'update' - self.config['update_mode'] = 'append' + self.config["task"] = "update" + self.config["update_mode"] = "append" field = workbench_fields.LinkField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 109 - csv_record['field_foo'] = "http://09a-1.net%%Node 109 1 appended|http://109a-2.net%%Node 109 2 appended" - node_field_values = [{"uri": "http://109o-1.net", "title": "Node 109 1 original"}, {"uri": "http://109o-2.net", "title": "Node 109 2 original"}] - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", node_field_values) + csv_record["node_id"] = 109 + csv_record["field_foo"] = ( + "http://09a-1.net%%Node 109 1 appended|http://109a-2.net%%Node 109 2 appended" + ) + node_field_values = [ + {"uri": "http://109o-1.net", "title": "Node 109 1 original"}, + {"uri": "http://109o-2.net", "title": "Node 109 2 original"}, + ] + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + node_field_values, + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ {"uri": "http://109o-1.net", "title": "Node 109 1 original"}, {"uri": "http://109o-2.net", "title": "Node 109 2 original"}, {"uri": "http://09a-1.net", "title": "Node 109 1 appended"}, - ] + ], } self.assertDictEqual(node, expected_node) def test_link_field_update_replace_cardinality_limited_with_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ {"uri": "http://110o-1.net", "title": "Node 110 1 original"}, - {"uri": "http://110o-2.net", "title": "Node 110 2 original"} - ] + {"uri": "http://110o-2.net", "title": "Node 110 2 original"}, + ], } self.field_definitions = { - 'field_foo': { - 'cardinality': 2, + "field_foo": { + "cardinality": 2, } } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" field = workbench_fields.LinkField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 110 - csv_record['field_foo'] = "http://110r-1.net%%Node 110 1 replaced|http://110r-2.net%%Node 110 2 replaced|http://110r-2.net%%Node 110 2 replaced" + csv_record["node_id"] = 110 + csv_record["field_foo"] = ( + "http://110r-1.net%%Node 110 1 replaced|http://110r-2.net%%Node 110 2 replaced|http://110r-2.net%%Node 110 2 replaced" + ) node_field_values = [] - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ {"uri": "http://110r-1.net", "title": "Node 110 1 replaced"}, - {"uri": "http://110r-2.net", "title": "Node 110 2 replaced"} - ] + {"uri": "http://110r-2.net", "title": "Node 110 2 replaced"}, + ], } self.assertDictEqual(node, expected_node) # Violate cardinality. self.field_definitions = { - 'field_foo': { - 'cardinality': 3, + "field_foo": { + "cardinality": 3, } } field = workbench_fields.LinkField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 111 - csv_record['field_foo'] = "http://111r-1.net%%Node 111 1 replaced|http://111r-2.net%%Node 111 2 replaced|http://111r-2.net%%Node 111 2 replaced" - node_field_values = [{"uri": "http://111o-1.net", "title": "Node 111 1 original"}, {"uri": "http://111o-2.net", "title": "Node 111 2 original"}] - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", node_field_values) + csv_record["node_id"] = 111 + csv_record["field_foo"] = ( + "http://111r-1.net%%Node 111 1 replaced|http://111r-2.net%%Node 111 2 replaced|http://111r-2.net%%Node 111 2 replaced" + ) + node_field_values = [ + {"uri": "http://111o-1.net", "title": "Node 111 1 original"}, + {"uri": "http://111o-2.net", "title": "Node 111 2 original"}, + ] + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + node_field_values, + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ {"uri": "http://111r-1.net", "title": "Node 111 1 replaced"}, - {"uri": "http://111r-2.net", "title": "Node 111 2 replaced"} - ] + {"uri": "http://111r-2.net", "title": "Node 111 2 replaced"}, + ], } self.assertDictEqual(node, expected_node) def test_link_field_update_delete(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ {"uri": "http://110o-1.net", "title": "Node 110 1 original"}, - {"uri": "http://110o-2.net", "title": "Node 110 2 original"} - ] + {"uri": "http://110o-2.net", "title": "Node 110 2 original"}, + ], } self.field_definitions = { - 'field_foo': { - 'cardinality': 3, + "field_foo": { + "cardinality": 3, } } - self.config['task'] = 'update' - self.config['update_mode'] = 'delete' + self.config["task"] = "update" + self.config["update_mode"] = "delete" field = workbench_fields.LinkField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 112 - csv_record['field_foo'] = "http://112r-1.net%%Node 112 1 replaced|http://112r-2.net%%Node 112 2 replaced" - node_field_values = [{"uri": "http://112o-1.net", "title": "Node 112 1 original"}, {"uri": "http://112o-2.net", "title": "Node 112 2 original"}] - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", node_field_values) + csv_record["node_id"] = 112 + csv_record["field_foo"] = ( + "http://112r-1.net%%Node 112 1 replaced|http://112r-2.net%%Node 112 2 replaced" + ) + node_field_values = [ + {"uri": "http://112o-1.net", "title": "Node 112 1 original"}, + {"uri": "http://112o-2.net", "title": "Node 112 2 original"}, + ] + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + node_field_values, + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [], } self.assertDictEqual(node, expected_node) def test_link_field_dudupe_values(self): # Split values from CSV. - input = ["http://example.net%%Example", "http://foo.net%%Foo", "http://example.net%%Example", "http://example.net%%Example"] + input = [ + "http://example.net%%Example", + "http://foo.net%%Foo", + "http://example.net%%Example", + "http://example.net%%Example", + ] field = workbench_fields.LinkField() output = field.dedupe_values(input) expected = ["http://example.net%%Example", "http://foo.net%%Foo"] @@ -3387,13 +3043,13 @@ def test_link_field_dudupe_values(self): input = [ {"uri": "http://example.net", "title": "Example"}, {"uri": "http://foo.net", "title": "Foo"}, - {"uri": "http://example.net", "title": "Example"} + {"uri": "http://example.net", "title": "Example"}, ] field = workbench_fields.LinkField() output = field.dedupe_values(input) expected = [ {"uri": "http://example.net", "title": "Example"}, - {"uri": "http://foo.net", "title": "Foo"} + {"uri": "http://foo.net", "title": "Foo"}, ] self.assertEqual(output, expected) @@ -3402,1402 +3058,1201 @@ class TestEntityRefererenceField(unittest.TestCase): def setUp(self): self.maxDiff = None - self.config = { - 'task': 'create', - 'subdelimiter': '|', - 'id_field': 'id' - } + self.config = {"task": "create", "subdelimiter": "|", "id_field": "id"} def test_create_with_entity_reference_field(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], } # Create a node with an entity_reference field of cardinality 1, no subdelimiters, # for both taxonomy term and node references. self.field_definitions = { - 'field_foo': { - 'cardinality': 1, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": 1, "target_type": "taxonomy_term"} } field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['id'] = "term_entity_reference_001" - csv_record['field_foo'] = "10" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "term_entity_reference_001" + csv_record["field_foo"] = "10" + node = field.create( + self.config, self.field_definitions, existing_node, csv_record, "field_foo" + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '10', 'target_type': 'taxonomy_term'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"target_id": "10", "target_type": "taxonomy_term"}], } self.assertDictEqual(node, expected_node) self.field_definitions = { - 'field_foo': { - 'cardinality': 1, - 'target_type': 'node' - } + "field_foo": {"cardinality": 1, "target_type": "node"} } field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['id'] = "node_entity_reference_001" - csv_record['field_foo'] = "10" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "node_entity_reference_001" + csv_record["field_foo"] = "10" + node = field.create( + self.config, self.field_definitions, existing_node, csv_record, "field_foo" + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '10', 'target_type': 'node_type'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"target_id": "10", "target_type": "node_type"}], } self.assertDictEqual(node, expected_node) # Create a node with an entity_reference field of cardinality 1, with subdelimiters # for both taxonomy term and node references. self.field_definitions = { - 'field_foo': { - 'cardinality': 1, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": 1, "target_type": "taxonomy_term"} } with self.assertLogs() as message: field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['id'] = "term_entity_reference_002" - csv_record['field_foo'] = "101|102" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "term_entity_reference_002" + csv_record["field_foo"] = "101|102" + node = field.create( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '101', 'target_type': 'taxonomy_term'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"target_id": "101", "target_type": "taxonomy_term"}], } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'for record term_entity_reference_002 would exceed maximum number of allowed values \(1\)') + self.assertRegex( + str(message.output), + r"for record term_entity_reference_002 would exceed maximum number of allowed values \(1\)", + ) self.field_definitions = { - 'field_foo': { - 'cardinality': 1, - 'target_type': 'node' - } + "field_foo": {"cardinality": 1, "target_type": "node"} } with self.assertLogs() as message: field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['id'] = "node_entity_reference_002" - csv_record['field_foo'] = "100|101" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "node_entity_reference_002" + csv_record["field_foo"] = "100|101" + node = field.create( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '100', 'target_type': 'node_type'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"target_id": "100", "target_type": "node_type"}], } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'for record node_entity_reference_002 would exceed maximum number of allowed values \(1\)') + self.assertRegex( + str(message.output), + r"for record node_entity_reference_002 would exceed maximum number of allowed values \(1\)", + ) # Create a node with an entity_reference field of cardinality unlimited, no subdelimiters, # for both taxonomy term and node references. self.field_definitions = { - 'field_foo': { - 'cardinality': -1, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": -1, "target_type": "taxonomy_term"} } field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['id'] = "term_entity_reference_003" - csv_record['field_foo'] = "1010" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "term_entity_reference_003" + csv_record["field_foo"] = "1010" + node = field.create( + self.config, self.field_definitions, existing_node, csv_record, "field_foo" + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '1010', 'target_type': 'taxonomy_term'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"target_id": "1010", "target_type": "taxonomy_term"}], } self.assertDictEqual(node, expected_node) self.field_definitions = { - 'field_foo': { - 'cardinality': -1, - 'target_type': 'node' - } + "field_foo": {"cardinality": -1, "target_type": "node"} } field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['id'] = "node_entity_reference_003" - csv_record['field_foo'] = "10001" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "node_entity_reference_003" + csv_record["field_foo"] = "10001" + node = field.create( + self.config, self.field_definitions, existing_node, csv_record, "field_foo" + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '10001', 'target_type': 'node_type'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"target_id": "10001", "target_type": "node_type"}], } self.assertDictEqual(node, expected_node) # Create a node with an entity_reference field of cardinality unlimited, with subdelimiters, # for both taxonomy term and node references. self.field_definitions = { - 'field_foo': { - 'cardinality': -1, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": -1, "target_type": "taxonomy_term"} } field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['id'] = "term_entity_reference_004" - csv_record['field_foo'] = "1010|1011|1011" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "term_entity_reference_004" + csv_record["field_foo"] = "1010|1011|1011" + node = field.create( + self.config, self.field_definitions, existing_node, csv_record, "field_foo" + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"target_id": "1010", "target_type": "taxonomy_term"}, + {"target_id": "1011", "target_type": "taxonomy_term"}, ], - 'field_foo': [ - {'target_id': '1010', 'target_type': 'taxonomy_term'}, - {'target_id': '1011', 'target_type': 'taxonomy_term'} - ] } self.assertDictEqual(node, expected_node) self.field_definitions = { - 'field_foo': { - 'cardinality': -1, - 'target_type': 'node' - } + "field_foo": {"cardinality": -1, "target_type": "node"} } field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['id'] = "node_entity_reference_004" - csv_record['field_foo'] = "10001|10002" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "node_entity_reference_004" + csv_record["field_foo"] = "10001|10002" + node = field.create( + self.config, self.field_definitions, existing_node, csv_record, "field_foo" + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"target_id": "10001", "target_type": "node_type"}, + {"target_id": "10002", "target_type": "node_type"}, ], - 'field_foo': [ - {'target_id': '10001', 'target_type': 'node_type'}, - {'target_id': '10002', 'target_type': 'node_type'} - ] } self.assertDictEqual(node, expected_node) # Create a node with an entity_reference field of cardinality limited, no subdelimiters, # for both taxonomy term and node references. self.field_definitions = { - 'field_foo': { - 'cardinality': 2, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": 2, "target_type": "taxonomy_term"} } field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['id'] = "term_entity_reference_005" - csv_record['field_foo'] = "101010" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "term_entity_reference_005" + csv_record["field_foo"] = "101010" + node = field.create( + self.config, self.field_definitions, existing_node, csv_record, "field_foo" + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '101010', 'target_type': 'taxonomy_term'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"target_id": "101010", "target_type": "taxonomy_term"}], } self.assertDictEqual(node, expected_node) self.field_definitions = { - 'field_foo': { - 'cardinality': 1, - 'target_type': 'node' - } + "field_foo": {"cardinality": 1, "target_type": "node"} } field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['id'] = "node_entity_reference_005" - csv_record['field_foo'] = "1010101" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "node_entity_reference_005" + csv_record["field_foo"] = "1010101" + node = field.create( + self.config, self.field_definitions, existing_node, csv_record, "field_foo" + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '1010101', 'target_type': 'node_type'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"target_id": "1010101", "target_type": "node_type"}], } self.assertDictEqual(node, expected_node) # Create a node with an entity_reference field of cardinality limited, with subdelimiters, # for both taxonomy term and node references. self.field_definitions = { - 'field_foo': { - 'cardinality': 2, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": 2, "target_type": "taxonomy_term"} } with self.assertLogs() as message: field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['id'] = "term_entity_reference_006" - csv_record['field_foo'] = "101|102|103|102" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "term_entity_reference_006" + csv_record["field_foo"] = "101|102|103|102" + node = field.create( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"target_id": "101", "target_type": "taxonomy_term"}, + {"target_id": "102", "target_type": "taxonomy_term"}, ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '101', 'target_type': 'taxonomy_term'}, - {'target_id': '102', 'target_type': 'taxonomy_term'} - ] } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'for record term_entity_reference_006 would exceed maximum number of allowed values \(2\)') + self.assertRegex( + str(message.output), + r"for record term_entity_reference_006 would exceed maximum number of allowed values \(2\)", + ) self.field_definitions = { - 'field_foo': { - 'cardinality': 2, - 'target_type': 'node' - } + "field_foo": {"cardinality": 2, "target_type": "node"} } with self.assertLogs() as message: field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['id'] = "node_entity_reference_006" - csv_record['field_foo'] = "200|201|202" - node = field.create(self.config, self.field_definitions, existing_node, csv_record, "field_foo") + csv_record["id"] = "node_entity_reference_006" + csv_record["field_foo"] = "200|201|202" + node = field.create( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"target_id": "200", "target_type": "node_type"}, + {"target_id": "201", "target_type": "node_type"}, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '200', 'target_type': 'node_type'}, - {'target_id': '201', 'target_type': 'node_type'} - ] } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'for record node_entity_reference_006 would exceed maximum number of allowed values \(2\)') + self.assertRegex( + str(message.output), + r"for record node_entity_reference_006 would exceed maximum number of allowed values \(2\)", + ) def test_entity_reference_field_update_replace_cardinality_1_no_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '1', 'target_type': 'taxonomy_term'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"target_id": "1", "target_type": "taxonomy_term"}], } self.field_definitions = { - 'field_foo': { - 'cardinality': 1, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": 1, "target_type": "taxonomy_term"} } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 100 - csv_record['field_foo'] = '5' - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 100 + csv_record["field_foo"] = "5" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '5', 'target_type': 'taxonomy_term'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"target_id": "5", "target_type": "taxonomy_term"}], } self.assertDictEqual(node, expected_node) self.field_definitions = { - 'field_foo': { - 'cardinality': 1, - 'target_type': 'node' - } + "field_foo": {"cardinality": 1, "target_type": "node"} } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 101 - csv_record['field_foo'] = '20' - node_field_values = [{'target_id': '10', 'target_type': 'node'}] - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", node_field_values) + csv_record["node_id"] = 101 + csv_record["field_foo"] = "20" + node_field_values = [{"target_id": "10", "target_type": "node"}] + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + node_field_values, + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '20', 'target_type': 'node_type'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"target_id": "20", "target_type": "node_type"}], } self.assertDictEqual(node, expected_node) def test_entity_reference_field_update_replace_cardinality_1_with_subdelims(self): - existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '1', 'target_type': 'taxonomy_term'} - ] + existing_node = { + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"target_id": "1", "target_type": "taxonomy_term"}], } self.field_definitions = { - 'field_foo': { - 'cardinality': 1, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": 1, "target_type": "taxonomy_term"} } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" with self.assertLogs() as message: field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 102 - csv_record['field_foo'] = '10|15' - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 102 + csv_record["field_foo"] = "10|15" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '10', 'target_type': 'taxonomy_term'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"target_id": "10", "target_type": "taxonomy_term"}], } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'for record 102 would exceed maximum number of allowed values \(1\)') + self.assertRegex( + str(message.output), + r"for record 102 would exceed maximum number of allowed values \(1\)", + ) # Node reference. self.field_definitions = { - 'field_foo': { - 'cardinality': 1, - 'target_type': 'node' - } + "field_foo": {"cardinality": 1, "target_type": "node"} } with self.assertLogs() as message: field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 103 - csv_record['field_foo'] = '20|25' - node_field_values = [{'target_id': '1', 'target_type': 'node'}] - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", node_field_values) + csv_record["node_id"] = 103 + csv_record["field_foo"] = "20|25" + node_field_values = [{"target_id": "1", "target_type": "node"}] + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + node_field_values, + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '20', 'target_type': 'node_type'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"target_id": "20", "target_type": "node_type"}], } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'for record 103 would exceed maximum number of allowed values \(1\)') - - def test_entity_reference_field_update_replace_cardinality_unlimited_no_subdelims(self): + self.assertRegex( + str(message.output), + r"for record 103 would exceed maximum number of allowed values \(1\)", + ) + + def test_entity_reference_field_update_replace_cardinality_unlimited_no_subdelims( + self, + ): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '40', 'target_type': 'taxonomy_term'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"target_id": "40", "target_type": "taxonomy_term"}], } self.field_definitions = { - 'field_foo': { - 'cardinality': -1, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": -1, "target_type": "taxonomy_term"} } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 104 - csv_record['field_foo'] = '30' - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 104 + csv_record["field_foo"] = "30" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '30', 'target_type': 'taxonomy_term'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"target_id": "30", "target_type": "taxonomy_term"}], } self.assertDictEqual(node, expected_node) # Node reference. self.field_definitions = { - 'field_foo': { - 'cardinality': -1, - 'target_type': 'node' - } + "field_foo": {"cardinality": -1, "target_type": "node"} } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 105 - csv_record['field_foo'] = '40' - node_field_values = [{'target_id': '50', 'target_type': 'node'}] - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", node_field_values) + csv_record["node_id"] = 105 + csv_record["field_foo"] = "40" + node_field_values = [{"target_id": "50", "target_type": "node"}] + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + node_field_values, + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '40', 'target_type': 'node_type'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"target_id": "40", "target_type": "node_type"}], } self.assertDictEqual(node, expected_node) - def test_entity_reference_field_update_replace_cardinality_unlimited_with_subdelims(self): + def test_entity_reference_field_update_replace_cardinality_unlimited_with_subdelims( + self, + ): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '50', 'target_type': 'taxonomy_term'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"target_id": "50", "target_type": "taxonomy_term"}], } self.field_definitions = { - 'field_foo': { - 'cardinality': -1, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": -1, "target_type": "taxonomy_term"} } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 106 - csv_record['field_foo'] = '51|52|51' - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 106 + csv_record["field_foo"] = "51|52|51" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"target_id": "51", "target_type": "taxonomy_term"}, + {"target_id": "52", "target_type": "taxonomy_term"}, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '51', 'target_type': 'taxonomy_term'}, - {'target_id': '52', 'target_type': 'taxonomy_term'} - ] } self.assertDictEqual(node, expected_node) # Node reference. self.field_definitions = { - 'field_foo': { - 'cardinality': -1, - 'target_type': 'node' - } + "field_foo": {"cardinality": -1, "target_type": "node"} } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 107 - csv_record['field_foo'] = '61|62' - node_field_values = [{'target_id': '60', 'target_type': 'node'}] - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", node_field_values) + csv_record["node_id"] = 107 + csv_record["field_foo"] = "61|62" + node_field_values = [{"target_id": "60", "target_type": "node"}] + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + node_field_values, + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"target_id": "61", "target_type": "node_type"}, + {"target_id": "62", "target_type": "node_type"}, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '61', 'target_type': 'node_type'}, - {'target_id': '62', 'target_type': 'node_type'} - ] } self.assertDictEqual(node, expected_node) - def test_entity_reference_field_update_append_cardinality_unlimited_no_subdelims(self): + def test_entity_reference_field_update_append_cardinality_unlimited_no_subdelims( + self, + ): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '70', 'target_type': 'taxonomy_term'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"target_id": "70", "target_type": "taxonomy_term"}], } self.field_definitions = { - 'field_foo': { - 'cardinality': -1, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": -1, "target_type": "taxonomy_term"} } - self.config['task'] = 'update' - self.config['update_mode'] = 'append' + self.config["task"] = "update" + self.config["update_mode"] = "append" field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 108 - csv_record['field_foo'] = '71' - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 108 + csv_record["field_foo"] = "71" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"target_id": "70", "target_type": "taxonomy_term"}, + {"target_id": "71", "target_type": "taxonomy_term"}, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '70', 'target_type': 'taxonomy_term'}, - {'target_id': '71', 'target_type': 'taxonomy_term'} - ] } self.assertDictEqual(node, expected_node) # Node reference. self.field_definitions = { - 'field_foo': { - 'cardinality': -1, - 'target_type': 'node' - } + "field_foo": {"cardinality": -1, "target_type": "node"} } - self.config['task'] = 'update' - self.config['update_mode'] = 'append' + self.config["task"] = "update" + self.config["update_mode"] = "append" field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 109 - csv_record['field_foo'] = '81' - node_field_values = [{'target_id': '80', 'target_type': 'node_type'}] - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", node_field_values) + csv_record["node_id"] = 109 + csv_record["field_foo"] = "81" + node_field_values = [{"target_id": "80", "target_type": "node_type"}] + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + node_field_values, + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"target_id": "80", "target_type": "node_type"}, + {"target_id": "81", "target_type": "node_type"}, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '80', 'target_type': 'node_type'}, - {'target_id': '81', 'target_type': 'node_type'} - ] } self.assertDictEqual(node, expected_node) - def test_entity_reference_field_update_append_cardinality_unlimited_with_subdelims(self): + def test_entity_reference_field_update_append_cardinality_unlimited_with_subdelims( + self, + ): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '70', 'target_type': 'taxonomy_term'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"target_id": "70", "target_type": "taxonomy_term"}], } self.field_definitions = { - 'field_foo': { - 'cardinality': -1, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": -1, "target_type": "taxonomy_term"} } - self.config['update_mode'] = 'append' + self.config["update_mode"] = "append" field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 110 - csv_record['field_foo'] = '72|73|73' - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 110 + csv_record["field_foo"] = "72|73|73" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"target_id": "70", "target_type": "taxonomy_term"}, + {"target_id": "72", "target_type": "taxonomy_term"}, + {"target_id": "73", "target_type": "taxonomy_term"}, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '70', 'target_type': 'taxonomy_term'}, - {'target_id': '72', 'target_type': 'taxonomy_term'}, - {'target_id': '73', 'target_type': 'taxonomy_term'} - ] } self.assertDictEqual(node, expected_node) # Node reference. existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '71', 'target_type': 'node_type'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"target_id": "71", "target_type": "node_type"}], } self.field_definitions = { - 'field_foo': { - 'cardinality': -1, - 'target_type': 'node' - } + "field_foo": {"cardinality": -1, "target_type": "node"} } - self.config['update_mode'] = 'append' + self.config["update_mode"] = "append" field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 111 - csv_record['field_foo'] = '74|75|71' - node_field_values = [{'target_id': '71', 'target_type': 'node_type'}] - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 111 + csv_record["field_foo"] = "74|75|71" + node_field_values = [{"target_id": "71", "target_type": "node_type"}] + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"target_id": "71", "target_type": "node_type"}, + {"target_id": "74", "target_type": "node_type"}, + {"target_id": "75", "target_type": "node_type"}, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '71', 'target_type': 'node_type'}, - {'target_id': '74', 'target_type': 'node_type'}, - {'target_id': '75', 'target_type': 'node_type'} - ] } self.assertDictEqual(node, expected_node) - def test_entity_reference_field_update_replace_cardinality_limited_no_subdelims(self): + def test_entity_reference_field_update_replace_cardinality_limited_no_subdelims( + self, + ): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"target_id": "70", "target_type": "taxonomy_term"}, + {"target_id": "71", "target_type": "taxonomy_term"}, ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '70', 'target_type': 'taxonomy_term'}, - {'target_id': '71', 'target_type': 'taxonomy_term'} - ] } self.field_definitions = { - 'field_foo': { - 'cardinality': 2, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": 2, "target_type": "taxonomy_term"} } - self.config['update_mode'] = 'replace' + self.config["update_mode"] = "replace" field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 112 - csv_record['field_foo'] = '112' - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 112 + csv_record["field_foo"] = "112" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '112', 'target_type': 'taxonomy_term'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"target_id": "112", "target_type": "taxonomy_term"}], } self.assertDictEqual(node, expected_node) - def test_entity_reference_field_update_append_cardinality_limited_no_subdelims(self): + def test_entity_reference_field_update_append_cardinality_limited_no_subdelims( + self, + ): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"target_id": "1131", "target_type": "taxonomy_term"}, + {"target_id": "1132", "target_type": "taxonomy_term"}, ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '1131', 'target_type': 'taxonomy_term'}, - {'target_id': '1132', 'target_type': 'taxonomy_term'} - ] } self.field_definitions = { - 'field_foo': { - 'cardinality': 2, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": 2, "target_type": "taxonomy_term"} } - self.config['task'] = 'update' - self.config['update_mode'] = 'append' + self.config["task"] = "update" + self.config["update_mode"] = "append" with self.assertLogs() as message: field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 113 - csv_record['field_foo'] = '1133' - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 113 + csv_record["field_foo"] = "1133" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"target_id": "1131", "target_type": "taxonomy_term"}, + {"target_id": "1132", "target_type": "taxonomy_term"}, ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '1131', 'target_type': 'taxonomy_term'}, - {'target_id': '1132', 'target_type': 'taxonomy_term'} - ] } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'for record 113 would exceed maximum number of allowed values \(2\)') + self.assertRegex( + str(message.output), + r"for record 113 would exceed maximum number of allowed values \(2\)", + ) # Do not violate cardinality. self.field_definitions = { - 'field_foo': { - 'cardinality': 3, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": 3, "target_type": "taxonomy_term"} } - self.config['update_mode'] = 'append' + self.config["update_mode"] = "append" field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 114 - csv_record['field_foo'] = '1133' - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 114 + csv_record["field_foo"] = "1133" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"target_id": "1131", "target_type": "taxonomy_term"}, + {"target_id": "1132", "target_type": "taxonomy_term"}, + {"target_id": "1133", "target_type": "taxonomy_term"}, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '1131', 'target_type': 'taxonomy_term'}, - {'target_id': '1132', 'target_type': 'taxonomy_term'}, - {'target_id': '1133', 'target_type': 'taxonomy_term'} - ] } self.assertDictEqual(node, expected_node) # Node reference. existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '60', 'target_type': 'node_type'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"target_id": "60", "target_type": "node_type"}], } self.field_definitions = { - 'field_foo': { - 'cardinality': 2, - 'target_type': 'node' - } + "field_foo": {"cardinality": 2, "target_type": "node"} } - self.config['update_mode'] = 'append' + self.config["update_mode"] = "append" with self.assertLogs() as message: field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 1141 - csv_record['field_foo'] = '101|102' - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 1141 + csv_record["field_foo"] = "101|102" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"target_id": "60", "target_type": "node_type"}, + {"target_id": "101", "target_type": "node_type"}, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '60', 'target_type': 'node_type'}, - {'target_id': '101', 'target_type': 'node_type'} - ] } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'for record 1141 would exceed maximum number of allowed values \(2\)') - - def test_entity_reference_field_update_replace_cardinality_limited_with_subdelims(self): + self.assertRegex( + str(message.output), + r"for record 1141 would exceed maximum number of allowed values \(2\)", + ) + + def test_entity_reference_field_update_replace_cardinality_limited_with_subdelims( + self, + ): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"target_id": "1131", "target_type": "taxonomy_term"}, + {"target_id": "1132", "target_type": "taxonomy_term"}, ], - 'field_foo': [ - {'target_id': '1131', 'target_type': 'taxonomy_term'}, - {'target_id': '1132', 'target_type': 'taxonomy_term'} - ] } self.field_definitions = { - 'field_foo': { - 'cardinality': 2, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": 2, "target_type": "taxonomy_term"} } - self.config['update_mode'] = 'replace' + self.config["update_mode"] = "replace" field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 115 - csv_record['field_foo'] = '115|116|116' - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 115 + csv_record["field_foo"] = "115|116|116" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"target_id": "115", "target_type": "taxonomy_term"}, + {"target_id": "116", "target_type": "taxonomy_term"}, ], - 'field_foo': [ - {'target_id': '115', 'target_type': 'taxonomy_term'}, - {'target_id': '116', 'target_type': 'taxonomy_term'} - ] } self.assertDictEqual(node, expected_node) # Violate cardinality with self.assertLogs() as message: self.field_definitions = { - 'field_foo': { - 'cardinality': 2, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": 2, "target_type": "taxonomy_term"} } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 116 - csv_record['field_foo'] = '115|116|117|116' - node_field_values = [{'target_id': '70', 'target_type': 'taxonomy_term'}, {'target_id': '71', 'target_type': 'taxonomy_term'}] - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", node_field_values) + csv_record["node_id"] = 116 + csv_record["field_foo"] = "115|116|117|116" + node_field_values = [ + {"target_id": "70", "target_type": "taxonomy_term"}, + {"target_id": "71", "target_type": "taxonomy_term"}, + ] + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + node_field_values, + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"target_id": "115", "target_type": "taxonomy_term"}, + {"target_id": "116", "target_type": "taxonomy_term"}, ], - 'field_foo': [ - {'target_id': '115', 'target_type': 'taxonomy_term'}, - {'target_id': '116', 'target_type': 'taxonomy_term'} - ] } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'for record 116 would exceed maximum number of allowed values \(2\)') + self.assertRegex( + str(message.output), + r"for record 116 would exceed maximum number of allowed values \(2\)", + ) # Node reference. existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '60', 'target_type': 'node_type'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"target_id": "60", "target_type": "node_type"}], } with self.assertLogs() as message: self.field_definitions = { - 'field_foo': { - 'cardinality': 3, - 'target_type': 'node' - } + "field_foo": {"cardinality": 3, "target_type": "node"} } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 1161 - csv_record['field_foo'] = '101|102|103|104' - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 1161 + csv_record["field_foo"] = "101|102|103|104" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"target_id": "101", "target_type": "node_type"}, + {"target_id": "102", "target_type": "node_type"}, + {"target_id": "103", "target_type": "node_type"}, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '101', 'target_type': 'node_type'}, - {'target_id': '102', 'target_type': 'node_type'}, - {'target_id': '103', 'target_type': 'node_type'} - ] } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'for record 1161 would exceed maximum number of allowed values \(3\)') - - def test_entity_reference_field_update_append_cardinality_limited_with_subdelims(self): + self.assertRegex( + str(message.output), + r"for record 1161 would exceed maximum number of allowed values \(3\)", + ) + + def test_entity_reference_field_update_append_cardinality_limited_with_subdelims( + self, + ): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"target_id": "1161", "target_type": "taxonomy_term"}, + {"target_id": "1162", "target_type": "taxonomy_term"}, ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '1161', 'target_type': 'taxonomy_term'}, - {'target_id': '1162', 'target_type': 'taxonomy_term'} - ] } self.field_definitions = { - 'field_foo': { - 'cardinality': 2, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": 2, "target_type": "taxonomy_term"} } - self.config['task'] = 'update' - self.config['update_mode'] = 'append' + self.config["task"] = "update" + self.config["update_mode"] = "append" with self.assertLogs() as message: field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 116 - csv_record['field_foo'] = '1163|1164|1163' - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 116 + csv_record["field_foo"] = "1163|1164|1163" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"target_id": "1161", "target_type": "taxonomy_term"}, + {"target_id": "1162", "target_type": "taxonomy_term"}, ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '1161', 'target_type': 'taxonomy_term'}, - {'target_id': '1162', 'target_type': 'taxonomy_term'} - ] } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'for record 116 would exceed maximum number of allowed values \(2\)') + self.assertRegex( + str(message.output), + r"for record 116 would exceed maximum number of allowed values \(2\)", + ) # Do not violate cardinality. self.field_definitions = { - 'field_foo': { - 'cardinality': 4, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": 4, "target_type": "taxonomy_term"} } - self.config['update_mode'] = 'append' + self.config["update_mode"] = "append" field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 117 - csv_record['field_foo'] = '117|118|118' - node_field_values = [{'target_id': '1131', 'target_type': 'taxonomy_term'}, {'target_id': '1132', 'target_type': 'taxonomy_term'}] - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", node_field_values) + csv_record["node_id"] = 117 + csv_record["field_foo"] = "117|118|118" + node_field_values = [ + {"target_id": "1131", "target_type": "taxonomy_term"}, + {"target_id": "1132", "target_type": "taxonomy_term"}, + ] + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + node_field_values, + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"target_id": "1131", "target_type": "taxonomy_term"}, + {"target_id": "1132", "target_type": "taxonomy_term"}, + {"target_id": "117", "target_type": "taxonomy_term"}, + {"target_id": "118", "target_type": "taxonomy_term"}, ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '1131', 'target_type': 'taxonomy_term'}, - {'target_id': '1132', 'target_type': 'taxonomy_term'}, - {'target_id': '117', 'target_type': 'taxonomy_term'}, - {'target_id': '118', 'target_type': 'taxonomy_term'} - ] } self.assertDictEqual(node, expected_node) # Node reference. existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '60', 'target_type': 'node_type'} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [{"target_id": "60", "target_type": "node_type"}], } self.field_definitions = { - 'field_foo': { - 'cardinality': 3, - 'target_type': 'node' - } + "field_foo": {"cardinality": 3, "target_type": "node"} } - self.config['update_mode'] = 'append' + self.config["update_mode"] = "append" field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 1162 - csv_record['field_foo'] = '102|103|103' - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 1162 + csv_record["field_foo"] = "102|103|103" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"target_id": "60", "target_type": "node_type"}, + {"target_id": "102", "target_type": "node_type"}, + {"target_id": "103", "target_type": "node_type"}, ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '60', 'target_type': 'node_type'}, - {'target_id': '102', 'target_type': 'node_type'}, - {'target_id': '103', 'target_type': 'node_type'} - ] } self.assertDictEqual(node, expected_node) def test_entity_reference_field_update_delete(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + {"target_id": "1001", "target_type": "taxonomy_term"}, + {"target_id": "1002", "target_type": "taxonomy_term"}, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'target_id': '1001', 'target_type': 'taxonomy_term'}, - {'target_id': '1002', 'target_type': 'taxonomy_term'} - ] } self.field_definitions = { - 'field_foo': { - 'cardinality': 3, + "field_foo": { + "cardinality": 3, } } - self.config['update_mode'] = 'delete' + self.config["update_mode"] = "delete" field = workbench_fields.EntityReferenceField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 130 - csv_record['field_foo'] = '' - self.config['update_mode'] = 'delete' - node = field.update(self.config, self.field_definitions, existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 130 + csv_record["field_foo"] = "" + self.config["update_mode"] = "delete" + node = field.update( + self.config, + self.field_definitions, + existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [], } self.assertDictEqual(node, expected_node) def test_entity_reference_field_dudupe_values(self): # Split values from CSV. - input = ['cats:Tuxedo', 'cats:Misbehaving', 'dogs:German Shepherd', 'cats:Tuxedo'] + input = [ + "cats:Tuxedo", + "cats:Misbehaving", + "dogs:German Shepherd", + "cats:Tuxedo", + ] field = workbench_fields.LinkField() output = field.dedupe_values(input) - expected = ['cats:Tuxedo', 'cats:Misbehaving', 'dogs:German Shepherd'] + expected = ["cats:Tuxedo", "cats:Misbehaving", "dogs:German Shepherd"] self.assertEqual(output, expected) # Dictionaries. input = [ - {'target_id': '600', 'target_type': 'node_type'}, - {'target_id': '1020', 'target_type': 'node_type'}, - {'target_id': '1030', 'target_type': 'node_type'}, - {'target_id': '1020', 'target_type': 'node_type'}, - {'target_id': '1030', 'target_type': 'node_type'} + {"target_id": "600", "target_type": "node_type"}, + {"target_id": "1020", "target_type": "node_type"}, + {"target_id": "1030", "target_type": "node_type"}, + {"target_id": "1020", "target_type": "node_type"}, + {"target_id": "1030", "target_type": "node_type"}, ] field = workbench_fields.LinkField() output = field.dedupe_values(input) expected = [ - {'target_id': '600', 'target_type': 'node_type'}, - {'target_id': '1020', 'target_type': 'node_type'}, - {'target_id': '1030', 'target_type': 'node_type'} + {"target_id": "600", "target_type": "node_type"}, + {"target_id": "1020", "target_type": "node_type"}, + {"target_id": "1030", "target_type": "node_type"}, ] self.assertEqual(output, expected) @@ -4806,52 +4261,39 @@ class TestTypedRelationField(unittest.TestCase): def setUp(self): self.maxDiff = None - self.config = { - 'task': 'create', - 'subdelimiter': '|', - 'id_field': 'id' - } + self.config = {"task": "create", "subdelimiter": "|", "id_field": "id"} self.existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], } def test_create_with_typed_relation_field(self): # Create a node with a typed_relation field of cardinality 1, no subdelimiters. field_definitions = { - 'field_foo': { - 'cardinality': 1, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": 1, "target_type": "taxonomy_term"} } field = workbench_fields.TypedRelationField() csv_record = collections.OrderedDict() - csv_record['id'] = "typed_relation_001" - csv_record['field_foo'] = "relators:pht:1" - node = field.create(self.config, field_definitions, self.existing_node, csv_record, "field_foo") + csv_record["id"] = "typed_relation_001" + csv_record["field_foo"] = "relators:pht:1" + node = field.create( + self.config, field_definitions, self.existing_node, csv_record, "field_foo" + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "rel_type": "relators:pht", + "target_id": "1", + "target_type": "taxonomy_term", + } ], - 'field_foo': [ - {'rel_type': 'relators:pht', 'target_id': '1', 'target_type': 'taxonomy_term'} - ] } self.assertDictEqual(node, expected_node) @@ -4859,701 +4301,827 @@ def test_create_with_typed_relation_field(self): with self.assertLogs() as message: field = workbench_fields.TypedRelationField() csv_record = collections.OrderedDict() - csv_record['id'] = "typed_relation_002" - csv_record['field_foo'] = "relators:art:2|relators:art:22" - node = field.create(self.config, field_definitions, self.existing_node, csv_record, "field_foo") + csv_record["id"] = "typed_relation_002" + csv_record["field_foo"] = "relators:art:2|relators:art:22" + node = field.create( + self.config, + field_definitions, + self.existing_node, + csv_record, + "field_foo", + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "rel_type": "relators:art", + "target_id": "2", + "target_type": "taxonomy_term", + } ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'rel_type': 'relators:art', 'target_id': '2', 'target_type': 'taxonomy_term'} - ] } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'for record typed_relation_002 would exceed maximum number of allowed values \(1\)') + self.assertRegex( + str(message.output), + r"for record typed_relation_002 would exceed maximum number of allowed values \(1\)", + ) # Create a node with a typed_relation field of cardinality unlimited, no subdelimiters. field_definitions = { - 'field_foo': { - 'cardinality': -1, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": -1, "target_type": "taxonomy_term"} } field = workbench_fields.TypedRelationField() csv_record = collections.OrderedDict() - csv_record['id'] = "typed_relation_003" - csv_record['field_foo'] = "relators:pht:3" - node = field.create(self.config, field_definitions, self.existing_node, csv_record, "field_foo") + csv_record["id"] = "typed_relation_003" + csv_record["field_foo"] = "relators:pht:3" + node = field.create( + self.config, field_definitions, self.existing_node, csv_record, "field_foo" + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "rel_type": "relators:pht", + "target_id": "3", + "target_type": "taxonomy_term", + } ], - 'field_foo': [ - {'rel_type': 'relators:pht', 'target_id': '3', 'target_type': 'taxonomy_term'} - ] } self.assertDictEqual(node, expected_node) # Create a node with a typed_relation field of cardinality unlimited, with subdelimiters. field_definitions = { - 'field_foo': { - 'cardinality': -1, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": -1, "target_type": "taxonomy_term"} } field = workbench_fields.TypedRelationField() csv_record = collections.OrderedDict() - csv_record['id'] = "typed_relation_004" - csv_record['field_foo'] = "relators:pht:1|relators:pht:2|relators:pht:3" - node = field.create(self.config, field_definitions, self.existing_node, csv_record, "field_foo") + csv_record["id"] = "typed_relation_004" + csv_record["field_foo"] = "relators:pht:1|relators:pht:2|relators:pht:3" + node = field.create( + self.config, field_definitions, self.existing_node, csv_record, "field_foo" + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "rel_type": "relators:pht", + "target_id": "1", + "target_type": "taxonomy_term", + }, + { + "rel_type": "relators:pht", + "target_id": "2", + "target_type": "taxonomy_term", + }, + { + "rel_type": "relators:pht", + "target_id": "3", + "target_type": "taxonomy_term", + }, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'rel_type': 'relators:pht', 'target_id': '1', 'target_type': 'taxonomy_term'}, - {'rel_type': 'relators:pht', 'target_id': '2', 'target_type': 'taxonomy_term'}, - {'rel_type': 'relators:pht', 'target_id': '3', 'target_type': 'taxonomy_term'} - ] } self.assertDictEqual(node, expected_node) # Create a node with a typed_relation field of cardinality limited, no subdelimiters. field_definitions = { - 'field_foo': { - 'cardinality': 2, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": 2, "target_type": "taxonomy_term"} } field = workbench_fields.TypedRelationField() csv_record = collections.OrderedDict() - csv_record['id'] = "typed_relation_005" - csv_record['field_foo'] = "relators:art:51" - node = field.create(self.config, field_definitions, self.existing_node, csv_record, "field_foo") + csv_record["id"] = "typed_relation_005" + csv_record["field_foo"] = "relators:art:51" + node = field.create( + self.config, field_definitions, self.existing_node, csv_record, "field_foo" + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "rel_type": "relators:art", + "target_id": "51", + "target_type": "taxonomy_term", + } ], - 'field_foo': [ - {'rel_type': 'relators:art', 'target_id': '51', 'target_type': 'taxonomy_term'} - ] } self.assertDictEqual(node, expected_node) # Create a node with a typed_relation field of cardinality limited, with subdelimiters. field_definitions = { - 'field_foo': { - 'cardinality': 3, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": 3, "target_type": "taxonomy_term"} } with self.assertLogs() as message: field = workbench_fields.TypedRelationField() csv_record = collections.OrderedDict() - csv_record['id'] = "typed_relation_006" - csv_record['field_foo'] = "relators:art:26|relators:art:36|relators:art:46|relators:art:56" - node = field.create(self.config, field_definitions, self.existing_node, csv_record, "field_foo") + csv_record["id"] = "typed_relation_006" + csv_record["field_foo"] = ( + "relators:art:26|relators:art:36|relators:art:46|relators:art:56" + ) + node = field.create( + self.config, + field_definitions, + self.existing_node, + csv_record, + "field_foo", + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "rel_type": "relators:art", + "target_id": "26", + "target_type": "taxonomy_term", + }, + { + "rel_type": "relators:art", + "target_id": "36", + "target_type": "taxonomy_term", + }, + { + "rel_type": "relators:art", + "target_id": "46", + "target_type": "taxonomy_term", + }, ], - 'field_foo': [ - {'rel_type': 'relators:art', 'target_id': '26', 'target_type': 'taxonomy_term'}, - {'rel_type': 'relators:art', 'target_id': '36', 'target_type': 'taxonomy_term'}, - {'rel_type': 'relators:art', 'target_id': '46', 'target_type': 'taxonomy_term'} - ] } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'for record typed_relation_006 would exceed maximum number of allowed values \(3\)') + self.assertRegex( + str(message.output), + r"for record typed_relation_006 would exceed maximum number of allowed values \(3\)", + ) def test_typed_relation_field_update_replace_cardinality_1_no_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "rel_type": "relators:art", + "target_id": "777", + "target_type": "taxonomy_term", + } ], - 'field_foo': [ - {'rel_type': 'relators:art', 'target_id': '777', 'target_type': 'taxonomy_term'} - ] } self.field_definitions = { - 'field_foo': { - 'cardinality': 1, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": 1, "target_type": "taxonomy_term"} } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" field = workbench_fields.TypedRelationField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 'typed_relation_007' - csv_record['field_foo'] = 'relators:art:701' - node = field.update(self.config, self.field_definitions, self.existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = "typed_relation_007" + csv_record["field_foo"] = "relators:art:701" + node = field.update( + self.config, + self.field_definitions, + self.existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "rel_type": "relators:art", + "target_id": "701", + "target_type": "taxonomy_term", + } ], - 'field_foo': [ - {'rel_type': 'relators:art', 'target_id': '701', 'target_type': 'taxonomy_term'} - ] } self.assertDictEqual(node, expected_node) def test_typed_relation_field_update_replace_cardinality_1_with_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "rel_type": "relators:art", + "target_id": "778", + "target_type": "taxonomy_term", + } ], - 'field_foo': [ - {'rel_type': 'relators:art', 'target_id': '778', 'target_type': 'taxonomy_term'} - ] } self.field_definitions = { - 'field_foo': { - 'cardinality': 1, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": 1, "target_type": "taxonomy_term"} } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" with self.assertLogs() as message: field = workbench_fields.TypedRelationField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 'typed_relation_008' - csv_record['field_foo'] = 'relators:xxx:801|relators:cpy:802' - node = field.update(self.config, self.field_definitions, self.existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = "typed_relation_008" + csv_record["field_foo"] = "relators:xxx:801|relators:cpy:802" + node = field.update( + self.config, + self.field_definitions, + self.existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "rel_type": "relators:xxx", + "target_id": "801", + "target_type": "taxonomy_term", + } ], - 'field_foo': [ - {'rel_type': 'relators:xxx', 'target_id': '801', 'target_type': 'taxonomy_term'} - ] } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'for record typed_relation_008 would exceed maximum number of allowed values \(1\)') - - def test_typed_relation_field_update_replace_cardinality_unlimited_no_subdelims(self): + self.assertRegex( + str(message.output), + r"for record typed_relation_008 would exceed maximum number of allowed values \(1\)", + ) + + def test_typed_relation_field_update_replace_cardinality_unlimited_no_subdelims( + self, + ): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "rel_type": "relators:art", + "target_id": "779", + "target_type": "taxonomy_term", + } ], - 'field_foo': [ - {'rel_type': 'relators:art', 'target_id': '779', 'target_type': 'taxonomy_term'} - ] } self.field_definitions = { - 'field_foo': { - 'cardinality': -1, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": -1, "target_type": "taxonomy_term"} } - self.config['update_mode'] = 'replace' + self.config["update_mode"] = "replace" field = workbench_fields.TypedRelationField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 'typed_relation_009' - csv_record['field_foo'] = 'relators:aaa:901' - node = field.update(self.config, self.field_definitions, self.existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = "typed_relation_009" + csv_record["field_foo"] = "relators:aaa:901" + node = field.update( + self.config, + self.field_definitions, + self.existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "rel_type": "relators:aaa", + "target_id": "901", + "target_type": "taxonomy_term", + } ], - 'field_foo': [ - {'rel_type': 'relators:aaa', 'target_id': '901', 'target_type': 'taxonomy_term'} - ] } self.assertDictEqual(node, expected_node) - def test_typed_relation_field_update_replace_cardinality_unlimited_with_subdelims(self): + def test_typed_relation_field_update_replace_cardinality_unlimited_with_subdelims( + self, + ): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "rel_type": "relators:art", + "target_id": "902", + "target_type": "taxonomy_term", + } ], - 'field_foo': [ - {'rel_type': 'relators:art', 'target_id': '902', 'target_type': 'taxonomy_term'} - ] } self.field_definitions = { - 'field_foo': { - 'cardinality': -1, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": -1, "target_type": "taxonomy_term"} } - self.config['update_mode'] = 'replace' + self.config["update_mode"] = "replace" field = workbench_fields.TypedRelationField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 'typed_relation_010' - csv_record['field_foo'] = 'relators:aaa:902|relators:bbb:903|relators:ccc:904' - node = field.update(self.config, self.field_definitions, self.existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = "typed_relation_010" + csv_record["field_foo"] = "relators:aaa:902|relators:bbb:903|relators:ccc:904" + node = field.update( + self.config, + self.field_definitions, + self.existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "rel_type": "relators:aaa", + "target_id": "902", + "target_type": "taxonomy_term", + }, + { + "rel_type": "relators:bbb", + "target_id": "903", + "target_type": "taxonomy_term", + }, + { + "rel_type": "relators:ccc", + "target_id": "904", + "target_type": "taxonomy_term", + }, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'rel_type': 'relators:aaa', 'target_id': '902', 'target_type': 'taxonomy_term'}, - {'rel_type': 'relators:bbb', 'target_id': '903', 'target_type': 'taxonomy_term'}, - {'rel_type': 'relators:ccc', 'target_id': '904', 'target_type': 'taxonomy_term'} - ] } self.assertDictEqual(node, expected_node) - def test_typed_relation_field_update_append_cardinality_unlimited_no_subdelims(self): + def test_typed_relation_field_update_append_cardinality_unlimited_no_subdelims( + self, + ): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "rel_type": "relators:art", + "target_id": "10", + "target_type": "taxonomy_term", + } ], - 'field_foo': [ - {'rel_type': 'relators:art', 'target_id': '10', 'target_type': 'taxonomy_term'} - ] } self.field_definitions = { - 'field_foo': { - 'cardinality': -1, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": -1, "target_type": "taxonomy_term"} } - self.config['update_mode'] = 'append' + self.config["update_mode"] = "append" field = workbench_fields.TypedRelationField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 'typed_relation_011' - csv_record['field_foo'] = 'relators:aaa:11' - node = field.update(self.config, self.field_definitions, self.existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = "typed_relation_011" + csv_record["field_foo"] = "relators:aaa:11" + node = field.update( + self.config, + self.field_definitions, + self.existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "rel_type": "relators:art", + "target_id": "10", + "target_type": "taxonomy_term", + }, + { + "rel_type": "relators:aaa", + "target_id": "11", + "target_type": "taxonomy_term", + }, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'rel_type': 'relators:art', 'target_id': '10', 'target_type': 'taxonomy_term'}, - {'rel_type': 'relators:aaa', 'target_id': '11', 'target_type': 'taxonomy_term'} - ] } self.assertDictEqual(node, expected_node) - def test_typed_relation_field_update_append_cardinality_unlimited_no_subdelims(self): + def test_typed_relation_field_update_append_cardinality_unlimited_no_subdelims( + self, + ): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "rel_type": "relators:art", + "target_id": "10", + "target_type": "taxonomy_term", + } ], - 'field_foo': [ - {'rel_type': 'relators:art', 'target_id': '10', 'target_type': 'taxonomy_term'} - ] } self.field_definitions = { - 'field_foo': { - 'cardinality': -1, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": -1, "target_type": "taxonomy_term"} } - self.config['update_mode'] = 'append' + self.config["update_mode"] = "append" field = workbench_fields.TypedRelationField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 'typed_relation_012' - csv_record['field_foo'] = 'relators:bbb:12|relators:ccc:13|relators:ddd:14' - node = field.update(self.config, self.field_definitions, self.existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = "typed_relation_012" + csv_record["field_foo"] = "relators:bbb:12|relators:ccc:13|relators:ddd:14" + node = field.update( + self.config, + self.field_definitions, + self.existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "rel_type": "relators:art", + "target_id": "10", + "target_type": "taxonomy_term", + }, + { + "rel_type": "relators:bbb", + "target_id": "12", + "target_type": "taxonomy_term", + }, + { + "rel_type": "relators:ccc", + "target_id": "13", + "target_type": "taxonomy_term", + }, + { + "rel_type": "relators:ddd", + "target_id": "14", + "target_type": "taxonomy_term", + }, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'rel_type': 'relators:art', 'target_id': '10', 'target_type': 'taxonomy_term'}, - {'rel_type': 'relators:bbb', 'target_id': '12', 'target_type': 'taxonomy_term'}, - {'rel_type': 'relators:ccc', 'target_id': '13', 'target_type': 'taxonomy_term'}, - {'rel_type': 'relators:ddd', 'target_id': '14', 'target_type': 'taxonomy_term'} - ] } self.assertDictEqual(node, expected_node) def test_typed_relation_field_update_replace_cardinality_limited_no_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "rel_type": "relators:art", + "target_id": "130", + "target_type": "taxonomy_term", + } ], - 'field_foo': [ - {'rel_type': 'relators:art', 'target_id': '130', 'target_type': 'taxonomy_term'} - ] } self.field_definitions = { - 'field_foo': { - 'cardinality': 2, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": 2, "target_type": "taxonomy_term"} } - self.config['update_mode'] = 'replace' + self.config["update_mode"] = "replace" field = workbench_fields.TypedRelationField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 'typed_relation_013' - csv_record['field_foo'] = 'relators:bbb:13' - node = field.update(self.config, self.field_definitions, self.existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = "typed_relation_013" + csv_record["field_foo"] = "relators:bbb:13" + node = field.update( + self.config, + self.field_definitions, + self.existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "rel_type": "relators:bbb", + "target_id": "13", + "target_type": "taxonomy_term", + } ], - 'field_foo': [ - {'rel_type': 'relators:bbb', 'target_id': '13', 'target_type': 'taxonomy_term'} - ] } self.assertDictEqual(node, expected_node) def test_typed_relation_field_update_append_cardinality_limited_no_subdelims(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "rel_type": "relators:yyy", + "target_id": "140", + "target_type": "taxonomy_term", + }, + { + "rel_type": "relators:zzz", + "target_id": "141", + "target_type": "taxonomy_term", + }, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'rel_type': 'relators:yyy', 'target_id': '140', 'target_type': 'taxonomy_term'}, - {'rel_type': 'relators:zzz', 'target_id': '141', 'target_type': 'taxonomy_term'} - ] } self.field_definitions = { - 'field_foo': { - 'cardinality': 2, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": 2, "target_type": "taxonomy_term"} } - self.config['task'] = 'update' - self.config['update_mode'] = 'append' + self.config["task"] = "update" + self.config["update_mode"] = "append" with self.assertLogs() as message: field = workbench_fields.TypedRelationField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 'typed_relation_014' - csv_record['field_foo'] = 'relators:sss:14' - node = field.update(self.config, self.field_definitions, self.existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = "typed_relation_014" + csv_record["field_foo"] = "relators:sss:14" + node = field.update( + self.config, + self.field_definitions, + self.existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "rel_type": "relators:yyy", + "target_id": "140", + "target_type": "taxonomy_term", + }, + { + "rel_type": "relators:zzz", + "target_id": "141", + "target_type": "taxonomy_term", + }, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'rel_type': 'relators:yyy', 'target_id': '140', 'target_type': 'taxonomy_term'}, - {'rel_type': 'relators:zzz', 'target_id': '141', 'target_type': 'taxonomy_term'} - ] } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'for record typed_relation_014 would exceed maximum number of allowed values \(2\)') - - def test_typed_relation_field_update_replace_cardinality_limited_with_subdelims(self): + self.assertRegex( + str(message.output), + r"for record typed_relation_014 would exceed maximum number of allowed values \(2\)", + ) + + def test_typed_relation_field_update_replace_cardinality_limited_with_subdelims( + self, + ): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "rel_type": "relators:yyy", + "target_id": "555", + "target_type": "taxonomy_term", + } ], - 'field_foo': [ - {'rel_type': 'relators:yyy', 'target_id': '555', 'target_type': 'taxonomy_term'} - ] } self.field_definitions = { - 'field_foo': { - 'cardinality': 2, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": 2, "target_type": "taxonomy_term"} } - self.config['task'] = 'update' - self.config['update_mode'] = 'replace' + self.config["task"] = "update" + self.config["update_mode"] = "replace" with self.assertLogs() as message: field = workbench_fields.TypedRelationField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 'typed_relation_015' - csv_record['field_foo'] = 'relators:bbb:150|relators:ccc:152|relators:ccc:152|relators:ddd:153' - node_field_values = [{'rel_type': 'relators:art', 'target_id': '555', 'target_type': 'taxonomy_term'}] - node = field.update(self.config, self.field_definitions, self.existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = "typed_relation_015" + csv_record["field_foo"] = ( + "relators:bbb:150|relators:ccc:152|relators:ccc:152|relators:ddd:153" + ) + node_field_values = [ + { + "rel_type": "relators:art", + "target_id": "555", + "target_type": "taxonomy_term", + } + ] + node = field.update( + self.config, + self.field_definitions, + self.existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "rel_type": "relators:bbb", + "target_id": "150", + "target_type": "taxonomy_term", + }, + { + "rel_type": "relators:ccc", + "target_id": "152", + "target_type": "taxonomy_term", + }, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'rel_type': 'relators:bbb', 'target_id': '150', 'target_type': 'taxonomy_term'}, - {'rel_type': 'relators:ccc', 'target_id': '152', 'target_type': 'taxonomy_term'} - ] } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'for record typed_relation_015 would exceed maximum number of allowed values \(2\)') - - def test_typed_relation_field_update_append_cardinality_limited_with_subdelims(self): + self.assertRegex( + str(message.output), + r"for record typed_relation_015 would exceed maximum number of allowed values \(2\)", + ) + + def test_typed_relation_field_update_append_cardinality_limited_with_subdelims( + self, + ): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "rel_type": "relators:jjj", + "target_id": "164", + "target_type": "taxonomy_term", + } ], - 'field_foo': [ - {'rel_type': 'relators:jjj', 'target_id': '164', 'target_type': 'taxonomy_term'} - ] } self.field_definitions = { - 'field_foo': { - 'cardinality': 3, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": 3, "target_type": "taxonomy_term"} } - self.config['task'] = 'update' - self.config['update_mode'] = 'append' + self.config["task"] = "update" + self.config["update_mode"] = "append" with self.assertLogs() as message: field = workbench_fields.TypedRelationField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 'typed_relation_016' - csv_record['field_foo'] = 'relators:rrr:160|relators:sss:161|relators:sss:161|relators:ttt:162' - node = field.update(self.config, self.field_definitions, self.existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = "typed_relation_016" + csv_record["field_foo"] = ( + "relators:rrr:160|relators:sss:161|relators:sss:161|relators:ttt:162" + ) + node = field.update( + self.config, + self.field_definitions, + self.existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "rel_type": "relators:jjj", + "target_id": "164", + "target_type": "taxonomy_term", + }, + { + "rel_type": "relators:rrr", + "target_id": "160", + "target_type": "taxonomy_term", + }, + { + "rel_type": "relators:sss", + "target_id": "161", + "target_type": "taxonomy_term", + }, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'rel_type': 'relators:jjj', 'target_id': '164', 'target_type': 'taxonomy_term'}, - {'rel_type': 'relators:rrr', 'target_id': '160', 'target_type': 'taxonomy_term'}, - {'rel_type': 'relators:sss', 'target_id': '161', 'target_type': 'taxonomy_term'} - ] } self.assertDictEqual(node, expected_node) - self.assertRegex(str(message.output), r'for record typed_relation_016 would exceed maximum number of allowed values \(3\)') + self.assertRegex( + str(message.output), + r"for record typed_relation_016 would exceed maximum number of allowed values \(3\)", + ) def test_typed_relation_field_update_delete(self): existing_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [ + { + "rel_type": "relators:art", + "target_id": "301", + "target_type": "taxonomy_term", + }, + { + "rel_type": "relators:art", + "target_id": "302", + "target_type": "taxonomy_term", + }, ], - 'status': [ - {'value': 1} - ], - 'field_foo': [ - {'rel_type': 'relators:art', 'target_id': '301', 'target_type': 'taxonomy_term'}, - {'rel_type': 'relators:art', 'target_id': '302', 'target_type': 'taxonomy_term'} - ] } self.field_definitions = { - 'field_foo': { - 'cardinality': 4, - 'target_type': 'taxonomy_term' - } + "field_foo": {"cardinality": 4, "target_type": "taxonomy_term"} } - self.config['update_mode'] = 'delete' + self.config["update_mode"] = "delete" field = workbench_fields.TypedRelationField() csv_record = collections.OrderedDict() - csv_record['node_id'] = 300 - csv_record['field_foo'] = '' - self.config['update_mode'] = 'delete' - node = field.update(self.config, self.field_definitions, self.existing_node, csv_record, "field_foo", existing_node["field_foo"]) + csv_record["node_id"] = 300 + csv_record["field_foo"] = "" + self.config["update_mode"] = "delete" + node = field.update( + self.config, + self.field_definitions, + self.existing_node, + csv_record, + "field_foo", + existing_node["field_foo"], + ) expected_node = { - 'type': [ - {'target_id': 'islandora_object', 'target_type': 'node_type'} - ], - 'title': [ - {'value': "Test node"} - ], - 'status': [ - {'value': 1} - ], - 'field_foo': [] + "type": [{"target_id": "islandora_object", "target_type": "node_type"}], + "title": [{"value": "Test node"}], + "status": [{"value": 1}], + "field_foo": [], } self.assertDictEqual(node, expected_node) def test_entity_reference_field_dudupe_values(self): # Split values from CSV. - input = ['relators:art:person:Bar, Foo', 'relators:art:person:Bang, Biz', 'relators:art:person:Bang, Biz'] + input = [ + "relators:art:person:Bar, Foo", + "relators:art:person:Bang, Biz", + "relators:art:person:Bang, Biz", + ] field = workbench_fields.LinkField() output = field.dedupe_values(input) - expected = ['relators:art:person:Bar, Foo', 'relators:art:person:Bang, Biz'] + expected = ["relators:art:person:Bar, Foo", "relators:art:person:Bang, Biz"] self.assertEqual(output, expected) # Dictionaries. input = [ - {'rel_type': 'relators:bbb', 'target_id': '1501', 'target_type': 'taxonomy_term'}, - {'rel_type': 'relators:ccc', 'target_id': '1521', 'target_type': 'taxonomy_term'}, - {'rel_type': 'relators:bbb', 'target_id': '1501', 'target_type': 'taxonomy_term'}, - {'rel_type': 'relators:ccc', 'target_id': '1521', 'target_type': 'taxonomy_term'} + { + "rel_type": "relators:bbb", + "target_id": "1501", + "target_type": "taxonomy_term", + }, + { + "rel_type": "relators:ccc", + "target_id": "1521", + "target_type": "taxonomy_term", + }, + { + "rel_type": "relators:bbb", + "target_id": "1501", + "target_type": "taxonomy_term", + }, + { + "rel_type": "relators:ccc", + "target_id": "1521", + "target_type": "taxonomy_term", + }, ] field = workbench_fields.LinkField() output = field.dedupe_values(input) expected = [ - {'rel_type': 'relators:bbb', 'target_id': '1501', 'target_type': 'taxonomy_term'}, - {'rel_type': 'relators:ccc', 'target_id': '1521', 'target_type': 'taxonomy_term'} + { + "rel_type": "relators:bbb", + "target_id": "1501", + "target_type": "taxonomy_term", + }, + { + "rel_type": "relators:ccc", + "target_id": "1521", + "target_type": "taxonomy_term", + }, ] self.assertEqual(output, expected) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/field_tests_values.py b/tests/field_tests_values.py index 4254fb46..0a0724c9 100644 --- a/tests/field_tests_values.py +++ b/tests/field_tests_values.py @@ -16,21 +16,19 @@ class TestSimpleField(unittest.TestCase): def test_simple_field_edtf_validate(self): config = dict() - field_definitions = { - 'field_foo': { - 'field_type': 'edtf' - } - } + field_definitions = {"field_foo": {"field_type": "edtf"}} with self.assertLogs() as message: - input = ['1900', '1xxx', '1901', '1902'] + input = ["1900", "1xxx", "1901", "1902"] field = workbench_fields.SimpleField() - output = field.remove_invalid_values(config, field_definitions, 'field_foo', input) - self.assertEqual(output, ['1900', '1901', '1902']) - self.assertRegex(str(message.output), r'is not a valid EDTF field value.') + output = field.remove_invalid_values( + config, field_definitions, "field_foo", input + ) + self.assertEqual(output, ["1900", "1901", "1902"]) + self.assertRegex(str(message.output), r"is not a valid EDTF field value.") -''' +""" class TestGeolocationField(unittest.TestCase): def test_simple_field_geolocation_validate(self): @@ -65,25 +63,34 @@ def test_simple_field_link_validate(self): output = field.remove_invalid_values(config, field_definitions, 'field_foo', input) self.assertEqual(output, ['https://example.com/foo', 'http://example.com%%bar']) self.assertRegex(str(message.output), r'is not a valid Link field value') -''' +""" + class TestAuthorityLinkField(unittest.TestCase): def test_authority_link_field_validate(self): config = dict() field_definitions = { - 'field_foo': { - 'field_type': 'authority_link', - 'authority_sources': ['foo', 'bar'] + "field_foo": { + "field_type": "authority_link", + "authority_sources": ["foo", "bar"], } } with self.assertLogs() as message: - input = ['foo%%https://foo.com%%Foo authority record', 'xxx%%https://xxx.com'] + input = [ + "foo%%https://foo.com%%Foo authority record", + "xxx%%https://xxx.com", + ] field = workbench_fields.AuthorityLinkField() - output = field.remove_invalid_values(config, field_definitions, 'field_foo', input) - self.assertEqual(output, ['foo%%https://foo.com%%Foo authority record']) - self.assertRegex(str(message.output), r'xxx.*not a valid Authority Link field value') + output = field.remove_invalid_values( + config, field_definitions, "field_foo", input + ) + self.assertEqual(output, ["foo%%https://foo.com%%Foo authority record"]) + self.assertRegex( + str(message.output), r"xxx.*not a valid Authority Link field value" + ) + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/islandora_tests.py b/tests/islandora_tests.py index 030f7402..91b6b22d 100644 --- a/tests/islandora_tests.py +++ b/tests/islandora_tests.py @@ -28,7 +28,9 @@ class TestCreate(unittest.TestCase): def setUp(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - self.create_config_file_path = os.path.join(self.current_dir, 'assets', 'create_test', 'create.yml') + self.create_config_file_path = os.path.join( + self.current_dir, "assets", "create_test", "create.yml" + ) self.create_cmd = ["./workbench", "--config", self.create_config_file_path] def test_create(self): @@ -37,23 +39,33 @@ def test_create(self): create_output = create_output.decode().strip() create_lines = create_output.splitlines() for line in create_lines: - if 'created at' in line: - nid = line.rsplit('/', 1)[-1] - nid = nid.strip('.') + if "created at" in line: + nid = line.rsplit("/", 1)[-1] + nid = nid.strip(".") self.nids.append(nid) self.assertEqual(len(self.nids), 2) def tearDown(self): for nid in self.nids: - quick_delete_cmd = ["./workbench", "--config", self.create_config_file_path, '--quick_delete_node', 'https://islandora.traefik.me/node/' + nid] + quick_delete_cmd = [ + "./workbench", + "--config", + self.create_config_file_path, + "--quick_delete_node", + "https://islandora.traefik.me/node/" + nid, + ] quick_delete_output = subprocess.check_output(quick_delete_cmd) - self.rollback_file_path = os.path.join(self.current_dir, 'assets', 'create_test', 'rollback.csv') + self.rollback_file_path = os.path.join( + self.current_dir, "assets", "create_test", "rollback.csv" + ) if os.path.exists(self.rollback_file_path): os.remove(self.rollback_file_path) - self.preprocessed_file_path = os.path.join(self.current_dir, 'assets', 'create_test', 'metadata.csv.preprocessed') + self.preprocessed_file_path = os.path.join( + self.current_dir, "assets", "create_test", "metadata.csv.preprocessed" + ) if os.path.exists(self.preprocessed_file_path): os.remove(self.preprocessed_file_path) @@ -62,7 +74,9 @@ class TestCreateFromFiles(unittest.TestCase): def setUp(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - self.create_config_file_path = os.path.join(self.current_dir, 'assets', 'create_from_files_test', 'create.yml') + self.create_config_file_path = os.path.join( + self.current_dir, "assets", "create_from_files_test", "create.yml" + ) self.create_cmd = ["./workbench", "--config", self.create_config_file_path] def test_create_from_files(self): @@ -71,19 +85,31 @@ def test_create_from_files(self): create_output = create_output.decode().strip() create_lines = create_output.splitlines() for line in create_lines: - if 'created at' in line: - nid = line.rsplit('/', 1)[-1] - nid = nid.strip('.') + if "created at" in line: + nid = line.rsplit("/", 1)[-1] + nid = nid.strip(".") self.nids.append(nid) self.assertEqual(len(self.nids), 3) def tearDown(self): for nid in self.nids: - quick_delete_cmd = ["./workbench", "--config", self.create_config_file_path, '--quick_delete_node', 'https://islandora.traefik.me/node/' + nid] + quick_delete_cmd = [ + "./workbench", + "--config", + self.create_config_file_path, + "--quick_delete_node", + "https://islandora.traefik.me/node/" + nid, + ] quick_delete_output = subprocess.check_output(quick_delete_cmd) - self.rollback_file_path = os.path.join(self.current_dir, 'assets', 'create_from_files_test', 'files', 'rollback.csv') + self.rollback_file_path = os.path.join( + self.current_dir, + "assets", + "create_from_files_test", + "files", + "rollback.csv", + ) if os.path.exists(self.rollback_file_path): os.remove(self.rollback_file_path) @@ -95,10 +121,12 @@ class TestCreateWithMaxNodeTitleLength(unittest.TestCase): def setUp(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - self.create_config_file_path = os.path.join(self.current_dir, 'assets', 'max_node_title_length_test', 'create.yml') + self.create_config_file_path = os.path.join( + self.current_dir, "assets", "max_node_title_length_test", "create.yml" + ) self.create_cmd = ["./workbench", "--config", self.create_config_file_path] self.nids = list() - self.output_lines = '' + self.output_lines = "" self.temp_dir = tempfile.gettempdir() @@ -107,30 +135,48 @@ def test_create(self): self.create_output = create_output.decode().strip() self.output_lines = copy.copy(self.create_output) - self.assertRegex(self.output_lines, '"This here title is 32 chars lo" .record 03', '') - self.assertRegex(self.output_lines, '"This here title is 34 chars lo" .record 04', '') - self.assertRegex(self.output_lines, '"This here title is 36 chars lo" .record 05', '') - self.assertRegex(self.output_lines, '"This title is 28 chars long." .record 06', '') + self.assertRegex( + self.output_lines, '"This here title is 32 chars lo" .record 03', "" + ) + self.assertRegex( + self.output_lines, '"This here title is 34 chars lo" .record 04', "" + ) + self.assertRegex( + self.output_lines, '"This here title is 36 chars lo" .record 05', "" + ) + self.assertRegex( + self.output_lines, '"This title is 28 chars long." .record 06', "" + ) create_lines = self.create_output.splitlines() for line in create_lines: - if 'created at' in line: - nid = line.rsplit('/', 1)[-1] - nid = nid.strip('.') + if "created at" in line: + nid = line.rsplit("/", 1)[-1] + nid = nid.strip(".") self.nids.append(nid) self.assertEqual(len(self.nids), 6) def tearDown(self): for nid in self.nids: - quick_delete_cmd = ["./workbench", "--config", self.create_config_file_path, '--quick_delete_node', 'https://islandora.traefik.me/node/' + nid] + quick_delete_cmd = [ + "./workbench", + "--config", + self.create_config_file_path, + "--quick_delete_node", + "https://islandora.traefik.me/node/" + nid, + ] quick_delete_output = subprocess.check_output(quick_delete_cmd) - self.rollback_file_path = os.path.join(self.current_dir, 'assets', 'max_node_title_length_test', 'rollback.csv') + self.rollback_file_path = os.path.join( + self.current_dir, "assets", "max_node_title_length_test", "rollback.csv" + ) if os.path.exists(self.rollback_file_path): os.remove(self.rollback_file_path) - self.preprocessed_file_path = os.path.join(self.temp_dir, 'create_max_node_title_length.csv.preprocessed') + self.preprocessed_file_path = os.path.join( + self.temp_dir, "create_max_node_title_length.csv.preprocessed" + ) if os.path.exists(self.preprocessed_file_path): os.remove(self.preprocessed_file_path) @@ -139,12 +185,21 @@ class TestUpdateWithMaxNodeTitleLength(unittest.TestCase): def setUp(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - self.create_config_file_path = os.path.join(self.current_dir, 'assets', 'max_node_title_length_test', 'create.yml') + self.create_config_file_path = os.path.join( + self.current_dir, "assets", "max_node_title_length_test", "create.yml" + ) self.create_cmd = ["./workbench", "--config", self.create_config_file_path] self.nids = list() - self.update_csv_file_path = os.path.join(self.current_dir, 'assets', 'max_node_title_length_test', 'update_max_node_title_length.csv') - self.update_config_file_path = os.path.join(self.current_dir, 'assets', 'max_node_title_length_test', 'update.yml') + self.update_csv_file_path = os.path.join( + self.current_dir, + "assets", + "max_node_title_length_test", + "update_max_node_title_length.csv", + ) + self.update_config_file_path = os.path.join( + self.current_dir, "assets", "max_node_title_length_test", "update.yml" + ) self.update_cmd = ["./workbench", "--config", self.update_config_file_path] self.temp_dir = tempfile.gettempdir() @@ -155,50 +210,66 @@ def test_create(self): create_lines = self.create_output.splitlines() for line in create_lines: - if 'created at' in line: - nid = line.rsplit('/', 1)[-1] - nid = nid.strip('.') + if "created at" in line: + nid = line.rsplit("/", 1)[-1] + nid = nid.strip(".") self.nids.append(nid) self.assertEqual(len(self.nids), 6) # Write out an update CSV file using the node IDs in self.nids. update_csv_file_rows = list() - test_titles = ['This title is 37 chars___________long', - 'This title is 39 chars_____________long', - 'This title is 29 _ chars long', - 'This title is 42 chars________________long', - 'This title is 44 chars__________________long', - 'This title is 28 chars long.'] - update_csv_file_rows.append('node_id,title') + test_titles = [ + "This title is 37 chars___________long", + "This title is 39 chars_____________long", + "This title is 29 _ chars long", + "This title is 42 chars________________long", + "This title is 44 chars__________________long", + "This title is 28 chars long.", + ] + update_csv_file_rows.append("node_id,title") i = 0 while i <= 5: - update_csv_file_rows.append(f'{self.nids[i]},{test_titles[i]}') + update_csv_file_rows.append(f"{self.nids[i]},{test_titles[i]}") i = i + 1 - with open(self.update_csv_file_path, mode='wt') as update_csv_file: - update_csv_file.write('\n'.join(update_csv_file_rows)) + with open(self.update_csv_file_path, mode="wt") as update_csv_file: + update_csv_file.write("\n".join(update_csv_file_rows)) # Run the update command. check_output = subprocess.check_output(self.update_cmd) # Fetch each node in self.nids and check to see if its title is <= 30 chars long. All should be. for nid_to_update in self.nids: - node_url = 'https://islandora.traefik.me/node/' + str(self.nids[0]) + '?_format=json' + node_url = ( + "https://islandora.traefik.me/node/" + + str(self.nids[0]) + + "?_format=json" + ) node_response = requests.get(node_url) node = json.loads(node_response.text) - updated_title = str(node['title'][0]['value']) - self.assertLessEqual(len(updated_title), 30, '') + updated_title = str(node["title"][0]["value"]) + self.assertLessEqual(len(updated_title), 30, "") def tearDown(self): for nid in self.nids: - quick_delete_cmd = ["./workbench", "--config", self.create_config_file_path, '--quick_delete_node', 'https://islandora.traefik.me/node/' + nid] + quick_delete_cmd = [ + "./workbench", + "--config", + self.create_config_file_path, + "--quick_delete_node", + "https://islandora.traefik.me/node/" + nid, + ] quick_delete_output = subprocess.check_output(quick_delete_cmd) - self.rollback_file_path = os.path.join(self.current_dir, 'assets', 'max_node_title_length_test', 'rollback.csv') + self.rollback_file_path = os.path.join( + self.current_dir, "assets", "max_node_title_length_test", "rollback.csv" + ) if os.path.exists(self.rollback_file_path): os.remove(self.rollback_file_path) - self.preprocessed_file_path = os.path.join(self.temp_dir, 'create_max_node_title_length.csv.preprocessed') + self.preprocessed_file_path = os.path.join( + self.temp_dir, "create_max_node_title_length.csv.preprocessed" + ) if os.path.exists(self.preprocessed_file_path): os.remove(self.preprocessed_file_path) @@ -206,7 +277,9 @@ def tearDown(self): if os.path.exists(self.update_csv_file_path): os.remove(self.update_csv_file_path) - self.preprocessed_update_file_path = os.path.join(self.temp_dir, 'update_max_node_title_length.csv.preprocessed') + self.preprocessed_update_file_path = os.path.join( + self.temp_dir, "update_max_node_title_length.csv.preprocessed" + ) if os.path.exists(self.preprocessed_update_file_path): os.remove(self.preprocessed_update_file_path) @@ -217,15 +290,20 @@ class TestCreateWithNewTypedRelation(unittest.TestCase): def setUp(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - self.config_file_path = os.path.join(self.current_dir, 'assets', 'typed_relation_test', 'create_with_new_typed_relation.yml') + self.config_file_path = os.path.join( + self.current_dir, + "assets", + "typed_relation_test", + "create_with_new_typed_relation.yml", + ) self.create_cmd = ["./workbench", "--config", self.config_file_path] self.temp_dir = tempfile.gettempdir() parser = argparse.ArgumentParser() - parser.add_argument('--config') - parser.add_argument('--check') - parser.add_argument('--get_csv_template') + parser.add_argument("--config") + parser.add_argument("--check") + parser.add_argument("--get_csv_template") parser.set_defaults(config=self.config_file_path, check=False) args = parser.parse_args() workbench_config = WorkbenchConfig(args) @@ -238,38 +316,57 @@ def test_create_with_new_typed_relation(self): create_output = create_output.decode().strip() create_lines = create_output.splitlines() for line in create_lines: - if 'created at' in line: - nid = line.rsplit('/', 1)[-1] - nid = nid.strip('.') + if "created at" in line: + nid = line.rsplit("/", 1)[-1] + nid = nid.strip(".") self.nids.append(nid) self.assertEqual(len(self.nids), 1) - self.term_id = workbench_utils.find_term_in_vocab(self.config, 'person', 'Kirk, James T.') + self.term_id = workbench_utils.find_term_in_vocab( + self.config, "person", "Kirk, James T." + ) self.assertTrue(self.term_id) def tearDown(self): for nid in self.nids: - quick_delete_cmd = ["./workbench", "--config", self.config_file_path, '--quick_delete_node', self.config['host'] + '/node/' + nid] + quick_delete_cmd = [ + "./workbench", + "--config", + self.config_file_path, + "--quick_delete_node", + self.config["host"] + "/node/" + nid, + ] quick_delete_output = subprocess.check_output(quick_delete_cmd) - preprocessed_csv_path = os.path.join(self.temp_dir, 'create_with_new_typed_relation.csv.preprocessed') + preprocessed_csv_path = os.path.join( + self.temp_dir, "create_with_new_typed_relation.csv.preprocessed" + ) if os.path.exists(preprocessed_csv_path): os.remove(preprocessed_csv_path) - term_endpoint = self.config['host'] + '/taxonomy/term/' + str(self.term_id) + '?_format=json' - delete_term_response = workbench_utils.issue_request(self.config, 'DELETE', term_endpoint) + term_endpoint = ( + self.config["host"] + + "/taxonomy/term/" + + str(self.term_id) + + "?_format=json" + ) + delete_term_response = workbench_utils.issue_request( + self.config, "DELETE", term_endpoint + ) class TestDelete(unittest.TestCase): def setUp(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - create_config_file_path = os.path.join(self.current_dir, 'assets', 'delete_test', 'create.yml') + create_config_file_path = os.path.join( + self.current_dir, "assets", "delete_test", "create.yml" + ) self.create_cmd = ["./workbench", "--config", create_config_file_path] self.temp_dir = tempfile.gettempdir() - self.nid_file = os.path.join(self.temp_dir, 'workbenchdeletetesttnids.txt') + self.nid_file = os.path.join(self.temp_dir, "workbenchdeletetesttnids.txt") nids = list() create_output = subprocess.check_output(self.create_cmd) @@ -278,14 +375,16 @@ def setUp(self): with open(self.nid_file, "a") as fh: fh.write("node_id\n") for line in create_lines: - if 'created at' in line: - nid = line.rsplit('/', 1)[-1] - nid = nid.strip('.') + if "created at" in line: + nid = line.rsplit("/", 1)[-1] + nid = nid.strip(".") nids.append(nid) fh.write(nid + "\n") def test_delete(self): - delete_config_file_path = os.path.join(self.current_dir, 'assets', 'delete_test', 'delete.yml') + delete_config_file_path = os.path.join( + self.current_dir, "assets", "delete_test", "delete.yml" + ) delete_cmd = ["./workbench", "--config", delete_config_file_path] delete_output = subprocess.check_output(delete_cmd) delete_output = delete_output.decode().strip() @@ -305,21 +404,25 @@ class TestUpdate(unittest.TestCase): def setUp(self): self.maxDiff = None self.current_dir = os.path.dirname(os.path.abspath(__file__)) - self.create_config_file_path = os.path.join(self.current_dir, 'assets', 'update_test', 'create.yml') + self.create_config_file_path = os.path.join( + self.current_dir, "assets", "update_test", "create.yml" + ) self.create_cmd = ["./workbench", "--config", self.create_config_file_path] self.temp_dir = tempfile.gettempdir() - self.nid_file = os.path.join(self.temp_dir, 'workbenchupdatetestnids.txt') - self.update_metadata_file = os.path.join(self.current_dir, 'assets', 'update_test', 'workbenchupdatetest.csv') + self.nid_file = os.path.join(self.temp_dir, "workbenchupdatetestnids.txt") + self.update_metadata_file = os.path.join( + self.current_dir, "assets", "update_test", "workbenchupdatetest.csv" + ) yaml = YAML() - with open(self.create_config_file_path, 'r') as f: + with open(self.create_config_file_path, "r") as f: config_file_contents = f.read() config_data = yaml.load(config_file_contents) config = {} for k, v in config_data.items(): config[k] = v - self.islandora_host = config['host'] + self.islandora_host = config["host"] self.nids = list() create_output = subprocess.check_output(self.create_cmd) @@ -329,9 +432,9 @@ def setUp(self): with open(self.nid_file, "a") as nids_fh: nids_fh.write("node_id\n") for line in create_lines: - if 'created at' in line: - nid = line.rsplit('/', 1)[-1] - nid = nid.strip('.') + if "created at" in line: + nid = line.rsplit("/", 1)[-1] + nid = nid.strip(".") nids_fh.write(nid + "\n") self.nids.append(nid) @@ -343,33 +446,43 @@ def setUp(self): def test_update(self): # Run update task. time.sleep(5) - update_config_file_path = os.path.join(self.current_dir, 'assets', 'update_test', 'update.yml') + update_config_file_path = os.path.join( + self.current_dir, "assets", "update_test", "update.yml" + ) self.update_cmd = ["./workbench", "--config", update_config_file_path] subprocess.check_output(self.update_cmd) # Confirm that fields have been updated. - url = self.islandora_host + '/node/' + str(self.nids[0]) + '?_format=json' + url = self.islandora_host + "/node/" + str(self.nids[0]) + "?_format=json" response = requests.get(url) node = json.loads(response.text) - identifier = str(node['field_identifier'][0]['value']) - self.assertEqual(identifier, 'identifier-0001') - coodinates = str(node['field_coordinates'][0]['lat']) - self.assertEqual(coodinates, '99.1') + identifier = str(node["field_identifier"][0]["value"]) + self.assertEqual(identifier, "identifier-0001") + coodinates = str(node["field_coordinates"][0]["lat"]) + self.assertEqual(coodinates, "99.1") def tearDown(self): - delete_config_file_path = os.path.join(self.current_dir, 'assets', 'update_test', 'delete.yml') + delete_config_file_path = os.path.join( + self.current_dir, "assets", "update_test", "delete.yml" + ) delete_cmd = ["./workbench", "--config", delete_config_file_path] subprocess.check_output(delete_cmd) os.remove(self.nid_file) os.remove(self.update_metadata_file) - nid_file_preprocessed_file = os.path.join(self.temp_dir, 'workbenchupdatetestnids.txt.preprocessed') + nid_file_preprocessed_file = os.path.join( + self.temp_dir, "workbenchupdatetestnids.txt.preprocessed" + ) if os.path.exists(nid_file_preprocessed_file): os.remove(nid_file_preprocessed_file) - update_test_csv_preprocessed_file = os.path.join(self.temp_dir, 'workbenchupdatetest.csv.preprocessed') + update_test_csv_preprocessed_file = os.path.join( + self.temp_dir, "workbenchupdatetest.csv.preprocessed" + ) if os.path.exists(update_test_csv_preprocessed_file): os.remove(update_test_csv_preprocessed_file) - create_csv_preprocessed_file = os.path.join(self.temp_dir, 'create.csv.preprocessed') + create_csv_preprocessed_file = os.path.join( + self.temp_dir, "create.csv.preprocessed" + ) if os.path.exists(create_csv_preprocessed_file): os.remove(create_csv_preprocessed_file) @@ -378,21 +491,27 @@ class TestCreateWithNonLatinText(unittest.TestCase): def setUp(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - create_config_file_path = os.path.join(self.current_dir, 'assets', 'non_latin_text_test', 'create.yml') + create_config_file_path = os.path.join( + self.current_dir, "assets", "non_latin_text_test", "create.yml" + ) self.create_cmd = ["./workbench", "--config", create_config_file_path] yaml = YAML() - with open(create_config_file_path, 'r') as f: + with open(create_config_file_path, "r") as f: config_file_contents = f.read() config_data = yaml.load(config_file_contents) config = {} for k, v in config_data.items(): config[k] = v - self.islandora_host = config['host'] + self.islandora_host = config["host"] self.temp_dir = tempfile.gettempdir() - self.nid_file = os.path.join(self.temp_dir, 'workbenchcreatenonlatintestnids.txt') - self.rollback_file_path = os.path.join(self.current_dir, 'assets', 'non_latin_text_test', 'rollback.csv') + self.nid_file = os.path.join( + self.temp_dir, "workbenchcreatenonlatintestnids.txt" + ) + self.rollback_file_path = os.path.join( + self.current_dir, "assets", "non_latin_text_test", "rollback.csv" + ) def test_create_with_non_latin_text(self): nids = list() @@ -402,34 +521,36 @@ def test_create_with_non_latin_text(self): with open(self.nid_file, "a") as fh: fh.write("node_id\n") for line in create_lines: - if 'created at' in line: - nid = line.rsplit('/', 1)[-1] - nid = nid.strip('.') + if "created at" in line: + nid = line.rsplit("/", 1)[-1] + nid = nid.strip(".") nids.append(nid) fh.write(nid + "\n") self.assertEqual(len(nids), 3) - url = self.islandora_host + '/node/' + str(nids[0]) + '?_format=json' + url = self.islandora_host + "/node/" + str(nids[0]) + "?_format=json" response = requests.get(url) node = json.loads(response.text) - title = str(node['title'][0]['value']) - self.assertEqual(title, '一九二四年六月十二日') + title = str(node["title"][0]["value"]) + self.assertEqual(title, "一九二四年六月十二日") - url = self.islandora_host + '/node/' + str(nids[1]) + '?_format=json' + url = self.islandora_host + "/node/" + str(nids[1]) + "?_format=json" response = requests.get(url) node = json.loads(response.text) - title = str(node['title'][0]['value']) - self.assertEqual(title, 'सरकारी दस्तावेज़') + title = str(node["title"][0]["value"]) + self.assertEqual(title, "सरकारी दस्तावेज़") - url = self.islandora_host + '/node/' + str(nids[2]) + '?_format=json' + url = self.islandora_host + "/node/" + str(nids[2]) + "?_format=json" response = requests.get(url) node = json.loads(response.text) - title = str(node['title'][0]['value']) - self.assertEqual(title, 'ᐊᑕᐅᓯᖅ ᓄᓇ, ᐅᓄᖅᑐᑦ ᓂᐲᑦ') + title = str(node["title"][0]["value"]) + self.assertEqual(title, "ᐊᑕᐅᓯᖅ ᓄᓇ, ᐅᓄᖅᑐᑦ ᓂᐲᑦ") def tearDown(self): - delete_config_file_path = os.path.join(self.current_dir, 'assets', 'non_latin_text_test', 'delete.yml') + delete_config_file_path = os.path.join( + self.current_dir, "assets", "non_latin_text_test", "delete.yml" + ) delete_cmd = ["./workbench", "--config", delete_config_file_path] delete_output = subprocess.check_output(delete_cmd) delete_output = delete_output.decode().strip() @@ -439,11 +560,16 @@ def tearDown(self): if os.path.exists(self.rollback_file_path): os.remove(self.rollback_file_path) - preprocessed_csv_path = os.path.join(self.current_dir, 'assets', 'non_latin_text_test', 'metadata.csv.preprocessed') + preprocessed_csv_path = os.path.join( + self.current_dir, + "assets", + "non_latin_text_test", + "metadata.csv.preprocessed", + ) if os.path.exists(preprocessed_csv_path): os.remove(preprocessed_csv_path) - nid_file_preprocessed_path = self.nid_file + '.preprocessed' + nid_file_preprocessed_path = self.nid_file + ".preprocessed" if os.path.exists(nid_file_preprocessed_path): os.remove(nid_file_preprocessed_path) @@ -452,16 +578,18 @@ class TestSecondaryTask(unittest.TestCase): def setUp(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - self.create_config_file_path = os.path.join(self.current_dir, 'assets', 'secondary_task_test', 'create.yml') + self.create_config_file_path = os.path.join( + self.current_dir, "assets", "secondary_task_test", "create.yml" + ) yaml = YAML() - with open(self.create_config_file_path, 'r') as f: + with open(self.create_config_file_path, "r") as f: config_file_contents = f.read() config_data = yaml.load(config_file_contents) config = {} for k, v in config_data.items(): config[k] = v - self.islandora_host = config['host'] + self.islandora_host = config["host"] self.create_cmd = ["./workbench", "--config", self.create_config_file_path] self.temp_dir = tempfile.gettempdir() @@ -473,71 +601,98 @@ def test_secondary_task(self): create_lines = create_output.splitlines() for line in create_lines: - if 'created at' in line: - nid = line.rsplit('/', 1)[-1] - nid = nid.strip('.') + if "created at" in line: + nid = line.rsplit("/", 1)[-1] + nid = nid.strip(".") self.nids.append(nid) self.assertEqual(len(self.nids), 5) for nid in self.nids: - node_url = self.islandora_host + '/node/' + nid + '?_format=json' + node_url = self.islandora_host + "/node/" + nid + "?_format=json" response = requests.get(node_url) node_json = json.loads(response.text) # Get the node ID of the parent node. - if node_json['title'][0]['value'].startswith('Tester'): - parent_nid = node_json['nid'][0]['value'] + if node_json["title"][0]["value"].startswith("Tester"): + parent_nid = node_json["nid"][0]["value"] break for nid in self.nids: - node_url = self.islandora_host + '/node/' + nid + '?_format=json' + node_url = self.islandora_host + "/node/" + nid + "?_format=json" response = requests.get(node_url) node_json = json.loads(response.text) - if node_json['title'][0]['value'].startswith('Secondary task test child 1'): - self.assertEqual(int(node_json['field_member_of'][0]['target_id']), int(parent_nid)) - elif node_json['title'][0]['value'].startswith('Secondary task test child 2'): - self.assertEqual(int(node_json['field_member_of'][0]['target_id']), int(parent_nid)) + if node_json["title"][0]["value"].startswith("Secondary task test child 1"): + self.assertEqual( + int(node_json["field_member_of"][0]["target_id"]), int(parent_nid) + ) + elif node_json["title"][0]["value"].startswith( + "Secondary task test child 2" + ): + self.assertEqual( + int(node_json["field_member_of"][0]["target_id"]), int(parent_nid) + ) else: - self.assertEqual(node_json['field_member_of'], []) + self.assertEqual(node_json["field_member_of"], []) def tearDown(self): for nid in self.nids: - quick_delete_cmd = ["./workbench", "--config", self.create_config_file_path, '--quick_delete_node', self.islandora_host + '/node/' + nid] + quick_delete_cmd = [ + "./workbench", + "--config", + self.create_config_file_path, + "--quick_delete_node", + self.islandora_host + "/node/" + nid, + ] quick_delete_output = subprocess.check_output(quick_delete_cmd) - preprocessed_csv_path = os.path.join(self.current_dir, 'assets', 'secondary_task_test', 'metadata.csv.preprocessed') + preprocessed_csv_path = os.path.join( + self.current_dir, + "assets", + "secondary_task_test", + "metadata.csv.preprocessed", + ) if os.path.exists(preprocessed_csv_path): os.remove(preprocessed_csv_path) - secondary_preprocessed_csv_path = os.path.join(self.temp_dir, 'secondary.csv.preprocessed') + secondary_preprocessed_csv_path = os.path.join( + self.temp_dir, "secondary.csv.preprocessed" + ) if os.path.exists(secondary_preprocessed_csv_path): os.remove(secondary_preprocessed_csv_path) - map_file_path = os.path.join(self.current_dir, 'assets', 'secondary_task_test', 'id_to_node_map.tsv') + map_file_path = os.path.join( + self.current_dir, "assets", "secondary_task_test", "id_to_node_map.tsv" + ) if os.path.exists(map_file_path): os.remove(map_file_path) - rollback_file_path = os.path.join(self.current_dir, 'assets', 'secondary_task_test', 'rollback.csv') + rollback_file_path = os.path.join( + self.current_dir, "assets", "secondary_task_test", "rollback.csv" + ) if os.path.exists(rollback_file_path): os.remove(rollback_file_path) class TestSecondaryTaskWithGoogleSheets(unittest.TestCase): - """Note: This test fetches data from https://docs.google.com/spreadsheets/d/19AxFWEFuwEoNqH8ciUo0PRAroIpNE9BuBhE5tIE6INQ/edit#gid=0 - """ + """Note: This test fetches data from https://docs.google.com/spreadsheets/d/19AxFWEFuwEoNqH8ciUo0PRAroIpNE9BuBhE5tIE6INQ/edit#gid=0""" def setUp(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - self.create_config_file_path = os.path.join(self.current_dir, 'assets', 'secondary_task_with_google_sheets_and_excel_test', 'google_sheets_primary.yml') + self.create_config_file_path = os.path.join( + self.current_dir, + "assets", + "secondary_task_with_google_sheets_and_excel_test", + "google_sheets_primary.yml", + ) yaml = YAML() - with open(self.create_config_file_path, 'r') as f: + with open(self.create_config_file_path, "r") as f: config_file_contents = f.read() config_data = yaml.load(config_file_contents) config = {} for k, v in config_data.items(): config[k] = v - self.islandora_host = config['host'] + self.islandora_host = config["host"] self.create_cmd = ["./workbench", "--config", self.create_config_file_path] self.temp_dir = tempfile.gettempdir() @@ -549,49 +704,69 @@ def test_secondary_task_with_google_sheet(self): create_lines = create_output.splitlines() for line in create_lines: - if 'created at' in line: - nid = line.rsplit('/', 1)[-1] - nid = nid.strip('.') + if "created at" in line: + nid = line.rsplit("/", 1)[-1] + nid = nid.strip(".") self.nids.append(nid) self.assertEqual(len(self.nids), 8) for nid in self.nids: - node_url = self.islandora_host + '/node/' + nid + '?_format=json' + node_url = self.islandora_host + "/node/" + nid + "?_format=json" response = requests.get(node_url) node_json = json.loads(response.text) # Get the node ID of the parent node. - if node_json['field_local_identifier'][0]['value'] == 'GSP-04': - parent_nid = node_json['nid'][0]['value'] + if node_json["field_local_identifier"][0]["value"] == "GSP-04": + parent_nid = node_json["nid"][0]["value"] break for nid in self.nids: - node_url = self.islandora_host + '/node/' + nid + '?_format=json' + node_url = self.islandora_host + "/node/" + nid + "?_format=json" response = requests.get(node_url) node_json = json.loads(response.text) - if node_json['field_local_identifier'][0]['value'] == 'GSC-03': - self.assertEqual(int(node_json['field_member_of'][0]['target_id']), int(parent_nid)) - if node_json['field_local_identifier'][0]['value'] == 'GSC-04': - self.assertEqual(int(node_json['field_member_of'][0]['target_id']), int(parent_nid)) + if node_json["field_local_identifier"][0]["value"] == "GSC-03": + self.assertEqual( + int(node_json["field_member_of"][0]["target_id"]), int(parent_nid) + ) + if node_json["field_local_identifier"][0]["value"] == "GSC-04": + self.assertEqual( + int(node_json["field_member_of"][0]["target_id"]), int(parent_nid) + ) def tearDown(self): for nid in self.nids: - quick_delete_cmd = ["./workbench", "--config", self.create_config_file_path, '--quick_delete_node', self.islandora_host + '/node/' + nid] + quick_delete_cmd = [ + "./workbench", + "--config", + self.create_config_file_path, + "--quick_delete_node", + self.islandora_host + "/node/" + nid, + ] quick_delete_output = subprocess.check_output(quick_delete_cmd) - rollback_file_path = os.path.join(self.current_dir, 'assets', 'secondary_task_with_google_sheets_and_excel_test', 'rollback.csv') + rollback_file_path = os.path.join( + self.current_dir, + "assets", + "secondary_task_with_google_sheets_and_excel_test", + "rollback.csv", + ) if os.path.exists(rollback_file_path): os.remove(rollback_file_path) - google_sheet_csv_path = os.path.join(self.temp_dir, 'google_sheet.csv') + google_sheet_csv_path = os.path.join(self.temp_dir, "google_sheet.csv") if os.path.exists(google_sheet_csv_path): os.remove(google_sheet_csv_path) - secondary_task_google_sheets_csv_paths = glob.glob('*secondary_task_with_google_sheets_and_excel_test_google_sheets_secondary*', root_dir=self.temp_dir) + secondary_task_google_sheets_csv_paths = glob.glob( + "*secondary_task_with_google_sheets_and_excel_test_google_sheets_secondary*", + root_dir=self.temp_dir, + ) for secondary_csv_file_path in secondary_task_google_sheets_csv_paths: if os.path.exists(os.path.join(self.temp_dir, secondary_csv_file_path)): os.remove(os.path.join(self.temp_dir, secondary_csv_file_path)) - google_sheet_csv_preprocessed_path = os.path.join(self.temp_dir, 'google_sheet.csv.preprocessed') + google_sheet_csv_preprocessed_path = os.path.join( + self.temp_dir, "google_sheet.csv.preprocessed" + ) if os.path.exists(google_sheet_csv_preprocessed_path): os.remove(google_sheet_csv_preprocessed_path) @@ -600,16 +775,21 @@ class TestSecondaryTaskWithExcel(unittest.TestCase): def setUp(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - self.create_config_file_path = os.path.join(self.current_dir, 'assets', 'secondary_task_with_google_sheets_and_excel_test', 'excel_primary.yml') + self.create_config_file_path = os.path.join( + self.current_dir, + "assets", + "secondary_task_with_google_sheets_and_excel_test", + "excel_primary.yml", + ) yaml = YAML() - with open(self.create_config_file_path, 'r') as f: + with open(self.create_config_file_path, "r") as f: config_file_contents = f.read() config_data = yaml.load(config_file_contents) config = {} for k, v in config_data.items(): config[k] = v - self.islandora_host = config['host'] + self.islandora_host = config["host"] self.create_cmd = ["./workbench", "--config", self.create_config_file_path] self.temp_dir = tempfile.gettempdir() @@ -623,49 +803,69 @@ def test_secondary_task_with_excel(self): # so they can be deleted in tearDown(). create_lines = create_output.splitlines() for line in create_lines: - if 'created at' in line: - nid = line.rsplit('/', 1)[-1] - nid = nid.strip('.') + if "created at" in line: + nid = line.rsplit("/", 1)[-1] + nid = nid.strip(".") self.nids.append(nid) self.assertEqual(len(self.nids), 8) for nid in self.nids: - node_url = self.islandora_host + '/node/' + nid + '?_format=json' + node_url = self.islandora_host + "/node/" + nid + "?_format=json" response = requests.get(node_url) node_json = json.loads(response.text) # Get the node ID of the parent node. - if node_json['field_local_identifier'][0]['value'] == 'STTP-02': - parent_nid = node_json['nid'][0]['value'] + if node_json["field_local_identifier"][0]["value"] == "STTP-02": + parent_nid = node_json["nid"][0]["value"] break for nid in self.nids: - node_url = self.islandora_host + '/node/' + nid + '?_format=json' + node_url = self.islandora_host + "/node/" + nid + "?_format=json" response = requests.get(node_url) node_json = json.loads(response.text) - if node_json['field_local_identifier'][0]['value'] == 'STTC-01': - self.assertEqual(int(node_json['field_member_of'][0]['target_id']), int(parent_nid)) - if node_json['field_local_identifier'][0]['value'] == 'STTC-02': - self.assertEqual(int(node_json['field_member_of'][0]['target_id']), int(parent_nid)) + if node_json["field_local_identifier"][0]["value"] == "STTC-01": + self.assertEqual( + int(node_json["field_member_of"][0]["target_id"]), int(parent_nid) + ) + if node_json["field_local_identifier"][0]["value"] == "STTC-02": + self.assertEqual( + int(node_json["field_member_of"][0]["target_id"]), int(parent_nid) + ) def tearDown(self): for nid in self.nids: - quick_delete_cmd = ["./workbench", "--config", self.create_config_file_path, '--quick_delete_node', self.islandora_host + '/node/' + nid] + quick_delete_cmd = [ + "./workbench", + "--config", + self.create_config_file_path, + "--quick_delete_node", + self.islandora_host + "/node/" + nid, + ] quick_delete_output = subprocess.check_output(quick_delete_cmd) - rollback_file_path = os.path.join(self.current_dir, 'assets', 'secondary_task_with_google_sheets_and_excel_test', 'rollback.csv') + rollback_file_path = os.path.join( + self.current_dir, + "assets", + "secondary_task_with_google_sheets_and_excel_test", + "rollback.csv", + ) if os.path.exists(rollback_file_path): os.remove(rollback_file_path) - excel_csv_path = os.path.join(self.temp_dir, 'excel.csv') + excel_csv_path = os.path.join(self.temp_dir, "excel.csv") if os.path.exists(excel_csv_path): os.remove(excel_csv_path) - secondary_task_excel_csv_paths = glob.glob('*secondary_task_with_google_sheets_and_excel_test_excel_secondary*', root_dir=self.temp_dir) + secondary_task_excel_csv_paths = glob.glob( + "*secondary_task_with_google_sheets_and_excel_test_excel_secondary*", + root_dir=self.temp_dir, + ) for secondary_csv_file_path in secondary_task_excel_csv_paths: if os.path.exists(os.path.join(self.temp_dir, secondary_csv_file_path)): os.remove(os.path.join(self.temp_dir, secondary_csv_file_path)) - excel_csv_preprocessed_path = os.path.join(self.temp_dir, 'excel.csv.preprocessed') + excel_csv_preprocessed_path = os.path.join( + self.temp_dir, "excel.csv.preprocessed" + ) if os.path.exists(excel_csv_preprocessed_path): os.remove(excel_csv_preprocessed_path) @@ -674,18 +874,20 @@ class TestAdditionalFilesCreate(unittest.TestCase): def setUp(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - create_config_file_path = os.path.join(self.current_dir, 'assets', 'additional_files_test', 'create.yml') + create_config_file_path = os.path.join( + self.current_dir, "assets", "additional_files_test", "create.yml" + ) yaml = YAML() - with open(create_config_file_path, 'r') as f: + with open(create_config_file_path, "r") as f: config_file_contents = f.read() config_data = yaml.load(config_file_contents) self.config = {} for k, v in config_data.items(): self.config[k] = v - self.islandora_host = self.config['host'] - self.islandora_username = self.config['username'] - self.islandora_password = self.config['password'] + self.islandora_host = self.config["host"] + self.islandora_username = self.config["username"] + self.islandora_password = self.config["password"] self.create_cmd = ["./workbench", "--config", create_config_file_path] create_output = subprocess.check_output(self.create_cmd) @@ -693,28 +895,38 @@ def setUp(self): self.temp_dir = tempfile.gettempdir() - self.rollback_file_path = os.path.join(self.current_dir, 'assets', 'additional_files_test', 'rollback.csv') - with open(self.rollback_file_path, 'r') as rbf: + self.rollback_file_path = os.path.join( + self.current_dir, "assets", "additional_files_test", "rollback.csv" + ) + with open(self.rollback_file_path, "r") as rbf: rollback_file_contents = rbf.read() # There will only be one nid in the rollback.csv file. - nid = rollback_file_contents.replace('node_id', '') + nid = rollback_file_contents.replace("node_id", "") self.nid = nid.strip() - media_list_url = self.islandora_host + '/node/' + self.nid + '/media?_format=json' - media_list_response = requests.get(media_list_url, auth=(self.islandora_username, self.islandora_password)) + media_list_url = ( + self.islandora_host + "/node/" + self.nid + "/media?_format=json" + ) + media_list_response = requests.get( + media_list_url, auth=(self.islandora_username, self.islandora_password) + ) media_list_json = json.loads(media_list_response.text) self.media_sizes = dict() self.media_use_tids = dict() for media in media_list_json: - self.media_use_tids[media['mid'][0]['value']] = media['field_media_use'][0]['target_id'] - if 'field_file_size' in media: - self.media_sizes[media['mid'][0]['value']] = media['field_file_size'][0]['value'] + self.media_use_tids[media["mid"][0]["value"]] = media["field_media_use"][0][ + "target_id" + ] + if "field_file_size" in media: + self.media_sizes[media["mid"][0]["value"]] = media["field_file_size"][ + 0 + ]["value"] # We don't use the transcript file's size here since it's not available via REST. Instead, since this # file will be the only media with 'field_edited_text' (the transcript), we tack its value onto media_sizes # for testing below. - if 'field_edited_text' in media: - self.media_sizes['transcript'] = media['field_edited_text'][0]['value'] + if "field_edited_text" in media: + self.media_sizes["transcript"] = media["field_edited_text"][0]["value"] def test_media_creation(self): # This is the original file's size. @@ -722,19 +934,25 @@ def test_media_creation(self): # This is the preservation file's size. self.assertTrue(286445 in self.media_sizes.values()) # This is the transcript. - self.assertIn('This is a transcript.', self.media_sizes['transcript']) + self.assertIn("This is a transcript.", self.media_sizes["transcript"]) def test_media_use_tids(self): - '''Doesn't associate media use terms to nodes, but at least it confirms that the intended - media use tids are present in the media created by this test. - ''' - preservation_media_use_tid = self.get_term_id_from_uri("http://pcdm.org/use#PreservationMasterFile") + """Doesn't associate media use terms to nodes, but at least it confirms that the intended + media use tids are present in the media created by this test. + """ + preservation_media_use_tid = self.get_term_id_from_uri( + "http://pcdm.org/use#PreservationMasterFile" + ) self.assertTrue(preservation_media_use_tid in self.media_use_tids.values()) - transcript_media_use_tid = self.get_term_id_from_uri("http://pcdm.org/use#Transcript") + transcript_media_use_tid = self.get_term_id_from_uri( + "http://pcdm.org/use#Transcript" + ) self.assertTrue(transcript_media_use_tid in self.media_use_tids.values()) def tearDown(self): - delete_config_file_path = os.path.join(self.current_dir, 'assets', 'additional_files_test', 'rollback.yml') + delete_config_file_path = os.path.join( + self.current_dir, "assets", "additional_files_test", "rollback.yml" + ) delete_cmd = ["./workbench", "--config", delete_config_file_path] delete_output = subprocess.check_output(delete_cmd) delete_output = delete_output.decode().strip() @@ -743,25 +961,35 @@ def tearDown(self): if os.path.exists(self.rollback_file_path): os.remove(self.rollback_file_path) - preprocessed_csv_path = os.path.join(self.temp_dir, 'create.csv.preprocessed') + preprocessed_csv_path = os.path.join(self.temp_dir, "create.csv.preprocessed") if os.path.exists(preprocessed_csv_path): os.remove(preprocessed_csv_path) - rollback_csv_path = os.path.join(self.current_dir, 'assets', 'additional_files_test', 'rollback.csv') + rollback_csv_path = os.path.join( + self.current_dir, "assets", "additional_files_test", "rollback.csv" + ) if os.path.exists(rollback_csv_path): os.remove(rollback_csv_path) - preprocessed_rollback_csv_path = os.path.join(self.temp_dir, 'rollback.csv.preprocessed') + preprocessed_rollback_csv_path = os.path.join( + self.temp_dir, "rollback.csv.preprocessed" + ) if os.path.exists(preprocessed_rollback_csv_path): os.remove(preprocessed_rollback_csv_path) def get_term_id_from_uri(self, uri): - '''We don't use get_term_from_uri() from workbench_utils because it requires a full config object. - ''' - term_from_authority_link_url = self.islandora_host + '/term_from_uri?_format=json&uri=' + uri.replace('#', '%23') - response = requests.get(term_from_authority_link_url, auth=(self.islandora_username, self.islandora_password)) + """We don't use get_term_from_uri() from workbench_utils because it requires a full config object.""" + term_from_authority_link_url = ( + self.islandora_host + + "/term_from_uri?_format=json&uri=" + + uri.replace("#", "%23") + ) + response = requests.get( + term_from_authority_link_url, + auth=(self.islandora_username, self.islandora_password), + ) response_body = json.loads(response.text) - tid = response_body[0]['tid'][0]['value'] + tid = response_body[0]["tid"][0]["value"] return tid @@ -769,22 +997,34 @@ class TestAdditionalFilesCreateAllowMissingFilesFalse(unittest.TestCase): def setUp(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - self.create_config_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'create_additional_files_allow_missing_files_false.yml') - self.create_log_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'additional_files_allow_missing_files_false.log') - self.rollback_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'rollback.csv') + self.create_config_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "create_additional_files_allow_missing_files_false.yml", + ) + self.create_log_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "additional_files_allow_missing_files_false.log", + ) + self.rollback_file_path = os.path.join( + self.current_dir, "assets", "allow_missing_files_test", "rollback.csv" + ) self.temp_dir = tempfile.gettempdir() self.nids = list() yaml = YAML() - with open(self.create_config_file_path, 'r') as f: + with open(self.create_config_file_path, "r") as f: config_file_contents = f.read() config_data = yaml.load(config_file_contents) self.config = {} for k, v in config_data.items(): self.config[k] = v - self.islandora_host = self.config['host'] - self.islandora_username = self.config['username'] - self.islandora_password = self.config['password'] + self.islandora_host = self.config["host"] + self.islandora_username = self.config["username"] + self.islandora_password = self.config["password"] def test_create(self): self.create_cmd = ["./workbench", "--config", self.create_config_file_path] @@ -795,9 +1035,9 @@ def test_create(self): # so they can be deleted in tearDown(). create_lines = create_output.splitlines() for line in create_lines: - if 'created at' in line: - nid = line.rsplit('/', 1)[-1] - nid = nid.strip('.') + if "created at" in line: + nid = line.rsplit("/", 1)[-1] + nid = nid.strip(".") self.nids.append(nid) # Only three nodes will be created before workbench exits. @@ -805,19 +1045,37 @@ def test_create(self): with open(self.create_log_file_path) as log_file: log_data = log_file.read() - self.assertRegex(log_data, 'Media for "additional_files" CSV column "tn" in row with ID "003" .* not created because CSV field is empty', '') - self.assertRegex(log_data, 'Media for file "https://www.lib.sfu.ca/xxxtttuuu.jpg" named in field "tn" of CSV row with ID "005" not created because file does not exist', '') - self.assertNotRegex(log_data, 'Islandora Workbench successfully completed', '') + self.assertRegex( + log_data, + 'Media for "additional_files" CSV column "tn" in row with ID "003" .* not created because CSV field is empty', + "", + ) + self.assertRegex( + log_data, + 'Media for file "https://www.lib.sfu.ca/xxxtttuuu.jpg" named in field "tn" of CSV row with ID "005" not created because file does not exist', + "", + ) + self.assertNotRegex( + log_data, "Islandora Workbench successfully completed", "" + ) def tearDown(self): for nid in self.nids: - quick_delete_cmd = ["./workbench", "--config", self.create_config_file_path, '--quick_delete_node', self.islandora_host + '/node/' + nid] + quick_delete_cmd = [ + "./workbench", + "--config", + self.create_config_file_path, + "--quick_delete_node", + self.islandora_host + "/node/" + nid, + ] quick_delete_output = subprocess.check_output(quick_delete_cmd) if os.path.exists(self.rollback_file_path): os.remove(self.rollback_file_path) - preprocessed_csv_path = os.path.join(self.temp_dir, 'metadata_additional_files_check.csv.preprocessed') + preprocessed_csv_path = os.path.join( + self.temp_dir, "metadata_additional_files_check.csv.preprocessed" + ) if os.path.exists(preprocessed_csv_path): os.remove(preprocessed_csv_path) @@ -829,22 +1087,34 @@ class TestAdditionalFilesCreateAllowMissingFilesTrue(unittest.TestCase): def setUp(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - self.create_config_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'create_additional_files_allow_missing_files_true.yml') - self.create_log_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'additional_files_allow_missing_files_true.log') - self.rollback_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'rollback.csv') + self.create_config_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "create_additional_files_allow_missing_files_true.yml", + ) + self.create_log_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "additional_files_allow_missing_files_true.log", + ) + self.rollback_file_path = os.path.join( + self.current_dir, "assets", "allow_missing_files_test", "rollback.csv" + ) self.temp_dir = tempfile.gettempdir() self.nids = list() yaml = YAML() - with open(self.create_config_file_path, 'r') as f: + with open(self.create_config_file_path, "r") as f: config_file_contents = f.read() config_data = yaml.load(config_file_contents) self.config = {} for k, v in config_data.items(): self.config[k] = v - self.islandora_host = self.config['host'] - self.islandora_username = self.config['username'] - self.islandora_password = self.config['password'] + self.islandora_host = self.config["host"] + self.islandora_username = self.config["username"] + self.islandora_password = self.config["password"] def test_create(self): self.create_cmd = ["./workbench", "--config", self.create_config_file_path] @@ -855,29 +1125,49 @@ def test_create(self): # so they can be deleted in tearDown(). create_lines = create_output.splitlines() for line in create_lines: - if 'created at' in line: - nid = line.rsplit('/', 1)[-1] - nid = nid.strip('.') + if "created at" in line: + nid = line.rsplit("/", 1)[-1] + nid = nid.strip(".") self.nids.append(nid) self.assertEqual(len(self.nids), 5) with open(self.create_log_file_path) as log_file: log_data = log_file.read() - self.assertRegex(log_data, 'Media for "additional_files" CSV column "tn" in row with ID "003" .* not created because CSV field is empty', '') - self.assertRegex(log_data, 'Media for file "https://www.lib.sfu.ca/xxxtttuuu.jpg" named in field "tn" of CSV row with ID "005" not created because file does not exist', '') - self.assertRegex(log_data, 'Media for file "additional_files_2_tn.jpg" named in field "tn" of CSV row with ID "002" not created because file does not exist', '') - self.assertRegex(log_data, 'Islandora Workbench successfully completed', '') + self.assertRegex( + log_data, + 'Media for "additional_files" CSV column "tn" in row with ID "003" .* not created because CSV field is empty', + "", + ) + self.assertRegex( + log_data, + 'Media for file "https://www.lib.sfu.ca/xxxtttuuu.jpg" named in field "tn" of CSV row with ID "005" not created because file does not exist', + "", + ) + self.assertRegex( + log_data, + 'Media for file "additional_files_2_tn.jpg" named in field "tn" of CSV row with ID "002" not created because file does not exist', + "", + ) + self.assertRegex(log_data, "Islandora Workbench successfully completed", "") def tearDown(self): for nid in self.nids: - quick_delete_cmd = ["./workbench", "--config", self.create_config_file_path, '--quick_delete_node', self.islandora_host + '/node/' + nid] + quick_delete_cmd = [ + "./workbench", + "--config", + self.create_config_file_path, + "--quick_delete_node", + self.islandora_host + "/node/" + nid, + ] quick_delete_output = subprocess.check_output(quick_delete_cmd) if os.path.exists(self.rollback_file_path): os.remove(self.rollback_file_path) - preprocessed_csv_path = os.path.join(self.temp_dir, 'metadata_additional_files_check.csv.preprocessed') + preprocessed_csv_path = os.path.join( + self.temp_dir, "metadata_additional_files_check.csv.preprocessed" + ) if os.path.exists(preprocessed_csv_path): os.remove(preprocessed_csv_path) @@ -889,26 +1179,58 @@ class TestAdditionalFilesAddMediaAllowMissingFilesFalse(unittest.TestCase): def setUp(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - self.create_config_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media_create_nodes.yml') - self.create_log_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media_create_nodes.log') - self.rollback_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'rollback.csv') - self.add_media_csv_template_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media_additional_files.csv.template') - self.add_media_config_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media_additional_files_allow_missing_files_false.yml') - self.add_media_csv_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media_additional_files.csv') - self.false_with_additional_files_log_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media_additional_files_allow_missing_files_false.log') + self.create_config_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "add_media_create_nodes.yml", + ) + self.create_log_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "add_media_create_nodes.log", + ) + self.rollback_file_path = os.path.join( + self.current_dir, "assets", "allow_missing_files_test", "rollback.csv" + ) + self.add_media_csv_template_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "add_media_additional_files.csv.template", + ) + self.add_media_config_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "add_media_additional_files_allow_missing_files_false.yml", + ) + self.add_media_csv_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "add_media_additional_files.csv", + ) + self.false_with_additional_files_log_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "add_media_additional_files_allow_missing_files_false.log", + ) self.temp_dir = tempfile.gettempdir() self.nids = list() yaml = YAML() - with open(self.create_config_file_path, 'r') as f: + with open(self.create_config_file_path, "r") as f: config_file_contents = f.read() config_data = yaml.load(config_file_contents) self.config = {} for k, v in config_data.items(): self.config[k] = v - self.islandora_host = self.config['host'] - self.islandora_username = self.config['username'] - self.islandora_password = self.config['password'] + self.islandora_host = self.config["host"] + self.islandora_username = self.config["username"] + self.islandora_password = self.config["password"] self.create_cmd = ["./workbench", "--config", self.create_config_file_path] create_output = subprocess.check_output(self.create_cmd) @@ -918,9 +1240,9 @@ def setUp(self): # so they can be deleted in tearDown(). create_lines = create_output.splitlines() for line in create_lines: - if 'created at' in line: - nid = line.rsplit('/', 1)[-1] - nid = nid.strip('.') + if "created at" in line: + nid = line.rsplit("/", 1)[-1] + nid = nid.strip(".") self.nids.append(nid) # Insert their node IDs in the input CSV file. First, open the CSV template. @@ -930,32 +1252,70 @@ def setUp(self): # Then add a node ID to the start of each line from the template # and write out an add_media input CSV file. template_line_index = 0 - with open(self.add_media_csv_file_path, 'a+') as add_media_csv: + with open(self.add_media_csv_file_path, "a+") as add_media_csv: # The first line in the output CSV is the headers from the template. add_media_csv.write(csv_template_lines[template_line_index]) # The subsequent lines should each start with a node ID from. for node_id in self.nids: template_line_index = template_line_index + 1 - add_media_csv.write(f"{node_id}{csv_template_lines[template_line_index]}") + add_media_csv.write( + f"{node_id}{csv_template_lines[template_line_index]}" + ) def test_false(self): - self.add_media_cmd = ["./workbench", "--config", self.add_media_config_file_path] - proc = subprocess.Popen(self.add_media_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + self.add_media_cmd = [ + "./workbench", + "--config", + self.add_media_config_file_path, + ] + proc = subprocess.Popen( + self.add_media_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ) stdout, stderr = proc.communicate() add_media_output = str(stdout.decode().strip()) - self.assertRegex(add_media_output, 'Media for node .* not created since CSV column "preservation" is empty', '') - self.assertRegex(add_media_output, 'Media for node .* not created since CSV column "file" is empty', '') - self.assertRegex(add_media_output, 'Additional file "add_media_transcript_x.txt" identified in CSV "transcript" column for node ID .* not found', '') + self.assertRegex( + add_media_output, + 'Media for node .* not created since CSV column "preservation" is empty', + "", + ) + self.assertRegex( + add_media_output, + 'Media for node .* not created since CSV column "file" is empty', + "", + ) + self.assertRegex( + add_media_output, + 'Additional file "add_media_transcript_x.txt" identified in CSV "transcript" column for node ID .* not found', + "", + ) with open(self.false_with_additional_files_log_file_path) as log_file_false: log_data_false = log_file_false.read() - self.assertRegex(log_data_false, 'Media for node .* not created since CSV column "preservation" is empty', '') - self.assertRegex(log_data_false, 'Media for node .* not created since CSV column "file" is empty', '') - self.assertRegex(log_data_false, 'Additional file "add_media_transcript_x.txt" identified in CSV "transcript" column for node ID .* not found', '') + self.assertRegex( + log_data_false, + 'Media for node .* not created since CSV column "preservation" is empty', + "", + ) + self.assertRegex( + log_data_false, + 'Media for node .* not created since CSV column "file" is empty', + "", + ) + self.assertRegex( + log_data_false, + 'Additional file "add_media_transcript_x.txt" identified in CSV "transcript" column for node ID .* not found', + "", + ) def tearDown(self): for nid in self.nids: - quick_delete_cmd = ["./workbench", "--config", self.create_config_file_path, '--quick_delete_node', self.islandora_host + '/node/' + nid] + quick_delete_cmd = [ + "./workbench", + "--config", + self.create_config_file_path, + "--quick_delete_node", + self.islandora_host + "/node/" + nid, + ] quick_delete_output = subprocess.check_output(quick_delete_cmd) if os.path.exists(self.rollback_file_path): @@ -964,11 +1324,15 @@ def tearDown(self): if os.path.exists(self.add_media_csv_file_path): os.remove(self.add_media_csv_file_path) - preprocessed_csv_path = os.path.join(self.temp_dir, 'add_media_create_nodes.csv.preprocessed') + preprocessed_csv_path = os.path.join( + self.temp_dir, "add_media_create_nodes.csv.preprocessed" + ) if os.path.exists(preprocessed_csv_path): os.remove(preprocessed_csv_path) - preprocessed_csv_path = os.path.join(self.temp_dir, 'add_media_additional_files.csv.preprocessed') + preprocessed_csv_path = os.path.join( + self.temp_dir, "add_media_additional_files.csv.preprocessed" + ) if os.path.exists(preprocessed_csv_path): os.remove(preprocessed_csv_path) @@ -983,26 +1347,58 @@ class TestAdditionalFilesAddMediaAllowMissingFilesTrue(unittest.TestCase): def setUp(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - self.create_config_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media_create_nodes.yml') - self.create_log_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media_create_nodes.log') - self.rollback_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'rollback.csv') - self.add_media_csv_template_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media_additional_files.csv.template') - self.add_media_config_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media_additional_files_allow_missing_files_true.yml') - self.add_media_csv_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media_additional_files.csv') - self.true_with_additional_files_log_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media_additional_files_allow_missing_files_true.log') + self.create_config_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "add_media_create_nodes.yml", + ) + self.create_log_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "add_media_create_nodes.log", + ) + self.rollback_file_path = os.path.join( + self.current_dir, "assets", "allow_missing_files_test", "rollback.csv" + ) + self.add_media_csv_template_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "add_media_additional_files.csv.template", + ) + self.add_media_config_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "add_media_additional_files_allow_missing_files_true.yml", + ) + self.add_media_csv_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "add_media_additional_files.csv", + ) + self.true_with_additional_files_log_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "add_media_additional_files_allow_missing_files_true.log", + ) self.temp_dir = tempfile.gettempdir() self.nids = list() yaml = YAML() - with open(self.create_config_file_path, 'r') as f: + with open(self.create_config_file_path, "r") as f: config_file_contents = f.read() config_data = yaml.load(config_file_contents) self.config = {} for k, v in config_data.items(): self.config[k] = v - self.islandora_host = self.config['host'] - self.islandora_username = self.config['username'] - self.islandora_password = self.config['password'] + self.islandora_host = self.config["host"] + self.islandora_username = self.config["username"] + self.islandora_password = self.config["password"] self.create_cmd = ["./workbench", "--config", self.create_config_file_path] create_output = subprocess.check_output(self.create_cmd) @@ -1012,9 +1408,9 @@ def setUp(self): # so they can be deleted in tearDown(). create_lines = create_output.splitlines() for line in create_lines: - if 'created at' in line: - nid = line.rsplit('/', 1)[-1] - nid = nid.strip('.') + if "created at" in line: + nid = line.rsplit("/", 1)[-1] + nid = nid.strip(".") self.nids.append(nid) # Insert their node IDs in the input CSV file. First, open the CSV template. @@ -1024,32 +1420,66 @@ def setUp(self): # Then add a node ID to the start of each line from the template # and write out an add_media input CSV file. template_line_index = 0 - with open(self.add_media_csv_file_path, 'a+') as add_media_csv: + with open(self.add_media_csv_file_path, "a+") as add_media_csv: # The first line in the output CSV is the headers from the template. add_media_csv.write(csv_template_lines[template_line_index]) # The subsequent lines should each start with a node ID from. for node_id in self.nids: template_line_index = template_line_index + 1 - add_media_csv.write(f"{node_id}{csv_template_lines[template_line_index]}") + add_media_csv.write( + f"{node_id}{csv_template_lines[template_line_index]}" + ) def test_true(self): - self.add_media_cmd = ["./workbench", "--config", self.add_media_config_file_path] + self.add_media_cmd = [ + "./workbench", + "--config", + self.add_media_config_file_path, + ] add_media_output = subprocess.check_output(self.add_media_cmd) add_media_output = add_media_output.decode().strip() - self.assertRegex(add_media_output, 'Media for node .* not created since CSV column "preservation" is empty', '') - self.assertRegex(add_media_output, 'Media for node .* not created since CSV column "file" is empty', '') + self.assertRegex( + add_media_output, + 'Media for node .* not created since CSV column "preservation" is empty', + "", + ) + self.assertRegex( + add_media_output, + 'Media for node .* not created since CSV column "file" is empty', + "", + ) with open(self.true_with_additional_files_log_file_path) as log_file_true: log_data_true = log_file_true.read() - self.assertRegex(log_data_true, 'Media for node .* not created since CSV column "preservation" is empty', '') - self.assertRegex(log_data_true, 'Media for node .* not created since CSV column "file" is empty', '') - self.assertRegex(log_data_true, 'Additional file "add_media_transcript_x.txt" identified in CSV "transcript" column for node ID .* not found', '') - self.assertRegex(log_data_true, 'Islandora Workbench successfully completed', '') + self.assertRegex( + log_data_true, + 'Media for node .* not created since CSV column "preservation" is empty', + "", + ) + self.assertRegex( + log_data_true, + 'Media for node .* not created since CSV column "file" is empty', + "", + ) + self.assertRegex( + log_data_true, + 'Additional file "add_media_transcript_x.txt" identified in CSV "transcript" column for node ID .* not found', + "", + ) + self.assertRegex( + log_data_true, "Islandora Workbench successfully completed", "" + ) def tearDown(self): for nid in self.nids: - quick_delete_cmd = ["./workbench", "--config", self.create_config_file_path, '--quick_delete_node', self.islandora_host + '/node/' + nid] + quick_delete_cmd = [ + "./workbench", + "--config", + self.create_config_file_path, + "--quick_delete_node", + self.islandora_host + "/node/" + nid, + ] quick_delete_output = subprocess.check_output(quick_delete_cmd) if os.path.exists(self.rollback_file_path): @@ -1058,11 +1488,15 @@ def tearDown(self): if os.path.exists(self.add_media_csv_file_path): os.remove(self.add_media_csv_file_path) - preprocessed_csv_path = os.path.join(self.temp_dir, 'add_media_create_nodes.csv.preprocessed') + preprocessed_csv_path = os.path.join( + self.temp_dir, "add_media_create_nodes.csv.preprocessed" + ) if os.path.exists(preprocessed_csv_path): os.remove(preprocessed_csv_path) - preprocessed_csv_path = os.path.join(self.temp_dir, 'add_media_additional_files.csv.preprocessed') + preprocessed_csv_path = os.path.join( + self.temp_dir, "add_media_additional_files.csv.preprocessed" + ) if os.path.exists(preprocessed_csv_path): os.remove(preprocessed_csv_path) @@ -1075,7 +1509,7 @@ def tearDown(self): class TestUpdateMediaFields(unittest.TestCase): """Create a couple nodes plus image media, update the media's field_original_name - and field_width fields, then confirm they were updated by GETting the media's JSON. + and field_width fields, then confirm they were updated by GETting the media's JSON. """ def SetUp(): @@ -1088,5 +1522,5 @@ def tearDown(): pass -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/islandora_tests_check.py b/tests/islandora_tests_check.py index 6dbedef3..6db6202d 100644 --- a/tests/islandora_tests_check.py +++ b/tests/islandora_tests_check.py @@ -25,39 +25,50 @@ class TestCreateCheck(unittest.TestCase): def setUp(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - config_file_path = os.path.join(self.current_dir, 'assets', 'check_test', 'create.yml') + config_file_path = os.path.join( + self.current_dir, "assets", "check_test", "create.yml" + ) cmd = ["./workbench", "--config", config_file_path, "--check"] output = subprocess.check_output(cmd) self.output = output.decode().strip() def test_create_check(self): - self.assertRegex(self.output, 'Configuration and input data appear to be valid', '') + self.assertRegex( + self.output, "Configuration and input data appear to be valid", "" + ) class TestCheckFromGoogleSpreadsheetCheck(unittest.TestCase): - """Note: This test fetches data from https://docs.google.com/spreadsheets/d/13Mw7gtBy1A3ZhYEAlBzmkswIdaZvX18xoRBxfbgxqWc/edit#gid=0. - """ + """Note: This test fetches data from https://docs.google.com/spreadsheets/d/13Mw7gtBy1A3ZhYEAlBzmkswIdaZvX18xoRBxfbgxqWc/edit#gid=0.""" def setUp(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - config_file_path = os.path.join(self.current_dir, 'assets', 'check_test', 'google_sheet.yml') + config_file_path = os.path.join( + self.current_dir, "assets", "check_test", "google_sheet.yml" + ) cmd = ["./workbench", "--config", config_file_path, "--check"] output = subprocess.check_output(cmd) self.output = output.decode().strip() def test_create_from_google_spreadsheet_check(self): - self.assertRegex(self.output, 'Extracting CSV data from https://docs.google.com', '') - self.assertRegex(self.output, 'Configuration and input data appear to be valid', '') + self.assertRegex( + self.output, "Extracting CSV data from https://docs.google.com", "" + ) + self.assertRegex( + self.output, "Configuration and input data appear to be valid", "" + ) class TestUpdateCheck(unittest.TestCase): def setUp(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - config_file_path = os.path.join(self.current_dir, 'assets', 'check_test', 'update.yml') + config_file_path = os.path.join( + self.current_dir, "assets", "check_test", "update.yml" + ) self.temp_dir = tempfile.gettempdir() cmd = ["./workbench", "--config", config_file_path, "--check"] @@ -65,10 +76,14 @@ def setUp(self): self.output = output.decode().strip() def test_update_check(self): - self.assertRegex(self.output, 'Configuration and input data appear to be valid', '') + self.assertRegex( + self.output, "Configuration and input data appear to be valid", "" + ) def tearDown(self): - preprocessed_csv_file_path = os.path.join(self.temp_dir, "update.csv.preprocessed") + preprocessed_csv_file_path = os.path.join( + self.temp_dir, "update.csv.preprocessed" + ) if os.path.exists(preprocessed_csv_file_path): os.remove(preprocessed_csv_file_path) @@ -77,7 +92,9 @@ class TestDeleteCheck(unittest.TestCase): def setUp(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - config_file_path = os.path.join(self.current_dir, 'assets', 'check_test', 'delete.yml') + config_file_path = os.path.join( + self.current_dir, "assets", "check_test", "delete.yml" + ) self.temp_dir = tempfile.gettempdir() cmd = ["./workbench", "--config", config_file_path, "--check"] @@ -85,10 +102,14 @@ def setUp(self): self.output = output.decode().strip() def test_delete_check(self): - self.assertRegex(self.output, 'Configuration and input data appear to be valid', '') + self.assertRegex( + self.output, "Configuration and input data appear to be valid", "" + ) def tearDown(self): - preprocessed_csv_file_path = os.path.join(self.temp_dir, "delete.csv.preprocessed") + preprocessed_csv_file_path = os.path.join( + self.temp_dir, "delete.csv.preprocessed" + ) if os.path.exists(preprocessed_csv_file_path): os.remove(preprocessed_csv_file_path) @@ -97,7 +118,9 @@ class TestAddMediaCheck(unittest.TestCase): def setUp(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - config_file_path = os.path.join(self.current_dir, 'assets', 'check_test', 'add_media.yml') + config_file_path = os.path.join( + self.current_dir, "assets", "check_test", "add_media.yml" + ) self.temp_dir = tempfile.gettempdir() cmd = ["./workbench", "--config", config_file_path, "--check"] @@ -105,10 +128,14 @@ def setUp(self): self.output = output.decode().strip() def test_add_media_check(self): - self.assertRegex(self.output, 'Configuration and input data appear to be valid', '') + self.assertRegex( + self.output, "Configuration and input data appear to be valid", "" + ) def tearDown(self): - preprocessed_csv_file_path = os.path.join(self.temp_dir, "add_media.csv.preprocessed") + preprocessed_csv_file_path = os.path.join( + self.temp_dir, "add_media.csv.preprocessed" + ) if os.path.exists(preprocessed_csv_file_path): os.remove(preprocessed_csv_file_path) @@ -117,7 +144,9 @@ class TestCreateMaxNodeTitleLengthCheck(unittest.TestCase): def setUp(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - config_file_path = os.path.join(self.current_dir, 'assets', 'max_node_title_length_test', 'create.yml') + config_file_path = os.path.join( + self.current_dir, "assets", "max_node_title_length_test", "create.yml" + ) self.temp_dir = tempfile.gettempdir() cmd = ["./workbench", "--config", config_file_path, "--check"] @@ -125,12 +154,26 @@ def setUp(self): self.output = output.decode().strip() def test_for_too_long_titles(self): - self.assertRegex(self.output, 'CSV field "title" in record with ID 03 contains a value that is longer .32 characters', '') - self.assertRegex(self.output, 'CSV field "title" in record with ID 04 contains a value that is longer .34 characters', '') - self.assertRegex(self.output, 'CSV field "title" in record with ID 05 contains a value that is longer .36 characters', '') + self.assertRegex( + self.output, + 'CSV field "title" in record with ID 03 contains a value that is longer .32 characters', + "", + ) + self.assertRegex( + self.output, + 'CSV field "title" in record with ID 04 contains a value that is longer .34 characters', + "", + ) + self.assertRegex( + self.output, + 'CSV field "title" in record with ID 05 contains a value that is longer .36 characters', + "", + ) def tearDown(self): - preprocessed_csv_file_path = os.path.join(self.temp_dir, "create_max_node_title_length.csv.preprocessed") + preprocessed_csv_file_path = os.path.join( + self.temp_dir, "create_max_node_title_length.csv.preprocessed" + ) if os.path.exists(preprocessed_csv_file_path): os.remove(preprocessed_csv_file_path) @@ -142,13 +185,27 @@ def setUp(self): # First, we create some nodes so we have the node IDs for the update CSV file. We are # reusing the CSV data used by the TestCreateWithMaxNodeTitleLength test class. - self.create_config_file_path = os.path.join(self.current_dir, 'assets', 'max_node_title_length_test', 'create.yml') + self.create_config_file_path = os.path.join( + self.current_dir, "assets", "max_node_title_length_test", "create.yml" + ) self.create_cmd = ["./workbench", "--config", self.create_config_file_path] self.nids = list() - self.update_csv_file_path = os.path.join(self.current_dir, 'assets', 'max_node_title_length_test', 'update_max_node_title_length.csv') - self.update_config_file_path = os.path.join(self.current_dir, 'assets', 'max_node_title_length_test', 'update.yml') - self.update_cmd = ["./workbench", "--config", self.update_config_file_path, "--check"] + self.update_csv_file_path = os.path.join( + self.current_dir, + "assets", + "max_node_title_length_test", + "update_max_node_title_length.csv", + ) + self.update_config_file_path = os.path.join( + self.current_dir, "assets", "max_node_title_length_test", "update.yml" + ) + self.update_cmd = [ + "./workbench", + "--config", + self.update_config_file_path, + "--check", + ] self.temp_dir = tempfile.gettempdir() @@ -158,52 +215,74 @@ def test_for_too_long_titles(self): create_lines = self.create_output.splitlines() for line in create_lines: - if 'created at' in line: - nid = line.rsplit('/', 1)[-1] - nid = nid.strip('.') + if "created at" in line: + nid = line.rsplit("/", 1)[-1] + nid = nid.strip(".") self.nids.append(nid) self.assertEqual(len(self.nids), 6) # Now that we have our node IDs, we write out the CSV file used in --check. update_csv_file_rows = list() - test_titles = ['This title is 37 chars___________long', - 'This title is 39 chars_____________long', - 'This title is 29 _ chars long', - 'This title is 42 chars________________long', - 'This title is 44 chars__________________long', - 'This title is 28 chars long.'] - update_csv_file_rows.append('node_id,title') + test_titles = [ + "This title is 37 chars___________long", + "This title is 39 chars_____________long", + "This title is 29 _ chars long", + "This title is 42 chars________________long", + "This title is 44 chars__________________long", + "This title is 28 chars long.", + ] + update_csv_file_rows.append("node_id,title") i = 0 while i <= 5: - update_csv_file_rows.append(f'{self.nids[i]},{test_titles[i]}') + update_csv_file_rows.append(f"{self.nids[i]},{test_titles[i]}") i = i + 1 - with open(self.update_csv_file_path, mode='wt') as update_csv_file: - update_csv_file.write('\n'.join(update_csv_file_rows)) + with open(self.update_csv_file_path, mode="wt") as update_csv_file: + update_csv_file.write("\n".join(update_csv_file_rows)) check_output = subprocess.check_output(self.update_cmd) self.check_output = check_output.decode().strip() - self.assertRegex(self.check_output, 'contains a value that is longer .37 characters.', '') - self.assertRegex(self.check_output, 'contains a value that is longer .39 characters.', '') - self.assertRegex(self.check_output, 'contains a value that is longer .42 characters.', '') - self.assertRegex(self.check_output, 'contains a value that is longer .44 characters.', '') + self.assertRegex( + self.check_output, "contains a value that is longer .37 characters.", "" + ) + self.assertRegex( + self.check_output, "contains a value that is longer .39 characters.", "" + ) + self.assertRegex( + self.check_output, "contains a value that is longer .42 characters.", "" + ) + self.assertRegex( + self.check_output, "contains a value that is longer .44 characters.", "" + ) def tearDown(self): # Delete our test nodes we created. for nid in self.nids: - quick_delete_cmd = ["./workbench", "--config", self.create_config_file_path, '--quick_delete_node', 'https://islandora.traefik.me/node/' + nid] + quick_delete_cmd = [ + "./workbench", + "--config", + self.create_config_file_path, + "--quick_delete_node", + "https://islandora.traefik.me/node/" + nid, + ] quick_delete_output = subprocess.check_output(quick_delete_cmd) - self.rollback_file_path = os.path.join(self.current_dir, 'assets', 'max_node_title_length_test', 'rollback.csv') + self.rollback_file_path = os.path.join( + self.current_dir, "assets", "max_node_title_length_test", "rollback.csv" + ) if os.path.exists(self.rollback_file_path): os.remove(self.rollback_file_path) - self.preprocessed_create_file_path = os.path.join(self.temp_dir, 'create_max_node_title_length.csv.preprocessed') + self.preprocessed_create_file_path = os.path.join( + self.temp_dir, "create_max_node_title_length.csv.preprocessed" + ) if os.path.exists(self.preprocessed_create_file_path): os.remove(self.preprocessed_create_file_path) - self.preprocessed_create_file_path = os.path.join(self.temp_dir, 'update_max_node_title_length.csv.preprocessed') + self.preprocessed_create_file_path = os.path.join( + self.temp_dir, "update_max_node_title_length.csv.preprocessed" + ) if os.path.exists(self.preprocessed_create_file_path): os.remove(self.preprocessed_create_file_path) @@ -218,21 +297,35 @@ def setUp(self): def test_bad_relator_check_fail(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - config_file_path = os.path.join(self.current_dir, 'assets', 'typed_relation_test', 'bad_relator.yml') + config_file_path = os.path.join( + self.current_dir, "assets", "typed_relation_test", "bad_relator.yml" + ) cmd = ["./workbench", "--config", config_file_path, "--check"] try: output = subprocess.check_output(cmd) output = output.decode().strip() - self.assertRegex(output, 'does not use the structure required for typed relation fields', '') + self.assertRegex( + output, + "does not use the structure required for typed relation fields", + "", + ) except subprocess.CalledProcessError as err: pass def tearDown(self): - self.rollback_file_path = os.path.join(self.current_dir, 'assets', 'typed_relation_test', 'input_data', 'rollback.csv') + self.rollback_file_path = os.path.join( + self.current_dir, + "assets", + "typed_relation_test", + "input_data", + "rollback.csv", + ) if os.path.exists(self.rollback_file_path): os.remove(self.rollback_file_path) - preprocessed_csv_file_path = os.path.join(self.temp_dir, "bad_typed_relation_fail.csv.preprocessed") + preprocessed_csv_file_path = os.path.join( + self.temp_dir, "bad_typed_relation_fail.csv.preprocessed" + ) if os.path.exists(preprocessed_csv_file_path): os.remove(preprocessed_csv_file_path) @@ -244,21 +337,31 @@ def setUp(self): def test_bad_uri_check_fail(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - config_file_path = os.path.join(self.current_dir, 'assets', 'typed_relation_test', 'bad_uri.yml') + config_file_path = os.path.join( + self.current_dir, "assets", "typed_relation_test", "bad_uri.yml" + ) cmd = ["./workbench", "--config", config_file_path, "--check"] try: output = subprocess.check_output(cmd) output = output.decode().strip() - self.assertRegex(output, 'example.com', '') + self.assertRegex(output, "example.com", "") except subprocess.CalledProcessError as err: pass def tearDown(self): - self.rollback_file_path = os.path.join(self.current_dir, 'assets', 'typed_relation_test', 'input_data', 'rollback.csv') + self.rollback_file_path = os.path.join( + self.current_dir, + "assets", + "typed_relation_test", + "input_data", + "rollback.csv", + ) if os.path.exists(self.rollback_file_path): os.remove(self.rollback_file_path) - preprocessed_csv_file_path = os.path.join(self.temp_dir, "bad_uri_fail.csv.preprocessed") + preprocessed_csv_file_path = os.path.join( + self.temp_dir, "bad_uri_fail.csv.preprocessed" + ) if os.path.exists(preprocessed_csv_file_path): os.remove(preprocessed_csv_file_path) @@ -267,21 +370,34 @@ class TestTypedRelationNewTypedRelationCheck(unittest.TestCase): def setUp(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - config_file_path = os.path.join(self.current_dir, 'assets', 'typed_relation_test', 'add_new_typed_relation.yml') + config_file_path = os.path.join( + self.current_dir, + "assets", + "typed_relation_test", + "add_new_typed_relation.yml", + ) cmd = ["./workbench", "--config", config_file_path, "--check"] output = subprocess.check_output(cmd) self.output = output.decode().strip() self.temp_dir = tempfile.gettempdir() def test_new_typed_relation_check(self): - self.assertRegex(self.output, 'new terms will be created as noted', '') + self.assertRegex(self.output, "new terms will be created as noted", "") def tearDown(self): - self.rollback_file_path = os.path.join(self.current_dir, 'assets', 'typed_relation_test', 'input_data', 'rollback.csv') + self.rollback_file_path = os.path.join( + self.current_dir, + "assets", + "typed_relation_test", + "input_data", + "rollback.csv", + ) if os.path.exists(self.rollback_file_path): os.remove(self.rollback_file_path) - preprocessed_csv_file_path = os.path.join(self.temp_dir, "new_typed_relation.csv.preprocessed") + preprocessed_csv_file_path = os.path.join( + self.temp_dir, "new_typed_relation.csv.preprocessed" + ) if os.path.exists(preprocessed_csv_file_path): os.remove(preprocessed_csv_file_path) @@ -293,21 +409,31 @@ def setUp(self): def test_no_namespace_check_fail(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - config_file_path = os.path.join(self.current_dir, 'assets', 'typed_relation_test', 'no_namespace.yml') + config_file_path = os.path.join( + self.current_dir, "assets", "typed_relation_test", "no_namespace.yml" + ) cmd = ["./workbench", "--config", config_file_path, "--check"] try: output = subprocess.check_output(cmd) output = output.decode().strip() - self.assertRegex(output, 'require a vocabulary namespace', '') + self.assertRegex(output, "require a vocabulary namespace", "") except subprocess.CalledProcessError as err: pass def tearDown(self): - self.rollback_file_path = os.path.join(self.current_dir, 'assets', 'typed_relation_test', 'input_data', 'rollback.csv') + self.rollback_file_path = os.path.join( + self.current_dir, + "assets", + "typed_relation_test", + "input_data", + "rollback.csv", + ) if os.path.exists(self.rollback_file_path): os.remove(self.rollback_file_path) - preprocessed_csv_file_path = os.path.join(self.temp_dir, "no_namespace.csv.preprocessed") + preprocessed_csv_file_path = os.path.join( + self.temp_dir, "no_namespace.csv.preprocessed" + ) if os.path.exists(preprocessed_csv_file_path): os.remove(preprocessed_csv_file_path) @@ -316,17 +442,21 @@ class TestDelimiterCheck(unittest.TestCase): def setUp(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - config_file_path = os.path.join(self.current_dir, 'assets', 'delimiter_test', 'create_tab.yml') + config_file_path = os.path.join( + self.current_dir, "assets", "delimiter_test", "create_tab.yml" + ) cmd = ["./workbench", "--config", config_file_path, "--check"] output = subprocess.check_output(cmd) self.output = output.decode().strip() self.temp_dir = tempfile.gettempdir() def test_delimiter_check(self): - self.assertRegex(self.output, 'input data appear to be valid', '') + self.assertRegex(self.output, "input data appear to be valid", "") def tearDown(self): - preprocessed_csv_file_path = os.path.join(self.temp_dir, "metadata.tsv.preprocessed") + preprocessed_csv_file_path = os.path.join( + self.temp_dir, "metadata.tsv.preprocessed" + ) if os.path.exists(preprocessed_csv_file_path): os.remove(preprocessed_csv_file_path) @@ -338,17 +468,21 @@ def setUp(self): def test_geolocation_check(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - config_file_path = os.path.join(self.current_dir, 'assets', 'geolocation_test', 'bad_geocoordinates.yml') + config_file_path = os.path.join( + self.current_dir, "assets", "geolocation_test", "bad_geocoordinates.yml" + ) cmd = ["./workbench", "--config", config_file_path, "--check"] try: output = subprocess.check_output(cmd) output = output.decode().strip() - self.assertRegex(output, r'+43.45-123.17', '') + self.assertRegex(output, r"+43.45-123.17", "") except subprocess.CalledProcessError as err: pass def tearDown(self): - preprocessed_csv_file_path = os.path.join(self.temp_dir, "bad_geocoorindates_fail.csv.preprocessed") + preprocessed_csv_file_path = os.path.join( + self.temp_dir, "bad_geocoorindates_fail.csv.preprocessed" + ) if os.path.exists(preprocessed_csv_file_path): os.remove(preprocessed_csv_file_path) @@ -360,21 +494,27 @@ def setUp(self): def test_header_column_mismatch_fail(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - config_file_path = os.path.join(self.current_dir, 'assets', 'header_column_mismatch_test', 'create.yml') + config_file_path = os.path.join( + self.current_dir, "assets", "header_column_mismatch_test", "create.yml" + ) cmd = ["./workbench", "--config", config_file_path, "--check"] try: output = subprocess.check_output(cmd) output = output.decode().strip() - self.assertRegex(output, 'Row 2 of your CSV file does not', '') + self.assertRegex(output, "Row 2 of your CSV file does not", "") except subprocess.CalledProcessError as err: pass def tearDown(self): - self.rollback_file_path = os.path.join(self.current_dir, 'assets', 'header_column_mismatch_test', 'rollback.csv') + self.rollback_file_path = os.path.join( + self.current_dir, "assets", "header_column_mismatch_test", "rollback.csv" + ) if os.path.exists(self.rollback_file_path): os.remove(self.rollback_file_path) - preprocessed_csv_file_path = os.path.join(self.temp_dir, "metadata.csv.preprocessed") + preprocessed_csv_file_path = os.path.join( + self.temp_dir, "metadata.csv.preprocessed" + ) if os.path.exists(preprocessed_csv_file_path): os.remove(preprocessed_csv_file_path) @@ -383,17 +523,23 @@ class TestCreateWithFieldTemplatesCheck(unittest.TestCase): def setUp(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - config_file_path = os.path.join(self.current_dir, 'assets', 'create_with_field_templates_test', 'create.yml') + config_file_path = os.path.join( + self.current_dir, "assets", "create_with_field_templates_test", "create.yml" + ) cmd = ["./workbench", "--config", config_file_path, "--check"] output = subprocess.check_output(cmd) self.output = output.decode().strip() self.temp_dir = tempfile.gettempdir() def test_create_with_field_templates_check(self): - self.assertRegex(self.output, 'all 3 rows in the CSV file have the same number of columns as there are headers .6.', '') + self.assertRegex( + self.output, + "all 3 rows in the CSV file have the same number of columns as there are headers .6.", + "", + ) def tearDown(self): - templated_csv_path = os.path.join(self.temp_dir, 'metadata.csv.preprocessed') + templated_csv_path = os.path.join(self.temp_dir, "metadata.csv.preprocessed") os.remove(templated_csv_path) @@ -403,36 +549,50 @@ def test_commented_csv(self): current_dir = os.path.dirname(os.path.abspath(__file__)) self.temp_dir = tempfile.gettempdir() - config_file_path = os.path.join(current_dir, "assets", "commented_csvs_test", "raw_csv.yml") + config_file_path = os.path.join( + current_dir, "assets", "commented_csvs_test", "raw_csv.yml" + ) cmd = ["./workbench", "--config", config_file_path, "--check"] output = subprocess.check_output(cmd) output = output.decode().strip() - self.assertRegex(output, 'all 3 rows in the CSV file', '') - preprocessed_csv_file_path = os.path.join(self.temp_dir, "metadata.csv.preprocessed") + self.assertRegex(output, "all 3 rows in the CSV file", "") + preprocessed_csv_file_path = os.path.join( + self.temp_dir, "metadata.csv.preprocessed" + ) if os.path.exists(preprocessed_csv_file_path): os.remove(preprocessed_csv_file_path) - config_file_path = os.path.join(current_dir, "assets", "commented_csvs_test", "excel.yml") + config_file_path = os.path.join( + current_dir, "assets", "commented_csvs_test", "excel.yml" + ) cmd = ["./workbench", "--config", config_file_path, "--check"] output = subprocess.check_output(cmd) output = output.decode().strip() - self.assertRegex(output, 'all 4 rows in the CSV file', '') + self.assertRegex(output, "all 4 rows in the CSV file", "") csv_file_path = os.path.join(self.temp_dir, "excel.csv") if os.path.exists(csv_file_path): os.remove(csv_file_path) - preprocessed_csv_file_path = os.path.join(self.temp_dir, "excel.csv.preprocessed") + preprocessed_csv_file_path = os.path.join( + self.temp_dir, "excel.csv.preprocessed" + ) if os.path.exists(preprocessed_csv_file_path): os.remove(preprocessed_csv_file_path) - config_file_path = os.path.join(current_dir, "assets", "commented_csvs_test", "google_sheets.yml") + config_file_path = os.path.join( + current_dir, "assets", "commented_csvs_test", "google_sheets.yml" + ) cmd = ["./workbench", "--config", config_file_path, "--check"] output = subprocess.check_output(cmd) output = output.decode().strip() - self.assertRegex(output, 'all 5 rows in the CSV file', '') - csv_file_path = os.path.join(current_dir, "assets", "commented_csvs_test", "google_sheet.csv") + self.assertRegex(output, "all 5 rows in the CSV file", "") + csv_file_path = os.path.join( + current_dir, "assets", "commented_csvs_test", "google_sheet.csv" + ) if os.path.exists(csv_file_path): os.remove(csv_file_path) - preprocessed_csv_file_path = os.path.join(self.temp_dir, "google_sheet.csv.preprocessed") + preprocessed_csv_file_path = os.path.join( + self.temp_dir, "google_sheet.csv.preprocessed" + ) if os.path.exists(preprocessed_csv_file_path): os.remove(preprocessed_csv_file_path) @@ -441,18 +601,20 @@ class TestTaxonomies(unittest.TestCase): def setUp(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - self.taxonomies_config_file_path = os.path.join(self.current_dir, 'assets', 'taxonomies_test', 'create.yml') + self.taxonomies_config_file_path = os.path.join( + self.current_dir, "assets", "taxonomies_test", "create.yml" + ) yaml = YAML() - with open(self.taxonomies_config_file_path, 'r') as f: + with open(self.taxonomies_config_file_path, "r") as f: config_file_contents = f.read() config_data = yaml.load(config_file_contents) config = {} for k, v in config_data.items(): config[k] = v - self.islandora_host = config['host'] - self.islandora_username = config['username'] - self.islandora_password = config['password'] + self.islandora_host = config["host"] + self.islandora_username = config["username"] + self.islandora_password = config["password"] self.create_cmd = ["./workbench", "--config", self.taxonomies_config_file_path] @@ -467,61 +629,122 @@ def setUp(self): # nodes created during this test so they can be deleted in tearDown(). create_lines = create_output.splitlines() for line in create_lines: - if 'created at' in line: - nid = line.rsplit('/', 1)[-1] - nid = nid.strip('.') + if "created at" in line: + nid = line.rsplit("/", 1)[-1] + nid = nid.strip(".") self.nids.append(nid) def test_validate_term_names_exist(self): - taxonomies_terms_exist_config_file_path = os.path.join(self.current_dir, 'assets', 'taxonomies_test', 'create.yml') - cmd = ["./workbench", "--config", taxonomies_terms_exist_config_file_path, "--check"] + taxonomies_terms_exist_config_file_path = os.path.join( + self.current_dir, "assets", "taxonomies_test", "create.yml" + ) + cmd = [ + "./workbench", + "--config", + taxonomies_terms_exist_config_file_path, + "--check", + ] output = subprocess.check_output(cmd) output = output.decode().strip() - self.assertRegex(output, 'term IDs/names in CSV file exist in their respective taxonomies', '') + self.assertRegex( + output, + "term IDs/names in CSV file exist in their respective taxonomies", + "", + ) def test_validate_term_name_does_not_exist(self): - taxonomies_term_name_does_not_exist_config_file_path = os.path.join(self.current_dir, 'assets', 'taxonomies_test', 'term_name_not_in_taxonomy.yml') - cmd = ["./workbench", "--config", taxonomies_term_name_does_not_exist_config_file_path, "--check"] + taxonomies_term_name_does_not_exist_config_file_path = os.path.join( + self.current_dir, + "assets", + "taxonomies_test", + "term_name_not_in_taxonomy.yml", + ) + cmd = [ + "./workbench", + "--config", + taxonomies_term_name_does_not_exist_config_file_path, + "--check", + ] proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) stdout, stderr = proc.communicate() - self.assertRegex(str(stdout), '"XPosters"', '') + self.assertRegex(str(stdout), '"XPosters"', "") def test_validate_term_id_does_not_exist(self): - taxonomies_term_id_does_not_exist_config_file_path = os.path.join(self.current_dir, 'assets', 'taxonomies_test', 'term_id_not_in_taxonomy.yml') - cmd = ["./workbench", "--config", taxonomies_term_id_does_not_exist_config_file_path, "--check"] + taxonomies_term_id_does_not_exist_config_file_path = os.path.join( + self.current_dir, "assets", "taxonomies_test", "term_id_not_in_taxonomy.yml" + ) + cmd = [ + "./workbench", + "--config", + taxonomies_term_id_does_not_exist_config_file_path, + "--check", + ] proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) stdout, stderr = proc.communicate() - self.assertRegex(str(stdout), '1000000', '') + self.assertRegex(str(stdout), "1000000", "") def tearDown(self): # Delete all terms in the genre taxonomy created by these tests. - terms_to_delete = ['XNewspapers', 'XPostcards', 'XCartoons', 'XCertificates', 'XPosters'] + terms_to_delete = [ + "XNewspapers", + "XPostcards", + "XCartoons", + "XCertificates", + "XPosters", + ] for term_name in terms_to_delete: - get_tid_url = self.islandora_host + '/term_from_term_name?vocab=genre&name=' + urllib.parse.quote(term_name.strip()) + '&_format=json' - get_tid_response = requests.get(get_tid_url, auth=(self.islandora_username, self.islandora_password)) + get_tid_url = ( + self.islandora_host + + "/term_from_term_name?vocab=genre&name=" + + urllib.parse.quote(term_name.strip()) + + "&_format=json" + ) + get_tid_response = requests.get( + get_tid_url, auth=(self.islandora_username, self.islandora_password) + ) term_data = json.loads(get_tid_response.text) if len(term_data): - term_to_delete_tid = term_data[0]['tid'][0]['value'] - delete_term_url = self.islandora_host + '/taxonomy/term/' + str(term_to_delete_tid) + '?_format=json' - term_delete_response = requests.delete(delete_term_url, auth=(self.islandora_username, self.islandora_password)) + term_to_delete_tid = term_data[0]["tid"][0]["value"] + delete_term_url = ( + self.islandora_host + + "/taxonomy/term/" + + str(term_to_delete_tid) + + "?_format=json" + ) + term_delete_response = requests.delete( + delete_term_url, + auth=(self.islandora_username, self.islandora_password), + ) for nid in self.nids: - quick_delete_cmd = ["./workbench", "--config", self.taxonomies_config_file_path, '--quick_delete_node', self.islandora_host + '/node/' + nid] + quick_delete_cmd = [ + "./workbench", + "--config", + self.taxonomies_config_file_path, + "--quick_delete_node", + self.islandora_host + "/node/" + nid, + ] quick_delete_output = subprocess.check_output(quick_delete_cmd) - rollback_file_path = os.path.join(self.current_dir, 'assets', 'taxonomies_test', 'rollback.csv') + rollback_file_path = os.path.join( + self.current_dir, "assets", "taxonomies_test", "rollback.csv" + ) if os.path.exists(rollback_file_path): os.remove(rollback_file_path) - preprocessed_csv_path = os.path.join(self.temp_dir, 'metadata.csv.preprocessed') + preprocessed_csv_path = os.path.join(self.temp_dir, "metadata.csv.preprocessed") if os.path.exists(preprocessed_csv_path): os.remove(preprocessed_csv_path) - preprocessed_csv_path = os.path.join(self.temp_dir, 'term_id_not_in_taxonomy.csv.preprocessed') + preprocessed_csv_path = os.path.join( + self.temp_dir, "term_id_not_in_taxonomy.csv.preprocessed" + ) if os.path.exists(preprocessed_csv_path): os.remove(preprocessed_csv_path) - preprocessed_csv_path = os.path.join(self.temp_dir, 'term_name_not_in_taxonomy.csv.preprocessed') + preprocessed_csv_path = os.path.join( + self.temp_dir, "term_name_not_in_taxonomy.csv.preprocessed" + ) if os.path.exists(preprocessed_csv_path): os.remove(preprocessed_csv_path) @@ -533,36 +756,46 @@ def setUp(self): def test_google_gid(self): current_dir = os.path.dirname(os.path.abspath(__file__)) - config_file_path = os.path.join(current_dir, 'assets', 'google_gid_test', 'gid_0.yml') + config_file_path = os.path.join( + current_dir, "assets", "google_gid_test", "gid_0.yml" + ) cmd = ["./workbench", "--config", config_file_path, "--check"] output = subprocess.check_output(cmd) output = output.decode().strip() - self.assertRegex(output, 'OK, all 2 rows in the CSV file') + self.assertRegex(output, "OK, all 2 rows in the CSV file") - config_file_path = os.path.join(current_dir, 'assets', 'google_gid_test', 'gid_1867618389.yml') + config_file_path = os.path.join( + current_dir, "assets", "google_gid_test", "gid_1867618389.yml" + ) cmd = ["./workbench", "--config", config_file_path, "--check"] output = subprocess.check_output(cmd) output = output.decode().strip() - self.assertRegex(output, 'OK, all 3 rows in the CSV file') + self.assertRegex(output, "OK, all 3 rows in the CSV file") - config_file_path = os.path.join(current_dir, 'assets', 'google_gid_test', 'gid_390347846.yml') + config_file_path = os.path.join( + current_dir, "assets", "google_gid_test", "gid_390347846.yml" + ) cmd = ["./workbench", "--config", config_file_path, "--check"] output = subprocess.check_output(cmd) output = output.decode().strip() - self.assertRegex(output, 'OK, all 5 rows in the CSV file') + self.assertRegex(output, "OK, all 5 rows in the CSV file") - config_file_path = os.path.join(current_dir, 'assets', 'google_gid_test', 'gid_953977578.yml') + config_file_path = os.path.join( + current_dir, "assets", "google_gid_test", "gid_953977578.yml" + ) cmd = ["./workbench", "--config", config_file_path, "--check"] output = subprocess.check_output(cmd) output = output.decode().strip() - self.assertRegex(output, 'OK, all 1 rows in the CSV file') + self.assertRegex(output, "OK, all 1 rows in the CSV file") def tearDown(self): - csv_path = os.path.join(self.temp_dir, 'google_sheet.csv') + csv_path = os.path.join(self.temp_dir, "google_sheet.csv") if os.path.exists(csv_path): os.remove(csv_path) - preprocessed_csv_path = os.path.join(self.temp_dir, 'google_sheet.csv.preprocessed') + preprocessed_csv_path = os.path.join( + self.temp_dir, "google_sheet.csv.preprocessed" + ) if os.path.exists(preprocessed_csv_path): os.remove(preprocessed_csv_path) @@ -574,25 +807,29 @@ def setUp(self): self.temp_dir = tempfile.gettempdir() def test_good_csv(self): - config_file_path = os.path.join(self.current_dir, 'assets', 'parents_precede_children_test', 'good.yml') + config_file_path = os.path.join( + self.current_dir, "assets", "parents_precede_children_test", "good.yml" + ) cmd = ["./workbench", "--config", config_file_path, "--check"] output = subprocess.check_output(cmd) output = output.decode().strip() - self.assertRegex(output, 'Configuration and input data appear to be valid') + self.assertRegex(output, "Configuration and input data appear to be valid") def test_bad_csv(self): - config_file_path = os.path.join(self.current_dir, 'assets', 'parents_precede_children_test', 'bad.yml') + config_file_path = os.path.join( + self.current_dir, "assets", "parents_precede_children_test", "bad.yml" + ) cmd = ["./workbench", "--config", config_file_path, "--check"] proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) stdout, stderr = proc.communicate() - self.assertRegex(str(stdout), '"c2p2" must come after', '') + self.assertRegex(str(stdout), '"c2p2" must come after', "") def tearDown(self): - preprocessed_csv_path = os.path.join(self.temp_dir, 'good.csv.preprocessed') + preprocessed_csv_path = os.path.join(self.temp_dir, "good.csv.preprocessed") if os.path.exists(preprocessed_csv_path): os.remove(preprocessed_csv_path) - preprocessed_csv_path = os.path.join(self.temp_dir, 'bad.csv.preprocessed') + preprocessed_csv_path = os.path.join(self.temp_dir, "bad.csv.preprocessed") if os.path.exists(preprocessed_csv_path): os.remove(preprocessed_csv_path) @@ -602,46 +839,101 @@ class TestCreateAllowMissingFiles(unittest.TestCase): def setUp(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) self.temp_dir = tempfile.gettempdir() - self.false_log_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'allow_missing_files_false.log') - self.true_log_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'allow_missing_files_true.log') - self.false_with_soft_checks_log_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'allow_missing_files_false_with_soft_checks.log') + self.false_log_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "allow_missing_files_false.log", + ) + self.true_log_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "allow_missing_files_true.log", + ) + self.false_with_soft_checks_log_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "allow_missing_files_false_with_soft_checks.log", + ) def test_false(self): - config_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'create_allow_missing_files_false.yml') + config_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "create_allow_missing_files_false.yml", + ) cmd = ["./workbench", "--config", config_file_path, "--check"] proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) stdout, stderr = proc.communicate() - self.assertRegex(str(stdout), 'identified in CSV "file" column for row with ID "03" not found', '') + self.assertRegex( + str(stdout), + 'identified in CSV "file" column for row with ID "03" not found', + "", + ) with open(self.false_log_file_path) as log_file_false: log_data_false = log_file_false.read() self.assertRegex(log_data_false, 'ID "03" not found') def test_true(self): - config_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'create_allow_missing_files_true.yml') + config_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "create_allow_missing_files_true.yml", + ) cmd = ["./workbench", "--config", config_file_path, "--check"] output = subprocess.check_output(cmd) output = output.decode().strip() - self.assertRegex(output, 'Warning: "allow_missing_files" configuration setting is set to "true", and CSV "file" column values') + self.assertRegex( + output, + 'Warning: "allow_missing_files" configuration setting is set to "true", and CSV "file" column values', + ) with open(self.true_log_file_path) as log_file_true: log_data_true = log_file_true.read() - self.assertRegex(log_data_true, 'row with ID "06" not found or not accessible') + self.assertRegex( + log_data_true, 'row with ID "06" not found or not accessible' + ) def test_false_with_soft_checks(self): - config_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'create_allow_missing_files_false_with_soft_checks.yml') + config_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "create_allow_missing_files_false_with_soft_checks.yml", + ) cmd = ["./workbench", "--config", config_file_path, "--check"] output = subprocess.check_output(cmd) output = output.decode().strip() - self.assertRegex(output, 'Warning: "perform_soft_checks" configuration setting is set to "true" and some values in the "file" column were not found') - - with open(self.false_with_soft_checks_log_file_path) as log_file_false_with_soft_checks: - log_file_false_with_soft_checks_data = log_file_false_with_soft_checks.read() - self.assertRegex(log_file_false_with_soft_checks_data, 'row with ID "03" not found') - - with open(self.false_with_soft_checks_log_file_path) as log_file_false_with_soft_checks: - log_file_false_with_soft_checks_data = log_file_false_with_soft_checks.read() - self.assertRegex(log_file_false_with_soft_checks_data, 'row with ID "06" not found or not accessible') + self.assertRegex( + output, + 'Warning: "perform_soft_checks" configuration setting is set to "true" and some values in the "file" column were not found', + ) + + with open( + self.false_with_soft_checks_log_file_path + ) as log_file_false_with_soft_checks: + log_file_false_with_soft_checks_data = ( + log_file_false_with_soft_checks.read() + ) + self.assertRegex( + log_file_false_with_soft_checks_data, 'row with ID "03" not found' + ) + + with open( + self.false_with_soft_checks_log_file_path + ) as log_file_false_with_soft_checks: + log_file_false_with_soft_checks_data = ( + log_file_false_with_soft_checks.read() + ) + self.assertRegex( + log_file_false_with_soft_checks_data, + 'row with ID "06" not found or not accessible', + ) def tearDown(self): if os.path.exists(self.false_log_file_path): @@ -653,7 +945,9 @@ def tearDown(self): if os.path.exists(self.false_with_soft_checks_log_file_path): os.remove(self.false_with_soft_checks_log_file_path) - preprocessed_csv_path = os.path.join(self.temp_dir, 'metadata_check.csv.preprocessed') + preprocessed_csv_path = os.path.join( + self.temp_dir, "metadata_check.csv.preprocessed" + ) if os.path.exists(preprocessed_csv_path): os.remove(preprocessed_csv_path) @@ -663,48 +957,115 @@ class TestCreateAllowMissingFilesWithAdditionalFiles(unittest.TestCase): def setUp(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) self.temp_dir = tempfile.gettempdir() - self.false_log_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'additional_files_allow_missing_files_false.log') - self.true_log_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'additional_files_allow_missing_files_true.log') - self.false_with_soft_checks_log_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'additional_files_allow_missing_files_false_with_soft_checks.log') + self.false_log_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "additional_files_allow_missing_files_false.log", + ) + self.true_log_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "additional_files_allow_missing_files_true.log", + ) + self.false_with_soft_checks_log_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "additional_files_allow_missing_files_false_with_soft_checks.log", + ) def test_false(self): - config_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'create_additional_files_allow_missing_files_false.yml') + config_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "create_additional_files_allow_missing_files_false.yml", + ) cmd = ["./workbench", "--config", config_file_path, "--check"] proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) stdout, stderr = proc.communicate() - self.assertRegex(str(stdout), 'Additional file "https://www.lib.sfu.ca/xxxtttuuu.jpg" in CSV column "tn" in row with ID 005 not found', '') + self.assertRegex( + str(stdout), + 'Additional file "https://www.lib.sfu.ca/xxxtttuuu.jpg" in CSV column "tn" in row with ID 005 not found', + "", + ) with open(self.false_log_file_path) as log_file_false: log_data_false = log_file_false.read() - self.assertRegex(log_data_false, 'CVS row with ID 003 contains an empty value in its "tn" column') - self.assertRegex(log_data_false, 'Additional file "https://www.lib.sfu.ca/xxxtttuuu.jpg" in CSV column "tn" in row with ID 005 not found or not accessible') + self.assertRegex( + log_data_false, + 'CVS row with ID 003 contains an empty value in its "tn" column', + ) + self.assertRegex( + log_data_false, + 'Additional file "https://www.lib.sfu.ca/xxxtttuuu.jpg" in CSV column "tn" in row with ID 005 not found or not accessible', + ) def test_true(self): - config_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'create_additional_files_allow_missing_files_true.yml') + config_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "create_additional_files_allow_missing_files_true.yml", + ) cmd = ["./workbench", "--config", config_file_path, "--check"] output = subprocess.check_output(cmd) output = output.decode().strip() - self.assertRegex(output, '"allow_missing_files" configuration setting is set to "true", and "additional_files" CSV columns', '') + self.assertRegex( + output, + '"allow_missing_files" configuration setting is set to "true", and "additional_files" CSV columns', + "", + ) with open(self.true_log_file_path) as log_file_true: log_data_true = log_file_true.read() - self.assertRegex(log_data_true, 'Additional file "additional_files_2_tn.jpg" in CSV column "tn" in row with ID 002 not found', '') - self.assertRegex(log_data_true, 'Additional file "https://www.lib.sfu.ca/xxxtttuuu.jpg" in CSV column "tn" in row with ID 005 not found', '') + self.assertRegex( + log_data_true, + 'Additional file "additional_files_2_tn.jpg" in CSV column "tn" in row with ID 002 not found', + "", + ) + self.assertRegex( + log_data_true, + 'Additional file "https://www.lib.sfu.ca/xxxtttuuu.jpg" in CSV column "tn" in row with ID 005 not found', + "", + ) def test_false_with_soft_checks(self): - config_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'create_additional_files_allow_missing_files_false_with_soft_checks.yml') + config_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "create_additional_files_allow_missing_files_false_with_soft_checks.yml", + ) cmd = ["./workbench", "--config", config_file_path, "--check"] output = subprocess.check_output(cmd) output = output.decode().strip() - self.assertRegex(output, 'The "perform_soft_checks" configuration setting is set to "true"') - - with open(self.false_with_soft_checks_log_file_path) as log_file_false_with_soft_checks: - log_file_false_with_soft_checks_data = log_file_false_with_soft_checks.read() - self.assertRegex(log_file_false_with_soft_checks_data, 'Additional file "additional_files_2_tn.jpg" in CSV column "tn" in row with ID 002 not found') - - with open(self.false_with_soft_checks_log_file_path) as log_file_false_with_soft_checks: - log_file_false_with_soft_checks_data = log_file_false_with_soft_checks.read() - self.assertRegex(log_file_false_with_soft_checks_data, ', no problems found') + self.assertRegex( + output, 'The "perform_soft_checks" configuration setting is set to "true"' + ) + + with open( + self.false_with_soft_checks_log_file_path + ) as log_file_false_with_soft_checks: + log_file_false_with_soft_checks_data = ( + log_file_false_with_soft_checks.read() + ) + self.assertRegex( + log_file_false_with_soft_checks_data, + 'Additional file "additional_files_2_tn.jpg" in CSV column "tn" in row with ID 002 not found', + ) + + with open( + self.false_with_soft_checks_log_file_path + ) as log_file_false_with_soft_checks: + log_file_false_with_soft_checks_data = ( + log_file_false_with_soft_checks.read() + ) + self.assertRegex( + log_file_false_with_soft_checks_data, ", no problems found" + ) def tearDown(self): if os.path.exists(self.false_log_file_path): @@ -716,7 +1077,9 @@ def tearDown(self): if os.path.exists(self.false_with_soft_checks_log_file_path): os.remove(self.false_with_soft_checks_log_file_path) - preprocessed_csv_path = os.path.join(self.temp_dir, 'metadata_additional_files_check.csv.preprocessed') + preprocessed_csv_path = os.path.join( + self.temp_dir, "metadata_additional_files_check.csv.preprocessed" + ) if os.path.exists(preprocessed_csv_path): os.remove(preprocessed_csv_path) @@ -726,27 +1089,61 @@ class TestAddMediaAllowMissingFiles(unittest.TestCase): def setUp(self): # Create nodes to use in add_media task. self.current_dir = os.path.dirname(os.path.abspath(__file__)) - self.create_config_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media_create_nodes.yml') - self.create_log_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media_create_nodes.log') - self.false_log_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media_allow_missing_files_false.log') - self.false_with_soft_checks_log_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media_allow_missing_files_false_with_soft_checks.log') - self.true_log_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media_allow_missing_files_true.log') - self.rollback_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'rollback.csv') - self.add_media_csv_template_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media.csv.template') - self.add_media_csv_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media.csv') + self.create_config_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "add_media_create_nodes.yml", + ) + self.create_log_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "add_media_create_nodes.log", + ) + self.false_log_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "add_media_allow_missing_files_false.log", + ) + self.false_with_soft_checks_log_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "add_media_allow_missing_files_false_with_soft_checks.log", + ) + self.true_log_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "add_media_allow_missing_files_true.log", + ) + self.rollback_file_path = os.path.join( + self.current_dir, "assets", "allow_missing_files_test", "rollback.csv" + ) + self.add_media_csv_template_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "add_media.csv.template", + ) + self.add_media_csv_file_path = os.path.join( + self.current_dir, "assets", "allow_missing_files_test", "add_media.csv" + ) self.temp_dir = tempfile.gettempdir() self.nids = list() yaml = YAML() - with open(self.create_config_file_path, 'r') as f: + with open(self.create_config_file_path, "r") as f: config_file_contents = f.read() config_data = yaml.load(config_file_contents) self.config = {} for k, v in config_data.items(): self.config[k] = v - self.islandora_host = self.config['host'] - self.islandora_username = self.config['username'] - self.islandora_password = self.config['password'] + self.islandora_host = self.config["host"] + self.islandora_username = self.config["username"] + self.islandora_password = self.config["password"] self.create_cmd = ["./workbench", "--config", self.create_config_file_path] create_output = subprocess.check_output(self.create_cmd) @@ -756,9 +1153,9 @@ def setUp(self): # so they can be deleted in tearDown(). create_lines = create_output.splitlines() for line in create_lines: - if 'created at' in line: - nid = line.rsplit('/', 1)[-1] - nid = nid.strip('.') + if "created at" in line: + nid = line.rsplit("/", 1)[-1] + nid = nid.strip(".") self.nids.append(nid) # Insert their node IDs in the input CSV file. First, open the CSV template. @@ -768,57 +1165,111 @@ def setUp(self): # Then add a node ID to the start of each line from the template # and write out an add_media input CSV file. template_line_index = 0 - with open(self.add_media_csv_file_path, 'a+') as add_media_csv: + with open(self.add_media_csv_file_path, "a+") as add_media_csv: # The first line in the output CSV is the headers from the template. add_media_csv.write(csv_template_lines[template_line_index]) # The subsequent lines should each start with a node ID from. for node_id in self.nids: template_line_index = template_line_index + 1 - add_media_csv.write(f"{node_id}{csv_template_lines[template_line_index]}") + add_media_csv.write( + f"{node_id}{csv_template_lines[template_line_index]}" + ) def test_false(self): - config_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media_allow_missing_files_false.yml') + config_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "add_media_allow_missing_files_false.yml", + ) cmd = ["./workbench", "--config", config_file_path, "--check"] proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) stdout, stderr = proc.communicate() - self.assertRegex(str(stdout), 'File ".*missing_transcript.txt" identified in CSV "file" column for row with ID .* not found', '') + self.assertRegex( + str(stdout), + 'File ".*missing_transcript.txt" identified in CSV "file" column for row with ID .* not found', + "", + ) with open(self.false_log_file_path) as log_file_false: log_data_false = log_file_false.read() - self.assertRegex(log_data_false, 'CSV row with ID .* contains an empty "file" value') - self.assertRegex(log_data_false, 'File ".*missing_transcript.txt" identified in CSV "file" column for row with ID .* not found', '') + self.assertRegex( + log_data_false, 'CSV row with ID .* contains an empty "file" value' + ) + self.assertRegex( + log_data_false, + 'File ".*missing_transcript.txt" identified in CSV "file" column for row with ID .* not found', + "", + ) def test_true(self): - config_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media_allow_missing_files_true.yml') + config_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "add_media_allow_missing_files_true.yml", + ) cmd = ["./workbench", "--config", config_file_path, "--check"] output = subprocess.check_output(cmd) output = output.decode().strip() - self.assertRegex(output, 'Warning: "allow_missing_files" configuration setting is set to "true", and CSV "file" column values containing missing') + self.assertRegex( + output, + 'Warning: "allow_missing_files" configuration setting is set to "true", and CSV "file" column values containing missing', + ) with open(self.true_log_file_path) as log_file_true: log_data_true = log_file_true.read() - self.assertRegex(log_data_true, 'CSV row with ID .* contains an empty "file" value', '') + self.assertRegex( + log_data_true, 'CSV row with ID .* contains an empty "file" value', "" + ) # self.assertRegex(log_data_true, 'CVS row with ID .* contains an empty value in its "preservation" column', '') - self.assertRegex(log_data_true, 'INFO - .*no problems found', '') + self.assertRegex(log_data_true, "INFO - .*no problems found", "") def test_false_with_soft_checks(self): - config_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media_allow_missing_files_false_with_soft_checks.yml') + config_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "add_media_allow_missing_files_false_with_soft_checks.yml", + ) cmd = ["./workbench", "--config", config_file_path, "--check"] output = subprocess.check_output(cmd) output = output.decode().strip() - self.assertRegex(output, 'Warning: "perform_soft_checks" configuration setting is set to "true" and some values in the "file" column') - self.assertRegex(output, 'Configuration and input data appear to be valid') - - with open(self.false_with_soft_checks_log_file_path) as log_file_false_with_soft_checks: - log_file_false_with_soft_checks_data = log_file_false_with_soft_checks.read() - self.assertRegex(log_file_false_with_soft_checks_data, 'CSV row with ID .* contains an empty "file" value') - self.assertRegex(log_file_false_with_soft_checks_data, 'File ".*missing_transcript.txt" identified in CSV "file" column for row with ID .* not found', '') - self.assertRegex(log_file_false_with_soft_checks_data, 'INFO - .*no problems found', '') + self.assertRegex( + output, + 'Warning: "perform_soft_checks" configuration setting is set to "true" and some values in the "file" column', + ) + self.assertRegex(output, "Configuration and input data appear to be valid") + + with open( + self.false_with_soft_checks_log_file_path + ) as log_file_false_with_soft_checks: + log_file_false_with_soft_checks_data = ( + log_file_false_with_soft_checks.read() + ) + self.assertRegex( + log_file_false_with_soft_checks_data, + 'CSV row with ID .* contains an empty "file" value', + ) + self.assertRegex( + log_file_false_with_soft_checks_data, + 'File ".*missing_transcript.txt" identified in CSV "file" column for row with ID .* not found', + "", + ) + self.assertRegex( + log_file_false_with_soft_checks_data, "INFO - .*no problems found", "" + ) def tearDown(self): # Delete the nodes created in setUp. for nid in self.nids: - quick_delete_cmd = ["./workbench", "--config", self.create_config_file_path, '--quick_delete_node', self.islandora_host + '/node/' + nid] + quick_delete_cmd = [ + "./workbench", + "--config", + self.create_config_file_path, + "--quick_delete_node", + self.islandora_host + "/node/" + nid, + ] quick_delete_output = subprocess.check_output(quick_delete_cmd) if os.path.exists(self.add_media_csv_file_path): @@ -827,15 +1278,21 @@ def tearDown(self): if os.path.exists(self.rollback_file_path): os.remove(self.rollback_file_path) - preprocessed_csv_path = os.path.join(self.temp_dir, 'add_media_create_nodes.csv.preprocessed') + preprocessed_csv_path = os.path.join( + self.temp_dir, "add_media_create_nodes.csv.preprocessed" + ) if os.path.exists(preprocessed_csv_path): os.remove(preprocessed_csv_path) - preprocessed_csv_path = os.path.join(self.temp_dir, 'add_media.csv.preprocessed') + preprocessed_csv_path = os.path.join( + self.temp_dir, "add_media.csv.preprocessed" + ) if os.path.exists(preprocessed_csv_path): os.remove(preprocessed_csv_path) - preprocessed_csv_path = os.path.join(self.temp_dir, 'metadata_check.csv.preprocessed') + preprocessed_csv_path = os.path.join( + self.temp_dir, "metadata_check.csv.preprocessed" + ) if os.path.exists(preprocessed_csv_path): os.remove(preprocessed_csv_path) @@ -857,27 +1314,64 @@ class TestAddMediaAllowMissingWithAdditionalFiles(unittest.TestCase): def setUp(self): # Create nodes to use in add_media task. self.current_dir = os.path.dirname(os.path.abspath(__file__)) - self.create_config_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media_create_nodes.yml') - self.create_log_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media_create_nodes.log') - self.false_with_additional_files_log_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media_additional_files_allow_missing_files_false.log') - self.false_with_soft_checks_log_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media_additional_files_allow_missing_files_false_with_soft_checks.log') - self.true_log_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media_additional_files_allow_missing_files_true.log') - self.rollback_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'rollback.csv') - self.add_media_csv_template_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media_additional_files.csv.template') - self.add_media_csv_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media_additional_files.csv') + self.create_config_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "add_media_create_nodes.yml", + ) + self.create_log_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "add_media_create_nodes.log", + ) + self.false_with_additional_files_log_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "add_media_additional_files_allow_missing_files_false.log", + ) + self.false_with_soft_checks_log_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "add_media_additional_files_allow_missing_files_false_with_soft_checks.log", + ) + self.true_log_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "add_media_additional_files_allow_missing_files_true.log", + ) + self.rollback_file_path = os.path.join( + self.current_dir, "assets", "allow_missing_files_test", "rollback.csv" + ) + self.add_media_csv_template_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "add_media_additional_files.csv.template", + ) + self.add_media_csv_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "add_media_additional_files.csv", + ) self.temp_dir = tempfile.gettempdir() self.nids = list() yaml = YAML() - with open(self.create_config_file_path, 'r') as f: + with open(self.create_config_file_path, "r") as f: config_file_contents = f.read() config_data = yaml.load(config_file_contents) self.config = {} for k, v in config_data.items(): self.config[k] = v - self.islandora_host = self.config['host'] - self.islandora_username = self.config['username'] - self.islandora_password = self.config['password'] + self.islandora_host = self.config["host"] + self.islandora_username = self.config["username"] + self.islandora_password = self.config["password"] self.create_cmd = ["./workbench", "--config", self.create_config_file_path] create_output = subprocess.check_output(self.create_cmd) @@ -887,9 +1381,9 @@ def setUp(self): # so they can be deleted in tearDown(). create_lines = create_output.splitlines() for line in create_lines: - if 'created at' in line: - nid = line.rsplit('/', 1)[-1] - nid = nid.strip('.') + if "created at" in line: + nid = line.rsplit("/", 1)[-1] + nid = nid.strip(".") self.nids.append(nid) # Insert their node IDs in the input CSV file. First, open the CSV template. @@ -899,59 +1393,123 @@ def setUp(self): # Then add a node ID to the start of each line from the template # and write out an add_media input CSV file. template_line_index = 0 - with open(self.add_media_csv_file_path, 'a+') as add_media_csv: + with open(self.add_media_csv_file_path, "a+") as add_media_csv: # The first line in the output CSV is the headers from the template. add_media_csv.write(csv_template_lines[template_line_index]) # The subsequent lines should each start with a node ID from. for node_id in self.nids: template_line_index = template_line_index + 1 - add_media_csv.write(f"{node_id}{csv_template_lines[template_line_index]}") + add_media_csv.write( + f"{node_id}{csv_template_lines[template_line_index]}" + ) def test_false(self): - config_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media_additional_files_allow_missing_files_false.yml') + config_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "add_media_additional_files_allow_missing_files_false.yml", + ) cmd = ["./workbench", "--config", config_file_path, "--check"] proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) stdout, stderr = proc.communicate() - self.assertRegex(str(stdout), 'Additional file "add_media_transcript_x.txt" in CSV column "transcript" in row with ID .* not found', '') + self.assertRegex( + str(stdout), + 'Additional file "add_media_transcript_x.txt" in CSV column "transcript" in row with ID .* not found', + "", + ) with open(self.false_with_additional_files_log_file_path) as log_file_false: log_data_false = log_file_false.read() - self.assertRegex(log_data_false, 'CSV row with ID .* contains an empty "file" value') - self.assertRegex(log_data_false, 'CVS row with ID .* contains an empty value in its "preservation" column') - self.assertRegex(log_data_false, 'Additional file "add_media_transcript_x.txt" in CSV column "transcript" in row with ID .* not found', '') + self.assertRegex( + log_data_false, 'CSV row with ID .* contains an empty "file" value' + ) + self.assertRegex( + log_data_false, + 'CVS row with ID .* contains an empty value in its "preservation" column', + ) + self.assertRegex( + log_data_false, + 'Additional file "add_media_transcript_x.txt" in CSV column "transcript" in row with ID .* not found', + "", + ) def test_true(self): - config_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media_additional_files_allow_missing_files_true.yml') + config_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "add_media_additional_files_allow_missing_files_true.yml", + ) cmd = ["./workbench", "--config", config_file_path, "--check"] output = subprocess.check_output(cmd) output = output.decode().strip() - self.assertRegex(output, 'Warning: "allow_missing_files" configuration setting is set to "true", and "additional_files" CSV columns containing missing') + self.assertRegex( + output, + 'Warning: "allow_missing_files" configuration setting is set to "true", and "additional_files" CSV columns containing missing', + ) with open(self.true_log_file_path) as log_file_true: log_data_true = log_file_true.read() - self.assertRegex(log_data_true, 'CSV row with ID .* contains an empty "file" value', '') - self.assertRegex(log_data_true, 'CVS row with ID .* contains an empty value in its "preservation" column', '') - self.assertRegex(log_data_true, 'INFO - .*no problems found', '') + self.assertRegex( + log_data_true, 'CSV row with ID .* contains an empty "file" value', "" + ) + self.assertRegex( + log_data_true, + 'CVS row with ID .* contains an empty value in its "preservation" column', + "", + ) + self.assertRegex(log_data_true, "INFO - .*no problems found", "") def test_false_with_soft_checks(self): - config_file_path = os.path.join(self.current_dir, 'assets', 'allow_missing_files_test', 'add_media_additional_files_allow_missing_files_false_with_soft_checks.yml') + config_file_path = os.path.join( + self.current_dir, + "assets", + "allow_missing_files_test", + "add_media_additional_files_allow_missing_files_false_with_soft_checks.yml", + ) cmd = ["./workbench", "--config", config_file_path, "--check"] output = subprocess.check_output(cmd) output = output.decode().strip() - self.assertRegex(output, '"perform_soft_checks" configuration setting is set to "true"', '') - self.assertRegex(output, 'Configuration and input data appear to be valid', '') - - with open(self.false_with_soft_checks_log_file_path) as log_file_false_with_soft_checks: - log_file_false_with_soft_checks_data = log_file_false_with_soft_checks.read() - self.assertRegex(log_file_false_with_soft_checks_data, 'CSV row with ID .* contains an empty "file" value') - self.assertRegex(log_file_false_with_soft_checks_data, 'CVS row with ID .* contains an empty value in its "preservation" column', '') - self.assertRegex(log_file_false_with_soft_checks_data, 'Additional file "add_media_transcript_x.txt" in CSV column "transcript" in row with ID .* not found', '') - self.assertRegex(log_file_false_with_soft_checks_data, 'INFO - .*no problems found', '') + self.assertRegex( + output, '"perform_soft_checks" configuration setting is set to "true"', "" + ) + self.assertRegex(output, "Configuration and input data appear to be valid", "") + + with open( + self.false_with_soft_checks_log_file_path + ) as log_file_false_with_soft_checks: + log_file_false_with_soft_checks_data = ( + log_file_false_with_soft_checks.read() + ) + self.assertRegex( + log_file_false_with_soft_checks_data, + 'CSV row with ID .* contains an empty "file" value', + ) + self.assertRegex( + log_file_false_with_soft_checks_data, + 'CVS row with ID .* contains an empty value in its "preservation" column', + "", + ) + self.assertRegex( + log_file_false_with_soft_checks_data, + 'Additional file "add_media_transcript_x.txt" in CSV column "transcript" in row with ID .* not found', + "", + ) + self.assertRegex( + log_file_false_with_soft_checks_data, "INFO - .*no problems found", "" + ) def tearDown(self): # Delete the nodes created in setUp. for nid in self.nids: - quick_delete_cmd = ["./workbench", "--config", self.create_config_file_path, '--quick_delete_node', self.islandora_host + '/node/' + nid] + quick_delete_cmd = [ + "./workbench", + "--config", + self.create_config_file_path, + "--quick_delete_node", + self.islandora_host + "/node/" + nid, + ] quick_delete_output = subprocess.check_output(quick_delete_cmd) if os.path.exists(self.add_media_csv_file_path): @@ -960,11 +1518,15 @@ def tearDown(self): if os.path.exists(self.rollback_file_path): os.remove(self.rollback_file_path) - preprocessed_csv_path = os.path.join(self.temp_dir, 'add_media_create_nodes.csv.preprocessed') + preprocessed_csv_path = os.path.join( + self.temp_dir, "add_media_create_nodes.csv.preprocessed" + ) if os.path.exists(preprocessed_csv_path): os.remove(preprocessed_csv_path) - preprocessed_csv_path = os.path.join(self.temp_dir, 'add_media_additional_files.csv.preprocessed') + preprocessed_csv_path = os.path.join( + self.temp_dir, "add_media_additional_files.csv.preprocessed" + ) if os.path.exists(preprocessed_csv_path): os.remove(preprocessed_csv_path) @@ -981,5 +1543,5 @@ def tearDown(self): os.remove(self.true_log_file_path) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/islandora_tests_hooks.py b/tests/islandora_tests_hooks.py index 2b4c9fbd..03981628 100644 --- a/tests/islandora_tests_hooks.py +++ b/tests/islandora_tests_hooks.py @@ -25,12 +25,18 @@ class TestExecuteBootstrapScript(unittest.TestCase): def setUp(self): dir_path = os.path.dirname(os.path.realpath(__file__)) - self.script_path = os.path.join(dir_path, 'assets', 'execute_bootstrap_script_test', 'script.py') - self.config_file_path = os.path.join(dir_path, 'assets', 'execute_bootstrap_script_test', 'config.yml') + self.script_path = os.path.join( + dir_path, "assets", "execute_bootstrap_script_test", "script.py" + ) + self.config_file_path = os.path.join( + dir_path, "assets", "execute_bootstrap_script_test", "config.yml" + ) def test_execute_python_script(self): - output, return_code = workbench_utils.execute_bootstrap_script(self.script_path, self.config_file_path) - self.assertEqual(output.strip(), b'Hello') + output, return_code = workbench_utils.execute_bootstrap_script( + self.script_path, self.config_file_path + ) + self.assertEqual(output.strip(), b"Hello") class TestExecutePreprocessorScript(unittest.TestCase): @@ -38,27 +44,44 @@ class TestExecutePreprocessorScript(unittest.TestCase): def setUp(self): yaml = YAML() dir_path = os.path.dirname(os.path.realpath(__file__)) - self.script_path = os.path.join(dir_path, 'assets', 'preprocess_field_data', 'script.py') + self.script_path = os.path.join( + dir_path, "assets", "preprocess_field_data", "script.py" + ) def test_preprocessor_script_single_field_value(self): - output, return_code = workbench_utils.preprocess_field_data('|', 'hello', self.script_path) - self.assertEqual(output.strip(), b'HELLO') + output, return_code = workbench_utils.preprocess_field_data( + "|", "hello", self.script_path + ) + self.assertEqual(output.strip(), b"HELLO") def test_preprocessor_script_multiple_field_value(self): - output, return_code = workbench_utils.preprocess_field_data('|', 'hello|there', self.script_path) - self.assertEqual(output.strip(), b'HELLO|THERE') + output, return_code = workbench_utils.preprocess_field_data( + "|", "hello|there", self.script_path + ) + self.assertEqual(output.strip(), b"HELLO|THERE") class TestExecutePostActionEntityScript(unittest.TestCase): - '''Note: Only tests for creating nodes. - ''' + """Note: Only tests for creating nodes.""" def setUp(self): self.current_dir = os.path.dirname(os.path.realpath(__file__)) - self.config_file_path = os.path.join(self.current_dir, 'assets', 'execute_post_action_entity_script_test', 'create.yml') - self.script_path = os.path.join(self.current_dir, 'assets', 'execute_post_action_entity_script_test', 'script.py') + self.config_file_path = os.path.join( + self.current_dir, + "assets", + "execute_post_action_entity_script_test", + "create.yml", + ) + self.script_path = os.path.join( + self.current_dir, + "assets", + "execute_post_action_entity_script_test", + "script.py", + ) self.temp_dir = tempfile.gettempdir() - self.output_file_path = os.path.join(self.temp_dir, 'execute_post_action_entity_script.dat') + self.output_file_path = os.path.join( + self.temp_dir, "execute_post_action_entity_script.dat" + ) if os.path.exists(self.output_file_path): os.remove(self.output_file_path) @@ -71,19 +94,32 @@ def test_post_task_entity_script(self): with open(self.output_file_path, "r") as lines: titles = lines.readlines() - self.assertEqual(titles[0].strip(), 'First title') - self.assertEqual(titles[1].strip(), 'Second title') + self.assertEqual(titles[0].strip(), "First title") + self.assertEqual(titles[1].strip(), "Second title") def tearDown(self): for nid in self.nids: - quick_delete_cmd = ["./workbench", "--config", self.config_file_path, '--quick_delete_node', 'https://islandora.traefik.me/node/' + nid] + quick_delete_cmd = [ + "./workbench", + "--config", + self.config_file_path, + "--quick_delete_node", + "https://islandora.traefik.me/node/" + nid, + ] quick_delete_output = subprocess.check_output(quick_delete_cmd) - self.rollback_file_path = os.path.join(self.current_dir, 'assets', 'execute_post_action_entity_script_test', 'rollback.csv') + self.rollback_file_path = os.path.join( + self.current_dir, + "assets", + "execute_post_action_entity_script_test", + "rollback.csv", + ) if os.path.exists(self.rollback_file_path): os.remove(self.rollback_file_path) - self.preprocessed_file_path = os.path.join(self.temp_dir, 'metadata.csv.preprocessed') + self.preprocessed_file_path = os.path.join( + self.temp_dir, "metadata.csv.preprocessed" + ) if os.path.exists(self.preprocessed_file_path): os.remove(self.preprocessed_file_path) @@ -91,5 +127,5 @@ def tearDown(self): os.remove(self.output_file_path) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/islandora_tests_paged_content.py b/tests/islandora_tests_paged_content.py index 21be0c9f..b9ce894c 100644 --- a/tests/islandora_tests_paged_content.py +++ b/tests/islandora_tests_paged_content.py @@ -24,16 +24,18 @@ class TestCreatePagedContent(unittest.TestCase): def setUp(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - self.create_config_file_path = os.path.join(self.current_dir, 'assets', 'create_paged_content_test', 'create.yml') + self.create_config_file_path = os.path.join( + self.current_dir, "assets", "create_paged_content_test", "create.yml" + ) yaml = YAML() - with open(self.create_config_file_path, 'r') as f: + with open(self.create_config_file_path, "r") as f: config_file_contents = f.read() config_data = yaml.load(config_file_contents) config = {} for k, v in config_data.items(): config[k] = v - self.islandora_host = config['host'] + self.islandora_host = config["host"] self.create_cmd = ["./workbench", "--config", self.create_config_file_path] @@ -48,9 +50,9 @@ def test_create_paged_content(self): # nodes created during this test so they can be deleted in tearDown(). create_lines = create_output.splitlines() for line in create_lines: - if 'created at' in line: - nid = line.rsplit('/', 1)[-1] - nid = nid.strip('.') + if "created at" in line: + nid = line.rsplit("/", 1)[-1] + nid = nid.strip(".") self.nids.append(nid) self.assertEqual(len(self.nids), 6) @@ -66,23 +68,33 @@ def test_create_paged_content(self): parent_node_id_to_test = self.nids[3] # The last node to be created was a page. child_node_id_to_test = self.nids[5] - node_url = self.islandora_host + '/node/' + child_node_id_to_test + '?_format=json' + node_url = ( + self.islandora_host + "/node/" + child_node_id_to_test + "?_format=json" + ) response = requests.get(node_url) node_json = json.loads(response.text) - field_member_of = node_json['field_member_of'][0]['target_id'] + field_member_of = node_json["field_member_of"][0]["target_id"] self.assertEqual(int(parent_node_id_to_test), field_member_of) def tearDown(self): for nid in self.nids: - quick_delete_cmd = ["./workbench", "--config", self.create_config_file_path, '--quick_delete_node', self.islandora_host + '/node/' + nid] + quick_delete_cmd = [ + "./workbench", + "--config", + self.create_config_file_path, + "--quick_delete_node", + self.islandora_host + "/node/" + nid, + ] quick_delete_output = subprocess.check_output(quick_delete_cmd) - preprocessed_csv_path = os.path.join(self.temp_dir, 'metadata.csv.preprocessed') + preprocessed_csv_path = os.path.join(self.temp_dir, "metadata.csv.preprocessed") if os.path.exists(preprocessed_csv_path): os.remove(preprocessed_csv_path) - rollback_file_path = os.path.join(self.current_dir, 'assets', 'create_paged_content_test', 'rollback.csv') + rollback_file_path = os.path.join( + self.current_dir, "assets", "create_paged_content_test", "rollback.csv" + ) if os.path.exists(rollback_file_path): os.remove(rollback_file_path) @@ -91,18 +103,23 @@ class TestCreatePagedContentFromDirectories(unittest.TestCase): def setUp(self): self.current_dir = os.path.dirname(os.path.abspath(__file__)) - self.create_config_file_path = os.path.join(self.current_dir, 'assets', 'create_paged_content_from_directories_test', 'books.yml') + self.create_config_file_path = os.path.join( + self.current_dir, + "assets", + "create_paged_content_from_directories_test", + "books.yml", + ) yaml = YAML() - with open(self.create_config_file_path, 'r') as f: + with open(self.create_config_file_path, "r") as f: config_file_contents = f.read() config_data = yaml.load(config_file_contents) config = {} for k, v in config_data.items(): config[k] = v - self.islandora_host = config['host'] - self.islandora_username = config['username'] - self.islandora_password = config['password'] + self.islandora_host = config["host"] + self.islandora_username = config["username"] + self.islandora_password = config["password"] self.create_cmd = ["./workbench", "--config", self.create_config_file_path] @@ -117,9 +134,9 @@ def test_create_paged_content_from_directories(self): # nodes created during this test so they can be deleted in tearDown(). create_lines = create_output.splitlines() for line in create_lines: - if 'created at' in line: - nid = line.rsplit('/', 1)[-1] - nid = nid.strip('.') + if "created at" in line: + nid = line.rsplit("/", 1)[-1] + nid = nid.strip(".") self.nids.append(nid) self.assertEqual(len(self.nids), 4) @@ -133,34 +150,56 @@ def test_create_paged_content_from_directories(self): # that this test creates media and does not delete them. parent_node_id_to_test = self.nids[0] # Get the REST feed for the parent node's members. - members_url = self.islandora_host + '/node/' + parent_node_id_to_test + '/members?_format=json' + members_url = ( + self.islandora_host + + "/node/" + + parent_node_id_to_test + + "/members?_format=json" + ) # Need to provide credentials for this REST export. - members_response = requests.get(members_url, auth=(self.islandora_username, self.islandora_password)) + members_response = requests.get( + members_url, auth=(self.islandora_username, self.islandora_password) + ) members = json.loads(members_response.text) expected_member_weights = [1, 2, 3] retrieved_member_weights = list() for member in members: - retrieved_member_weights.append(int(member['field_weight'][0]['value'])) + retrieved_member_weights.append(int(member["field_weight"][0]["value"])) # Test that each page indeed a member of the first node created during this test. - self.assertEqual(int(parent_node_id_to_test), int(member['field_member_of'][0]['target_id'])) + self.assertEqual( + int(parent_node_id_to_test), + int(member["field_member_of"][0]["target_id"]), + ) # Test that the weights assigned to the three pages are what we expect. self.assertEqual(expected_member_weights, retrieved_member_weights) def tearDown(self): for nid in self.nids: - quick_delete_cmd = ["./workbench", "--config", self.create_config_file_path, '--quick_delete_node', self.islandora_host + '/node/' + nid] + quick_delete_cmd = [ + "./workbench", + "--config", + self.create_config_file_path, + "--quick_delete_node", + self.islandora_host + "/node/" + nid, + ] quick_delete_output = subprocess.check_output(quick_delete_cmd) - preprocessed_csv_path = os.path.join(self.temp_dir, 'metadata.csv.preprocessed') + preprocessed_csv_path = os.path.join(self.temp_dir, "metadata.csv.preprocessed") if os.path.exists(preprocessed_csv_path): os.remove(preprocessed_csv_path) - rollback_file_path = os.path.join(self.current_dir, 'assets', 'create_paged_content_from_directories_test', 'samplebooks', 'rollback.csv') + rollback_file_path = os.path.join( + self.current_dir, + "assets", + "create_paged_content_from_directories_test", + "samplebooks", + "rollback.csv", + ) if os.path.exists(rollback_file_path): os.remove(rollback_file_path) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/unit_tests.py b/tests/unit_tests.py index 1aa88489..eed1b29f 100644 --- a/tests/unit_tests.py +++ b/tests/unit_tests.py @@ -15,21 +15,25 @@ class TestCompareStings(unittest.TestCase): def test_strings_match(self): - res = workbench_utils.compare_strings('foo', 'foo ') + res = workbench_utils.compare_strings("foo", "foo ") self.assertTrue(res) - res = workbench_utils.compare_strings('foo', 'Foo') + res = workbench_utils.compare_strings("foo", "Foo") self.assertTrue(res) - res = workbench_utils.compare_strings('foo', 'Foo#~^.') + res = workbench_utils.compare_strings("foo", "Foo#~^.") self.assertTrue(res) - res = workbench_utils.compare_strings('foo bar baz', 'foo bar baz') + res = workbench_utils.compare_strings("foo bar baz", "foo bar baz") self.assertTrue(res) - res = workbench_utils.compare_strings('Lastname,Firstname', 'Lastname, Firstname') + res = workbench_utils.compare_strings( + "Lastname,Firstname", "Lastname, Firstname" + ) self.assertTrue(res) - res = workbench_utils.compare_strings('لدولي العاشر ليونيكود--', 'لدولي, العاشر []ليونيكود') + res = workbench_utils.compare_strings( + "لدولي العاشر ليونيكود--", "لدولي, العاشر []ليونيكود" + ) self.assertTrue(res) def test_strings_do_not_match(self): - res = workbench_utils.compare_strings('foo', 'foot') + res = workbench_utils.compare_strings("foo", "foot") self.assertFalse(res) @@ -37,238 +41,373 @@ class TestCsvRecordHasher(unittest.TestCase): def test_hasher(self): csv_record = collections.OrderedDict() - csv_record['one'] = 'eijco87we ' - csv_record['two'] = 'jjjclsle300sloww' - csv_record['three'] = 'pppzzffr46wkkw' - csv_record['four'] = 'لدولي, العاشر []ليونيكود' - csv_record['four'] = '' - csv_record['six'] = 5 + csv_record["one"] = "eijco87we " + csv_record["two"] = "jjjclsle300sloww" + csv_record["three"] = "pppzzffr46wkkw" + csv_record["four"] = "لدولي, العاشر []ليونيكود" + csv_record["four"] = "" + csv_record["six"] = 5 md5hash = workbench_utils.get_csv_record_hash(csv_record) - self.assertEqual(md5hash, 'bda4013d3695a98cd56d4d2b6a66fb4c') + self.assertEqual(md5hash, "bda4013d3695a98cd56d4d2b6a66fb4c") class TestSplitGeolocationString(unittest.TestCase): def test_split_geolocation_string_single(self): - config = {'subdelimiter': '|'} - res = workbench_utils.split_geolocation_string( - config, '49.16667, -123.93333') - self.assertDictEqual(res[0], {'lat': '49.16667', 'lng': '-123.93333'}) + config = {"subdelimiter": "|"} + res = workbench_utils.split_geolocation_string(config, "49.16667, -123.93333") + self.assertDictEqual(res[0], {"lat": "49.16667", "lng": "-123.93333"}) def test_split_geolocation_string_multiple(self): - config = {'subdelimiter': '|'} + config = {"subdelimiter": "|"} res = workbench_utils.split_geolocation_string( - config, '30.16667, -120.93333|50.1,-120.5') - self.assertDictEqual(res[0], {'lat': '30.16667', 'lng': '-120.93333'}) - self.assertDictEqual(res[1], {'lat': '50.1', 'lng': '-120.5'}) + config, "30.16667, -120.93333|50.1,-120.5" + ) + self.assertDictEqual(res[0], {"lat": "30.16667", "lng": "-120.93333"}) + self.assertDictEqual(res[1], {"lat": "50.1", "lng": "-120.5"}) def test_split_geolocation_string_multiple_at_sign(self): - config = {'subdelimiter': '@'} + config = {"subdelimiter": "@"} res = workbench_utils.split_geolocation_string( - config, '49.16667, -123.93333@50.1,-120.5') - self.assertDictEqual(res[0], {'lat': '49.16667', 'lng': '-123.93333'}) - self.assertDictEqual(res[1], {'lat': '50.1', 'lng': '-120.5'}) + config, "49.16667, -123.93333@50.1,-120.5" + ) + self.assertDictEqual(res[0], {"lat": "49.16667", "lng": "-123.93333"}) + self.assertDictEqual(res[1], {"lat": "50.1", "lng": "-120.5"}) def test_split_geolocation_string_with_leading_slash(self): - config = {'subdelimiter': '@'} + config = {"subdelimiter": "@"} res = workbench_utils.split_geolocation_string( - config, r'\+49.16667, -123.93333@\+50.1,-120.5') - self.assertDictEqual(res[0], {'lat': '+49.16667', 'lng': '-123.93333'}) - self.assertDictEqual(res[1], {'lat': '+50.1', 'lng': '-120.5'}) + config, r"\+49.16667, -123.93333@\+50.1,-120.5" + ) + self.assertDictEqual(res[0], {"lat": "+49.16667", "lng": "-123.93333"}) + self.assertDictEqual(res[1], {"lat": "+50.1", "lng": "-120.5"}) def test_split_geolocation_string_empty(self): - config = {'subdelimiter': '|'} - res = workbench_utils.split_geolocation_string(config, ' ') + config = {"subdelimiter": "|"} + res = workbench_utils.split_geolocation_string(config, " ") self.assertEqual(res, []) class TestSplitLinkString(unittest.TestCase): def test_split_link_string_single(self): - config = {'subdelimiter': '|'} - res = workbench_utils.split_link_string(config, 'http://www.foo.bar%%Foobar website') - self.assertDictEqual(res[0], {'uri': 'http://www.foo.bar', 'title': 'Foobar website'}) + config = {"subdelimiter": "|"} + res = workbench_utils.split_link_string( + config, "http://www.foo.bar%%Foobar website" + ) + self.assertDictEqual( + res[0], {"uri": "http://www.foo.bar", "title": "Foobar website"} + ) def test_split_geolocation_string_multiple(self): - config = {'subdelimiter': '|'} - res = workbench_utils.split_link_string(config, 'http://foobar.net%%Foobardotnet website|http://baz.com%%Baz website') - self.assertDictEqual(res[0], {'uri': 'http://foobar.net', 'title': 'Foobardotnet website'}) - self.assertDictEqual(res[1], {'uri': 'http://baz.com', 'title': 'Baz website'}) + config = {"subdelimiter": "|"} + res = workbench_utils.split_link_string( + config, + "http://foobar.net%%Foobardotnet website|http://baz.com%%Baz website", + ) + self.assertDictEqual( + res[0], {"uri": "http://foobar.net", "title": "Foobardotnet website"} + ) + self.assertDictEqual(res[1], {"uri": "http://baz.com", "title": "Baz website"}) def test_split_link_string_no_title_single(self): - config = {'subdelimiter': '|'} - res = workbench_utils.split_link_string(config, 'http://www.foo.bar') - self.assertDictEqual(res[0], {'uri': 'http://www.foo.bar', 'title': 'http://www.foo.bar'}) + config = {"subdelimiter": "|"} + res = workbench_utils.split_link_string(config, "http://www.foo.bar") + self.assertDictEqual( + res[0], {"uri": "http://www.foo.bar", "title": "http://www.foo.bar"} + ) def test_split_geolocation_string_no_title_multiple(self): - config = {'subdelimiter': '|'} - res = workbench_utils.split_link_string(config, 'http://foobar.net|http://baz.com%%Baz website') - self.assertDictEqual(res[0], {'uri': 'http://foobar.net', 'title': 'http://foobar.net'}) - self.assertDictEqual(res[1], {'uri': 'http://baz.com', 'title': 'Baz website'}) + config = {"subdelimiter": "|"} + res = workbench_utils.split_link_string( + config, "http://foobar.net|http://baz.com%%Baz website" + ) + self.assertDictEqual( + res[0], {"uri": "http://foobar.net", "title": "http://foobar.net"} + ) + self.assertDictEqual(res[1], {"uri": "http://baz.com", "title": "Baz website"}) class TestSplitAuthorityLinkString(unittest.TestCase): def test_split_link_string_single(self): - config = {'subdelimiter': '|'} - res = workbench_utils.split_authority_link_string(config, 'foo%%http://www.foo.bar%%Foobar website') - self.assertDictEqual(res[0], {'source': 'foo', 'uri': 'http://www.foo.bar', 'title': 'Foobar website'}) + config = {"subdelimiter": "|"} + res = workbench_utils.split_authority_link_string( + config, "foo%%http://www.foo.bar%%Foobar website" + ) + self.assertDictEqual( + res[0], + {"source": "foo", "uri": "http://www.foo.bar", "title": "Foobar website"}, + ) def test_split_geolocation_string_multiple(self): - config = {'subdelimiter': '|'} - res = workbench_utils.split_authority_link_string(config, 'bar%%http://foobar.net%%Foobardotnet website|xxx%%http://baz.com%%Baz website') - self.assertDictEqual(res[0], {'source': 'bar', 'uri': 'http://foobar.net', 'title': 'Foobardotnet website'}) - self.assertDictEqual(res[1], {'source': 'xxx', 'uri': 'http://baz.com', 'title': 'Baz website'}) + config = {"subdelimiter": "|"} + res = workbench_utils.split_authority_link_string( + config, + "bar%%http://foobar.net%%Foobardotnet website|xxx%%http://baz.com%%Baz website", + ) + self.assertDictEqual( + res[0], + { + "source": "bar", + "uri": "http://foobar.net", + "title": "Foobardotnet website", + }, + ) + self.assertDictEqual( + res[1], {"source": "xxx", "uri": "http://baz.com", "title": "Baz website"} + ) def test_split_link_string_no_title_single(self): - config = {'subdelimiter': '|'} - res = workbench_utils.split_authority_link_string(config, 'foo%%http://www.foo.bar') - self.assertDictEqual(res[0], {'source': 'foo', 'uri': 'http://www.foo.bar', 'title': ''}) + config = {"subdelimiter": "|"} + res = workbench_utils.split_authority_link_string( + config, "foo%%http://www.foo.bar" + ) + self.assertDictEqual( + res[0], {"source": "foo", "uri": "http://www.foo.bar", "title": ""} + ) def test_split_geolocation_string_no_title_multiple(self): - config = {'subdelimiter': '|'} - res = workbench_utils.split_authority_link_string(config, 'zzz%%http://foobar.net|rrr%%http://baz.com%%Baz website') - self.assertDictEqual(res[0], {'source': 'zzz', 'uri': 'http://foobar.net', 'title': ''}) - self.assertDictEqual(res[1], {'source': 'rrr', 'uri': 'http://baz.com', 'title': 'Baz website'}) + config = {"subdelimiter": "|"} + res = workbench_utils.split_authority_link_string( + config, "zzz%%http://foobar.net|rrr%%http://baz.com%%Baz website" + ) + self.assertDictEqual( + res[0], {"source": "zzz", "uri": "http://foobar.net", "title": ""} + ) + self.assertDictEqual( + res[1], {"source": "rrr", "uri": "http://baz.com", "title": "Baz website"} + ) class TestSplitTypedRelationString(unittest.TestCase): def test_split_typed_relation_string_single(self): - config = {'subdelimiter': '|'} - res = workbench_utils.split_typed_relation_string(config, 'relators:pht:5', 'foo') - self.assertDictEqual(res[0], - {'target_id': int(5), - 'rel_type': 'relators:pht', - 'target_type': 'foo'}) + config = {"subdelimiter": "|"} + res = workbench_utils.split_typed_relation_string( + config, "relators:pht:5", "foo" + ) + self.assertDictEqual( + res[0], + {"target_id": int(5), "rel_type": "relators:pht", "target_type": "foo"}, + ) def test_split_typed_relation_alpha_numeric_string_single(self): - config = {'subdelimiter': '|'} - res = workbench_utils.split_typed_relation_string(config, 'aat:300024987:5', 'foo') - self.assertDictEqual(res[0], - {'target_id': int(5), - 'rel_type': 'aat:300024987', - 'target_type': 'foo'}) + config = {"subdelimiter": "|"} + res = workbench_utils.split_typed_relation_string( + config, "aat:300024987:5", "foo" + ) + self.assertDictEqual( + res[0], + {"target_id": int(5), "rel_type": "aat:300024987", "target_type": "foo"}, + ) def test_split_typed_relation_uri_single(self): - config = {'subdelimiter': '|'} - res = workbench_utils.split_typed_relation_string(config, 'relators:art:https://foo.bar/baz', 'foo') - self.assertDictEqual(res[0], - {'target_id': 'https://foo.bar/baz', - 'rel_type': 'relators:art', - 'target_type': 'foo'}) + config = {"subdelimiter": "|"} + res = workbench_utils.split_typed_relation_string( + config, "relators:art:https://foo.bar/baz", "foo" + ) + self.assertDictEqual( + res[0], + { + "target_id": "https://foo.bar/baz", + "rel_type": "relators:art", + "target_type": "foo", + }, + ) def test_split_typed_relation_uri_multiple(self): - config = {'subdelimiter': '|'} - res = workbench_utils.split_typed_relation_string(config, 'relators:pht:https://example.com/example1|relators:con:https://example5.com/example6', 'bar') - self.assertDictEqual(res[0], - {'target_id': 'https://example.com/example1', - 'rel_type': 'relators:pht', - 'target_type': 'bar'}) - self.assertDictEqual(res[1], - {'target_id': 'https://example5.com/example6', - 'rel_type': 'relators:con', - 'target_type': 'bar'}) + config = {"subdelimiter": "|"} + res = workbench_utils.split_typed_relation_string( + config, + "relators:pht:https://example.com/example1|relators:con:https://example5.com/example6", + "bar", + ) + self.assertDictEqual( + res[0], + { + "target_id": "https://example.com/example1", + "rel_type": "relators:pht", + "target_type": "bar", + }, + ) + self.assertDictEqual( + res[1], + { + "target_id": "https://example5.com/example6", + "rel_type": "relators:con", + "target_type": "bar", + }, + ) def test_split_typed_relation_string_single_with_delimter_in_value(self): - config = {'subdelimiter': '|'} - res = workbench_utils.split_typed_relation_string(config, 'relators:pbl:London: Bar Press', 'foopub') - self.assertDictEqual(res[0], - {'target_id': 'London: Bar Press', - 'rel_type': 'relators:pbl', - 'target_type': 'foopub'}) + config = {"subdelimiter": "|"} + res = workbench_utils.split_typed_relation_string( + config, "relators:pbl:London: Bar Press", "foopub" + ) + self.assertDictEqual( + res[0], + { + "target_id": "London: Bar Press", + "rel_type": "relators:pbl", + "target_type": "foopub", + }, + ) def test_split_typed_relation_string_multiple(self): - config = {'subdelimiter': '|'} - res = workbench_utils.split_typed_relation_string(config, 'relators:pht:5|relators:con:10', 'bar') - self.assertDictEqual(res[0], - {'target_id': int(5), - 'rel_type': 'relators:pht', - 'target_type': 'bar'}) - self.assertDictEqual(res[1], - {'target_id': int(10), - 'rel_type': 'relators:con', - 'target_type': 'bar'}) + config = {"subdelimiter": "|"} + res = workbench_utils.split_typed_relation_string( + config, "relators:pht:5|relators:con:10", "bar" + ) + self.assertDictEqual( + res[0], + {"target_id": int(5), "rel_type": "relators:pht", "target_type": "bar"}, + ) + self.assertDictEqual( + res[1], + {"target_id": int(10), "rel_type": "relators:con", "target_type": "bar"}, + ) def test_split_typed_relation_string_multiple_at_sign(self): - config = {'subdelimiter': '@'} - res = workbench_utils.split_typed_relation_string(config, 'relators:pht:5@relators:con:10', 'baz') - self.assertDictEqual(res[0], - {'target_id': int(5), - 'rel_type': 'relators:pht', - 'target_type': 'baz'}) - self.assertDictEqual(res[1], - {'target_id': int(10), - 'rel_type': 'relators:con', - 'target_type': 'baz'}) + config = {"subdelimiter": "@"} + res = workbench_utils.split_typed_relation_string( + config, "relators:pht:5@relators:con:10", "baz" + ) + self.assertDictEqual( + res[0], + {"target_id": int(5), "rel_type": "relators:pht", "target_type": "baz"}, + ) + self.assertDictEqual( + res[1], + {"target_id": int(10), "rel_type": "relators:con", "target_type": "baz"}, + ) class TestMediaTrackString(unittest.TestCase): def test_split_media_track_string_single(self): - config = {'subdelimiter': '|'} - res = workbench_utils.split_media_track_string(config, 'Transcript:subtitles:en:/path/to/file') - self.assertDictEqual(res[0], {'label': 'Transcript', 'kind': 'subtitles', 'srclang': 'en', 'file_path': '/path/to/file'}) + config = {"subdelimiter": "|"} + res = workbench_utils.split_media_track_string( + config, "Transcript:subtitles:en:/path/to/file" + ) + self.assertDictEqual( + res[0], + { + "label": "Transcript", + "kind": "subtitles", + "srclang": "en", + "file_path": "/path/to/file", + }, + ) def test_split_media_track_string_single_windows(self): - config = {'subdelimiter': '|'} - res = workbench_utils.split_media_track_string(config, 'Foo:subtitles:en:c:/path/to/file') - self.assertDictEqual(res[0], {'label': 'Foo', 'kind': 'subtitles', 'srclang': 'en', 'file_path': 'c:/path/to/file'}) + config = {"subdelimiter": "|"} + res = workbench_utils.split_media_track_string( + config, "Foo:subtitles:en:c:/path/to/file" + ) + self.assertDictEqual( + res[0], + { + "label": "Foo", + "kind": "subtitles", + "srclang": "en", + "file_path": "c:/path/to/file", + }, + ) def test_split_media_track_multiple(self): - config = {'subdelimiter': '|'} - res = workbench_utils.split_media_track_string(config, 'Bar:subtitles:en:c:/path/to/file.vtt|Baz:subtitles:fr:/path/to/file2.vtt') - self.assertDictEqual(res[0], {'label': 'Bar', 'kind': 'subtitles', 'srclang': 'en', 'file_path': 'c:/path/to/file.vtt'}) - self.assertDictEqual(res[1], {'label': 'Baz', 'kind': 'subtitles', 'srclang': 'fr', 'file_path': '/path/to/file2.vtt'}) + config = {"subdelimiter": "|"} + res = workbench_utils.split_media_track_string( + config, + "Bar:subtitles:en:c:/path/to/file.vtt|Baz:subtitles:fr:/path/to/file2.vtt", + ) + self.assertDictEqual( + res[0], + { + "label": "Bar", + "kind": "subtitles", + "srclang": "en", + "file_path": "c:/path/to/file.vtt", + }, + ) + self.assertDictEqual( + res[1], + { + "label": "Baz", + "kind": "subtitles", + "srclang": "fr", + "file_path": "/path/to/file2.vtt", + }, + ) class TestValidateMediaTrackString(unittest.TestCase): def test_validate_media_track_values(self): - res = workbench_utils.validate_media_track_value('Transcript:subtitles:en:c:/path/to/file.vtt') + res = workbench_utils.validate_media_track_value( + "Transcript:subtitles:en:c:/path/to/file.vtt" + ) self.assertTrue(res) - res = workbench_utils.validate_media_track_value('Transcript:captions:de:c:/path/to/file.vtt') + res = workbench_utils.validate_media_track_value( + "Transcript:captions:de:c:/path/to/file.vtt" + ) self.assertTrue(res) - res = workbench_utils.validate_media_track_value('Transcript:subtitles:fr:c:/path/to/file.VTT') + res = workbench_utils.validate_media_track_value( + "Transcript:subtitles:fr:c:/path/to/file.VTT" + ) self.assertTrue(res) - res = workbench_utils.validate_media_track_value('Transcript:subtitles:ff:c:/path/to/file.VTT') + res = workbench_utils.validate_media_track_value( + "Transcript:subtitles:ff:c:/path/to/file.VTT" + ) self.assertFalse(res) - res = workbench_utils.validate_media_track_value('Transcript:subtitles:en:c:/path/to/file.ccc') + res = workbench_utils.validate_media_track_value( + "Transcript:subtitles:en:c:/path/to/file.ccc" + ) self.assertFalse(res) - res = workbench_utils.validate_media_track_value(':subtitles:en:c:/path/to/file.VTT') + res = workbench_utils.validate_media_track_value( + ":subtitles:en:c:/path/to/file.VTT" + ) self.assertFalse(res) - res = workbench_utils.validate_media_track_value('Transcript:subtitle:en:c:/path/to/file.vtt') + res = workbench_utils.validate_media_track_value( + "Transcript:subtitle:en:c:/path/to/file.vtt" + ) self.assertFalse(res) class TestValidateLanguageCode(unittest.TestCase): def test_validate_code_in_list(self): - res = workbench_utils.validate_language_code('es') + res = workbench_utils.validate_language_code("es") self.assertTrue(res) def test_validate_code_not_in_list(self): - res = workbench_utils.validate_language_code('foo') + res = workbench_utils.validate_language_code("foo") self.assertFalse(res) class TestValidateLatlongValue(unittest.TestCase): def test_validate_good_latlong_values(self): - values = ['+90.0, -127.554334', '90.0, -127.554334', '-90,-180', '+50.25,-117.8', '+48.43333,-123.36667'] + values = [ + "+90.0, -127.554334", + "90.0, -127.554334", + "-90,-180", + "+50.25,-117.8", + "+48.43333,-123.36667", + ] for value in values: res = workbench_utils.validate_latlong_value(value) self.assertTrue(res) def test_validate_bad_latlong_values(self): - values = ['+90.1 -100.111', '045, 180', '+5025,-117.8', '-123.36667'] + values = ["+90.1 -100.111", "045, 180", "+5025,-117.8", "-123.36667"] for value in values: res = workbench_utils.validate_latlong_value(value) self.assertFalse(res) @@ -277,13 +416,18 @@ def test_validate_bad_latlong_values(self): class TestValidateLinkValue(unittest.TestCase): def test_validate_good_link_values(self): - values = ['http://foo.com', 'https://foo1.com%%Foo Hardware'] + values = ["http://foo.com", "https://foo1.com%%Foo Hardware"] for value in values: res = workbench_utils.validate_link_value(value) self.assertTrue(res) def test_validate_bad_link_values(self): - values = ['foo.com', 'http:/foo.com', 'file://server/folder/data.xml', 'mailto:someone@example.com'] + values = [ + "foo.com", + "http:/foo.com", + "file://server/folder/data.xml", + "mailto:someone@example.com", + ] for value in values: res = workbench_utils.validate_link_value(value) self.assertFalse(res) @@ -292,33 +436,36 @@ def test_validate_bad_link_values(self): class TestValidateAuthorityLinkValue(unittest.TestCase): def test_validate_good_authority_link_values(self): - values = ['viaf%%http://viaf.org/viaf/10646807%%VIAF Record', 'cash%%http://cash.org%%foo'] + values = [ + "viaf%%http://viaf.org/viaf/10646807%%VIAF Record", + "cash%%http://cash.org%%foo", + ] for value in values: - res = workbench_utils.validate_authority_link_value(value, ['cash', 'viaf']) + res = workbench_utils.validate_authority_link_value(value, ["cash", "viaf"]) self.assertTrue(res) def test_validate_bad_authority_link_values(self): - values = ['viaf%%htt://viaf.org/viaf/10646807%%VIAF Record'] + values = ["viaf%%htt://viaf.org/viaf/10646807%%VIAF Record"] for value in values: - res = workbench_utils.validate_authority_link_value(value, ['cash', 'viaf']) + res = workbench_utils.validate_authority_link_value(value, ["cash", "viaf"]) self.assertFalse(res) - values = ['xcash%%http://cash.org%%foo'] + values = ["xcash%%http://cash.org%%foo"] for value in values: - res = workbench_utils.validate_authority_link_value(value, ['cash', 'viaf']) + res = workbench_utils.validate_authority_link_value(value, ["cash", "viaf"]) self.assertFalse(res) class TestValidateNodeCreatedDateValue(unittest.TestCase): def test_validate_good_date_string_values(self): - values = ['2020-11-15T23:49:22+00:00'] + values = ["2020-11-15T23:49:22+00:00"] for value in values: res = workbench_utils.validate_node_created_date_string(value) self.assertTrue(res) def test_validate_bad_date_string_values(self): - values = ['2020-11-15:23:49:22+00:00', '2020-11-15T:23:49:22', '2020-11-15'] + values = ["2020-11-15:23:49:22+00:00", "2020-11-15T:23:49:22", "2020-11-15"] for value in values: res = workbench_utils.validate_node_created_date_string(value) self.assertFalse(res) @@ -327,45 +474,47 @@ def test_validate_bad_date_string_values(self): class TestValidEdtfDate(unittest.TestCase): def test_validate_good_edtf_values(self): - good_values = ['190X', - '1900-XX', - '1900', - '2020-10', - '2021-10-12', - '2001-21', - '2001-22', - '2001-23', - '2001-24', - '2001-31', - '193X/196X', - '198X?', - '19XX?', - '2XXX?', - '198X~', - '19XX~', - '2XXX~', - '198X%', - '19XX%', - '2XXX%', - 'XXXX?', - 'XXXX~', - 'XXXX%' - ] + good_values = [ + "190X", + "1900-XX", + "1900", + "2020-10", + "2021-10-12", + "2001-21", + "2001-22", + "2001-23", + "2001-24", + "2001-31", + "193X/196X", + "198X?", + "19XX?", + "2XXX?", + "198X~", + "19XX~", + "2XXX~", + "198X%", + "19XX%", + "2XXX%", + "XXXX?", + "XXXX~", + "XXXX%", + ] for good_value in good_values: res = workbench_utils.validate_edtf_date(good_value) self.assertTrue(res, good_value) def test_validate_bad_edtf_values(self): - bad_values = ['1900-05-45', - '1900-13-01', - '1900-02-31', - '1900-00-31', - '1900-00', - '19000', - '7/5/51', - '19X?', - '2XX%', - ] + bad_values = [ + "1900-05-45", + "1900-13-01", + "1900-02-31", + "1900-00-31", + "1900-00", + "19000", + "7/5/51", + "19X?", + "2XX%", + ] for bad_value in bad_values: res = workbench_utils.validate_edtf_date(bad_value) self.assertFalse(res, bad_value) @@ -378,110 +527,154 @@ def setUp(self): dir_path = os.path.dirname(os.path.realpath(__file__)) # Media types are mapped from extensions. - types_config_file_path = os.path.join(dir_path, 'assets', 'set_media_type_test', 'multi_types_config.yml') - with open(types_config_file_path, 'r') as f: + types_config_file_path = os.path.join( + dir_path, "assets", "set_media_type_test", "multi_types_config.yml" + ) + with open(types_config_file_path, "r") as f: multi_types_config_file_contents = f.read() self.multi_types_config_yaml = yaml.load(multi_types_config_file_contents) # Media type is set for all media. - type_config_file_path = os.path.join(dir_path, 'assets', 'set_media_type_test', 'single_type_config.yml') - with open(type_config_file_path, 'r') as f: + type_config_file_path = os.path.join( + dir_path, "assets", "set_media_type_test", "single_type_config.yml" + ) + with open(type_config_file_path, "r") as f: single_type_config_file_contents = f.read() self.single_type_config_yaml = yaml.load(single_type_config_file_contents) def test_multi_types_set_media_type(self): fake_csv_record = collections.OrderedDict() - fake_csv_record['file'] = '/tmp/foo.txt' - res = workbench_utils.set_media_type(self.multi_types_config_yaml, '/tmp/foo.txt', 'file', fake_csv_record) - self.assertEqual(res, 'sometextmedia') + fake_csv_record["file"] = "/tmp/foo.txt" + res = workbench_utils.set_media_type( + self.multi_types_config_yaml, "/tmp/foo.txt", "file", fake_csv_record + ) + self.assertEqual(res, "sometextmedia") fake_csv_record = collections.OrderedDict() - fake_csv_record['file'] = '/tmp/foo.tif' - res = workbench_utils.set_media_type(self.multi_types_config_yaml, '/tmp/foo.tif', 'file', fake_csv_record) - self.assertEqual(res, 'file') + fake_csv_record["file"] = "/tmp/foo.tif" + res = workbench_utils.set_media_type( + self.multi_types_config_yaml, "/tmp/foo.tif", "file", fake_csv_record + ) + self.assertEqual(res, "file") fake_csv_record = collections.OrderedDict() - fake_csv_record['file'] = '/tmp/foo.tif' - res = workbench_utils.set_media_type(self.multi_types_config_yaml, '/tmp/foocaps.TIF', 'file', fake_csv_record) - self.assertEqual(res, 'file') + fake_csv_record["file"] = "/tmp/foo.tif" + res = workbench_utils.set_media_type( + self.multi_types_config_yaml, "/tmp/foocaps.TIF", "file", fake_csv_record + ) + self.assertEqual(res, "file") fake_csv_record = collections.OrderedDict() - fake_csv_record['file'] = '/tmp/foo.mp4' - res = workbench_utils.set_media_type(self.multi_types_config_yaml, '/tmp/foo.mp4', 'file', fake_csv_record) - self.assertEqual(res, 'video') + fake_csv_record["file"] = "/tmp/foo.mp4" + res = workbench_utils.set_media_type( + self.multi_types_config_yaml, "/tmp/foo.mp4", "file", fake_csv_record + ) + self.assertEqual(res, "video") fake_csv_record = collections.OrderedDict() - fake_csv_record['file'] = '/tmp/foo.mp4' - res = workbench_utils.set_media_type(self.multi_types_config_yaml, '/tmp/foocaps.MP4', 'file', fake_csv_record) - self.assertEqual(res, 'video') + fake_csv_record["file"] = "/tmp/foo.mp4" + res = workbench_utils.set_media_type( + self.multi_types_config_yaml, "/tmp/foocaps.MP4", "file", fake_csv_record + ) + self.assertEqual(res, "video") fake_csv_record = collections.OrderedDict() - fake_csv_record['file'] = '/tmp/foo.png' - res = workbench_utils.set_media_type(self.multi_types_config_yaml, '/tmp/foo.png', 'file', fake_csv_record) - self.assertEqual(res, 'image') + fake_csv_record["file"] = "/tmp/foo.png" + res = workbench_utils.set_media_type( + self.multi_types_config_yaml, "/tmp/foo.png", "file", fake_csv_record + ) + self.assertEqual(res, "image") fake_csv_record = collections.OrderedDict() - fake_csv_record['file'] = '/tmp/foo.pptx' - res = workbench_utils.set_media_type(self.multi_types_config_yaml, '/tmp/foo.pptx', 'file', fake_csv_record) - self.assertEqual(res, 'document') + fake_csv_record["file"] = "/tmp/foo.pptx" + res = workbench_utils.set_media_type( + self.multi_types_config_yaml, "/tmp/foo.pptx", "file", fake_csv_record + ) + self.assertEqual(res, "document") fake_csv_record = collections.OrderedDict() - fake_csv_record['file'] = '/tmp/foo.pptx' - res = workbench_utils.set_media_type(self.multi_types_config_yaml, '/tmp/foo.Pptx', 'file', fake_csv_record) - self.assertEqual(res, 'document') + fake_csv_record["file"] = "/tmp/foo.pptx" + res = workbench_utils.set_media_type( + self.multi_types_config_yaml, "/tmp/foo.Pptx", "file", fake_csv_record + ) + self.assertEqual(res, "document") fake_csv_record = collections.OrderedDict() - fake_csv_record['file'] = '/tmp/foo.xxx' - res = workbench_utils.set_media_type(self.multi_types_config_yaml, '/tmp/foo.xxx', 'file', fake_csv_record) - self.assertEqual(res, 'file') + fake_csv_record["file"] = "/tmp/foo.xxx" + res = workbench_utils.set_media_type( + self.multi_types_config_yaml, "/tmp/foo.xxx", "file", fake_csv_record + ) + self.assertEqual(res, "file") fake_csv_record = collections.OrderedDict() - fake_csv_record['file'] = '/tmp/foo.wp' - res = workbench_utils.set_media_type(self.multi_types_config_yaml, '/tmp/foo.wp', 'file', fake_csv_record) - self.assertEqual(res, 'document') + fake_csv_record["file"] = "/tmp/foo.wp" + res = workbench_utils.set_media_type( + self.multi_types_config_yaml, "/tmp/foo.wp", "file", fake_csv_record + ) + self.assertEqual(res, "document") fake_csv_record = collections.OrderedDict() - fake_csv_record['file'] = '/tmp/foo.ogg' - res = workbench_utils.set_media_type(self.multi_types_config_yaml, '/tmp/foo.ogg', 'file', fake_csv_record) - self.assertEqual(res, 'video') + fake_csv_record["file"] = "/tmp/foo.ogg" + res = workbench_utils.set_media_type( + self.multi_types_config_yaml, "/tmp/foo.ogg", "file", fake_csv_record + ) + self.assertEqual(res, "video") fake_csv_record = collections.OrderedDict() - fake_csv_record['file'] = '/tmp/xxx.foo' - res = workbench_utils.set_media_type(self.multi_types_config_yaml, '/tmp/xxx.foo', 'file', fake_csv_record) - self.assertEqual(res, 'foomedia') + fake_csv_record["file"] = "/tmp/xxx.foo" + res = workbench_utils.set_media_type( + self.multi_types_config_yaml, "/tmp/xxx.foo", "file", fake_csv_record + ) + self.assertEqual(res, "foomedia") fake_csv_record = collections.OrderedDict() - fake_csv_record['file'] = 'https://youtu.be/xxxx' - res = workbench_utils.set_media_type(self.multi_types_config_yaml, 'https://youtu.be/xxxx', 'file', fake_csv_record) - self.assertEqual(res, 'remote_video') + fake_csv_record["file"] = "https://youtu.be/xxxx" + res = workbench_utils.set_media_type( + self.multi_types_config_yaml, + "https://youtu.be/xxxx", + "file", + fake_csv_record, + ) + self.assertEqual(res, "remote_video") fake_csv_record = collections.OrderedDict() - fake_csv_record['file'] = 'https://vimeo.com/xxxx' - res = workbench_utils.set_media_type(self.multi_types_config_yaml, 'https://vimeo.com/xxxx', 'file', fake_csv_record) - self.assertEqual(res, 'remote_video') + fake_csv_record["file"] = "https://vimeo.com/xxxx" + res = workbench_utils.set_media_type( + self.multi_types_config_yaml, + "https://vimeo.com/xxxx", + "file", + fake_csv_record, + ) + self.assertEqual(res, "remote_video") def test_single_type_set_media_type(self): fake_csv_record = collections.OrderedDict() - fake_csv_record['file'] = '/tmp/foo.xxx' - res = workbench_utils.set_media_type(self.single_type_config_yaml, '/tmp/foo.xxx', 'file', fake_csv_record) - self.assertEqual(res, 'barmediatype') + fake_csv_record["file"] = "/tmp/foo.xxx" + res = workbench_utils.set_media_type( + self.single_type_config_yaml, "/tmp/foo.xxx", "file", fake_csv_record + ) + self.assertEqual(res, "barmediatype") class TestGetCsvFromExcel(unittest.TestCase): """Note: this tests the extraction of CSV data from Excel only, - not using an Excel file as an input CSV file. That is tested - in TestCommentedCsvs in islandora_tests.py. + not using an Excel file as an input CSV file. That is tested + in TestCommentedCsvs in islandora_tests.py. """ - def setUp(self): - self.config = {'input_dir': 'tests/assets/excel_test', - 'temp_dir': 'tests/assets/excel_test', - 'input_csv': 'test_excel_file.xlsx', - 'excel_worksheet': 'Sheet1', - 'excel_csv_filename': 'excel_csv.csv', - 'id_field': 'id' - } - self.csv_file_path = os.path.join(self.config['input_dir'], self.config['excel_csv_filename']) + def setUp(self): + self.config = { + "input_dir": "tests/assets/excel_test", + "temp_dir": "tests/assets/excel_test", + "input_csv": "test_excel_file.xlsx", + "excel_worksheet": "Sheet1", + "excel_csv_filename": "excel_csv.csv", + "id_field": "id", + } + + self.csv_file_path = os.path.join( + self.config["input_dir"], self.config["excel_csv_filename"] + ) def test_get_csv_from_excel(self): workbench_utils.get_csv_from_excel(self.config) @@ -492,8 +685,8 @@ def test_get_csv_from_excel(self): self.assertEqual(len(csv_data), 5) fourth_row = csv_data[4] - fourth_row_parts = fourth_row.split(',') - self.assertEqual(fourth_row_parts[1], 'Title 4') + fourth_row_parts = fourth_row.split(",") + self.assertEqual(fourth_row_parts[1], "Title 4") def tearDown(self): os.remove(self.csv_file_path) @@ -501,28 +694,78 @@ def tearDown(self): class TestSqliteManager(unittest.TestCase): def setUp(self): - self.config = {'temp_dir': tempfile.gettempdir(), - 'sqlite_db_filename': 'workbench_unit_tests.db' - } - - self.db_file_path = os.path.join(self.config['temp_dir'], self.config['sqlite_db_filename']) - - workbench_utils.sqlite_manager(self.config, db_file_path=self.db_file_path, operation='create_database') - workbench_utils.sqlite_manager(self.config, db_file_path=self.db_file_path, operation='create_table', table_name='names', query='CREATE TABLE names (name TEXT, location TEXT)') + self.config = { + "temp_dir": tempfile.gettempdir(), + "sqlite_db_filename": "workbench_unit_tests.db", + } + + self.db_file_path = os.path.join( + self.config["temp_dir"], self.config["sqlite_db_filename"] + ) + + workbench_utils.sqlite_manager( + self.config, db_file_path=self.db_file_path, operation="create_database" + ) + workbench_utils.sqlite_manager( + self.config, + db_file_path=self.db_file_path, + operation="create_table", + table_name="names", + query="CREATE TABLE names (name TEXT, location TEXT)", + ) def test_crud_operations(self): - workbench_utils.sqlite_manager(self.config, operation='insert', db_file_path=self.db_file_path, query="INSERT INTO names VALUES (?, ?)", values=('Mark', 'Burnaby')) - workbench_utils.sqlite_manager(self.config, operation='insert', db_file_path=self.db_file_path, query="INSERT INTO names VALUES (?, ?)", values=('Mix', 'Catland')) - res = workbench_utils.sqlite_manager(self.config, operation='select', db_file_path=self.db_file_path, query="select * from names") - self.assertEqual(res[0]['name'], 'Mark') - self.assertEqual(res[1]['location'], 'Catland') - - workbench_utils.sqlite_manager(self.config, operation='update', db_file_path=self.db_file_path, query="UPDATE names set location = ? where name = ?", values=('Blank stare', 'Mix')) - res = workbench_utils.sqlite_manager(self.config, operation='select', db_file_path=self.db_file_path, query="select * from names") - self.assertEqual(res[1]['location'], 'Blank stare') - - workbench_utils.sqlite_manager(self.config, operation='delete', db_file_path=self.db_file_path, query="delete from names where name = ?", values=('Mix',)) - res = workbench_utils.sqlite_manager(self.config, operation='select', db_file_path=self.db_file_path, query="select * from names") + workbench_utils.sqlite_manager( + self.config, + operation="insert", + db_file_path=self.db_file_path, + query="INSERT INTO names VALUES (?, ?)", + values=("Mark", "Burnaby"), + ) + workbench_utils.sqlite_manager( + self.config, + operation="insert", + db_file_path=self.db_file_path, + query="INSERT INTO names VALUES (?, ?)", + values=("Mix", "Catland"), + ) + res = workbench_utils.sqlite_manager( + self.config, + operation="select", + db_file_path=self.db_file_path, + query="select * from names", + ) + self.assertEqual(res[0]["name"], "Mark") + self.assertEqual(res[1]["location"], "Catland") + + workbench_utils.sqlite_manager( + self.config, + operation="update", + db_file_path=self.db_file_path, + query="UPDATE names set location = ? where name = ?", + values=("Blank stare", "Mix"), + ) + res = workbench_utils.sqlite_manager( + self.config, + operation="select", + db_file_path=self.db_file_path, + query="select * from names", + ) + self.assertEqual(res[1]["location"], "Blank stare") + + workbench_utils.sqlite_manager( + self.config, + operation="delete", + db_file_path=self.db_file_path, + query="delete from names where name = ?", + values=("Mix",), + ) + res = workbench_utils.sqlite_manager( + self.config, + operation="select", + db_file_path=self.db_file_path, + query="select * from names", + ) self.assertEqual(len(res), 1) def tearDown(self): @@ -532,44 +775,62 @@ def tearDown(self): class TestDrupalCoreVersionNumbers(unittest.TestCase): def test_version_numbers(self): minimum_core_version = tuple([8, 6]) - lower_versions = ['8.3.0', '8.5.0-alpha1', '8.5.0', '8.5.6'] + lower_versions = ["8.3.0", "8.5.0-alpha1", "8.5.0", "8.5.6"] for version in lower_versions: version_number = workbench_utils.convert_semver_to_number(version) res = version_number < minimum_core_version - self.assertTrue(res, 'Version number ' + str(version_number) + ' is greater than 8.6.') - - version_number = workbench_utils.convert_semver_to_number('8.6') - self.assertTrue(version_number == minimum_core_version, 'Not sure what failed.') - - higher_versions = ['8.6.1', '8.6.4', '8.9.14', '8.10.0-dev', '9.0', '9.1', '9.0.0-dev', '9.1.0-rc3', '9.0.2'] + self.assertTrue( + res, "Version number " + str(version_number) + " is greater than 8.6." + ) + + version_number = workbench_utils.convert_semver_to_number("8.6") + self.assertTrue(version_number == minimum_core_version, "Not sure what failed.") + + higher_versions = [ + "8.6.1", + "8.6.4", + "8.9.14", + "8.10.0-dev", + "9.0", + "9.1", + "9.0.0-dev", + "9.1.0-rc3", + "9.0.2", + ] for version in higher_versions: version_number = workbench_utils.convert_semver_to_number(version) res = version_number >= minimum_core_version - self.assertTrue(res, 'Version number ' + str(version_number) + ' is less than 8.6.') + self.assertTrue( + res, "Version number " + str(version_number) + " is less than 8.6." + ) class TestIntegrationModuleVersionNumbers(unittest.TestCase): def test_version_numbers(self): minimum_version = tuple([1, 0]) - lower_versions = ['0.9', '0.8', '0.8.0-dev'] + lower_versions = ["0.9", "0.8", "0.8.0-dev"] for version in lower_versions: version_number = workbench_utils.convert_semver_to_number(version) res = version_number < minimum_version - self.assertTrue(res, 'Version number ' + str(version_number) + ' is greater than 1.0.') + self.assertTrue( + res, "Version number " + str(version_number) + " is greater than 1.0." + ) - higher_versions = ['1.0.0', '1.0.1', '1.2', '1.0.1-dev', '10.0'] + higher_versions = ["1.0.0", "1.0.1", "1.2", "1.0.1-dev", "10.0"] for version in higher_versions: version_number = workbench_utils.convert_semver_to_number(version) res = version_number >= minimum_version - self.assertTrue(res, 'Version number ' + str(version_number) + ' is less than 1.0.') + self.assertTrue( + res, "Version number " + str(version_number) + " is less than 1.0." + ) class TestDedupedFilePaths(unittest.TestCase): def test_deduped_file_paths(self): paths = [ - ['/home/foo/bar.txt', '/home/foo/bar_1.txt'], - ['/home/foo/bar_1.txt', '/home/foo/bar_2.txt'], - ['/tmp/dir/dog_05.zip', '/tmp/dir/dog_6.zip'] + ["/home/foo/bar.txt", "/home/foo/bar_1.txt"], + ["/home/foo/bar_1.txt", "/home/foo/bar_2.txt"], + ["/tmp/dir/dog_05.zip", "/tmp/dir/dog_6.zip"], ] for path_pair in paths: deduped_path = workbench_utils.get_deduped_file_path(path_pair[0]) @@ -578,120 +839,126 @@ def test_deduped_file_paths(self): class TestValueIsNumeric(unittest.TestCase): def test_value_is_numeric(self): - values = ['200', '0', 999] + values = ["200", "0", 999] for value in values: res = workbench_utils.value_is_numeric(value) - self.assertTrue(res, 'Value ' + str(value) + ' is not numeric.') + self.assertTrue(res, "Value " + str(value) + " is not numeric.") def test_value_is_not_numeric(self): - values = ['n200', False, '999-1000'] + values = ["n200", False, "999-1000"] for value in values: res = workbench_utils.value_is_numeric(value) - self.assertFalse(res, 'Value ' + str(value) + ' is numeric.') + self.assertFalse(res, "Value " + str(value) + " is numeric.") class TestCleanCsvValues(unittest.TestCase): def test_clean_csv_values(self): - config = {'subdelimiter': '|', 'clean_csv_values_skip': []} + config = {"subdelimiter": "|", "clean_csv_values_skip": []} csv_record = collections.OrderedDict() - csv_record['one'] = ' blsidlw ' - csv_record['two'] = 'hheo "s7s9w9"' - csv_record['three'] = "b‘bbbbb’" - csv_record['four'] = 'لدولي, العاشر []ليونيكود ' + csv_record["one"] = " blsidlw " + csv_record["two"] = 'hheo "s7s9w9"' + csv_record["three"] = "b‘bbbbb’" + csv_record["four"] = "لدولي, العاشر []ليونيكود " newline = "\n" - csv_record['five'] = f"{newline}new lines{newline}" - csv_record['six'] = 'a b c d e' + csv_record["five"] = f"{newline}new lines{newline}" + csv_record["six"] = "a b c d e" clean_csv_record = collections.OrderedDict() - clean_csv_record['one'] = 'blsidlw' - clean_csv_record['two'] = 'hheo "s7s9w9"' - clean_csv_record['three'] = "b'bbbbb'" - clean_csv_record['four'] = 'لدولي, العاشر []ليونيكود' - clean_csv_record['five'] = 'new lines' - clean_csv_record['six'] = 'a b c d e' + clean_csv_record["one"] = "blsidlw" + clean_csv_record["two"] = 'hheo "s7s9w9"' + clean_csv_record["three"] = "b'bbbbb'" + clean_csv_record["four"] = "لدولي, العاشر []ليونيكود" + clean_csv_record["five"] = "new lines" + clean_csv_record["six"] = "a b c d e" csv_record = workbench_utils.clean_csv_values(config, csv_record) self.assertEqual(clean_csv_record, csv_record) def test_clean_csv_values_skip_smart_quotes(self): - config = {'subdelimiter': '|', 'clean_csv_values_skip': ['smart_quotes']} + config = {"subdelimiter": "|", "clean_csv_values_skip": ["smart_quotes"]} csv_record = collections.OrderedDict() - csv_record['smq1'] = "b‘bbxbbb’" + csv_record["smq1"] = "b‘bbxbbb’" clean_csv_record = collections.OrderedDict() - clean_csv_record['smq1'] = "b‘bbxbbb’" + clean_csv_record["smq1"] = "b‘bbxbbb’" csv_record = workbench_utils.clean_csv_values(config, csv_record) self.assertEqual(clean_csv_record, csv_record) def test_clean_csv_values_skip_spaces(self): - config = {'subdelimiter': '|', 'clean_csv_values_skip': ['inside_spaces', 'outside_spaces']} + config = { + "subdelimiter": "|", + "clean_csv_values_skip": ["inside_spaces", "outside_spaces"], + } csv_record = collections.OrderedDict() - csv_record['one'] = ' blsidlw ' - csv_record['two'] = 'hheo "s7s9w9"' - csv_record['three'] = "b‘bbbbb’" - csv_record['four'] = 'لدولي, العاشر []ليونيكود ' + csv_record["one"] = " blsidlw " + csv_record["two"] = 'hheo "s7s9w9"' + csv_record["three"] = "b‘bbbbb’" + csv_record["four"] = "لدولي, العاشر []ليونيكود " newline = "\n" - csv_record['five'] = f"{newline}new lines{newline}" - csv_record['six'] = 'a b c d e' + csv_record["five"] = f"{newline}new lines{newline}" + csv_record["six"] = "a b c d e" clean_csv_record = collections.OrderedDict() - clean_csv_record['one'] = ' blsidlw ' - clean_csv_record['two'] = 'hheo "s7s9w9"' - clean_csv_record['three'] = "b'bbbbb'" - clean_csv_record['four'] = 'لدولي, العاشر []ليونيكود ' - clean_csv_record['five'] = f"{newline}new lines{newline}" - clean_csv_record['six'] = 'a b c d e' + clean_csv_record["one"] = " blsidlw " + clean_csv_record["two"] = 'hheo "s7s9w9"' + clean_csv_record["three"] = "b'bbbbb'" + clean_csv_record["four"] = "لدولي, العاشر []ليونيكود " + clean_csv_record["five"] = f"{newline}new lines{newline}" + clean_csv_record["six"] = "a b c d e" csv_record = workbench_utils.clean_csv_values(config, csv_record) self.assertEqual(clean_csv_record, csv_record) def test_clean_csv_values_subdelimiters(self): # Most common case, using the default subdelimiter. - config = {'subdelimiter': '|', 'clean_csv_values_skip': []} + config = {"subdelimiter": "|", "clean_csv_values_skip": []} csv_record = collections.OrderedDict() - csv_record['one'] = ' |blsidlw ' - csv_record['two'] = 'something|' - csv_record['three'] = 'something||' + csv_record["one"] = " |blsidlw " + csv_record["two"] = "something|" + csv_record["three"] = "something||" clean_csv_record = collections.OrderedDict() - clean_csv_record['one'] = 'blsidlw' - clean_csv_record['two'] = 'something' - clean_csv_record['three'] = 'something' + clean_csv_record["one"] = "blsidlw" + clean_csv_record["two"] = "something" + clean_csv_record["three"] = "something" csv_record = workbench_utils.clean_csv_values(config, csv_record) self.assertEqual(clean_csv_record, csv_record) # Non-| dubdelimiter. - config = {'subdelimiter': '%%', 'clean_csv_values_skip': []} + config = {"subdelimiter": "%%", "clean_csv_values_skip": []} csv_record = collections.OrderedDict() - csv_record['one'] = ' %%blsidlw ' - csv_record['two'] = 'something%%' + csv_record["one"] = " %%blsidlw " + csv_record["two"] = "something%%" clean_csv_record = collections.OrderedDict() - clean_csv_record['one'] = 'blsidlw' - clean_csv_record['two'] = 'something' + clean_csv_record["one"] = "blsidlw" + clean_csv_record["two"] = "something" csv_record = workbench_utils.clean_csv_values(config, csv_record) self.assertEqual(clean_csv_record, csv_record) # Skipping the outside whitespace and subdelimiter. - config = {'subdelimiter': '|', 'clean_csv_values_skip': ['outside_spaces', 'outside_subdelimiters']} + config = { + "subdelimiter": "|", + "clean_csv_values_skip": ["outside_spaces", "outside_subdelimiters"], + } csv_record = collections.OrderedDict() - csv_record['one'] = ' |blsidlw' - csv_record['two'] = 'something|' - csv_record['three'] = 'something||' + csv_record["one"] = " |blsidlw" + csv_record["two"] = "something|" + csv_record["three"] = "something||" clean_csv_record = collections.OrderedDict() - clean_csv_record['one'] = ' |blsidlw' - clean_csv_record['two'] = 'something|' - clean_csv_record['three'] = 'something||' + clean_csv_record["one"] = " |blsidlw" + clean_csv_record["two"] = "something|" + clean_csv_record["three"] = "something||" csv_record = workbench_utils.clean_csv_values(config, csv_record) self.assertEqual(clean_csv_record, csv_record) @@ -699,116 +966,213 @@ def test_clean_csv_values_subdelimiters(self): class TestGetPageTitleFromTemplate(unittest.TestCase): def test_get_page_title_from_template(self): - fixtures = [{'config': {'page_title_template': '$parent_title, page $weight'}, 'parent_title': 'Test parent title', 'weight': 2, 'control': 'Test parent title, page 2'}, - {'config': {'page_title_template': '$parent_title, part $weight'}, 'parent_title': 'Test parent title', 'weight': 10, 'control': 'Test parent title, part 10'}, - {'config': {'page_title_template': 'section $weight'}, 'parent_title': 'Foo', 'weight': 9, 'control': 'section 9'} - ] + fixtures = [ + { + "config": {"page_title_template": "$parent_title, page $weight"}, + "parent_title": "Test parent title", + "weight": 2, + "control": "Test parent title, page 2", + }, + { + "config": {"page_title_template": "$parent_title, part $weight"}, + "parent_title": "Test parent title", + "weight": 10, + "control": "Test parent title, part 10", + }, + { + "config": {"page_title_template": "section $weight"}, + "parent_title": "Foo", + "weight": 9, + "control": "section 9", + }, + ] for fixture in fixtures: - page_title = workbench_utils.get_page_title_from_template(fixture['config'], fixture['parent_title'], fixture['weight']) - self.assertEqual(fixture['control'], page_title) + page_title = workbench_utils.get_page_title_from_template( + fixture["config"], fixture["parent_title"], fixture["weight"] + ) + self.assertEqual(fixture["control"], page_title) class TestGetPreprocessedFilePath(unittest.TestCase): def test_get_preprocessed_file_path_with_extension(self): node_csv_record = collections.OrderedDict() - node_csv_record['id'] = 'id_0001' - node_csv_record['file'] = 'https://example.com/somepathinfo/fullsize/test-filename.jpg' - node_csv_record['title'] = 'Test Title' + node_csv_record["id"] = "id_0001" + node_csv_record["file"] = ( + "https://example.com/somepathinfo/fullsize/test-filename.jpg" + ) + node_csv_record["title"] = "Test Title" self.node_csv_record = node_csv_record - self.config = {'task': 'create', - 'id_field': 'id', - 'oembed_providers': [], - 'temp_dir': tempfile.gettempdir(), - 'field_for_remote_filename': False} - - path = workbench_utils.get_preprocessed_file_path(self.config, 'file', self.node_csv_record) - expected_path = os.path.join(self.config['temp_dir'], self.node_csv_record['id'], 'test-filename.jpg') + self.config = { + "task": "create", + "id_field": "id", + "oembed_providers": [], + "temp_dir": tempfile.gettempdir(), + "field_for_remote_filename": False, + } + + path = workbench_utils.get_preprocessed_file_path( + self.config, "file", self.node_csv_record + ) + expected_path = os.path.join( + self.config["temp_dir"], self.node_csv_record["id"], "test-filename.jpg" + ) self.assertEqual(path, expected_path) def test_get_preprocessed_file_path_use_node_title_for_remote_filename(self): node_csv_record = collections.OrderedDict() - node_csv_record['id'] = 'id_0001' - node_csv_record['file'] = 'https://example.com/somepathinfo/fullsize/test-filename.jpg' - node_csv_record['title'] = 'Test Title' + node_csv_record["id"] = "id_0001" + node_csv_record["file"] = ( + "https://example.com/somepathinfo/fullsize/test-filename.jpg" + ) + node_csv_record["title"] = "Test Title" self.node_csv_record = node_csv_record - self.config = {'task': 'create', - 'id_field': 'id', - 'oembed_providers': [], - 'temp_dir': tempfile.gettempdir(), - 'field_for_remote_filename': False, - 'use_node_title_for_remote_filename': True} - - path = workbench_utils.get_preprocessed_file_path(self.config, 'file', self.node_csv_record) - expected_path = os.path.join(self.config['temp_dir'], self.node_csv_record['id'], 'Test_Title.jpg') + self.config = { + "task": "create", + "id_field": "id", + "oembed_providers": [], + "temp_dir": tempfile.gettempdir(), + "field_for_remote_filename": False, + "use_node_title_for_remote_filename": True, + } + + path = workbench_utils.get_preprocessed_file_path( + self.config, "file", self.node_csv_record + ) + expected_path = os.path.join( + self.config["temp_dir"], self.node_csv_record["id"], "Test_Title.jpg" + ) self.assertEqual(path, expected_path) def test_get_preprocessed_file_path_use_nid_in_remote_filename(self): node_csv_record = collections.OrderedDict() - node_csv_record['id'] = 'id_0001' - node_csv_record['file'] = 'https://example.com/somepathinfo/fullsize/test-filename.jpg' - node_csv_record['title'] = 'Test Title' + node_csv_record["id"] = "id_0001" + node_csv_record["file"] = ( + "https://example.com/somepathinfo/fullsize/test-filename.jpg" + ) + node_csv_record["title"] = "Test Title" self.node_csv_record = node_csv_record - self.config = {'task': 'create', - 'id_field': 'id', - 'oembed_providers': [], - 'temp_dir': tempfile.gettempdir(), - 'field_for_remote_filename': False, - 'use_nid_in_remote_filename': True} - - path = workbench_utils.get_preprocessed_file_path(self.config, 'file', self.node_csv_record, node_id=456) - expected_path = os.path.join(self.config['temp_dir'], self.node_csv_record['id'], '456.jpg') + self.config = { + "task": "create", + "id_field": "id", + "oembed_providers": [], + "temp_dir": tempfile.gettempdir(), + "field_for_remote_filename": False, + "use_nid_in_remote_filename": True, + } + + path = workbench_utils.get_preprocessed_file_path( + self.config, "file", self.node_csv_record, node_id=456 + ) + expected_path = os.path.join( + self.config["temp_dir"], self.node_csv_record["id"], "456.jpg" + ) self.assertEqual(path, expected_path) def test_get_preprocessed_file_path_field_for_remote_filename(self): node_csv_record = collections.OrderedDict() - node_csv_record['id'] = 'id_0001' - node_csv_record['file'] = 'https://example.com/somepathinfo/fullsize/test-filename.jpg' - node_csv_record['title'] = 'Test Title' - node_csv_record['field_description'] = 'A description used for testing.' + node_csv_record["id"] = "id_0001" + node_csv_record["file"] = ( + "https://example.com/somepathinfo/fullsize/test-filename.jpg" + ) + node_csv_record["title"] = "Test Title" + node_csv_record["field_description"] = "A description used for testing." self.node_csv_record = node_csv_record - self.config = {'task': 'create', - 'id_field': 'id', - 'oembed_providers': [], - 'temp_dir': tempfile.gettempdir(), - 'field_for_remote_filename': False, - 'field_for_remote_filename': 'field_description'} - - path = workbench_utils.get_preprocessed_file_path(self.config, 'file', self.node_csv_record, node_id=456) - expected_path = os.path.join(self.config['temp_dir'], self.node_csv_record['id'], 'A_description_used_for_testing.jpg') + self.config = { + "task": "create", + "id_field": "id", + "oembed_providers": [], + "temp_dir": tempfile.gettempdir(), + "field_for_remote_filename": False, + "field_for_remote_filename": "field_description", + } + + path = workbench_utils.get_preprocessed_file_path( + self.config, "file", self.node_csv_record, node_id=456 + ) + expected_path = os.path.join( + self.config["temp_dir"], + self.node_csv_record["id"], + "A_description_used_for_testing.jpg", + ) self.assertEqual(path, expected_path) class TestDeduplicateFieldValues(unittest.TestCase): def test_strings(self): - fixtures = [{'input': ['one', 'two', 'two', 'three'], 'control': ['one', 'two', 'three']}, - {'input': ['one', 'two', 'two', 'three', 'three'], 'control': ['one', 'two', 'three']}, - ] + fixtures = [ + { + "input": ["one", "two", "two", "three"], + "control": ["one", "two", "three"], + }, + { + "input": ["one", "two", "two", "three", "three"], + "control": ["one", "two", "three"], + }, + ] for fixture in fixtures: - unique_values = workbench_utils.deduplicate_field_values(fixture['input']) - self.assertEqual(fixture['control'], unique_values) + unique_values = workbench_utils.deduplicate_field_values(fixture["input"]) + self.assertEqual(fixture["control"], unique_values) def test_dictionaries(self): - fixtures = [{'input': [{'foo': 'bar'}, {'1': 2}, {'1': 2}, {'three': 'four'}], - 'control': [{'foo': 'bar'}, {'1': 2}, {'three': 'four'}]}, - {'input': [{'foo': 'bar', 'up': 'down'}, {'1': 2}, {'three': 'four'}, {'1': 2}], - 'control': [{'foo': 'bar', 'up': 'down'}, {'1': 2}, {'three': 'four'}]}, - {'input': [{'foo': {'up': 'down'}}, {'1': 2}, {'three': 'four'}, {'1': 2}], - 'control': [{'foo': {'up': 'down'}}, {'1': 2}, {'three': 'four'}]}, - {'input': [{'target_type': 'node', 'target_id': 1000, 'target_uuid': 'a5f348bc-ee11-4055-bc9c-599b4da65819', 'url': '/node/1000'}, - {'target_id': 1000, 'target_type': 'node', 'url': '/node/1000', 'target_uuid': 'a5f348bc-ee11-4055-bc9c-599b4da65819'}], - 'control': [{'target_id': 1000, 'target_type': 'node', 'target_uuid': 'a5f348bc-ee11-4055-bc9c-599b4da65819', 'url': '/node/1000'}]} - ] + fixtures = [ + { + "input": [{"foo": "bar"}, {"1": 2}, {"1": 2}, {"three": "four"}], + "control": [{"foo": "bar"}, {"1": 2}, {"three": "four"}], + }, + { + "input": [ + {"foo": "bar", "up": "down"}, + {"1": 2}, + {"three": "four"}, + {"1": 2}, + ], + "control": [{"foo": "bar", "up": "down"}, {"1": 2}, {"three": "four"}], + }, + { + "input": [ + {"foo": {"up": "down"}}, + {"1": 2}, + {"three": "four"}, + {"1": 2}, + ], + "control": [{"foo": {"up": "down"}}, {"1": 2}, {"three": "four"}], + }, + { + "input": [ + { + "target_type": "node", + "target_id": 1000, + "target_uuid": "a5f348bc-ee11-4055-bc9c-599b4da65819", + "url": "/node/1000", + }, + { + "target_id": 1000, + "target_type": "node", + "url": "/node/1000", + "target_uuid": "a5f348bc-ee11-4055-bc9c-599b4da65819", + }, + ], + "control": [ + { + "target_id": 1000, + "target_type": "node", + "target_uuid": "a5f348bc-ee11-4055-bc9c-599b4da65819", + "url": "/node/1000", + } + ], + }, + ] for fixture in fixtures: - unique_values = workbench_utils.deduplicate_field_values(fixture['input']) - self.assertEqual(fixture['control'], unique_values) + unique_values = workbench_utils.deduplicate_field_values(fixture["input"]) + self.assertEqual(fixture["control"], unique_values) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/unit_tests_workbench_config.py b/tests/unit_tests_workbench_config.py index 14f8e7df..3ed3e94e 100644 --- a/tests/unit_tests_workbench_config.py +++ b/tests/unit_tests_workbench_config.py @@ -13,25 +13,43 @@ class TestWorkbenchConfig(unittest.TestCase): def setUp(self) -> None: parser = argparse.ArgumentParser() - parser.add_argument('--config', required=True, help='Configuration file to use.') - parser.add_argument('--check', help='Check input data and exit without creating/updating/etc.', - action='store_true') - parser.add_argument('--get_csv_template', - help='Generate a CSV template using the specified configuration file.', action='store_true') - parser.add_argument('--quick_delete_node', - help='Delete the node (and all attached media) identified by the URL).') - parser.add_argument('--quick_delete_media', help='Delete the media (and attached file) identified by the URL).') - parser.add_argument('--contactsheet', help='Generate a contact sheet.', action='store_true') - parser.add_argument('--version', action='version', version='Islandora Workbench 0.0.0') + parser.add_argument( + "--config", required=True, help="Configuration file to use." + ) + parser.add_argument( + "--check", + help="Check input data and exit without creating/updating/etc.", + action="store_true", + ) + parser.add_argument( + "--get_csv_template", + help="Generate a CSV template using the specified configuration file.", + action="store_true", + ) + parser.add_argument( + "--quick_delete_node", + help="Delete the node (and all attached media) identified by the URL).", + ) + parser.add_argument( + "--quick_delete_media", + help="Delete the media (and attached file) identified by the URL).", + ) + parser.add_argument( + "--contactsheet", help="Generate a contact sheet.", action="store_true" + ) + parser.add_argument( + "--version", action="version", version="Islandora Workbench 0.0.0" + ) self.parser = parser def test_init_path_check_invalid_file(self): - test_file_name = '/file/does/not/exist.yml' + test_file_name = "/file/does/not/exist.yml" - args = self.parser.parse_args(['--config', test_file_name]) + args = self.parser.parse_args(["--config", test_file_name]) - with self.assertRaises(SystemExit) as exit_return, \ - patch('WorkbenchConfig.logging') as mocked_logging: + with self.assertRaises(SystemExit) as exit_return, patch( + "WorkbenchConfig.logging" + ) as mocked_logging: mocked_logging.return_value = None @@ -43,13 +61,13 @@ def test_init_path_check_invalid_file(self): # TODO: check values sent to logger def test_init_path_check_valid_file(self): - test_file_name = 'tests/assets/execute_bootstrap_script_test/config.yml' + test_file_name = "tests/assets/execute_bootstrap_script_test/config.yml" - args = self.parser.parse_args(['--config', test_file_name]) + args = self.parser.parse_args(["--config", test_file_name]) - with patch('sys.exit', side_effect=lambda x: None) as mock_exit, \ - patch('WorkbenchConfig.WorkbenchConfig.validate') as mocked_validate, \ - patch('WorkbenchConfig.logging') as mocked_logging: + with patch("sys.exit", side_effect=lambda x: None) as mock_exit, patch( + "WorkbenchConfig.WorkbenchConfig.validate" + ) as mocked_validate, patch("WorkbenchConfig.logging") as mocked_logging: mocked_validate.return_value = None mocked_logging.return_value = None @@ -61,12 +79,15 @@ def test_init_path_check_valid_file(self): # TODO: check values sent to logger def test_get_config_valid_config_file_01(self): - test_file_name = 'tests/assets/WorkbenchConfig_test/config_01_create_short_valid.yml' + test_file_name = ( + "tests/assets/WorkbenchConfig_test/config_01_create_short_valid.yml" + ) - args = self.parser.parse_args(['--config', test_file_name]) + args = self.parser.parse_args(["--config", test_file_name]) - with patch('WorkbenchConfig.WorkbenchConfig.validate') as mocked_validate, \ - patch('WorkbenchConfig.logging') as mocked_logging: + with patch( + "WorkbenchConfig.WorkbenchConfig.validate" + ) as mocked_validate, patch("WorkbenchConfig.logging") as mocked_logging: mocked_validate.return_value = None mocked_logging.return_value = None @@ -77,103 +98,124 @@ def test_get_config_valid_config_file_01(self): # checking for config variables set in # tests/assets/execute_bootstrap_script_test/config.yml - self.assertEqual(test_config_dict['task'], 'create') - self.assertEqual(test_config_dict['host'], 'https://islandora.traefik.me') - self.assertEqual(test_config_dict['username'], 'admin') - self.assertEqual(test_config_dict['password'], 'password') + self.assertEqual(test_config_dict["task"], "create") + self.assertEqual(test_config_dict["host"], "https://islandora.traefik.me") + self.assertEqual(test_config_dict["username"], "admin") + self.assertEqual(test_config_dict["password"], "password") # self.assertEqual(test_config_dict['media_type'], 'document') # TODO: check values sent to logger def test_init_validate_valid(self): - test_file_name = 'tests/assets/WorkbenchConfig_test/config_01_create_short_valid.yml' + test_file_name = ( + "tests/assets/WorkbenchConfig_test/config_01_create_short_valid.yml" + ) - args = self.parser.parse_args(['--config', test_file_name]) + args = self.parser.parse_args(["--config", test_file_name]) - with patch('WorkbenchConfig.issue_request') as mocked_issue_request, \ - patch('WorkbenchConfig.logging') as mocked_logging: + with patch("WorkbenchConfig.issue_request") as mocked_issue_request, patch( + "WorkbenchConfig.logging" + ) as mocked_logging: mocked_logging.return_value = None - fake_response = namedtuple('fake_response', ['status_code']) + fake_response = namedtuple("fake_response", ["status_code"]) fake_response.status_code = 200 mocked_issue_request.return_value = fake_response test_config_obj = WorkbenchConfig(args) - content_type = 'islandora_object' + content_type = "islandora_object" url = f"https://islandora.traefik.me/entity/entity_form_display/node.{content_type}.default?_format=json" - mocked_issue_request.assert_called_with(test_config_obj.get_config(), 'GET', url) + mocked_issue_request.assert_called_with( + test_config_obj.get_config(), "GET", url + ) def test_init_validate_invalid_content_type(self): - test_file_name = 'tests/assets/WorkbenchConfig_test/config_02_01_create_short_invalid.yml' + test_file_name = ( + "tests/assets/WorkbenchConfig_test/config_02_01_create_short_invalid.yml" + ) - args = self.parser.parse_args(['--config', test_file_name]) + args = self.parser.parse_args(["--config", test_file_name]) - with patch('WorkbenchConfig.issue_request') as mocked_issue_request, \ - patch('WorkbenchConfig.logging') as mocked_logging, \ - self.assertRaises(SystemExit) as exit_return: + with patch("WorkbenchConfig.issue_request") as mocked_issue_request, patch( + "WorkbenchConfig.logging" + ) as mocked_logging, self.assertRaises(SystemExit) as exit_return: mocked_logging.return_value = None - fake_response = namedtuple('fake_response', ['status_code']) + fake_response = namedtuple("fake_response", ["status_code"]) fake_response.status_code = 404 mocked_issue_request.return_value = fake_response test_config_obj = WorkbenchConfig(args) - content_type = 'invalid_content_type' - host = 'https://islandora.traefik.me' + content_type = "invalid_content_type" + host = "https://islandora.traefik.me" url = f"{host}/entity/entity_form_display/node.{content_type}.default?_format=json" - mocked_issue_request.assert_called_with(test_config_obj.get_config(), 'GET', url) + mocked_issue_request.assert_called_with( + test_config_obj.get_config(), "GET", url + ) - error_message = f'Error: Content type {content_type} does not exist on {host}.' + error_message = ( + f"Error: Content type {content_type} does not exist on {host}." + ) self.assertEqual(exit_return.exception.code, error_message) def test_init_validate_invalid_mutators_01(self): - test_file_name = 'tests/assets/WorkbenchConfig_test/config_02_02_create_short_invalid.yml' + test_file_name = ( + "tests/assets/WorkbenchConfig_test/config_02_02_create_short_invalid.yml" + ) - args = self.parser.parse_args(['--config', test_file_name]) + args = self.parser.parse_args(["--config", test_file_name]) - with patch('WorkbenchConfig.issue_request') as mocked_issue_request, \ - patch('WorkbenchConfig.logging') as mocked_logging: + with patch("WorkbenchConfig.issue_request") as mocked_issue_request, patch( + "WorkbenchConfig.logging" + ) as mocked_logging: mocked_logging.return_value = None - fake_response = namedtuple('fake_response', ['status_code']) + fake_response = namedtuple("fake_response", ["status_code"]) fake_response.status_code = 200 mocked_issue_request.return_value = fake_response # Error text should only be this line, therefore use ^ and $ at the start and end of the message respectively - error_message = "^Error: You may only select one of \['use_node_title_for_media', " \ - + "'use_nid_in_media_title', 'field_for_media_title'\].\n - This config has selected " \ + error_message = ( + "^Error: You may only select one of \['use_node_title_for_media', " + + "'use_nid_in_media_title', 'field_for_media_title'\].\n - This config has selected " + "\['use_node_title_for_media', 'use_nid_in_media_title'\].\n$" + ) with self.assertRaisesRegex(SystemExit, error_message) as exit_return: test_config_obj = WorkbenchConfig(args) def test_init_validate_invalid_mutators_02(self): - test_file_name = 'tests/assets/WorkbenchConfig_test/config_02_03_create_short_invalid.yml' + test_file_name = ( + "tests/assets/WorkbenchConfig_test/config_02_03_create_short_invalid.yml" + ) - args = self.parser.parse_args(['--config', test_file_name]) + args = self.parser.parse_args(["--config", test_file_name]) - with patch('WorkbenchConfig.issue_request') as mocked_issue_request, \ - patch('WorkbenchConfig.logging') as mocked_logging: + with patch("WorkbenchConfig.issue_request") as mocked_issue_request, patch( + "WorkbenchConfig.logging" + ) as mocked_logging: mocked_logging.return_value = None - fake_response = namedtuple('fake_response', ['status_code']) + fake_response = namedtuple("fake_response", ["status_code"]) fake_response.status_code = 200 mocked_issue_request.return_value = fake_response # Error text should only be this line, therefore use ^ and $ at the start and end of the message respectively - error_message = "^Error: You may only select one of \['use_node_title_for_media', " \ - + "'use_nid_in_media_title', 'field_for_media_title'\].\n - This config has selected " \ + error_message = ( + "^Error: You may only select one of \['use_node_title_for_media', " + + "'use_nid_in_media_title', 'field_for_media_title'\].\n - This config has selected " + "\['use_node_title_for_media', 'field_for_media_title'\].\n$" + ) with self.assertRaisesRegex(SystemExit, error_message) as exit_return: test_config_obj = WorkbenchConfig(args) -if __name__ == '__main__': - unittest.main() \ No newline at end of file +if __name__ == "__main__": + unittest.main() diff --git a/workbench_fields.py b/workbench_fields.py index 7e1e754c..54df2b1d 100644 --- a/workbench_fields.py +++ b/workbench_fields.py @@ -13,102 +13,137 @@ from workbench_utils import * -class SimpleField(): +class SimpleField: """Functions for handling fields with text and other "simple" Drupal field data types, - e.g. fields that have a "{'value': 'xxx'}" structure such as plain text fields, ETDF - fields. All functions return a "entity" dictionary that is passed to Requests' "json" - parameter. + e.g. fields that have a "{'value': 'xxx'}" structure such as plain text fields, ETDF + fields. All functions return a "entity" dictionary that is passed to Requests' "json" + parameter. - Note that text fields that are "formatted" (i.e., use text formats/output filters) - require a 'format' key in their JSON in addition to the 'value' key. Otherwise, markup - or other text filters won't be applied when rendered. + Note that text fields that are "formatted" (i.e., use text formats/output filters) + require a 'format' key in their JSON in addition to the 'value' key. Otherwise, markup + or other text filters won't be applied when rendered. - Note: this class assumes that the entity has the field identified in 'field_name'. - Callers should pre-emptively confirm that. For an example, see code near the top - of workbench.update(). + Note: this class assumes that the entity has the field identified in 'field_name'. + Callers should pre-emptively confirm that. For an example, see code near the top + of workbench.update(). - Also note: the required Drupal field 'title' is not processed by this class. + Also note: the required Drupal field 'title' is not processed by this class. """ + def create(self, config, field_definitions, entity, row, field_name): """Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - field_definitions : dict - The field definitions object defined by get_field_definitions(). - entity : dict - The dict that will be POSTed to Drupal as JSON. - row : OrderedDict. - The current CSV record. - field_name : string - The Drupal fieldname/CSV column header. - Returns - ------- - dictionary - A dictionary represeting the entity that is POSTed to Drupal as JSON. + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + field_definitions : dict + The field definitions object defined by get_field_definitions(). + entity : dict + The dict that will be POSTed to Drupal as JSON. + row : OrderedDict. + The current CSV record. + field_name : string + The Drupal fieldname/CSV column header. + Returns + ------- + dictionary + A dictionary represeting the entity that is POSTed to Drupal as JSON. """ if row[field_name] is None: return entity - if field_name in config['field_text_format_ids']: - text_format = config['field_text_format_ids'][field_name] + if field_name in config["field_text_format_ids"]: + text_format = config["field_text_format_ids"][field_name] else: - text_format = config['text_format_id'] + text_format = config["text_format_id"] - id_field = row.get(config.get('id_field', 'not_applicable'), 'not_applicable') + id_field = row.get(config.get("id_field", "not_applicable"), "not_applicable") # Cardinality is unlimited. - if field_definitions[field_name]['cardinality'] == -1: - if config['subdelimiter'] in row[field_name]: + if field_definitions[field_name]["cardinality"] == -1: + if config["subdelimiter"] in row[field_name]: field_values = [] - subvalues = row[field_name].split(config['subdelimiter']) - subvalues = self.remove_invalid_values(config, field_definitions, field_name, subvalues) + subvalues = row[field_name].split(config["subdelimiter"]) + subvalues = self.remove_invalid_values( + config, field_definitions, field_name, subvalues + ) subvalues = self.dedupe_values(subvalues) for subvalue in subvalues: - subvalue = truncate_csv_value(field_name, id_field, field_definitions[field_name], subvalue) - if 'formatted_text' in field_definitions[field_name] and field_definitions[field_name]['formatted_text'] is True: - field_values.append({'value': subvalue, 'format': text_format}) + subvalue = truncate_csv_value( + field_name, id_field, field_definitions[field_name], subvalue + ) + if ( + "formatted_text" in field_definitions[field_name] + and field_definitions[field_name]["formatted_text"] is True + ): + field_values.append({"value": subvalue, "format": text_format}) else: - field_values.append({'value': subvalue}) + field_values.append({"value": subvalue}) entity[field_name] = field_values else: - row[field_name] = truncate_csv_value(field_name, id_field, field_definitions[field_name], row[field_name]) - if 'formatted_text' in field_definitions[field_name] and field_definitions[field_name]['formatted_text'] is True: - entity[field_name] = [{'value': row[field_name], 'format': text_format}] + row[field_name] = truncate_csv_value( + field_name, id_field, field_definitions[field_name], row[field_name] + ) + if ( + "formatted_text" in field_definitions[field_name] + and field_definitions[field_name]["formatted_text"] is True + ): + entity[field_name] = [ + {"value": row[field_name], "format": text_format} + ] else: - entity[field_name] = [{'value': row[field_name]}] + entity[field_name] = [{"value": row[field_name]}] # Cardinality has a limit, including 1. else: - if config['subdelimiter'] in row[field_name]: + if config["subdelimiter"] in row[field_name]: field_values = [] - subvalues = row[field_name].split(config['subdelimiter']) - subvalues = self.remove_invalid_values(config, field_definitions, field_name, subvalues) + subvalues = row[field_name].split(config["subdelimiter"]) + subvalues = self.remove_invalid_values( + config, field_definitions, field_name, subvalues + ) subvalues = self.dedupe_values(subvalues) - if len(subvalues) > int(field_definitions[field_name]['cardinality']): - log_field_cardinality_violation(field_name, id_field, field_definitions[field_name]['cardinality']) - subvalues = subvalues[:field_definitions[field_name]['cardinality']] + if len(subvalues) > int(field_definitions[field_name]["cardinality"]): + log_field_cardinality_violation( + field_name, + id_field, + field_definitions[field_name]["cardinality"], + ) + subvalues = subvalues[: field_definitions[field_name]["cardinality"]] for subvalue in subvalues: - subvalue = truncate_csv_value(field_name, id_field, field_definitions[field_name], subvalue) - if 'formatted_text' in field_definitions[field_name] and field_definitions[field_name]['formatted_text'] is True: - field_values.append({'value': subvalue, 'format': text_format}) + subvalue = truncate_csv_value( + field_name, id_field, field_definitions[field_name], subvalue + ) + if ( + "formatted_text" in field_definitions[field_name] + and field_definitions[field_name]["formatted_text"] is True + ): + field_values.append({"value": subvalue, "format": text_format}) else: - field_values.append({'value': subvalue}) + field_values.append({"value": subvalue}) field_values = self.dedupe_values(field_values) entity[field_name] = field_values else: - row[field_name] = truncate_csv_value(field_name, id_field, field_definitions[field_name], row[field_name]) - if 'formatted_text' in field_definitions[field_name] and field_definitions[field_name]['formatted_text'] is True: - entity[field_name] = [{'value': row[field_name], 'format': text_format}] + row[field_name] = truncate_csv_value( + field_name, id_field, field_definitions[field_name], row[field_name] + ) + if ( + "formatted_text" in field_definitions[field_name] + and field_definitions[field_name]["formatted_text"] is True + ): + entity[field_name] = [ + {"value": row[field_name], "format": text_format} + ] else: - entity[field_name] = [{'value': row[field_name]}] + entity[field_name] = [{"value": row[field_name]}] return entity - def update(self, config, field_definitions, entity, row, field_name, entity_field_values): + def update( + self, config, field_definitions, entity, row, field_name, entity_field_values + ): """Note: this method appends incoming CSV values to existing values, replaces existing field - values with incoming values, or deletes all values from fields, depending on whether - config['update_mode'] is 'append', 'replace', or 'delete'. It doesn not replace individual - values within fields. + values with incoming values, or deletes all values from fields, depending on whether + config['update_mode'] is 'append', 'replace', or 'delete'. It doesn not replace individual + values within fields. """ """Parameters ---------- @@ -129,124 +164,239 @@ def update(self, config, field_definitions, entity, row, field_name, entity_fiel dictionary A dictionary represeting the entity that is PATCHed to Drupal as JSON. """ - if config['update_mode'] == 'delete': + if config["update_mode"] == "delete": entity[field_name] = [] return entity if row[field_name] is None: return entity - if field_name in config['field_text_format_ids']: - text_format = config['field_text_format_ids'][field_name] + if field_name in config["field_text_format_ids"]: + text_format = config["field_text_format_ids"][field_name] else: - text_format = config['text_format_id'] + text_format = config["text_format_id"] - if config['task'] == 'update_terms': - entity_id_field = 'term_id' - if config['task'] == 'update': - entity_id_field = 'node_id' - if config['task'] == 'update_media': - entity_id_field = 'media_id' + if config["task"] == "update_terms": + entity_id_field = "term_id" + if config["task"] == "update": + entity_id_field = "node_id" + if config["task"] == "update_media": + entity_id_field = "media_id" # Cardinality has a limit. - if field_definitions[field_name]['cardinality'] > 0: - if config['update_mode'] == 'append': - if config['subdelimiter'] in row[field_name]: - subvalues = row[field_name].split(config['subdelimiter']) - subvalues = self.remove_invalid_values(config, field_definitions, field_name, subvalues) + if field_definitions[field_name]["cardinality"] > 0: + if config["update_mode"] == "append": + if config["subdelimiter"] in row[field_name]: + subvalues = row[field_name].split(config["subdelimiter"]) + subvalues = self.remove_invalid_values( + config, field_definitions, field_name, subvalues + ) for subvalue in subvalues: - subvalue = truncate_csv_value(field_name, row[entity_id_field], field_definitions[field_name], subvalue) - if 'formatted_text' in field_definitions[field_name] and field_definitions[field_name]['formatted_text'] is True: - entity[field_name].append({'value': subvalue, 'format': text_format}) + subvalue = truncate_csv_value( + field_name, + row[entity_id_field], + field_definitions[field_name], + subvalue, + ) + if ( + "formatted_text" in field_definitions[field_name] + and field_definitions[field_name]["formatted_text"] is True + ): + entity[field_name].append( + {"value": subvalue, "format": text_format} + ) else: - entity[field_name].append({'value': subvalue}) + entity[field_name].append({"value": subvalue}) entity[field_name] = self.dedupe_values(entity[field_name]) - if len(entity[field_name]) > int(field_definitions[field_name]['cardinality']): - log_field_cardinality_violation(field_name, row[entity_id_field], field_definitions[field_name]['cardinality']) - entity[field_name] = entity[field_name][:field_definitions[field_name]['cardinality']] + if len(entity[field_name]) > int( + field_definitions[field_name]["cardinality"] + ): + log_field_cardinality_violation( + field_name, + row[entity_id_field], + field_definitions[field_name]["cardinality"], + ) + entity[field_name] = entity[field_name][ + : field_definitions[field_name]["cardinality"] + ] else: - row[field_name] = self.remove_invalid_values(config, field_definitions, field_name, row[field_name]) - row[field_name] = truncate_csv_value(field_name, row[entity_id_field], field_definitions[field_name], row[field_name]) - if 'formatted_text' in field_definitions[field_name] and field_definitions[field_name]['formatted_text'] is True: - entity[field_name].append({'value': row[field_name], 'format': text_format}) + row[field_name] = self.remove_invalid_values( + config, field_definitions, field_name, row[field_name] + ) + row[field_name] = truncate_csv_value( + field_name, + row[entity_id_field], + field_definitions[field_name], + row[field_name], + ) + if ( + "formatted_text" in field_definitions[field_name] + and field_definitions[field_name]["formatted_text"] is True + ): + entity[field_name].append( + {"value": row[field_name], "format": text_format} + ) else: - entity[field_name].append({'value': row[field_name]}) + entity[field_name].append({"value": row[field_name]}) entity[field_name] = self.dedupe_values(entity[field_name]) - if len(entity[field_name]) > int(field_definitions[field_name]['cardinality']): - log_field_cardinality_violation(field_name, row[entity_id_field], field_definitions[field_name]['cardinality']) - entity[field_name] = entity[field_name][:field_definitions[field_name]['cardinality']] - - if config['update_mode'] == 'replace': - if config['subdelimiter'] in row[field_name]: + if len(entity[field_name]) > int( + field_definitions[field_name]["cardinality"] + ): + log_field_cardinality_violation( + field_name, + row[entity_id_field], + field_definitions[field_name]["cardinality"], + ) + entity[field_name] = entity[field_name][ + : field_definitions[field_name]["cardinality"] + ] + + if config["update_mode"] == "replace": + if config["subdelimiter"] in row[field_name]: field_values = [] - subvalues = row[field_name].split(config['subdelimiter']) - subvalues = self.remove_invalid_values(config, field_definitions, field_name, subvalues) + subvalues = row[field_name].split(config["subdelimiter"]) + subvalues = self.remove_invalid_values( + config, field_definitions, field_name, subvalues + ) subvalues = self.dedupe_values(subvalues) - if len(subvalues) > int(field_definitions[field_name]['cardinality']): - log_field_cardinality_violation(field_name, row[entity_id_field], field_definitions[field_name]['cardinality']) - subvalues = subvalues[:field_definitions[field_name]['cardinality']] + if len(subvalues) > int( + field_definitions[field_name]["cardinality"] + ): + log_field_cardinality_violation( + field_name, + row[entity_id_field], + field_definitions[field_name]["cardinality"], + ) + subvalues = subvalues[ + : field_definitions[field_name]["cardinality"] + ] for subvalue in subvalues: - subvalue = truncate_csv_value(field_name, row[entity_id_field], field_definitions[field_name], subvalue) - if 'formatted_text' in field_definitions[field_name] and field_definitions[field_name]['formatted_text'] is True: - field_values.append({'value': subvalue, 'format': text_format}) + subvalue = truncate_csv_value( + field_name, + row[entity_id_field], + field_definitions[field_name], + subvalue, + ) + if ( + "formatted_text" in field_definitions[field_name] + and field_definitions[field_name]["formatted_text"] is True + ): + field_values.append( + {"value": subvalue, "format": text_format} + ) else: - field_values.append({'value': subvalue}) + field_values.append({"value": subvalue}) field_values = self.dedupe_values(field_values) entity[field_name] = field_values else: - row[field_name] = truncate_csv_value(field_name, row[entity_id_field], field_definitions[field_name], row[field_name]) - if 'formatted_text' in field_definitions[field_name] and field_definitions[field_name]['formatted_text'] is True: - entity[field_name] = [{'value': row[field_name], 'format': text_format}] + row[field_name] = truncate_csv_value( + field_name, + row[entity_id_field], + field_definitions[field_name], + row[field_name], + ) + if ( + "formatted_text" in field_definitions[field_name] + and field_definitions[field_name]["formatted_text"] is True + ): + entity[field_name] = [ + {"value": row[field_name], "format": text_format} + ] else: - entity[field_name] = [{'value': row[field_name]}] + entity[field_name] = [{"value": row[field_name]}] # Cardinatlity is unlimited. else: - if config['update_mode'] == 'append': - if config['subdelimiter'] in row[field_name]: + if config["update_mode"] == "append": + if config["subdelimiter"] in row[field_name]: field_values = [] - subvalues = row[field_name].split(config['subdelimiter']) - subvalues = self.remove_invalid_values(config, field_definitions, field_name, subvalues) + subvalues = row[field_name].split(config["subdelimiter"]) + subvalues = self.remove_invalid_values( + config, field_definitions, field_name, subvalues + ) for subvalue in subvalues: - subvalue = truncate_csv_value(field_name, row[entity_id_field], field_definitions[field_name], subvalue) - if 'formatted_text' in field_definitions[field_name] and field_definitions[field_name]['formatted_text'] is True: - field_values.append({'value': subvalue, 'format': text_format}) + subvalue = truncate_csv_value( + field_name, + row[entity_id_field], + field_definitions[field_name], + subvalue, + ) + if ( + "formatted_text" in field_definitions[field_name] + and field_definitions[field_name]["formatted_text"] is True + ): + field_values.append( + {"value": subvalue, "format": text_format} + ) else: - field_values.append({'value': subvalue}) + field_values.append({"value": subvalue}) entity[field_name] = entity_field_values + field_values entity[field_name] = self.dedupe_values(entity[field_name]) else: - row[field_name] = truncate_csv_value(field_name, row[entity_id_field], field_definitions[field_name], row[field_name]) - if 'formatted_text' in field_definitions[field_name] and field_definitions[field_name]['formatted_text'] is True: - entity[field_name] = entity_field_values + [{'value': row[field_name], 'format': text_format}] + row[field_name] = truncate_csv_value( + field_name, + row[entity_id_field], + field_definitions[field_name], + row[field_name], + ) + if ( + "formatted_text" in field_definitions[field_name] + and field_definitions[field_name]["formatted_text"] is True + ): + entity[field_name] = entity_field_values + [ + {"value": row[field_name], "format": text_format} + ] else: - entity[field_name] = entity_field_values + [{'value': row[field_name]}] + entity[field_name] = entity_field_values + [ + {"value": row[field_name]} + ] entity[field_name] = self.dedupe_values(entity[field_name]) - if config['update_mode'] == 'replace': - if config['subdelimiter'] in row[field_name]: + if config["update_mode"] == "replace": + if config["subdelimiter"] in row[field_name]: field_values = [] - subvalues = row[field_name].split(config['subdelimiter']) - subvalues = self.remove_invalid_values(config, field_definitions, field_name, subvalues) + subvalues = row[field_name].split(config["subdelimiter"]) + subvalues = self.remove_invalid_values( + config, field_definitions, field_name, subvalues + ) for subvalue in subvalues: - subvalue = truncate_csv_value(field_name, row[entity_id_field], field_definitions[field_name], subvalue) - if 'formatted_text' in field_definitions[field_name] and field_definitions[field_name]['formatted_text'] is True: - field_values.append({'value': subvalue, 'format': text_format}) + subvalue = truncate_csv_value( + field_name, + row[entity_id_field], + field_definitions[field_name], + subvalue, + ) + if ( + "formatted_text" in field_definitions[field_name] + and field_definitions[field_name]["formatted_text"] is True + ): + field_values.append( + {"value": subvalue, "format": text_format} + ) else: - field_values.append({'value': subvalue}) + field_values.append({"value": subvalue}) entity[field_name] = field_values entity[field_name] = self.dedupe_values(entity[field_name]) else: - row[field_name] = truncate_csv_value(field_name, row[entity_id_field], field_definitions[field_name], row[field_name]) - if 'formatted_text' in field_definitions[field_name] and field_definitions[field_name]['formatted_text'] is True: - entity[field_name] = [{'value': row[field_name], 'format': text_format}] + row[field_name] = truncate_csv_value( + field_name, + row[entity_id_field], + field_definitions[field_name], + row[field_name], + ) + if ( + "formatted_text" in field_definitions[field_name] + and field_definitions[field_name]["formatted_text"] is True + ): + entity[field_name] = [ + {"value": row[field_name], "format": text_format} + ] else: - entity[field_name] = [{'value': row[field_name]}] + entity[field_name] = [{"value": row[field_name]}] return entity def dedupe_values(self, values): - """Removes duplicate entries from 'values'. - """ + """Removes duplicate entries from 'values'.""" """Parameters ---------- values : list @@ -260,8 +410,7 @@ def dedupe_values(self, values): return deduplicate_field_values(values) def remove_invalid_values(self, config, field_definitions, field_name, values): - """Removes invalid entries from 'values'. - """ + """Removes invalid entries from 'values'.""" """Parameters ---------- config : dict @@ -277,25 +426,37 @@ def remove_invalid_values(self, config, field_definitions, field_name, values): list A list of valid field values. """ - if 'field_type' not in field_definitions[field_name]: + if "field_type" not in field_definitions[field_name]: return values - if field_definitions[field_name]['field_type'] == 'edtf': + if field_definitions[field_name]["field_type"] == "edtf": valid_values = list() for subvalue in values: if validate_edtf_date(subvalue) is True: valid_values.append(subvalue) else: - message = 'Value "' + subvalue + '" in field "' + field_name + '" is not a valid EDTF field value.' + message = ( + 'Value "' + + subvalue + + '" in field "' + + field_name + + '" is not a valid EDTF field value.' + ) logging.warning(message) return valid_values - elif field_definitions[field_name]['field_type'] == 'list_string': + elif field_definitions[field_name]["field_type"] == "list_string": valid_values = list() for subvalue in values: - if subvalue in field_definitions[field_name]['allowed_values']: + if subvalue in field_definitions[field_name]["allowed_values"]: valid_values.append(subvalue) else: - message = 'Value "' + subvalue + '" in field "' + field_name + '" is not in the field\'s list of allowed values.' + message = ( + 'Value "' + + subvalue + + '" in field "' + + field_name + + "\" is not in the field's list of allowed values." + ) logging.warning(message) return valid_values else: @@ -303,8 +464,7 @@ def remove_invalid_values(self, config, field_definitions, field_name, values): return values def serialize(self, config, field_definitions, field_name, field_data): - """Serialized values into a format consistent with Workbench's CSV-field input format. - """ + """Serialized values into a format consistent with Workbench's CSV-field input format.""" """Parameters ---------- config : dict @@ -320,59 +480,66 @@ def serialize(self, config, field_definitions, field_name, field_data): string A string structured same as the Workbench CSV field data for this field type. """ - if 'field_type' not in field_definitions[field_name]: + if "field_type" not in field_definitions[field_name]: return values subvalues = list() for subvalue in field_data: - if 'value' in subvalue: - subvalues.append(subvalue['value']) + if "value" in subvalue: + subvalues.append(subvalue["value"]) else: - logging.warning("Field data " + str(field_data) + ' in field "' + field_name + '" cannot be serialized by the SimpleField handler.') - return '' + logging.warning( + "Field data " + + str(field_data) + + ' in field "' + + field_name + + '" cannot be serialized by the SimpleField handler.' + ) + return "" if len(subvalues) > 1: - return config['subdelimiter'].join(subvalues) + return config["subdelimiter"].join(subvalues) elif len(subvalues) == 0: return None else: return subvalues[0] -class GeolocationField(): +class GeolocationField: """Functions for handling fields with 'geolocation' Drupal field data type. - All functions return a "entity" dictionary that is passed to Requests' - "json" parameter. + All functions return a "entity" dictionary that is passed to Requests' + "json" parameter. - Note: this class assumes that the entity has the field identified in 'field_name'. - Callers should pre-emptively confirm that. For an example, see code near the top - of workbench.update(). + Note: this class assumes that the entity has the field identified in 'field_name'. + Callers should pre-emptively confirm that. For an example, see code near the top + of workbench.update(). """ + def create(self, config, field_definitions, entity, row, field_name): """Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - field_definitions : dict - The field definitions object defined by get_field_definitions(). - entity : dict - The dict that will be POSTed to Drupal as JSON. - row : OrderedDict. - The current CSV record. - field_name : string - The Drupal fieldname/CSV column header. - Returns - ------- - dictionary - A dictionary represeting the entity that is POSTed to Drupal as JSON. + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + field_definitions : dict + The field definitions object defined by get_field_definitions(). + entity : dict + The dict that will be POSTed to Drupal as JSON. + row : OrderedDict. + The current CSV record. + field_name : string + The Drupal fieldname/CSV column header. + Returns + ------- + dictionary + A dictionary represeting the entity that is POSTed to Drupal as JSON. """ if row[field_name] is None: return entity - id_field = row.get(config.get('id_field', 'not_applicable'), 'not_applicable') + id_field = row.get(config.get("id_field", "not_applicable"), "not_applicable") # Cardinality is unlimited. - if field_definitions[field_name]['cardinality'] == -1: - if config['subdelimiter'] in row[field_name]: + if field_definitions[field_name]["cardinality"] == -1: + if config["subdelimiter"] in row[field_name]: field_values = [] subvalues = split_geolocation_string(config, row[field_name]) subvalues = self.dedupe_values(subvalues) @@ -384,12 +551,18 @@ def create(self, config, field_definitions, entity, row, field_name): entity[field_name] = field_value # Cardinality has a limit. else: - if config['subdelimiter'] in row[field_name]: + if config["subdelimiter"] in row[field_name]: subvalues = split_geolocation_string(config, row[field_name]) subvalues = self.dedupe_values(subvalues) - if len(subvalues) > int(field_definitions[field_name]['cardinality']): - subvalues = subvalues[:field_definitions[field_name]['cardinality']] - log_field_cardinality_violation(field_name, id_field, field_definitions[field_name]['cardinality']) + if len(subvalues) > int(field_definitions[field_name]["cardinality"]): + subvalues = subvalues[ + : field_definitions[field_name]["cardinality"] + ] + log_field_cardinality_violation( + field_name, + id_field, + field_definitions[field_name]["cardinality"], + ) entity[field_name] = subvalues else: field_value = split_geolocation_string(config, row[field_name]) @@ -397,11 +570,13 @@ def create(self, config, field_definitions, entity, row, field_name): return entity - def update(self, config, field_definitions, entity, row, field_name, entity_field_values): + def update( + self, config, field_definitions, entity, row, field_name, entity_field_values + ): """Note: this method appends incoming CSV values to existing values, replaces existing field - values with incoming values, or deletes all values from fields, depending on whether - config['update_mode'] is 'append', 'replace', or 'delete'. It doesn not replace individual - values within fields. + values with incoming values, or deletes all values from fields, depending on whether + config['update_mode'] is 'append', 'replace', or 'delete'. It doesn not replace individual + values within fields. """ """Parameters ---------- @@ -422,24 +597,24 @@ def update(self, config, field_definitions, entity, row, field_name, entity_fiel dictionary A dictionary represeting the entity that is PATCHed to Drupal as JSON. """ - if config['update_mode'] == 'delete': + if config["update_mode"] == "delete": entity[field_name] = [] return entity if row[field_name] is None: return entity - if config['task'] == 'update_terms': - entity_id_field = 'term_id' - if config['task'] == 'update': - entity_id_field = 'node_id' - if config['task'] == 'update_media': - entity_id_field = 'media_id' + if config["task"] == "update_terms": + entity_id_field = "term_id" + if config["task"] == "update": + entity_id_field = "node_id" + if config["task"] == "update_media": + entity_id_field = "media_id" # Cardinality is unlimited. - if field_definitions[field_name]['cardinality'] == -1: - if config['update_mode'] == 'replace': - if config['subdelimiter'] in row[field_name]: + if field_definitions[field_name]["cardinality"] == -1: + if config["update_mode"] == "replace": + if config["subdelimiter"] in row[field_name]: field_values = [] subvalues = split_geolocation_string(config, row[field_name]) subvalues = self.dedupe_values(subvalues) @@ -450,7 +625,7 @@ def update(self, config, field_definitions, entity, row, field_name, entity_fiel else: field_value = split_geolocation_string(config, row[field_name]) entity[field_name] = field_value - if config['update_mode'] == 'append': + if config["update_mode"] == "append": field_values = split_geolocation_string(config, row[field_name]) if field_name in entity: for field_value in field_values: @@ -458,41 +633,64 @@ def update(self, config, field_definitions, entity, row, field_name, entity_fiel entity[field_name] = self.dedupe_values(entity_field_values) # Cardinality has a limit. else: - if config['update_mode'] == 'replace': + if config["update_mode"] == "replace": subvalues = split_geolocation_string(config, row[field_name]) subvalues = self.dedupe_values(subvalues) - if config['subdelimiter'] in row[field_name]: + if config["subdelimiter"] in row[field_name]: field_values = [] for subvalue in subvalues: field_values.append(subvalue) - if len(field_values) > int(field_definitions[field_name]['cardinality']): - log_field_cardinality_violation(field_name, row[entity_id_field], field_definitions[field_name]['cardinality']) - field_values = field_values[:field_definitions[field_name]['cardinality']] + if len(field_values) > int( + field_definitions[field_name]["cardinality"] + ): + log_field_cardinality_violation( + field_name, + row[entity_id_field], + field_definitions[field_name]["cardinality"], + ) + field_values = field_values[ + : field_definitions[field_name]["cardinality"] + ] entity[field_name] = field_values else: entity[field_name] = subvalues - if config['update_mode'] == 'append': + if config["update_mode"] == "append": subvalues = split_geolocation_string(config, row[field_name]) subvalues = self.dedupe_values(subvalues) - if config['subdelimiter'] in row[field_name]: + if config["subdelimiter"] in row[field_name]: for subvalue in subvalues: entity_field_values.append(subvalue) - if len(entity[field_name]) > int(field_definitions[field_name]['cardinality']): - entity[field_name] = entity_field_values[:field_definitions[field_name]['cardinality']] - log_field_cardinality_violation(field_name, row[entity_id_field], field_definitions[field_name]['cardinality']) + if len(entity[field_name]) > int( + field_definitions[field_name]["cardinality"] + ): + entity[field_name] = entity_field_values[ + : field_definitions[field_name]["cardinality"] + ] + log_field_cardinality_violation( + field_name, + row[entity_id_field], + field_definitions[field_name]["cardinality"], + ) else: for subvalue in subvalues: entity_field_values.append(subvalue) - if len(entity_field_values) > int(field_definitions[field_name]['cardinality']): - entity[field_name] = entity_field_values[:field_definitions[field_name]['cardinality']] - log_field_cardinality_violation(field_name, row[entity_id_field], field_definitions[field_name]['cardinality']) + if len(entity_field_values) > int( + field_definitions[field_name]["cardinality"] + ): + entity[field_name] = entity_field_values[ + : field_definitions[field_name]["cardinality"] + ] + log_field_cardinality_violation( + field_name, + row[entity_id_field], + field_definitions[field_name]["cardinality"], + ) return entity def dedupe_values(self, values): - """Removes duplicate entries from 'values'. - """ + """Removes duplicate entries from 'values'.""" """Parameters ---------- values : list @@ -506,8 +704,7 @@ def dedupe_values(self, values): return deduplicate_field_values(values) def remove_invalid_values(self, config, field_definitions, field_name, values): - """Removes invalid entries from 'values'. - """ + """Removes invalid entries from 'values'.""" """Parameters ---------- config : dict @@ -528,13 +725,18 @@ def remove_invalid_values(self, config, field_definitions, field_name, values): if validate_latlong_value(subvalue) is True: valid_values.append(subvalue) else: - message = 'Value "' + subvalue + '" in field "' + field_name + '" is not a valid Geolocation field value.' + message = ( + 'Value "' + + subvalue + + '" in field "' + + field_name + + '" is not a valid Geolocation field value.' + ) logging.warning(message) return valid_values def serialize(self, config, field_definitions, field_name, field_data): - """Serialized values into a format consistent with Workbench's CSV-field input format. - """ + """Serialized values into a format consistent with Workbench's CSV-field input format.""" """Parameters ---------- config : dict @@ -550,55 +752,56 @@ def serialize(self, config, field_definitions, field_name, field_data): string A string structured same as the Workbench CSV field data for this field type. """ - if 'field_type' not in field_definitions[field_name]: + if "field_type" not in field_definitions[field_name]: return values subvalues = list() for subvalue in field_data: - subvalues.append(str(subvalue['lat']) + ',' + str(subvalue['lng'])) + subvalues.append(str(subvalue["lat"]) + "," + str(subvalue["lng"])) if len(subvalues) > 1: - return config['subdelimiter'].join(subvalues) + return config["subdelimiter"].join(subvalues) elif len(subvalues) == 0: return None else: return subvalues[0] -class LinkField(): +class LinkField: """Functions for handling fields with 'link' Drupal field data type. - All functions return a "entity" dictionary that is passed to Requests' - "json" parameter. + All functions return a "entity" dictionary that is passed to Requests' + "json" parameter. - Note: this class assumes that the entity has the field identified in 'field_name'. - Callers should pre-emptively confirm that. For an example, see code near the top - of workbench.update(). + Note: this class assumes that the entity has the field identified in 'field_name'. + Callers should pre-emptively confirm that. For an example, see code near the top + of workbench.update(). """ + def create(self, config, field_definitions, entity, row, field_name): """Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - field_definitions : dict - The field definitions object defined by get_field_definitions(). - entity : dict - The dict that will be POSTed to Drupal as JSON. - row : OrderedDict. - The current CSV record. - field_name : string - The Drupal fieldname/CSV column header. - Returns - ------- - dictionary - A dictionary represeting the entity that is POSTed to Drupal as JSON. + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + field_definitions : dict + The field definitions object defined by get_field_definitions(). + entity : dict + The dict that will be POSTed to Drupal as JSON. + row : OrderedDict. + The current CSV record. + field_name : string + The Drupal fieldname/CSV column header. + Returns + ------- + dictionary + A dictionary represeting the entity that is POSTed to Drupal as JSON. """ if row[field_name] is None: return entity - id_field = row.get(config.get('id_field', 'not_applicable'), 'not_applicable') + id_field = row.get(config.get("id_field", "not_applicable"), "not_applicable") # Cardinality is unlimited. - if field_definitions[field_name]['cardinality'] == -1: - if config['subdelimiter'] in row[field_name]: + if field_definitions[field_name]["cardinality"] == -1: + if config["subdelimiter"] in row[field_name]: subvalues = split_link_string(config, row[field_name]) subvalues = self.dedupe_values(subvalues) entity[field_name] = subvalues @@ -607,12 +810,18 @@ def create(self, config, field_definitions, entity, row, field_name): entity[field_name] = field_value # Cardinality has a limit, including 1. else: - if config['subdelimiter'] in row[field_name]: + if config["subdelimiter"] in row[field_name]: subvalues = split_link_string(config, row[field_name]) subvalues = self.dedupe_values(subvalues) - if len(subvalues) > int(field_definitions[field_name]['cardinality']): - subvalues = subvalues[:field_definitions[field_name]['cardinality']] - log_field_cardinality_violation(field_name, id_field, field_definitions[field_name]['cardinality']) + if len(subvalues) > int(field_definitions[field_name]["cardinality"]): + subvalues = subvalues[ + : field_definitions[field_name]["cardinality"] + ] + log_field_cardinality_violation( + field_name, + id_field, + field_definitions[field_name]["cardinality"], + ) entity[field_name] = subvalues else: field_value = split_link_string(config, row[field_name]) @@ -620,11 +829,13 @@ def create(self, config, field_definitions, entity, row, field_name): return entity - def update(self, config, field_definitions, entity, row, field_name, entity_field_values): + def update( + self, config, field_definitions, entity, row, field_name, entity_field_values + ): """Note: this method appends incoming CSV values to existing values, replaces existing field - values with incoming values, or deletes all values from fields, depending on whether - config['update_mode'] is 'append', 'replace', or 'delete'. It doesn not replace individual - values within fields. + values with incoming values, or deletes all values from fields, depending on whether + config['update_mode'] is 'append', 'replace', or 'delete'. It doesn not replace individual + values within fields. """ """Parameters ---------- @@ -645,24 +856,24 @@ def update(self, config, field_definitions, entity, row, field_name, entity_fiel dictionary A dictionary represeting the entity that is PATCHed to Drupal as JSON. """ - if config['update_mode'] == 'delete': + if config["update_mode"] == "delete": entity[field_name] = [] return entity if row[field_name] is None: return entity - if config['task'] == 'update_terms': - entity_id_field = 'term_id' - if config['task'] == 'update': - entity_id_field = 'node_id' - if config['task'] == 'update_media': - entity_id_field = 'media_id' + if config["task"] == "update_terms": + entity_id_field = "term_id" + if config["task"] == "update": + entity_id_field = "node_id" + if config["task"] == "update_media": + entity_id_field = "media_id" # Cardinality is unlimited. - if field_definitions[field_name]['cardinality'] == -1: - if config['update_mode'] == 'replace': - if config['subdelimiter'] in row[field_name]: + if field_definitions[field_name]["cardinality"] == -1: + if config["update_mode"] == "replace": + if config["subdelimiter"] in row[field_name]: field_values = [] subvalues = split_link_string(config, row[field_name]) subvalues = self.dedupe_values(subvalues) @@ -672,8 +883,8 @@ def update(self, config, field_definitions, entity, row, field_name, entity_fiel else: field_value = split_link_string(config, row[field_name]) entity[field_name] = field_value - if config['update_mode'] == 'append': - if config['subdelimiter'] in row[field_name]: + if config["update_mode"] == "append": + if config["subdelimiter"] in row[field_name]: field_values = [] subvalues = split_link_string(config, row[field_name]) for subvalue in subvalues: @@ -681,7 +892,9 @@ def update(self, config, field_definitions, entity, row, field_name, entity_fiel if field_name in entity: for field_subvalue in field_values: entity_field_values.append(field_subvalue) - entity_field_values = subvalues = self.dedupe_values(entity_field_values) + entity_field_values = subvalues = self.dedupe_values( + entity_field_values + ) entity[field_name] = entity_field_values else: field_value = split_link_string(config, row[field_name]) @@ -691,33 +904,48 @@ def update(self, config, field_definitions, entity, row, field_name, entity_fiel entity[field_name] = entity_field_values # Cardinality has a limit. else: - if config['update_mode'] == 'replace': - if config['subdelimiter'] in row[field_name]: + if config["update_mode"] == "replace": + if config["subdelimiter"] in row[field_name]: field_values = [] subvalues = split_link_string(config, row[field_name]) subvalues = self.dedupe_values(subvalues) - if len(subvalues) > int(field_definitions[field_name]['cardinality']): - log_field_cardinality_violation(field_name, row[entity_id_field], field_definitions[field_name]['cardinality']) - subvalues = subvalues[:field_definitions[field_name]['cardinality']] + if len(subvalues) > int( + field_definitions[field_name]["cardinality"] + ): + log_field_cardinality_violation( + field_name, + row[entity_id_field], + field_definitions[field_name]["cardinality"], + ) + subvalues = subvalues[ + : field_definitions[field_name]["cardinality"] + ] for subvalue in subvalues: field_values.append(subvalue) entity[field_name] = field_values else: field_value = split_link_string(config, row[field_name]) entity[field_name] = field_value - if config['update_mode'] == 'append': + if config["update_mode"] == "append": subvalues = split_link_string(config, row[field_name]) for subvalue in subvalues: entity_field_values.append(subvalue) - entity[field_name] = entity_field_values[:field_definitions[field_name]['cardinality']] - if len(entity[field_name]) > int(field_definitions[field_name]['cardinality']): - log_field_cardinality_violation(field_name, row[entity_id_field], field_definitions[field_name]['cardinality']) + entity[field_name] = entity_field_values[ + : field_definitions[field_name]["cardinality"] + ] + if len(entity[field_name]) > int( + field_definitions[field_name]["cardinality"] + ): + log_field_cardinality_violation( + field_name, + row[entity_id_field], + field_definitions[field_name]["cardinality"], + ) return entity def dedupe_values(self, values): - """Removes duplicate entries from 'values'. - """ + """Removes duplicate entries from 'values'.""" """Parameters ---------- values : list @@ -731,8 +959,7 @@ def dedupe_values(self, values): return deduplicate_field_values(values) def remove_invalid_values(self, config, field_definitions, field_name, values): - """Removes invalid entries from 'values'. - """ + """Removes invalid entries from 'values'.""" """Parameters ---------- config : dict @@ -753,13 +980,18 @@ def remove_invalid_values(self, config, field_definitions, field_name, values): if validate_link_value(subvalue) is True: valid_values.append(subvalue) else: - message = 'Value "' + subvalue + '" in field "' + field_name + '" is not a valid Link field value.' + message = ( + 'Value "' + + subvalue + + '" in field "' + + field_name + + '" is not a valid Link field value.' + ) logging.warning(message) return valid_values def serialize(self, config, field_definitions, field_name, field_data): - """Serialized values into a format consistent with Workbench's CSV-field input format. - """ + """Serialized values into a format consistent with Workbench's CSV-field input format.""" """Parameters ---------- config : dict @@ -775,122 +1007,151 @@ def serialize(self, config, field_definitions, field_name, field_data): string A string structured same as the Workbench CSV field data for this field type. """ - if 'field_type' not in field_definitions[field_name]: + if "field_type" not in field_definitions[field_name]: return values subvalues = list() for subvalue in field_data: - if 'title' in subvalue and subvalue['title'] is not None and subvalue['title'] != '': - subvalues.append(subvalue['uri'] + '%%' + subvalue['title']) + if ( + "title" in subvalue + and subvalue["title"] is not None + and subvalue["title"] != "" + ): + subvalues.append(subvalue["uri"] + "%%" + subvalue["title"]) else: - subvalues.append(subvalue['uri']) + subvalues.append(subvalue["uri"]) if len(subvalues) > 1: - return config['subdelimiter'].join(subvalues) + return config["subdelimiter"].join(subvalues) elif len(subvalues) == 0: return None else: return subvalues[0] -class EntityReferenceField(): +class EntityReferenceField: """Functions for handling fields with 'entity_reference' Drupal field data type. - All functions return a "entity" dictionary that is passed to Requests' "json" - parameter. + All functions return a "entity" dictionary that is passed to Requests' "json" + parameter. - Note: this class assumes that the entity has the field identified in 'field_name'. - Callers should pre-emptively confirm that. For an example, see code near the top - of workbench.update(). + Note: this class assumes that the entity has the field identified in 'field_name'. + Callers should pre-emptively confirm that. For an example, see code near the top + of workbench.update(). """ + def create(self, config, field_definitions, entity, row, field_name): """Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - field_definitions : dict - The field definitions object defined by get_field_definitions(). - entity : dict - The dict that will be POSTed to Drupal as JSON. - row : OrderedDict. - The current CSV record. - field_name : string - The Drupal fieldname/CSV column header. - Returns - ------- - dictionary - A dictionary represeting the entity that is POSTed to Drupal as JSON. + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + field_definitions : dict + The field definitions object defined by get_field_definitions(). + entity : dict + The dict that will be POSTed to Drupal as JSON. + row : OrderedDict. + The current CSV record. + field_name : string + The Drupal fieldname/CSV column header. + Returns + ------- + dictionary + A dictionary represeting the entity that is POSTed to Drupal as JSON. """ if row[field_name] is None: return entity - id_field = row.get(config.get('id_field', 'not_applicable'), 'not_applicable') - if field_definitions[field_name]['target_type'] == 'taxonomy_term': - target_type = 'taxonomy_term' + id_field = row.get(config.get("id_field", "not_applicable"), "not_applicable") + if field_definitions[field_name]["target_type"] == "taxonomy_term": + target_type = "taxonomy_term" field_vocabs = get_field_vocabularies(config, field_definitions, field_name) - if config['subdelimiter'] in row[field_name]: + if config["subdelimiter"] in row[field_name]: prepared_tids = [] - delimited_values = row[field_name].split(config['subdelimiter']) + delimited_values = row[field_name].split(config["subdelimiter"]) for delimited_value in delimited_values: - tid = prepare_term_id(config, field_vocabs, field_name, delimited_value) + tid = prepare_term_id( + config, field_vocabs, field_name, delimited_value + ) if value_is_numeric(tid): tid = str(tid) prepared_tids.append(tid) else: continue - row[field_name] = config['subdelimiter'].join(prepared_tids) + row[field_name] = config["subdelimiter"].join(prepared_tids) else: - row[field_name] = prepare_term_id(config, field_vocabs, field_name, row[field_name]) + row[field_name] = prepare_term_id( + config, field_vocabs, field_name, row[field_name] + ) if value_is_numeric(row[field_name]): row[field_name] = str(row[field_name]) - if field_definitions[field_name]['target_type'] == 'node': - target_type = 'node_type' + if field_definitions[field_name]["target_type"] == "node": + target_type = "node_type" - if field_definitions[field_name]['target_type'] == 'media': - target_type = 'media_type' + if field_definitions[field_name]["target_type"] == "media": + target_type = "media_type" # Cardinality is unlimited. - if field_definitions[field_name]['cardinality'] == -1: - if config['subdelimiter'] in str(row[field_name]): + if field_definitions[field_name]["cardinality"] == -1: + if config["subdelimiter"] in str(row[field_name]): field_values = [] - subvalues = row[field_name].split(config['subdelimiter']) + subvalues = row[field_name].split(config["subdelimiter"]) subvalues = self.dedupe_values(subvalues) for subvalue in subvalues: subvalue = str(subvalue) - field_values.append({'target_id': subvalue, 'target_type': target_type}) + field_values.append( + {"target_id": subvalue, "target_type": target_type} + ) entity[field_name] = field_values else: - entity[field_name] = [{'target_id': str(row[field_name]), 'target_type': target_type}] + entity[field_name] = [ + {"target_id": str(row[field_name]), "target_type": target_type} + ] # Cardinality has a limit. - elif field_definitions[field_name]['cardinality'] > 0: - if config['subdelimiter'] in str(row[field_name]): + elif field_definitions[field_name]["cardinality"] > 0: + if config["subdelimiter"] in str(row[field_name]): field_values = [] - subvalues = row[field_name].split(config['subdelimiter']) + subvalues = row[field_name].split(config["subdelimiter"]) subvalues = self.dedupe_values(subvalues) for subvalue in subvalues: subvalue = str(subvalue) - field_values.append({'target_id': subvalue, 'target_type': target_type}) - if len(field_values) > int(field_definitions[field_name]['cardinality']): - entity[field_name] = field_values[:field_definitions[field_name]['cardinality']] - log_field_cardinality_violation(field_name, id_field, field_definitions[field_name]['cardinality']) + field_values.append( + {"target_id": subvalue, "target_type": target_type} + ) + if len(field_values) > int( + field_definitions[field_name]["cardinality"] + ): + entity[field_name] = field_values[ + : field_definitions[field_name]["cardinality"] + ] + log_field_cardinality_violation( + field_name, + id_field, + field_definitions[field_name]["cardinality"], + ) else: entity[field_name] = field_values else: - entity[field_name] = [{'target_id': str(row[field_name]), 'target_type': target_type}] + entity[field_name] = [ + {"target_id": str(row[field_name]), "target_type": target_type} + ] # Cardinality is 1. else: - subvalues = row[field_name].split(config['subdelimiter']) - entity[field_name] = [{'target_id': str(subvalues[0]), 'target_type': target_type}] + subvalues = row[field_name].split(config["subdelimiter"]) + entity[field_name] = [ + {"target_id": str(subvalues[0]), "target_type": target_type} + ] if len(subvalues) > 1: - log_field_cardinality_violation(field_name, id_field, '1') + log_field_cardinality_violation(field_name, id_field, "1") return entity - def update(self, config, field_definitions, entity, row, field_name, entity_field_values): + def update( + self, config, field_definitions, entity, row, field_name, entity_field_values + ): """Note: this method appends incoming CSV values to existing values, replaces existing field - values with incoming values, or deletes all values from fields, depending on whether - config['update_mode'] is 'append', 'replace', or 'delete'. It doesn not replace individual - values within fields. + values with incoming values, or deletes all values from fields, depending on whether + config['update_mode'] is 'append', 'replace', or 'delete'. It doesn not replace individual + values within fields. """ """Parameters ---------- @@ -911,106 +1172,149 @@ def update(self, config, field_definitions, entity, row, field_name, entity_fiel dictionary A dictionary represeting the entity that is PATCHed to Drupal as JSON. """ - if config['update_mode'] == 'delete': + if config["update_mode"] == "delete": entity[field_name] = [] return entity if row[field_name] is None: return entity - if config['task'] == 'update_terms': - entity_id_field = 'term_id' - if config['task'] == 'update': - entity_id_field = 'node_id' - if config['task'] == 'update_media': - entity_id_field = 'media_id' + if config["task"] == "update_terms": + entity_id_field = "term_id" + if config["task"] == "update": + entity_id_field = "node_id" + if config["task"] == "update_media": + entity_id_field = "media_id" - if field_definitions[field_name]['target_type'] == 'taxonomy_term': - target_type = 'taxonomy_term' + if field_definitions[field_name]["target_type"] == "taxonomy_term": + target_type = "taxonomy_term" field_vocabs = get_field_vocabularies(config, field_definitions, field_name) - if config['subdelimiter'] in str(row[field_name]): + if config["subdelimiter"] in str(row[field_name]): prepared_tids = [] - delimited_values = row[field_name].split(config['subdelimiter']) + delimited_values = row[field_name].split(config["subdelimiter"]) for delimited_value in delimited_values: - tid = prepare_term_id(config, field_vocabs, field_name, delimited_value) + tid = prepare_term_id( + config, field_vocabs, field_name, delimited_value + ) if value_is_numeric(tid): tid = str(tid) prepared_tids.append(tid) else: continue - row[field_name] = config['subdelimiter'].join(prepared_tids) + row[field_name] = config["subdelimiter"].join(prepared_tids) else: - row[field_name] = prepare_term_id(config, field_vocabs, field_name, row[field_name]) + row[field_name] = prepare_term_id( + config, field_vocabs, field_name, row[field_name] + ) if value_is_numeric(row[field_name]): row[field_name] = str(row[field_name]) - if field_definitions[field_name]['target_type'] == 'node': - target_type = 'node_type' + if field_definitions[field_name]["target_type"] == "node": + target_type = "node_type" # Cardinality has a limit. - if field_definitions[field_name]['cardinality'] > 0: - if config['update_mode'] == 'replace': - if config['subdelimiter'] in str(row[field_name]): + if field_definitions[field_name]["cardinality"] > 0: + if config["update_mode"] == "replace": + if config["subdelimiter"] in str(row[field_name]): field_values = [] - subvalues = row[field_name].split(config['subdelimiter']) + subvalues = row[field_name].split(config["subdelimiter"]) subvalues = self.dedupe_values(subvalues) for subvalue in subvalues: - field_values.append({'target_id': str(subvalue), 'target_type': target_type}) - if len(field_values) > int(field_definitions[field_name]['cardinality']): - entity[field_name] = field_values[:field_definitions[field_name]['cardinality']] - log_field_cardinality_violation(field_name, row[entity_id_field], field_definitions[field_name]['cardinality']) + field_values.append( + {"target_id": str(subvalue), "target_type": target_type} + ) + if len(field_values) > int( + field_definitions[field_name]["cardinality"] + ): + entity[field_name] = field_values[ + : field_definitions[field_name]["cardinality"] + ] + log_field_cardinality_violation( + field_name, + row[entity_id_field], + field_definitions[field_name]["cardinality"], + ) else: entity[field_name] = field_values else: - entity[field_name] = [{'target_id': row[field_name], 'target_type': target_type}] - if config['update_mode'] == 'append': - if config['subdelimiter'] in str(row[field_name]): - subvalues = row[field_name].split(config['subdelimiter']) + entity[field_name] = [ + {"target_id": row[field_name], "target_type": target_type} + ] + if config["update_mode"] == "append": + if config["subdelimiter"] in str(row[field_name]): + subvalues = row[field_name].split(config["subdelimiter"]) for subvalue in subvalues: - entity_field_values.append({'target_id': str(subvalue), 'target_type': target_type}) + entity_field_values.append( + {"target_id": str(subvalue), "target_type": target_type} + ) entity_field_values = self.dedupe_values(entity_field_values) - if len(entity_field_values) > int(field_definitions[field_name]['cardinality']): - entity[field_name] = entity_field_values[:field_definitions[field_name]['cardinality']] - log_field_cardinality_violation(field_name, row[entity_id_field], field_definitions[field_name]['cardinality']) + if len(entity_field_values) > int( + field_definitions[field_name]["cardinality"] + ): + entity[field_name] = entity_field_values[ + : field_definitions[field_name]["cardinality"] + ] + log_field_cardinality_violation( + field_name, + row[entity_id_field], + field_definitions[field_name]["cardinality"], + ) else: entity[field_name] = entity_field_values else: - entity_field_values.append({'target_id': str(row[field_name]), 'target_type': target_type}) + entity_field_values.append( + {"target_id": str(row[field_name]), "target_type": target_type} + ) entity_field_values = self.dedupe_values(entity_field_values) - if len(entity_field_values) > int(field_definitions[field_name]['cardinality']): - entity[field_name] = entity_field_values[:field_definitions[field_name]['cardinality']] - log_field_cardinality_violation(field_name, row[entity_id_field], field_definitions[field_name]['cardinality']) + if len(entity_field_values) > int( + field_definitions[field_name]["cardinality"] + ): + entity[field_name] = entity_field_values[ + : field_definitions[field_name]["cardinality"] + ] + log_field_cardinality_violation( + field_name, + row[entity_id_field], + field_definitions[field_name]["cardinality"], + ) else: entity[field_name] = entity_field_values # Cardinality is unlimited. else: - if config['update_mode'] == 'replace': - if config['subdelimiter'] in str(row[field_name]): + if config["update_mode"] == "replace": + if config["subdelimiter"] in str(row[field_name]): field_values = [] - subvalues = row[field_name].split(config['subdelimiter']) + subvalues = row[field_name].split(config["subdelimiter"]) subvalues = self.dedupe_values(subvalues) for subvalue in subvalues: - field_values.append({'target_id': str(subvalue), 'target_type': target_type}) + field_values.append( + {"target_id": str(subvalue), "target_type": target_type} + ) entity[field_name] = field_values else: - entity[field_name] = [{'target_id': str(row[field_name]), 'target_type': target_type}] - if config['update_mode'] == 'append': - if config['subdelimiter'] in str(row[field_name]): + entity[field_name] = [ + {"target_id": str(row[field_name]), "target_type": target_type} + ] + if config["update_mode"] == "append": + if config["subdelimiter"] in str(row[field_name]): field_values = [] - subvalues = row[field_name].split(config['subdelimiter']) + subvalues = row[field_name].split(config["subdelimiter"]) for subvalue in subvalues: - entity_field_values.append({'target_id': str(subvalue), 'target_type': target_type}) + entity_field_values.append( + {"target_id": str(subvalue), "target_type": target_type} + ) entity[field_name] = self.dedupe_values(entity_field_values) else: - entity_field_values.append({'target_id': str(row[field_name]), 'target_type': target_type}) + entity_field_values.append( + {"target_id": str(row[field_name]), "target_type": target_type} + ) entity[field_name] = self.dedupe_values(entity_field_values) return entity def dedupe_values(self, values): - """Removes duplicate entries from 'values'. - """ + """Removes duplicate entries from 'values'.""" """Parameters ---------- values : list @@ -1024,8 +1328,7 @@ def dedupe_values(self, values): return deduplicate_field_values(values) def remove_invalid_values(self, config, field_definitions, field_name, values): - """Removes invalid entries from 'values'. - """ + """Removes invalid entries from 'values'.""" """Parameters ---------- config : dict @@ -1041,7 +1344,7 @@ def remove_invalid_values(self, config, field_definitions, field_name, values): list A list of valid field values. """ - ''' + """ valid_values = list() for subvalue in values: if validate_link_value(subvalue) is True: @@ -1050,12 +1353,11 @@ def remove_invalid_values(self, config, field_definitions, field_name, values): message = 'Value "' + subvalue + '" in field "' + field_name + '" is not a valid Entity Reference field value.' logging.warning(message) return valid_values - ''' + """ return values def serialize(self, config, field_definitions, field_name, field_data): - """Serialized values into a format consistent with Workbench's CSV-field input format. - """ + """Serialized values into a format consistent with Workbench's CSV-field input format.""" """Parameters ---------- config : dict @@ -1071,114 +1373,144 @@ def serialize(self, config, field_definitions, field_name, field_data): string A string structured same as the Workbench CSV field data for this field type. """ - if 'field_type' not in field_definitions[field_name]: + if "field_type" not in field_definitions[field_name]: return values subvalues = list() for subvalue in field_data: - if config['export_csv_term_mode'] == 'name' and subvalue['target_type'] == 'taxonomy_term': + if ( + config["export_csv_term_mode"] == "name" + and subvalue["target_type"] == "taxonomy_term" + ): # Output term names, with vocab IDs (aka namespaces). - vocab_id = get_term_vocab(config, subvalue['target_id']) - term_name = get_term_name(config, subvalue['target_id']) + vocab_id = get_term_vocab(config, subvalue["target_id"]) + term_name = get_term_name(config, subvalue["target_id"]) if vocab_id is not False and term_name is not False: - subvalues.append(vocab_id + ':' + term_name) + subvalues.append(vocab_id + ":" + term_name) else: # Output term IDs. - if ping_term(config, subvalue['target_id']) is True: - subvalues.append(str(subvalue['target_id'])) + if ping_term(config, subvalue["target_id"]) is True: + subvalues.append(str(subvalue["target_id"])) if len(subvalues) > 1: - return config['subdelimiter'].join(subvalues) + return config["subdelimiter"].join(subvalues) elif len(subvalues) == 0: return None else: return subvalues[0] -class TypedRelationField(): +class TypedRelationField: """Functions for handling fields with 'typed_relation' Drupal field data type. - All functions return a "entity" dictionary that is passed to Requests' "json" - parameter. + All functions return a "entity" dictionary that is passed to Requests' "json" + parameter. - Currently this field type only supports Typed Relation Taxonomies (not other - Typed Relation entity types). + Currently this field type only supports Typed Relation Taxonomies (not other + Typed Relation entity types). - Note: this class assumes that the entity has the field identified in 'field_name'. - Callers should pre-emptively confirm that. For an example, see code near the top - of workbench.update(). + Note: this class assumes that the entity has the field identified in 'field_name'. + Callers should pre-emptively confirm that. For an example, see code near the top + of workbench.update(). """ + def create(self, config, field_definitions, entity, row, field_name): """Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - field_definitions : dict - The field definitions object defined by get_field_definitions(). - entity : dict - The dict that will be POSTed to Drupal as JSON. - row : OrderedDict. - The current CSV record. - field_name : string - The Drupal fieldname/CSV column header. - Returns - ------- - dictionary - A dictionary represeting the entity that is POSTed to Drupal as JSON. + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + field_definitions : dict + The field definitions object defined by get_field_definitions(). + entity : dict + The dict that will be POSTed to Drupal as JSON. + row : OrderedDict. + The current CSV record. + field_name : string + The Drupal fieldname/CSV column header. + Returns + ------- + dictionary + A dictionary represeting the entity that is POSTed to Drupal as JSON. """ if row[field_name] is None: return entity - id_field = row.get(config.get('id_field', 'not_applicable'), 'not_applicable') + id_field = row.get(config.get("id_field", "not_applicable"), "not_applicable") # Currently only supports Typed Relation taxonomy entities. - if field_definitions[field_name]['target_type'] == 'taxonomy_term': - target_type = 'taxonomy_term' + if field_definitions[field_name]["target_type"] == "taxonomy_term": + target_type = "taxonomy_term" field_vocabs = get_field_vocabularies(config, field_definitions, field_name) # Cardinality is unlimited. - if field_definitions[field_name]['cardinality'] == -1: + if field_definitions[field_name]["cardinality"] == -1: field_values = [] - subvalues = split_typed_relation_string(config, row[field_name], target_type) + subvalues = split_typed_relation_string( + config, row[field_name], target_type + ) subvalues = self.dedupe_values(subvalues) - if config['subdelimiter'] in row[field_name]: + if config["subdelimiter"] in row[field_name]: for subvalue in subvalues: - subvalue['target_id'] = prepare_term_id(config, field_vocabs, field_name, subvalue['target_id']) + subvalue["target_id"] = prepare_term_id( + config, field_vocabs, field_name, subvalue["target_id"] + ) field_values.append(subvalue) entity[field_name] = field_values else: - subvalues[0]['target_id'] = prepare_term_id(config, field_vocabs, field_name, subvalues[0]['target_id']) + subvalues[0]["target_id"] = prepare_term_id( + config, field_vocabs, field_name, subvalues[0]["target_id"] + ) entity[field_name] = subvalues # Cardinality has a limit. - elif field_definitions[field_name]['cardinality'] > 1: - if config['subdelimiter'] in row[field_name]: + elif field_definitions[field_name]["cardinality"] > 1: + if config["subdelimiter"] in row[field_name]: field_values = [] - subvalues = split_typed_relation_string(config, row[field_name], target_type) + subvalues = split_typed_relation_string( + config, row[field_name], target_type + ) subvalues = self.dedupe_values(subvalues) - if len(subvalues) > field_definitions[field_name]['cardinality']: - log_field_cardinality_violation(field_name, id_field, field_definitions[field_name]['cardinality']) - subvalues = subvalues[:field_definitions[field_name]['cardinality']] + if len(subvalues) > field_definitions[field_name]["cardinality"]: + log_field_cardinality_violation( + field_name, + id_field, + field_definitions[field_name]["cardinality"], + ) + subvalues = subvalues[ + : field_definitions[field_name]["cardinality"] + ] for subvalue in subvalues: - subvalue['target_id'] = prepare_term_id(config, field_vocabs, field_name, subvalue['target_id']) + subvalue["target_id"] = prepare_term_id( + config, field_vocabs, field_name, subvalue["target_id"] + ) field_values.append(subvalue) entity[field_name] = field_values else: - field_value = split_typed_relation_string(config, row[field_name], target_type) - field_value[0]['target_id'] = prepare_term_id(config, field_vocabs, field_name, field_value[0]['target_id']) + field_value = split_typed_relation_string( + config, row[field_name], target_type + ) + field_value[0]["target_id"] = prepare_term_id( + config, field_vocabs, field_name, field_value[0]["target_id"] + ) entity[field_name] = field_value # Cardinality is 1. else: - subvalues = split_typed_relation_string(config, row[field_name], target_type) + subvalues = split_typed_relation_string( + config, row[field_name], target_type + ) subvalues = self.dedupe_values(subvalues) - subvalues[0]['target_id'] = prepare_term_id(config, field_vocabs, field_name, subvalues[0]['target_id']) + subvalues[0]["target_id"] = prepare_term_id( + config, field_vocabs, field_name, subvalues[0]["target_id"] + ) entity[field_name] = [subvalues[0]] if len(subvalues) > 1: - log_field_cardinality_violation(field_name, id_field, '1') + log_field_cardinality_violation(field_name, id_field, "1") return entity - def update(self, config, field_definitions, entity, row, field_name, entity_field_values): + def update( + self, config, field_definitions, entity, row, field_name, entity_field_values + ): """Note: this method appends incoming CSV values to existing values, replaces existing field - values with incoming values, or deletes all values from fields, depending on whether - config['update_mode'] is 'append', 'replace', or 'delete'. It doesn not replace individual - values within fields. + values with incoming values, or deletes all values from fields, depending on whether + config['update_mode'] is 'append', 'replace', or 'delete'. It doesn not replace individual + values within fields. """ """Parameters ---------- @@ -1199,98 +1531,150 @@ def update(self, config, field_definitions, entity, row, field_name, entity_fiel dictionary A dictionary represeting the entity that is PATCHed to Drupal as JSON. """ - if config['update_mode'] == 'delete': + if config["update_mode"] == "delete": entity[field_name] = [] return entity if row[field_name] is None: return entity - if config['task'] == 'update_terms': - entity_id_field = 'term_id' - if config['task'] == 'update': - entity_id_field = 'node_id' - if config['task'] == 'update_media': - entity_id_field = 'media_id' + if config["task"] == "update_terms": + entity_id_field = "term_id" + if config["task"] == "update": + entity_id_field = "node_id" + if config["task"] == "update_media": + entity_id_field = "media_id" # Currently only supports Typed Relation taxonomy entities. - if field_definitions[field_name]['target_type'] == 'taxonomy_term': - target_type = 'taxonomy_term' + if field_definitions[field_name]["target_type"] == "taxonomy_term": + target_type = "taxonomy_term" field_vocabs = get_field_vocabularies(config, field_definitions, field_name) # Cardinality has a limit. - if field_definitions[field_name]['cardinality'] > 0: - if config['update_mode'] == 'replace': - subvalues = split_typed_relation_string(config, row[field_name], target_type) + if field_definitions[field_name]["cardinality"] > 0: + if config["update_mode"] == "replace": + subvalues = split_typed_relation_string( + config, row[field_name], target_type + ) subvalues = self.dedupe_values(subvalues) - if config['subdelimiter'] in row[field_name]: + if config["subdelimiter"] in row[field_name]: field_values = [] for subvalue in subvalues: - subvalue['target_id'] = prepare_term_id(config, field_vocabs, field_name, subvalue['target_id']) + subvalue["target_id"] = prepare_term_id( + config, field_vocabs, field_name, subvalue["target_id"] + ) field_values.append(subvalue) - if len(field_values) > int(field_definitions[field_name]['cardinality']): - field_values = field_values[:field_definitions[field_name]['cardinality']] - log_field_cardinality_violation(field_name, row[entity_id_field], field_definitions[field_name]['cardinality']) + if len(field_values) > int( + field_definitions[field_name]["cardinality"] + ): + field_values = field_values[ + : field_definitions[field_name]["cardinality"] + ] + log_field_cardinality_violation( + field_name, + row[entity_id_field], + field_definitions[field_name]["cardinality"], + ) entity[field_name] = field_values else: - subvalues[0]['target_id'] = prepare_term_id(config, field_vocabs, field_name, subvalues[0]['target_id']) + subvalues[0]["target_id"] = prepare_term_id( + config, field_vocabs, field_name, subvalues[0]["target_id"] + ) entity[field_name] = subvalues - if config['update_mode'] == 'append': - if config['subdelimiter'] in row[field_name]: + if config["update_mode"] == "append": + if config["subdelimiter"] in row[field_name]: field_values = [] - subvalues = split_typed_relation_string(config, row[field_name], target_type) + subvalues = split_typed_relation_string( + config, row[field_name], target_type + ) for subvalue in subvalues: - subvalue['target_id'] = prepare_term_id(config, field_vocabs, field_name, subvalue['target_id']) + subvalue["target_id"] = prepare_term_id( + config, field_vocabs, field_name, subvalue["target_id"] + ) entity_field_values.append(subvalue) entity_field_values = self.dedupe_values(entity_field_values) - if len(entity_field_values) > int(field_definitions[field_name]['cardinality']): - entity[field_name] = entity_field_values[:field_definitions[field_name]['cardinality']] - log_field_cardinality_violation(field_name, row[entity_id_field], field_definitions[field_name]['cardinality']) + if len(entity_field_values) > int( + field_definitions[field_name]["cardinality"] + ): + entity[field_name] = entity_field_values[ + : field_definitions[field_name]["cardinality"] + ] + log_field_cardinality_violation( + field_name, + row[entity_id_field], + field_definitions[field_name]["cardinality"], + ) else: entity[field_name] = entity_field_values else: - csv_typed_relation_value = split_typed_relation_string(config, row[field_name], target_type) - csv_typed_relation_value[0]['target_id'] = prepare_term_id(config, field_vocabs, field_name, csv_typed_relation_value[0]['target_id']) + csv_typed_relation_value = split_typed_relation_string( + config, row[field_name], target_type + ) + csv_typed_relation_value[0]["target_id"] = prepare_term_id( + config, + field_vocabs, + field_name, + csv_typed_relation_value[0]["target_id"], + ) entity_field_values.append(csv_typed_relation_value[0]) entity_field_values = self.dedupe_values(entity_field_values) - if len(entity_field_values) > int(field_definitions[field_name]['cardinality']): - entity[field_name] = entity_field_values[:field_definitions[field_name]['cardinality']] - log_field_cardinality_violation(field_name, row[entity_id_field], field_definitions[field_name]['cardinality']) + if len(entity_field_values) > int( + field_definitions[field_name]["cardinality"] + ): + entity[field_name] = entity_field_values[ + : field_definitions[field_name]["cardinality"] + ] + log_field_cardinality_violation( + field_name, + row[entity_id_field], + field_definitions[field_name]["cardinality"], + ) else: entity[field_name] = entity_field_values # Cardinality is unlimited. else: - if config['update_mode'] == 'replace': - subvalues = split_typed_relation_string(config, row[field_name], target_type) + if config["update_mode"] == "replace": + subvalues = split_typed_relation_string( + config, row[field_name], target_type + ) subvalues = self.dedupe_values(subvalues) - if config['subdelimiter'] in row[field_name]: + if config["subdelimiter"] in row[field_name]: field_values = [] for subvalue in subvalues: - subvalue['target_id'] = prepare_term_id(config, field_vocabs, field_name, subvalue['target_id']) + subvalue["target_id"] = prepare_term_id( + config, field_vocabs, field_name, subvalue["target_id"] + ) field_values.append(subvalue) entity[field_name] = field_values else: - subvalues[0]['target_id'] = prepare_term_id(config, field_vocabs, field_name, subvalues[0]['target_id']) + subvalues[0]["target_id"] = prepare_term_id( + config, field_vocabs, field_name, subvalues[0]["target_id"] + ) entity[field_name] = subvalues - if config['update_mode'] == 'append': - subvalues = split_typed_relation_string(config, row[field_name], target_type) - if config['subdelimiter'] in row[field_name]: + if config["update_mode"] == "append": + subvalues = split_typed_relation_string( + config, row[field_name], target_type + ) + if config["subdelimiter"] in row[field_name]: field_values = [] for subvalue in subvalues: - subvalue['target_id'] = prepare_term_id(config, field_vocabs, field_name, subvalue['target_id']) + subvalue["target_id"] = prepare_term_id( + config, field_vocabs, field_name, subvalue["target_id"] + ) entity_field_values.append(subvalue) entity[field_name] = self.dedupe_values(entity_field_values) else: - subvalues[0]['target_id'] = prepare_term_id(config, field_vocabs, field_name, subvalues[0]['target_id']) + subvalues[0]["target_id"] = prepare_term_id( + config, field_vocabs, field_name, subvalues[0]["target_id"] + ) entity_field_values.append(subvalues[0]) entity[field_name] = self.dedupe_values(entity_field_values) return entity def dedupe_values(self, values): - """Removes duplicate entries from 'values'. - """ + """Removes duplicate entries from 'values'.""" """Parameters ---------- values : list @@ -1304,8 +1688,7 @@ def dedupe_values(self, values): return deduplicate_field_values(values) def remove_invalid_values(self, config, field_definitions, field_name, values): - """Removes invalid entries from 'values'. - """ + """Removes invalid entries from 'values'.""" """Parameters ---------- config : dict @@ -1321,7 +1704,7 @@ def remove_invalid_values(self, config, field_definitions, field_name, values): list A list of valid field values. """ - ''' + """ valid_values = list() for subvalue in values: if validate_link_value(subvalue) is True: @@ -1330,12 +1713,11 @@ def remove_invalid_values(self, config, field_definitions, field_name, values): message = 'Value "' + subvalue + '" in field "' + field_name + '" is not a valid Typed Relation field value.' logging.warning(message) return valid_values - ''' + """ return values def serialize(self, config, field_definitions, field_name, field_data): - """Serialized values into a format consistent with Workbench's CSV-field input format. - """ + """Serialized values into a format consistent with Workbench's CSV-field input format.""" """Parameters ---------- config : dict @@ -1351,61 +1733,66 @@ def serialize(self, config, field_definitions, field_name, field_data): string A string structured same as the Workbench CSV field data for this field type. """ - if 'field_type' not in field_definitions[field_name]: + if "field_type" not in field_definitions[field_name]: return values subvalues = list() for subvalue in field_data: - if config['export_csv_term_mode'] == 'name': - vocab_id = get_term_vocab(config, subvalue['target_id']) - term_name = get_term_name(config, subvalue['target_id']) - subvalues.append(str(subvalue['rel_type']) + ':' + vocab_id + ':' + term_name) + if config["export_csv_term_mode"] == "name": + vocab_id = get_term_vocab(config, subvalue["target_id"]) + term_name = get_term_name(config, subvalue["target_id"]) + subvalues.append( + str(subvalue["rel_type"]) + ":" + vocab_id + ":" + term_name + ) else: # Term IDs. - subvalues.append(str(subvalue['rel_type']) + ':' + str(subvalue['target_id'])) + subvalues.append( + str(subvalue["rel_type"]) + ":" + str(subvalue["target_id"]) + ) if len(subvalues) > 1: - return config['subdelimiter'].join(subvalues) + return config["subdelimiter"].join(subvalues) elif len(subvalues) == 0: return None else: return subvalues[0] -class AuthorityLinkField(): +class AuthorityLinkField: """Functions for handling fields with 'authority_link' Drupal field data type. - All functions return a "entity" dictionary that is passed to Requests' "json" - parameter. + All functions return a "entity" dictionary that is passed to Requests' "json" + parameter. - Note: this class assumes that the entity has the field identified in 'field_name'. - Callers should pre-emptively confirm that. For an example, see code near the top - of workbench.update(). + Note: this class assumes that the entity has the field identified in 'field_name'. + Callers should pre-emptively confirm that. For an example, see code near the top + of workbench.update(). """ + def create(self, config, field_definitions, entity, row, field_name): """Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - field_definitions : dict - The field definitions object defined by get_field_definitions(). - entity : dict - The dict that will be POSTed to Drupal as JSON. - row : OrderedDict. - The current CSV record. - field_name : string - The Drupal fieldname/CSV column header. - Returns - ------- - dictionary - A dictionary represeting the entity that is POSTed to Drupal as JSON. + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + field_definitions : dict + The field definitions object defined by get_field_definitions(). + entity : dict + The dict that will be POSTed to Drupal as JSON. + row : OrderedDict. + The current CSV record. + field_name : string + The Drupal fieldname/CSV column header. + Returns + ------- + dictionary + A dictionary represeting the entity that is POSTed to Drupal as JSON. """ if row[field_name] is None: return entity - id_field = row.get(config.get('id_field', 'not_applicable'), 'not_applicable') + id_field = row.get(config.get("id_field", "not_applicable"), "not_applicable") # Cardinality is unlimited. - if field_definitions[field_name]['cardinality'] == -1: - if config['subdelimiter'] in row[field_name]: + if field_definitions[field_name]["cardinality"] == -1: + if config["subdelimiter"] in row[field_name]: subvalues = split_authority_link_string(config, row[field_name]) subvalues = self.dedupe_values(subvalues) entity[field_name] = subvalues @@ -1414,12 +1801,18 @@ def create(self, config, field_definitions, entity, row, field_name): entity[field_name] = field_value # Cardinality has a limit, including 1. else: - if config['subdelimiter'] in row[field_name]: + if config["subdelimiter"] in row[field_name]: subvalues = split_authority_link_string(config, row[field_name]) subvalues = self.dedupe_values(subvalues) - if len(subvalues) > int(field_definitions[field_name]['cardinality']): - subvalues = subvalues[:field_definitions[field_name]['cardinality']] - log_field_cardinality_violation(field_name, id_field, field_definitions[field_name]['cardinality']) + if len(subvalues) > int(field_definitions[field_name]["cardinality"]): + subvalues = subvalues[ + : field_definitions[field_name]["cardinality"] + ] + log_field_cardinality_violation( + field_name, + id_field, + field_definitions[field_name]["cardinality"], + ) entity[field_name] = subvalues else: field_value = split_authority_link_string(config, row[field_name]) @@ -1427,11 +1820,13 @@ def create(self, config, field_definitions, entity, row, field_name): return entity - def update(self, config, field_definitions, entity, row, field_name, entity_field_values): + def update( + self, config, field_definitions, entity, row, field_name, entity_field_values + ): """Note: this method appends incoming CSV values to existing values, replaces existing field - values with incoming values, or deletes all values from fields, depending on whether - config['update_mode'] is 'append', 'replace', or 'delete'. It doesn not replace individual - values within fields. + values with incoming values, or deletes all values from fields, depending on whether + config['update_mode'] is 'append', 'replace', or 'delete'. It doesn not replace individual + values within fields. """ """Parameters ---------- @@ -1452,24 +1847,24 @@ def update(self, config, field_definitions, entity, row, field_name, entity_fiel dictionary A dictionary represeting the entity that is PATCHed to Drupal as JSON. """ - if config['update_mode'] == 'delete': + if config["update_mode"] == "delete": entity[field_name] = [] return entity if row[field_name] is None: return entity - if config['task'] == 'update_terms': - entity_id_field = 'term_id' - if config['task'] == 'update': - entity_id_field = 'node_id' - if config['task'] == 'update_media': - entity_id_field = 'media_id' + if config["task"] == "update_terms": + entity_id_field = "term_id" + if config["task"] == "update": + entity_id_field = "node_id" + if config["task"] == "update_media": + entity_id_field = "media_id" # Cardinality is unlimited. - if field_definitions[field_name]['cardinality'] == -1: - if config['update_mode'] == 'replace': - if config['subdelimiter'] in row[field_name]: + if field_definitions[field_name]["cardinality"] == -1: + if config["update_mode"] == "replace": + if config["subdelimiter"] in row[field_name]: field_values = [] subvalues = split_authority_link_string(config, row[field_name]) subvalues = self.dedupe_values(subvalues) @@ -1479,8 +1874,8 @@ def update(self, config, field_definitions, entity, row, field_name, entity_fiel else: field_value = split_authority_link_string(config, row[field_name]) entity[field_name] = field_value - if config['update_mode'] == 'append': - if config['subdelimiter'] in row[field_name]: + if config["update_mode"] == "append": + if config["subdelimiter"] in row[field_name]: field_values = [] subvalues = split_authority_link_string(config, row[field_name]) for subvalue in subvalues: @@ -1498,33 +1893,48 @@ def update(self, config, field_definitions, entity, row, field_name, entity_fiel entity[field_name] = entity_field_values # Cardinality has a limit. else: - if config['update_mode'] == 'replace': - if config['subdelimiter'] in row[field_name]: + if config["update_mode"] == "replace": + if config["subdelimiter"] in row[field_name]: field_values = [] subvalues = split_authority_link_string(config, row[field_name]) subvalues = self.dedupe_values(subvalues) - if len(subvalues) > int(field_definitions[field_name]['cardinality']): - log_field_cardinality_violation(field_name, row[entity_id_field], field_definitions[field_name]['cardinality']) - subvalues = subvalues[:field_definitions[field_name]['cardinality']] + if len(subvalues) > int( + field_definitions[field_name]["cardinality"] + ): + log_field_cardinality_violation( + field_name, + row[entity_id_field], + field_definitions[field_name]["cardinality"], + ) + subvalues = subvalues[ + : field_definitions[field_name]["cardinality"] + ] for subvalue in subvalues: field_values.append(subvalue) entity[field_name] = field_values else: field_value = split_authority_link_string(config, row[field_name]) entity[field_name] = field_value - if config['update_mode'] == 'append': + if config["update_mode"] == "append": subvalues = split_authority_link_string(config, row[field_name]) for subvalue in subvalues: entity_field_values.append(subvalue) - entity[field_name] = entity_field_values[:field_definitions[field_name]['cardinality']] - if len(entity[field_name]) > int(field_definitions[field_name]['cardinality']): - log_field_cardinality_violation(field_name, row[entity_id_field], field_definitions[field_name]['cardinality']) + entity[field_name] = entity_field_values[ + : field_definitions[field_name]["cardinality"] + ] + if len(entity[field_name]) > int( + field_definitions[field_name]["cardinality"] + ): + log_field_cardinality_violation( + field_name, + row[entity_id_field], + field_definitions[field_name]["cardinality"], + ) return entity def dedupe_values(self, values): - """Removes duplicate entries from 'values'. - """ + """Removes duplicate entries from 'values'.""" """Parameters ---------- values : list @@ -1538,8 +1948,7 @@ def dedupe_values(self, values): return deduplicate_field_values(values) def remove_invalid_values(self, config, field_definitions, field_name, values): - """Removes invalid entries from 'values'. - """ + """Removes invalid entries from 'values'.""" """Parameters ---------- config : dict @@ -1557,16 +1966,26 @@ def remove_invalid_values(self, config, field_definitions, field_name, values): """ valid_values = list() for subvalue in values: - if validate_authority_link_value(subvalue, field_definitions[field_name]['authority_sources']) is True: + if ( + validate_authority_link_value( + subvalue, field_definitions[field_name]["authority_sources"] + ) + is True + ): valid_values.append(subvalue) else: - message = 'Value "' + subvalue + '" in field "' + field_name + '" is not a valid Authority Link field value.' + message = ( + 'Value "' + + subvalue + + '" in field "' + + field_name + + '" is not a valid Authority Link field value.' + ) logging.warning(message) return valid_values def serialize(self, config, field_definitions, field_name, field_data): - """Serialized values into a format consistent with Workbench's CSV-field input format. - """ + """Serialized values into a format consistent with Workbench's CSV-field input format.""" """Parameters ---------- config : dict @@ -1582,58 +2001,65 @@ def serialize(self, config, field_definitions, field_name, field_data): string A string structured same as the Workbench CSV field data for this field type. """ - if 'field_type' not in field_definitions[field_name]: + if "field_type" not in field_definitions[field_name]: return values subvalues = list() for subvalue in field_data: - if 'title' in subvalue and subvalue['title'] is not None: - subvalues.append(subvalue['source'] + '%%' + subvalue['uri'] + '%%' + subvalue['title']) + if "title" in subvalue and subvalue["title"] is not None: + subvalues.append( + subvalue["source"] + + "%%" + + subvalue["uri"] + + "%%" + + subvalue["title"] + ) else: - subvalues.append(subvalue['source'] + '%%' + subvalue['uri']) + subvalues.append(subvalue["source"] + "%%" + subvalue["uri"]) if len(subvalues) > 1: - return config['subdelimiter'].join(subvalues) + return config["subdelimiter"].join(subvalues) elif len(subvalues) == 0: return None else: return subvalues[0] -class MediaTrackField(): +class MediaTrackField: """Functions for handling fields with "media_track" Drupal (Islandora) field data type. - All functions return a "entity" dictionary that is passed to Requests' "json" - parameter. + All functions return a "entity" dictionary that is passed to Requests' "json" + parameter. - Note: this class assumes that the entity has the field identified in "field_name". - Callers should pre-emptively confirm that. For an example, see code near the top - of workbench.update(). + Note: this class assumes that the entity has the field identified in "field_name". + Callers should pre-emptively confirm that. For an example, see code near the top + of workbench.update(). """ + def create(self, config, field_definitions, entity, row, field_name): """Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - field_definitions : dict - The field definitions object defined by get_field_definitions(). - entity : dict - The dict that will be POSTed to Drupal as JSON. - row : OrderedDict. - The current CSV record. - field_name : string - The Drupal fieldname/CSV column header. - Returns - ------- - dictionary - A dictionary represeting the entity that is POSTed to Drupal as JSON. + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + field_definitions : dict + The field definitions object defined by get_field_definitions(). + entity : dict + The dict that will be POSTed to Drupal as JSON. + row : OrderedDict. + The current CSV record. + field_name : string + The Drupal fieldname/CSV column header. + Returns + ------- + dictionary + A dictionary represeting the entity that is POSTed to Drupal as JSON. """ if row[field_name] is None: return entity - id_field = row.get(config.get('id_field', 'not_applicable'), 'not_applicable') + id_field = row.get(config.get("id_field", "not_applicable"), "not_applicable") # Cardinality is unlimited. - if field_definitions[field_name]['cardinality'] == -1: - if config['subdelimiter'] in row[field_name]: + if field_definitions[field_name]["cardinality"] == -1: + if config["subdelimiter"] in row[field_name]: subvalues = split_media_track_string(config, row[field_name]) subvalues = self.dedupe_values(subvalues) entity[field_name] = subvalues @@ -1642,12 +2068,18 @@ def create(self, config, field_definitions, entity, row, field_name): entity[field_name] = field_value # Cardinality has a limit, including 1. else: - if config['subdelimiter'] in row[field_name]: + if config["subdelimiter"] in row[field_name]: subvalues = split_media_track_string(config, row[field_name]) subvalues = self.dedupe_values(subvalues) - if len(subvalues) > int(field_definitions[field_name]['cardinality']): - subvalues = subvalues[:field_definitions[field_name]['cardinality']] - log_field_cardinality_violation(field_name, id_field, field_definitions[field_name]['cardinality']) + if len(subvalues) > int(field_definitions[field_name]["cardinality"]): + subvalues = subvalues[ + : field_definitions[field_name]["cardinality"] + ] + log_field_cardinality_violation( + field_name, + id_field, + field_definitions[field_name]["cardinality"], + ) entity[field_name] = subvalues else: field_value = split_media_track_string(config, row[field_name]) @@ -1655,11 +2087,13 @@ def create(self, config, field_definitions, entity, row, field_name): return entity - def update(self, config, field_definitions, entity, row, field_name, entity_field_values): + def update( + self, config, field_definitions, entity, row, field_name, entity_field_values + ): """Note: this method appends incoming CSV values to existing values, replaces existing field - values with incoming values, or deletes all values from fields, depending on whether - config['update_mode'] is 'append', 'replace', or 'delete'. It doesn not replace individual - values within fields. + values with incoming values, or deletes all values from fields, depending on whether + config['update_mode'] is 'append', 'replace', or 'delete'. It doesn not replace individual + values within fields. """ """Parameters ---------- @@ -1680,7 +2114,7 @@ def update(self, config, field_definitions, entity, row, field_name, entity_fiel dictionary A dictionary represeting the entity that is PATCHed to Drupal as JSON. """ - if config['update_mode'] == 'delete': + if config["update_mode"] == "delete": entity[field_name] = [] return entity @@ -1688,9 +2122,9 @@ def update(self, config, field_definitions, entity, row, field_name, entity_fiel return entity # Cardinality is unlimited. - if field_definitions[field_name]['cardinality'] == -1: - if config['update_mode'] == 'replace': - if config['subdelimiter'] in row[field_name]: + if field_definitions[field_name]["cardinality"] == -1: + if config["update_mode"] == "replace": + if config["subdelimiter"] in row[field_name]: field_values = [] subvalues = split_media_track_string(config, row[field_name]) subvalues = self.dedupe_values(subvalues) @@ -1700,8 +2134,8 @@ def update(self, config, field_definitions, entity, row, field_name, entity_fiel else: field_value = split_media_track_string(config, row[field_name]) entity[field_name] = field_value - if config['update_mode'] == 'append': - if config['subdelimiter'] in row[field_name]: + if config["update_mode"] == "append": + if config["subdelimiter"] in row[field_name]: field_values = [] subvalues = split_media_track_string(config, row[field_name]) for subvalue in subvalues: @@ -1719,33 +2153,48 @@ def update(self, config, field_definitions, entity, row, field_name, entity_fiel entity[field_name] = entity_field_values # Cardinality has a limit. else: - if config['update_mode'] == 'replace': - if config['subdelimiter'] in row[field_name]: + if config["update_mode"] == "replace": + if config["subdelimiter"] in row[field_name]: field_values = [] subvalues = split_media_track_string(config, row[field_name]) subvalues = self.dedupe_values(subvalues) - if len(subvalues) > int(field_definitions[field_name]['cardinality']): - log_field_cardinality_violation(field_name, row['node_id'], field_definitions[field_name]['cardinality']) - subvalues = subvalues[:field_definitions[field_name]['cardinality']] + if len(subvalues) > int( + field_definitions[field_name]["cardinality"] + ): + log_field_cardinality_violation( + field_name, + row["node_id"], + field_definitions[field_name]["cardinality"], + ) + subvalues = subvalues[ + : field_definitions[field_name]["cardinality"] + ] for subvalue in subvalues: field_values.append(subvalue) entity[field_name] = field_values else: field_value = split_media_track_string(config, row[field_name]) entity[field_name] = field_value - if config['update_mode'] == 'append': + if config["update_mode"] == "append": subvalues = split_media_track_string(config, row[field_name]) for subvalue in subvalues: entity_field_values.append(subvalue) - entity[field_name] = entity_field_values[:field_definitions[field_name]['cardinality']] - if len(entity[field_name]) > int(field_definitions[field_name]['cardinality']): - log_field_cardinality_violation(field_name, row['node_id'], field_definitions[field_name]['cardinality']) + entity[field_name] = entity_field_values[ + : field_definitions[field_name]["cardinality"] + ] + if len(entity[field_name]) > int( + field_definitions[field_name]["cardinality"] + ): + log_field_cardinality_violation( + field_name, + row["node_id"], + field_definitions[field_name]["cardinality"], + ) return entity def dedupe_values(self, values): - """Removes duplicate entries from 'values'. - """ + """Removes duplicate entries from 'values'.""" """Parameters ---------- values : list @@ -1759,8 +2208,7 @@ def dedupe_values(self, values): return deduplicate_field_values(values) def remove_invalid_values(self, config, field_definitions, field_name, values): - """Removes invalid entries from 'values'. - """ + """Removes invalid entries from 'values'.""" """Parameters ---------- config : dict @@ -1778,16 +2226,26 @@ def remove_invalid_values(self, config, field_definitions, field_name, values): """ valid_values = list() for subvalue in values: - if validate_media_track_value(subvalue, field_definitions[field_name]['authority_sources']) is True: + if ( + validate_media_track_value( + subvalue, field_definitions[field_name]["authority_sources"] + ) + is True + ): valid_values.append(subvalue) else: - message = 'Value "' + subvalue + '" in field "' + field_name + '" is not a valid Authority Link field value.' + message = ( + 'Value "' + + subvalue + + '" in field "' + + field_name + + '" is not a valid Authority Link field value.' + ) logging.warning(message) return valid_values def serialize(self, config, field_definitions, field_name, field_data): - """Serialized values into a format consistent with Workbench's CSV-field input format. - """ + """Serialized values into a format consistent with Workbench's CSV-field input format.""" """Parameters ---------- config : dict @@ -1803,77 +2261,91 @@ def serialize(self, config, field_definitions, field_name, field_data): string A string structured same as the Workbench CSV field data for this field type. """ - if 'field_type' not in field_definitions[field_name]: + if "field_type" not in field_definitions[field_name]: return values subvalues = list() for subvalue in field_data: - if all('label' in subvalue, subvalue['label'] is not None, - 'kind' in subvalue, subvalue['kind'] is not None, - 'srclang' in subvalue, subvalue['srclang'] is not None, - 'url' in subvalue, subvalue['url'] is not None): + if all( + "label" in subvalue, + subvalue["label"] is not None, + "kind" in subvalue, + subvalue["kind"] is not None, + "srclang" in subvalue, + subvalue["srclang"] is not None, + "url" in subvalue, + subvalue["url"] is not None, + ): serialized = f"{subvalue['label']}:{subvalue['kind']}:{subvalue['srclang']}:{os.path.basename(subvalue['url'])}" subvalues.append(serialized) else: - subvalues.append(f"{subvalue['label']}:{subvalue['kind']}:{subvalue['srclang']}:{os.path.basename(subvalue['url'])}") + subvalues.append( + f"{subvalue['label']}:{subvalue['kind']}:{subvalue['srclang']}:{os.path.basename(subvalue['url'])}" + ) if len(subvalues) > 1: - return config['subdelimiter'].join(subvalues) + return config["subdelimiter"].join(subvalues) elif len(subvalues) == 0: return None else: return subvalues[0] -class EntityReferenceRevisionsField(): +class EntityReferenceRevisionsField: """Functions for handling fields with 'entity_reference_revisions' Drupal field - data type. This field *can* reference nodes, taxonomy terms, and media, but - workbench only supports paragraphs for now. + data type. This field *can* reference nodes, taxonomy terms, and media, but + workbench only supports paragraphs for now. - All functions return a "entity" dictionary that is passed to Requests' "json" - parameter. + All functions return a "entity" dictionary that is passed to Requests' "json" + parameter. """ paragraph_field_definitions = {} def create(self, config, field_definitions, entity, row, field_name): """Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - field_definitions : dict - The field definitions object defined by get_field_definitions(). - entity : dict - The dict that will be POSTed to Drupal as JSON. - row : OrderedDict. - The current CSV record. - field_name : string - The Drupal fieldname/CSV column header. - Returns - ------- - dictionary - A dictionary represeting the entity that is POSTed to Drupal as JSON. + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + field_definitions : dict + The field definitions object defined by get_field_definitions(). + entity : dict + The dict that will be POSTed to Drupal as JSON. + row : OrderedDict. + The current CSV record. + field_name : string + The Drupal fieldname/CSV column header. + Returns + ------- + dictionary + A dictionary represeting the entity that is POSTed to Drupal as JSON. """ if row[field_name] is None: logging.warning(f'Did not find "{field_name}" in row.') return entity - id_field = row.get(config.get('id_field', 'not_applicable'), 'not_applicable') + id_field = row.get(config.get("id_field", "not_applicable"), "not_applicable") # This field *can* reference nodes, taxonomy terms, and media, but workbench # only supports paragraphs for now. - if not field_definitions[field_name]['target_type'] == 'paragraph': + if not field_definitions[field_name]["target_type"] == "paragraph": return entity # We allow fields to overide the global subdelimiter. - paragraph_configs = config.get('paragraph_fields', {}).get(field_definitions[field_name]['entity_type'], {}).get(field_name, {}) - subdelimiter = paragraph_configs.get('subdelimiter', None) or config['subdelimiter'] + paragraph_configs = ( + config.get("paragraph_fields", {}) + .get(field_definitions[field_name]["entity_type"], {}) + .get(field_name, {}) + ) + subdelimiter = ( + paragraph_configs.get("subdelimiter", None) or config["subdelimiter"] + ) subvalues = row[field_name].split(subdelimiter) # @todo self.dedup_values # Enforce cardinality. - cardinality = field_definitions[field_name].get('cardinality', -1) + cardinality = field_definitions[field_name].get("cardinality", -1) if -1 < cardinality < len(subvalues): log_field_cardinality_violation(field_name, id_field, str(cardinality)) subvalues = subvalues[slice(0, cardinality)] @@ -1885,82 +2357,184 @@ def create(self, config, field_definitions, entity, row, field_name): # creating a node from the CSV row... # Cache paragraph field definitions - paragraph_type = paragraph_configs.get('type') + paragraph_type = paragraph_configs.get("type") if not paragraph_type: - logging.warn(f'Could not determine target paragraph type for field "{field_name}"') + logging.warn( + f'Could not determine target paragraph type for field "{field_name}"' + ) return entity if not self.paragraph_field_definitions.get(paragraph_type): - self.paragraph_field_definitions[paragraph_type] = get_field_definitions(config, 'paragraph', paragraph_type) + self.paragraph_field_definitions[paragraph_type] = get_field_definitions( + config, "paragraph", paragraph_type + ) reference_revisions = [] for subvalue in subvalues: # Zip together the fields and their values. - paragraph = dict(zip(paragraph_configs.get('field_order', {}), subvalue.split(paragraph_configs.get('field_delimiter', ':')))) + paragraph = dict( + zip( + paragraph_configs.get("field_order", {}), + subvalue.split(paragraph_configs.get("field_delimiter", ":")), + ) + ) # Process each field's value. for p_field, value in paragraph.items(): # This certainly isn't DRY, but here we go. # Entity reference fields (taxonomy_term and node). - if self.paragraph_field_definitions[paragraph_type][p_field]['field_type'] == 'entity_reference': + if ( + self.paragraph_field_definitions[paragraph_type][p_field][ + "field_type" + ] + == "entity_reference" + ): entity_reference_field = EntityReferenceField() - paragraph = entity_reference_field.create(config, self.paragraph_field_definitions[paragraph_type], paragraph, paragraph, p_field) + paragraph = entity_reference_field.create( + config, + self.paragraph_field_definitions[paragraph_type], + paragraph, + paragraph, + p_field, + ) # Entity reference revision fields (paragraphs). - elif self.paragraph_field_definitions[paragraph_type][p_field]['field_type'] == 'entity_reference_revisions': + elif ( + self.paragraph_field_definitions[paragraph_type][p_field][ + "field_type" + ] + == "entity_reference_revisions" + ): entity_reference_revisions_field = EntityReferenceRevisionsField() - paragraph = entity_reference_field.create(config, self.paragraph_field_definitions[paragraph_type], paragraph, paragraph, p_field) + paragraph = entity_reference_field.create( + config, + self.paragraph_field_definitions[paragraph_type], + paragraph, + paragraph, + p_field, + ) # Typed relation fields. - elif self.paragraph_field_definitions[paragraph_type][p_field]['field_type'] == 'typed_relation': + elif ( + self.paragraph_field_definitions[paragraph_type][p_field][ + "field_type" + ] + == "typed_relation" + ): typed_relation_field = TypedRelationField() - paragraph = typed_relation_field.create(config, self.paragraph_field_definitions[paragraph_type], paragraph, paragraph, p_field) + paragraph = typed_relation_field.create( + config, + self.paragraph_field_definitions[paragraph_type], + paragraph, + paragraph, + p_field, + ) # Geolocation fields. - elif self.paragraph_field_definitions[paragraph_type][p_field]['field_type'] == 'geolocation': + elif ( + self.paragraph_field_definitions[paragraph_type][p_field][ + "field_type" + ] + == "geolocation" + ): geolocation_field = GeolocationField() - paragraph = geolocation_field.create(config, self.paragraph_field_definitions[paragraph_type], paragraph, paragraph, p_field) + paragraph = geolocation_field.create( + config, + self.paragraph_field_definitions[paragraph_type], + paragraph, + paragraph, + p_field, + ) # Link fields. - elif self.paragraph_field_definitions[paragraph_type][p_field]['field_type'] == 'link': + elif ( + self.paragraph_field_definitions[paragraph_type][p_field][ + "field_type" + ] + == "link" + ): link_field = LinkField() - paragraph = link_field.create(config, self.paragraph_field_definitions[paragraph_type], paragraph, paragraph, p_field) + paragraph = link_field.create( + config, + self.paragraph_field_definitions[paragraph_type], + paragraph, + paragraph, + p_field, + ) # Authority Link fields. - elif self.paragraph_field_definitions[paragraph_type][p_field]['field_type'] == 'authority_link': + elif ( + self.paragraph_field_definitions[paragraph_type][p_field][ + "field_type" + ] + == "authority_link" + ): link_field = AuthorityLinkField() - paragraph = link_field.create(config, self.paragraph_field_definitions[paragraph_type], paragraph, paragraph, p_field) + paragraph = link_field.create( + config, + self.paragraph_field_definitions[paragraph_type], + paragraph, + paragraph, + p_field, + ) # For non-entity reference and non-typed relation fields (text, integer, boolean etc.). else: simple_field = SimpleField() - paragraph = simple_field.create(config, self.paragraph_field_definitions[paragraph_type], paragraph, paragraph, p_field) + paragraph = simple_field.create( + config, + self.paragraph_field_definitions[paragraph_type], + paragraph, + paragraph, + p_field, + ) # Set parent information. - paragraph.update({'type': [{'target_id': paragraph_configs.get('type')}], 'parent_field_name': [{'value': field_name}]}) + paragraph.update( + { + "type": [{"target_id": paragraph_configs.get("type")}], + "parent_field_name": [{"value": field_name}], + } + ) # Create the paragraph. - p_response = issue_request(config, 'POST', '/entity/paragraph?_format=json', {'Content-Type': 'application/json'}, paragraph, None) + p_response = issue_request( + config, + "POST", + "/entity/paragraph?_format=json", + {"Content-Type": "application/json"}, + paragraph, + None, + ) if p_response.status_code == 201: paragraph = p_response.json() - reference_revisions.append({'target_id': paragraph['id'][0]['value'], 'target_revision_id': paragraph['revision_id'][0]['value']}) + reference_revisions.append( + { + "target_id": paragraph["id"][0]["value"], + "target_revision_id": paragraph["revision_id"][0]["value"], + } + ) elif p_response.status_code == 403: - message = 'Not authorized to create paragraphs. Please ensure the paragraphs_type_permissions module is enable and the user has sufficient permissions.' + message = "Not authorized to create paragraphs. Please ensure the paragraphs_type_permissions module is enable and the user has sufficient permissions." print(message) logging.error(message) else: - message = p_response.json().get('message', 'Unknown') - logging.warn(f'Could not create paragraph for "{field_name}" in row "{id_field}": {message}') + message = p_response.json().get("message", "Unknown") + logging.warn( + f'Could not create paragraph for "{field_name}" in row "{id_field}": {message}' + ) entity[field_name] = reference_revisions return entity - def update(self, config, field_definitions, entity, row, field_name, entity_field_values): + def update( + self, config, field_definitions, entity, row, field_name, entity_field_values + ): """Note: this method appends incoming CSV values to existing values, replaces existing field - values with incoming values, or deletes all values from fields, depending on whether - config['update_mode'] is 'append', 'replace', or 'delete'. It doesn not replace individual - values within fields. + values with incoming values, or deletes all values from fields, depending on whether + config['update_mode'] is 'append', 'replace', or 'delete'. It doesn not replace individual + values within fields. """ """Parameters ---------- @@ -1981,30 +2555,33 @@ def update(self, config, field_definitions, entity, row, field_name, entity_fiel dictionary A dictionary represeting the entity that is PATCHed to Drupal as JSON. """ - if config['update_mode'] == 'delete': + if config["update_mode"] == "delete": entity[field_name] = [] return entity if row[field_name] is None: return entity - if config['update_mode'] == 'replace': + if config["update_mode"] == "replace": return self.create(config, field_definitions, entity, row, field_name) - if config['update_mode'] == 'append': + if config["update_mode"] == "append": # Save away existing values entity = self.create(config, field_definitions, entity, row, field_name) entity[field_name] = entity_field_values + entity[field_name] # Enforce cardinality - cardinality = field_definitions[field_name].get('cardinality', -1) + cardinality = field_definitions[field_name].get("cardinality", -1) if -1 < cardinality < len(entity[field_name]): - log_field_cardinality_violation(field_name, row.get(config.get('id_field', 'not_applicable'), 'not_applicable'), str(cardinality)) + log_field_cardinality_violation( + field_name, + row.get(config.get("id_field", "not_applicable"), "not_applicable"), + str(cardinality), + ) entity[field_name] = entity[field_name][slice(0, cardinality)] return entity def dedupe_values(self, values): - """Removes duplicate entries from 'values'. - """ + """Removes duplicate entries from 'values'.""" """Parameters ---------- values : list @@ -2018,8 +2595,7 @@ def dedupe_values(self, values): return deduplicate_field_values(values) def serialize(self, config, field_definitions, field_name, field_data): - """Serialized values into a format consistent with Workbench's CSV-field input format. - """ + """Serialized values into a format consistent with Workbench's CSV-field input format.""" """Parameters ---------- config : dict @@ -2035,65 +2611,163 @@ def serialize(self, config, field_definitions, field_name, field_data): string A string structured same as the Workbench CSV field data for this field type. """ - if 'field_type' not in field_definitions[field_name]: + if "field_type" not in field_definitions[field_name]: return field_data # We allow fields to overide the global subdelimiter. - paragraph_configs = config.get('paragraph_fields', {}).get(field_definitions[field_name]['entity_type'], {}).get(field_name, {}) - subdelimiter = paragraph_configs.get('subdelimiter', None) or config['subdelimiter'] + paragraph_configs = ( + config.get("paragraph_fields", {}) + .get(field_definitions[field_name]["entity_type"], {}) + .get(field_name, {}) + ) + subdelimiter = ( + paragraph_configs.get("subdelimiter", None) or config["subdelimiter"] + ) # Cache paragraph field definitions - paragraph_type = paragraph_configs.get('type') + paragraph_type = paragraph_configs.get("type") if not paragraph_type: - logging.warn(f'Could not determine target paragraph type for field "field_name". Returning data from Drupal.') + logging.warn( + f'Could not determine target paragraph type for field "field_name". Returning data from Drupal.' + ) return json.dumps(field_data) if not self.paragraph_field_definitions.get(paragraph_type): - self.paragraph_field_definitions[paragraph_type] = get_field_definitions(config, 'paragraph', paragraph_type) + self.paragraph_field_definitions[paragraph_type] = get_field_definitions( + config, "paragraph", paragraph_type + ) subvalues = list() for subvalue in field_data: # Retrieve the paragraph so we can serialize it. - target_id = subvalue.get('target_id') - p_response = issue_request(config, 'GET', f'/entity/paragraph/{target_id}?_format=json') + target_id = subvalue.get("target_id") + p_response = issue_request( + config, "GET", f"/entity/paragraph/{target_id}?_format=json" + ) if p_response.status_code == 200: paragraph = p_response.json() paragraph_parts = [] - for field in paragraph_configs.get('field_order', {}): - logging.info(f'Serializing paragraph field: {field}:' + json.dumps(paragraph.get(field))) + for field in paragraph_configs.get("field_order", {}): + logging.info( + f"Serializing paragraph field: {field}:" + + json.dumps(paragraph.get(field)) + ) if not paragraph.get(field): continue # Entity reference fields (taxonomy term and node). - if self.paragraph_field_definitions[paragraph_type][field]['field_type'] == 'entity_reference': + if ( + self.paragraph_field_definitions[paragraph_type][field][ + "field_type" + ] + == "entity_reference" + ): serialized_field = EntityReferenceField() - paragraph_parts.append(serialized_field.serialize(config, self.paragraph_field_definitions[paragraph_type], field, paragraph.get(field))) + paragraph_parts.append( + serialized_field.serialize( + config, + self.paragraph_field_definitions[paragraph_type], + field, + paragraph.get(field), + ) + ) # Entity reference revision fields (mostly paragraphs). - elif self.paragraph_field_definitions[paragraph_type][field]['field_type'] == 'entity_reference_revisions': + elif ( + self.paragraph_field_definitions[paragraph_type][field][ + "field_type" + ] + == "entity_reference_revisions" + ): serialized_field = EntityReferenceRevisionsField() - paragraph_parts.append(serialized_field.serialize(config, self.paragraph_field_definitions[paragraph_type], field, paragraph.get(field))) + paragraph_parts.append( + serialized_field.serialize( + config, + self.paragraph_field_definitions[paragraph_type], + field, + paragraph.get(field), + ) + ) # Typed relation fields (currently, only taxonomy term) - elif self.paragraph_field_definitions[paragraph_type][field]['field_type'] == 'typed_relation': + elif ( + self.paragraph_field_definitions[paragraph_type][field][ + "field_type" + ] + == "typed_relation" + ): serialized_field = TypedRelationField() - paragraph_parts.append(serialized_field.serialize(config, self.paragraph_field_definitions[paragraph_type], field, paragraph.get(field))) + paragraph_parts.append( + serialized_field.serialize( + config, + self.paragraph_field_definitions[paragraph_type], + field, + paragraph.get(field), + ) + ) # Geolocation fields. - elif self.paragraph_field_definitions[paragraph_type][field]['field_type'] == 'geolocation': + elif ( + self.paragraph_field_definitions[paragraph_type][field][ + "field_type" + ] + == "geolocation" + ): serialized_field = GeolocationField() - paragraph_parts.append(serialized_field.serialize(config, self.paragraph_field_definitions[paragraph_type], field, paragraph.get(field))) + paragraph_parts.append( + serialized_field.serialize( + config, + self.paragraph_field_definitions[paragraph_type], + field, + paragraph.get(field), + ) + ) # Link fields. - elif self.paragraph_field_definitions[paragraph_type][field]['field_type'] == 'link': + elif ( + self.paragraph_field_definitions[paragraph_type][field][ + "field_type" + ] + == "link" + ): serialized_field = LinkField() - paragraph_parts.append(serialized_field.serialize(config, self.paragraph_field_definitions[paragraph_type], field, paragraph.get(field))) + paragraph_parts.append( + serialized_field.serialize( + config, + self.paragraph_field_definitions[paragraph_type], + field, + paragraph.get(field), + ) + ) # Authority Link fields. - elif self.paragraph_field_definitions[paragraph_type][field]['field_type'] == 'authority_link': + elif ( + self.paragraph_field_definitions[paragraph_type][field][ + "field_type" + ] + == "authority_link" + ): serialized_field = AuthorityLinkField() - paragraph_parts.append(serialized_field.serialize(config, self.paragraph_field_definitions[paragraph_type], field, paragraph.get(field))) + paragraph_parts.append( + serialized_field.serialize( + config, + self.paragraph_field_definitions[paragraph_type], + field, + paragraph.get(field), + ) + ) # Simple fields. else: - paragraph_parts.append(SimpleField().serialize(config, self.paragraph_field_definitions[paragraph_type], field, paragraph.get(field))) - subvalues.append(paragraph_configs.get('field_delimiter', ':').join(paragraph_parts)) + paragraph_parts.append( + SimpleField().serialize( + config, + self.paragraph_field_definitions[paragraph_type], + field, + paragraph.get(field), + ) + ) + subvalues.append( + paragraph_configs.get("field_delimiter", ":").join(paragraph_parts) + ) else: # Something went wrong, so we'll just return the Drupal field data we already have. - message = p_response.json().get('message', 'Unknown') - logging.warn(f'Could not retrieve paragraph for "{field_name}": {message}') + message = p_response.json().get("message", "Unknown") + logging.warn( + f'Could not retrieve paragraph for "{field_name}": {message}' + ) subvalues.append(subvalue) if len(subvalues) > 1: return subdelimiter.join(subvalues) diff --git a/workbench_utils.py b/workbench_utils.py index 94542e70..4ed9332b 100644 --- a/workbench_utils.py +++ b/workbench_utils.py @@ -30,13 +30,14 @@ import sqlite3 import requests_cache from rich.traceback import install + install() # Set some global variables. yaml = YAML() EXECUTION_START_TIME = datetime.datetime.now() -INTEGRATION_MODULE_MIN_VERSION = '1.0' +INTEGRATION_MODULE_MIN_VERSION = "1.0" # Workaround for https://github.com/mjordan/islandora_workbench/issues/360. http.client._MAXHEADERS = 10000 http_response_times = [] @@ -45,58 +46,61 @@ newly_created_terms = list() # These are the Drupal field names on the standard types of media. file_fields = [ - 'field_media_file', - 'field_media_image', - 'field_media_document', - 'field_media_audio_file', - 'field_media_video_file'] + "field_media_file", + "field_media_image", + "field_media_document", + "field_media_audio_file", + "field_media_video_file", +] def set_media_type(config, filepath, file_fieldname, csv_row): """Using either the 'media_type' or 'media_types_override' configuration - setting, determine which media bundle type to use. + setting, determine which media bundle type to use. - Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - filepath: string - The value of the CSV 'file' column. - file_fieldname: string - The name of the CSV column containing the filename (usually 'file'). None if the file - isn't in a CSV field (e.g., when config['paged_content_from_directories'] is True). - csv_row : OrderedDict - The CSV row for the current item. - Returns - ------- - string - A string naming the configured media type, e.g. 'image'. + Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + filepath: string + The value of the CSV 'file' column. + file_fieldname: string + The name of the CSV column containing the filename (usually 'file'). None if the file + isn't in a CSV field (e.g., when config['paged_content_from_directories'] is True). + csv_row : OrderedDict + The CSV row for the current item. + Returns + ------- + string + A string naming the configured media type, e.g. 'image'. """ - if 'media_type' in config: - return config['media_type'] + if "media_type" in config: + return config["media_type"] # Determine if the incomtimg filepath matches a registered eEmbed media type. oembed_media_type = get_oembed_url_media_type(config, filepath) if oembed_media_type is not None: return oembed_media_type - if file_fieldname is not None and filepath.strip().startswith('http'): - preprocessed_file_path = get_preprocessed_file_path(config, file_fieldname, csv_row) - filename = preprocessed_file_path.split('/')[-1] - extension = filename.split('.')[-1] - extension_with_dot = '.' + extension + if file_fieldname is not None and filepath.strip().startswith("http"): + preprocessed_file_path = get_preprocessed_file_path( + config, file_fieldname, csv_row + ) + filename = preprocessed_file_path.split("/")[-1] + extension = filename.split(".")[-1] + extension_with_dot = "." + extension else: extension_with_dot = os.path.splitext(filepath)[-1] extension = extension_with_dot[1:] normalized_extension = extension.lower() - media_type = 'file' - for types in config['media_types']: + media_type = "file" + for types in config["media_types"]: for type, extensions in types.items(): if normalized_extension in extensions: media_type = type - if 'media_types_override' in config: - for override in config['media_types_override']: + if "media_types_override" in config: + for override in config["media_types_override"]: for type, extensions in override.items(): if normalized_extension in extensions: media_type = type @@ -107,22 +111,22 @@ def set_media_type(config, filepath, file_fieldname, csv_row): def get_oembed_url_media_type(config, filepath): """Since oEmbed remote media (e.g. remove video) don't have extensions, which we - use to detect the media type of local files, we use remote URL patterns to - detect if the value of the 'file' columns is an oEmbed media. - - Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - filepath: string - The value of the CSV 'file' column. - Returns - ------- - mtype : str|None - A string naming the detected media type, e.g. 'remote_video', or None - if the filepath does not start with a configured provider URL. + use to detect the media type of local files, we use remote URL patterns to + detect if the value of the 'file' columns is an oEmbed media. + + Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + filepath: string + The value of the CSV 'file' column. + Returns + ------- + mtype : str|None + A string naming the detected media type, e.g. 'remote_video', or None + if the filepath does not start with a configured provider URL. """ - for oembed_provider in config['oembed_providers']: + for oembed_provider in config["oembed_providers"]: for mtype, provider_urls in oembed_provider.items(): for provider_url in provider_urls: if filepath.startswith(provider_url): @@ -134,19 +138,19 @@ def get_oembed_url_media_type(config, filepath): def get_oembed_media_types(config): """Get a list of the registered oEmbed media types from config. - Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). + Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). - Returns - ------- - media_types : list - A list with the configured allowed oEmbed media type(s), e.g. ['remote_video']. + Returns + ------- + media_types : list + A list with the configured allowed oEmbed media type(s), e.g. ['remote_video']. """ media_types = list() - for omt in config['oembed_providers']: + for omt in config["oembed_providers"]: keys = list(omt.keys()) media_types.append(keys[0]) return media_types @@ -154,89 +158,88 @@ def get_oembed_media_types(config): def set_model_from_extension(file_name, config): """Using configuration options, determine which Islandora Model value - to assign to nodes created from files. Options are either a single model - or a set of mappings from file extension to Islandora Model term ID. - - Parameters - ---------- - file_name : str - Filename that will be checked to determine Islandora Model value(s). - config : dict - The configuration settings defined by workbench_config.get_config(). + to assign to nodes created from files. Options are either a single model + or a set of mappings from file extension to Islandora Model term ID. + + Parameters + ---------- + file_name : str + Filename that will be checked to determine Islandora Model value(s). + config : dict + The configuration settings defined by workbench_config.get_config(). - Returns - ------- - None|str|dict - None is returned if 'task' is not set to 'create_from_files'. + Returns + ------- + None|str|dict + None is returned if 'task' is not set to 'create_from_files'. - str is returned if 'model' config value is set, a single model term ID is str returned. + str is returned if 'model' config value is set, a single model term ID is str returned. - dict is returned if 'models' config value is set, a dict with a mapping of URIs or Islandora Model term ID(s) - to file extension(s) is returned. + dict is returned if 'models' config value is set, a dict with a mapping of URIs or Islandora Model term ID(s) + to file extension(s) is returned. """ - if config['task'] != 'create_from_files': + if config["task"] != "create_from_files": return None - if 'model' in config: - return config['model'] + if "model" in config: + return config["model"] extension_with_dot = os.path.splitext(file_name)[1] extension = extension_with_dot[1:] normalized_extension = extension.lower() - for model_tids in config['models']: + for model_tids in config["models"]: for tid, extensions in model_tids.items(): - if str(tid).startswith('http'): + if str(tid).startswith("http"): tid = get_term_id_from_uri(config, tid) if normalized_extension in extensions: return tid # If the file's extension is not listed in the config, # We use the term ID that contains an empty extension. - if '' in extensions: + if "" in extensions: return tid -def issue_request( - config, - method, - path, - headers=None, - json='', - data='', - query=None): +def issue_request(config, method, path, headers=None, json="", data="", query=None): """Issue the HTTP request to Drupal. Note: calls to non-Drupal URLs - do not use this function. + do not use this function. - Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - method : str - The HTTP method to be issued for the request, e.g. POST or GET. - path : str - Path to the API endpoint that will be used for request. - headers : dict, optional - HTTP header information to be sent with request encoded as a dict. - json : dict, optional - Data to be sent with request body as JSON format, but encoded as a dict. - data : str, optional - Data to be sent in request body. - query : dict, optional - Request parameters sent as a dict. - - Returns - ------- - requests.Response + Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + method : str + The HTTP method to be issued for the request, e.g. POST or GET. + path : str + Path to the API endpoint that will be used for request. + headers : dict, optional + HTTP header information to be sent with request encoded as a dict. + json : dict, optional + Data to be sent with request body as JSON format, but encoded as a dict. + data : str, optional + Data to be sent in request body. + query : dict, optional + Request parameters sent as a dict. + + Returns + ------- + requests.Response """ - if not config['password']: - message = 'Password for Drupal user not found. Please add the "password" option to your configuration ' + \ - 'file or provide the Drupal user\'s password in your ISLANDORA_WORKBENCH_PASSWORD environment variable.' + if not config["password"]: + message = ( + 'Password for Drupal user not found. Please add the "password" option to your configuration ' + + "file or provide the Drupal user's password in your ISLANDORA_WORKBENCH_PASSWORD environment variable." + ) logging.error(message) sys.exit("Error: " + message) - if config['check'] is False: - if 'pause' in config and method in ['POST', 'PUT', 'PATCH', 'DELETE'] and value_is_numeric(config['pause']): - time.sleep(int(config['pause'])) + if config["check"] is False: + if ( + "pause" in config + and method in ["POST", "PUT", "PATCH", "DELETE"] + and value_is_numeric(config["pause"]) + ): + time.sleep(int(config["pause"])) if headers is None: headers = dict() @@ -244,150 +247,166 @@ def issue_request( if query is None: query = dict() - headers.update({'User-Agent': config['user_agent']}) + headers.update({"User-Agent": config["user_agent"]}) # The trailing / is stripped in config, but we do it here too, just in case. - config['host'] = config['host'].rstrip('/') - if config['host'] in path: + config["host"] = config["host"].rstrip("/") + if config["host"] in path: url = path else: # Since we remove the trailing / from the hostname, we need to ensure # that there is a / separating the host from the path. - if not path.startswith('/'): - path = '/' + path - url = config['host'] + path + if not path.startswith("/"): + path = "/" + path + url = config["host"] + path - if config['log_request_url'] is True: - logging.info(method + ' ' + url) + if config["log_request_url"] is True: + logging.info(method + " " + url) - if method == 'GET': - if config['log_headers'] is True: + if method == "GET": + if config["log_headers"] is True: logging.info(headers) response = requests.get( url, - allow_redirects=config['allow_redirects'], - verify=config['secure_ssl_only'], - auth=(config['username'], config['password']), + allow_redirects=config["allow_redirects"], + verify=config["secure_ssl_only"], + auth=(config["username"], config["password"]), params=query, - headers=headers + headers=headers, ) - if method == 'HEAD': - if config['log_headers'] is True: + if method == "HEAD": + if config["log_headers"] is True: logging.info(headers) response = requests.head( url, - allow_redirects=config['allow_redirects'], - verify=config['secure_ssl_only'], - auth=(config['username'], config['password']), - headers=headers + allow_redirects=config["allow_redirects"], + verify=config["secure_ssl_only"], + auth=(config["username"], config["password"]), + headers=headers, ) - if method == 'POST': - if config['log_headers'] is True: + if method == "POST": + if config["log_headers"] is True: logging.info(headers) - if config['log_json'] is True: + if config["log_json"] is True: logging.info(json) response = requests.post( url, - allow_redirects=config['allow_redirects'], + allow_redirects=config["allow_redirects"], stream=True, - verify=config['secure_ssl_only'], - auth=(config['username'], config['password']), + verify=config["secure_ssl_only"], + auth=(config["username"], config["password"]), headers=headers, json=json, - data=data + data=data, ) - if method == 'PUT': - if config['log_headers'] is True: + if method == "PUT": + if config["log_headers"] is True: logging.info(headers) - if config['log_json'] is True: + if config["log_json"] is True: logging.info(json) response = requests.put( url, - allow_redirects=config['allow_redirects'], + allow_redirects=config["allow_redirects"], stream=True, - verify=config['secure_ssl_only'], - auth=(config['username'], config['password']), + verify=config["secure_ssl_only"], + auth=(config["username"], config["password"]), headers=headers, json=json, - data=data + data=data, ) - if method == 'PATCH': - if config['log_headers'] is True: + if method == "PATCH": + if config["log_headers"] is True: logging.info(headers) - if config['log_json'] is True: + if config["log_json"] is True: logging.info(json) response = requests.patch( url, - allow_redirects=config['allow_redirects'], + allow_redirects=config["allow_redirects"], stream=True, - verify=config['secure_ssl_only'], - auth=(config['username'], config['password']), + verify=config["secure_ssl_only"], + auth=(config["username"], config["password"]), headers=headers, json=json, - data=data + data=data, ) - if method == 'DELETE': - if config['log_headers'] is True: + if method == "DELETE": + if config["log_headers"] is True: logging.info(headers) response = requests.delete( url, - allow_redirects=config['allow_redirects'], - verify=config['secure_ssl_only'], - auth=(config['username'], config['password']), - headers=headers + allow_redirects=config["allow_redirects"], + verify=config["secure_ssl_only"], + auth=(config["username"], config["password"]), + headers=headers, ) - if config['log_response_status_code'] is True: + if config["log_response_status_code"] is True: logging.info(response.status_code) - if config['log_response_body'] is True: + if config["log_response_body"] is True: logging.info(response.text) response_time = response.elapsed.total_seconds() average_response_time = calculate_response_time_trend(config, response_time) - log_response_time_value = copy.copy(config['log_response_time']) - if 'adaptive_pause' in config and value_is_numeric(config['adaptive_pause']): + log_response_time_value = copy.copy(config["log_response_time"]) + if "adaptive_pause" in config and value_is_numeric(config["adaptive_pause"]): # Pause defined in config['adaptive_pause'] is included in the response time, # so we subtract it to get the "unpaused" response time. - if average_response_time is not None and (response_time - int(config['adaptive_pause'])) > (average_response_time * int(config['adaptive_pause_threshold'])): - message = "HTTP requests paused for " + str(config['adaptive_pause']) + " seconds because request in next log entry " + \ - "exceeded adaptive threshold of " + str(config['adaptive_pause_threshold']) + "." - time.sleep(int(config['adaptive_pause'])) + if average_response_time is not None and ( + response_time - int(config["adaptive_pause"]) + ) > (average_response_time * int(config["adaptive_pause_threshold"])): + message = ( + "HTTP requests paused for " + + str(config["adaptive_pause"]) + + " seconds because request in next log entry " + + "exceeded adaptive threshold of " + + str(config["adaptive_pause_threshold"]) + + "." + ) + time.sleep(int(config["adaptive_pause"])) logging.info(message) # Enable response time logging if we surpass the adaptive pause threashold. - config['log_response_time'] = True + config["log_response_time"] = True - if config['log_response_time'] is True: + if config["log_response_time"] is True: parsed_query_string = urllib.parse.urlparse(url).query if len(parsed_query_string): - url_for_logging = urllib.parse.urlparse(url).path + '?' + parsed_query_string + url_for_logging = ( + urllib.parse.urlparse(url).path + "?" + parsed_query_string + ) else: url_for_logging = urllib.parse.urlparse(url).path - if 'adaptive_pause' in config and value_is_numeric(config['adaptive_pause']): - response_time = response_time - int(config['adaptive_pause']) - response_time_trend_entry = {'method': method, 'response': response.status_code, 'url': url_for_logging, 'response_time': response_time, 'average_response_time': average_response_time} + if "adaptive_pause" in config and value_is_numeric(config["adaptive_pause"]): + response_time = response_time - int(config["adaptive_pause"]) + response_time_trend_entry = { + "method": method, + "response": response.status_code, + "url": url_for_logging, + "response_time": response_time, + "average_response_time": average_response_time, + } logging.info(response_time_trend_entry) # Set this config option back to what it was before we updated in above. - config['log_response_time'] = log_response_time_value + config["log_response_time"] = log_response_time_value return response def convert_semver_to_number(version_string): """Convert a Semantic Version number (e.g. Drupal's) string to a number. We only need the major - and minor numbers (e.g. 9.2). - - Parameters - ---------- - version_string: string - The version string as retrieved from Drupal. - Returns - ------- - tuple - A tuple containing the major and minor Drupal core version numbers as integers. + and minor numbers (e.g. 9.2). + + Parameters + ---------- + version_string: string + The version string as retrieved from Drupal. + Returns + ------- + tuple + A tuple containing the major and minor Drupal core version numbers as integers. """ - parts = version_string.split('.') + parts = version_string.split(".") parts = parts[:2] int_parts = [int(part) for part in parts] version_tuple = tuple(int_parts) @@ -397,36 +416,38 @@ def convert_semver_to_number(version_string): def get_drupal_core_version(config): """Get Drupal's version number. - Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - Returns - ------- - string|False - The Drupal core version number string (i.e., may contain -dev, etc.). + Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + Returns + ------- + string|False + The Drupal core version number string (i.e., may contain -dev, etc.). """ - url = config['host'] + '/islandora_workbench_integration/core_version' - response = issue_request(config, 'GET', url) + url = config["host"] + "/islandora_workbench_integration/core_version" + response = issue_request(config, "GET", url) if response.status_code == 200: version_body = json.loads(response.text) - return version_body['core_version'] + return version_body["core_version"] else: logging.warning( - "Attempt to get Drupal core version number returned a %s status code", response.status_code) + "Attempt to get Drupal core version number returned a %s status code", + response.status_code, + ) return False def check_drupal_core_version(config): """Used during --check to verify if the minimum required Drupal version for workbench is being used. - Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - Returns - ------- - None + Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + Returns + ------- + None """ drupal_core_version = get_drupal_core_version(config) if drupal_core_version is not False: @@ -434,93 +455,114 @@ def check_drupal_core_version(config): else: message = "Workbench cannot determine Drupal's version number." logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) if core_version_number < tuple([8, 6]): - message = "Warning: Media creation in your version of Drupal (" + \ - drupal_core_version + \ - ") is less reliable than in Drupal 8.6 or higher." + message = ( + "Warning: Media creation in your version of Drupal (" + + drupal_core_version + + ") is less reliable than in Drupal 8.6 or higher." + ) print(message) def check_integration_module_version(config): """Verifies if the minimum required version of the workbench integration module is being used. - Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - Returns - ------- - None + Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + Returns + ------- + None """ version = get_integration_module_version(config) if version is False: - message = "Workbench cannot determine the Islandora Workbench Integration module's version number. It must be version " + \ - str(INTEGRATION_MODULE_MIN_VERSION) + ' or higher.' + message = ( + "Workbench cannot determine the Islandora Workbench Integration module's version number. It must be version " + + str(INTEGRATION_MODULE_MIN_VERSION) + + " or higher." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) else: version_number = convert_semver_to_number(version) - minimum_version_number = convert_semver_to_number(INTEGRATION_MODULE_MIN_VERSION) + minimum_version_number = convert_semver_to_number( + INTEGRATION_MODULE_MIN_VERSION + ) if version_number < minimum_version_number: - message = "The Islandora Workbench Integration module installed on " + config['host'] + " must be" + \ - " upgraded to version " + str(INTEGRATION_MODULE_MIN_VERSION) + '.' + message = ( + "The Islandora Workbench Integration module installed on " + + config["host"] + + " must be" + + " upgraded to version " + + str(INTEGRATION_MODULE_MIN_VERSION) + + "." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) else: - logging.info("OK, Islandora Workbench Integration module installed on " + config['host'] + " is at version " + str(version) + '.') + logging.info( + "OK, Islandora Workbench Integration module installed on " + + config["host"] + + " is at version " + + str(version) + + "." + ) def get_integration_module_version(config): """Get the Islandora Workbench Integration module's version number. - Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - Returns - ------- - string|False - The version number string (i.e., may contain -dev, etc.) from the - Islandora Workbench Integration module. + Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + Returns + ------- + string|False + The version number string (i.e., may contain -dev, etc.) from the + Islandora Workbench Integration module. """ - url = config['host'] + '/islandora_workbench_integration/version' - response = issue_request(config, 'GET', url) + url = config["host"] + "/islandora_workbench_integration/version" + response = issue_request(config, "GET", url) if response.status_code == 200: version_body = json.loads(response.text) - return version_body['integration_module_version'] + return version_body["integration_module_version"] else: logging.warning( - "Attempt to get the Islandora Workbench Integration module's version number returned a %s status code", response.status_code) + "Attempt to get the Islandora Workbench Integration module's version number returned a %s status code", + response.status_code, + ) return False -def ping_node(config, nid, method='HEAD', return_json=False, warn=True): +def ping_node(config, nid, method="HEAD", return_json=False, warn=True): """Ping the node to see if it exists. - Note that HEAD requests do not return a response body. + Note that HEAD requests do not return a response body. - Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - nid : - Node ID of the node to be pinged. - method: string, optional - Either 'HEAD' or 'GET'. - return_json: boolean, optional - warn: boolean, optional - - Returns - ------ - boolean|str - True if method is HEAD and node was found, the response JSON response - body if method was GET. False if request returns a non-allowed status code. + Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + nid : + Node ID of the node to be pinged. + method: string, optional + Either 'HEAD' or 'GET'. + return_json: boolean, optional + warn: boolean, optional + + Returns + ------ + boolean|str + True if method is HEAD and node was found, the response JSON response + body if method was GET. False if request returns a non-allowed status code. """ if value_is_numeric(nid) is False: nid = get_nid_from_url_alias(config, nid) - url = config['host'] + '/node/' + str(nid) + '?_format=json' + url = config["host"] + "/node/" + str(nid) + "?_format=json" response = issue_request(config, method.upper(), url) allowed_status_codes = [200, 301, 302] if response.status_code in allowed_status_codes: @@ -530,128 +572,165 @@ def ping_node(config, nid, method='HEAD', return_json=False, warn=True): return True else: if warn is True: - logging.warning("Node ping (%s) on %s returned a %s status code.", method.upper(), url, response.status_code) + logging.warning( + "Node ping (%s) on %s returned a %s status code.", + method.upper(), + url, + response.status_code, + ) return False def ping_url_alias(config, url_alias): """Ping the URL alias to see if it exists. Return the status code. - Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - url_alias : str - The string with the URL alias being pinged. - Returns - ------- - int - HTTP status code. + Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + url_alias : str + The string with the URL alias being pinged. + Returns + ------- + int + HTTP status code. """ - url = config['host'] + url_alias + '?_format=json' - response = issue_request(config, 'GET', url) + url = config["host"] + url_alias + "?_format=json" + response = issue_request(config, "GET", url) return response.status_code def ping_vocabulary(config, vocab_id): """Ping the node to see if it exists. - Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - vocab_id : str - The string with the vocabulary ID being pinged. - Returns - ------- - boolean - Returns Ture if HTTP status code returned is 200, if not False is returned. - """ - url = config['host'] + '/entity/taxonomy_vocabulary/' + vocab_id.strip() + '?_format=json' - response = issue_request(config, 'GET', url) + Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + vocab_id : str + The string with the vocabulary ID being pinged. + Returns + ------- + boolean + Returns Ture if HTTP status code returned is 200, if not False is returned. + """ + url = ( + config["host"] + + "/entity/taxonomy_vocabulary/" + + vocab_id.strip() + + "?_format=json" + ) + response = issue_request(config, "GET", url) if response.status_code == 200: return True else: - logging.warning("Node ping (GET) on %s returned a %s status code.", url, response.status_code) + logging.warning( + "Node ping (GET) on %s returned a %s status code.", + url, + response.status_code, + ) return False def ping_term(config, term_id): """Ping the term to see if it exists. - Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - term_id : str - The string with the term ID being pinged. - Returns - ------- - boolean - Returns Ture if HTTP status code returned is 200, if not False is returned. + Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + term_id : str + The string with the term ID being pinged. + Returns + ------- + boolean + Returns Ture if HTTP status code returned is 200, if not False is returned. """ - url = config['host'] + '/taxonomy/term/' + str(term_id).strip() + '?_format=json' - response = issue_request(config, 'GET', url) + url = config["host"] + "/taxonomy/term/" + str(term_id).strip() + "?_format=json" + response = issue_request(config, "GET", url) if response.status_code == 200: return True else: - logging.warning("Term ping (GET) on %s returned a %s status code.", url, response.status_code) + logging.warning( + "Term ping (GET) on %s returned a %s status code.", + url, + response.status_code, + ) return False def ping_islandora(config, print_message=True): """Connect to Islandora in prep for subsequent HTTP requests. - Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - print_message : boolean, optional - If set to True, after ping successfully performed, a status message is printed for the user. - Returns - ------- - None + Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + print_message : boolean, optional + If set to True, after ping successfully performed, a status message is printed for the user. + Returns + ------- + None """ # First, test a known request that requires Administrator-level permissions. - url = config['host'] + '/islandora_workbench_integration/version' + url = config["host"] + "/islandora_workbench_integration/version" try: - host_response = issue_request(config, 'GET', url) + host_response = issue_request(config, "GET", url) except requests.exceptions.Timeout as err_timeout: - message = 'Workbench timed out trying to reach ' + \ - config['host'] + '. Please verify the "host" setting in your configuration ' + \ - 'and check your network connection.' + message = ( + "Workbench timed out trying to reach " + + config["host"] + + '. Please verify the "host" setting in your configuration ' + + "and check your network connection." + ) logging.error(message) logging.error(err_timeout) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) except requests.exceptions.ConnectionError as error_connection: - message = 'Workbench cannot connect to ' + \ - config['host'] + '. Please verify the "host" setting in your configuration ' + \ - 'and check your network connection.' + message = ( + "Workbench cannot connect to " + + config["host"] + + '. Please verify the "host" setting in your configuration ' + + "and check your network connection." + ) logging.error(message) logging.error(error_connection) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) if host_response.status_code == 404: - message = 'Workbench cannot detect whether the Islandora Workbench Integration module is ' + \ - 'enabled on ' + config['host'] + '. Please ensure it is enabled and that its version is ' + \ - str(INTEGRATION_MODULE_MIN_VERSION) + ' or higher.' + message = ( + "Workbench cannot detect whether the Islandora Workbench Integration module is " + + "enabled on " + + config["host"] + + ". Please ensure it is enabled and that its version is " + + str(INTEGRATION_MODULE_MIN_VERSION) + + " or higher." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) not_authorized = [401, 403] if host_response.status_code in not_authorized: - message = 'Workbench can connect to ' + \ - config['host'] + ' but the user "' + config['username'] + \ - '" does not have sufficient permissions to continue, or the credentials are invalid.' + message = ( + "Workbench can connect to " + + config["host"] + + ' but the user "' + + config["username"] + + '" does not have sufficient permissions to continue, or the credentials are invalid.' + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) - if config['secure_ssl_only'] is True: - message = "OK, connection to Drupal at " + config['host'] + " verified." + if config["secure_ssl_only"] is True: + message = "OK, connection to Drupal at " + config["host"] + " verified." else: - message = "OK, connection to Drupal at " + config['host'] + " verified. Ignoring SSL certificates." + message = ( + "OK, connection to Drupal at " + + config["host"] + + " verified. Ignoring SSL certificates." + ) if print_message is True: logging.info(message) print(message) @@ -659,125 +738,135 @@ def ping_islandora(config, print_message=True): def ping_content_type(config): """Ping the content_type set in the configuration to see if it exists. - Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - Returns - ------- - int - The HTTP response code. + Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + Returns + ------- + int + The HTTP response code. """ url = f"{config['host']}/entity/entity_form_display/node.{config['content_type']}.default?_format=json" - return issue_request(config, 'GET', url).status_code + return issue_request(config, "GET", url).status_code def ping_view_endpoint(config, view_url): """Verifies that the View REST endpoint is accessible. - Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - view_url - The View's REST export path. - Returns - ------- - int - The HTTP response code. + Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + view_url + The View's REST export path. + Returns + ------- + int + The HTTP response code. """ - return issue_request(config, 'HEAD', view_url).status_code + return issue_request(config, "HEAD", view_url).status_code def ping_entity_reference_view_endpoint(config, fieldname, handler_settings): """Verifies that the REST endpoint of the View is accessible. The path to this endpoint - is defined in the configuration file's 'entity_reference_view_endpoints' option. + is defined in the configuration file's 'entity_reference_view_endpoints' option. - Necessary for entity reference fields configured as "Views: Filter by an entity reference View". - Unlike Views endpoints for taxonomy entity reference fields configured using the "default" - entity reference method, the Islandora Workbench Integration module does not provide a generic - Views REST endpoint that can be used to validate values in this type of field. + Necessary for entity reference fields configured as "Views: Filter by an entity reference View". + Unlike Views endpoints for taxonomy entity reference fields configured using the "default" + entity reference method, the Islandora Workbench Integration module does not provide a generic + Views REST endpoint that can be used to validate values in this type of field. - Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - fieldname : string - The name of the Drupal field. - handler_settings : dict - The handler_settings values from the field's configuration. - # handler_settings': {'view': {'view_name': 'mj_entity_reference_test', 'display_name': 'entity_reference_1', 'arguments': []}} - Returns - ------- - bool - True if the REST endpoint is accessible, False if not. + Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + fieldname : string + The name of the Drupal field. + handler_settings : dict + The handler_settings values from the field's configuration. + # handler_settings': {'view': {'view_name': 'mj_entity_reference_test', 'display_name': 'entity_reference_1', 'arguments': []}} + Returns + ------- + bool + True if the REST endpoint is accessible, False if not. """ endpoint_mappings = get_entity_reference_view_endpoints(config) if len(endpoint_mappings) == 0: - logging.warning("The 'entity_reference_view_endpoints' option in your configuration file does not contain any field-Views endpoint mappings.") + logging.warning( + "The 'entity_reference_view_endpoints' option in your configuration file does not contain any field-Views endpoint mappings." + ) return False if fieldname not in endpoint_mappings: - logging.warning('The field "' + fieldname + '" is not in your "entity_reference_view_endpoints" configuration option.') + logging.warning( + 'The field "' + + fieldname + + '" is not in your "entity_reference_view_endpoints" configuration option.' + ) return False # E.g., "http://localhost:8000/issue_452_test?name=xxx&_format=json" - url = config['host'] + endpoint_mappings[fieldname] + '?name=xxx&_format=json' - response = issue_request(config, 'GET', url) + url = config["host"] + endpoint_mappings[fieldname] + "?name=xxx&_format=json" + response = issue_request(config, "GET", url) if response.status_code == 200: return True else: - logging.warning("View REST export ping (GET) on %s returned a %s status code", url, response.status_code) + logging.warning( + "View REST export ping (GET) on %s returned a %s status code", + url, + response.status_code, + ) return False def ping_media_bundle(config, bundle_name): """Ping the Media bundle/type to see if it exists. Return the status code, - a 200 if it exists or a 404 if it doesn't exist or the Media Type REST resource - is not enabled on the target Drupal. + a 200 if it exists or a 404 if it doesn't exist or the Media Type REST resource + is not enabled on the target Drupal. - Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - bundle_name : str - Media bundle/type to be pinged. - Returns - ------- - int - The HTTP response code. + Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + bundle_name : str + Media bundle/type to be pinged. + Returns + ------- + int + The HTTP response code. """ - url = config['host'] + '/entity/media_type/' + bundle_name + '?_format=json' - response = issue_request(config, 'GET', url) + url = config["host"] + "/entity/media_type/" + bundle_name + "?_format=json" + response = issue_request(config, "GET", url) return response.status_code -def ping_media(config, mid, method='HEAD', return_json=False, warn=True): +def ping_media(config, mid, method="HEAD", return_json=False, warn=True): """Ping the media to see if it exists. - Note that HEAD requests do not return a response body. + Note that HEAD requests do not return a response body. - Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - mid : - Media ID of the media to be pinged. - method: string, optional - Either 'HEAD' or 'GET'. - return_json: boolean, optional - warn: boolean, optional - - Returns - ------ - boolean|str - True if method is HEAD and node was found, the response JSON response - body if method was GET. False if request returns a non-allowed status code. + Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + mid : + Media ID of the media to be pinged. + method: string, optional + Either 'HEAD' or 'GET'. + return_json: boolean, optional + warn: boolean, optional + + Returns + ------ + boolean|str + True if method is HEAD and node was found, the response JSON response + body if method was GET. False if request returns a non-allowed status code. """ - if config['standalone_media_url'] is True: - url = config['host'] + '/media/' + mid + '?_format=json' + if config["standalone_media_url"] is True: + url = config["host"] + "/media/" + mid + "?_format=json" else: - url = config['host'] + '/media/' + mid + '/edit?_format=json' + url = config["host"] + "/media/" + mid + "/edit?_format=json" response = issue_request(config, method.upper(), url) allowed_status_codes = [200, 301, 302] @@ -788,155 +877,174 @@ def ping_media(config, mid, method='HEAD', return_json=False, warn=True): return True else: if warn is True: - logging.warning("Media ping (%s) on %s returned a %s status code.", method.upper(), url, response.status_code) + logging.warning( + "Media ping (%s) on %s returned a %s status code.", + method.upper(), + url, + response.status_code, + ) return False def extract_media_id(config: dict, media_csv_row: dict): """Extract the media entity's ID from the CSV row. - Parameters - ---------- - config : dict - The global configuration object. - media_csv_row : OrderedDict - The CSV row containing the media entity's field names and values. - - Returns - ------- - str|None - The media entity's ID if it could be extracted from the CSV row and is valid, otherwise None. + Parameters + ---------- + config : dict + The global configuration object. + media_csv_row : OrderedDict + The CSV row containing the media entity's field names and values. + + Returns + ------- + str|None + The media entity's ID if it could be extracted from the CSV row and is valid, otherwise None. """ - if 'media_id' not in media_csv_row: # Media ID column is missing - logging.error('Media ID column missing in CSV file.') + if "media_id" not in media_csv_row: # Media ID column is missing + logging.error("Media ID column missing in CSV file.") return None - if not media_csv_row['media_id']: # Media ID column is present but empty - logging.error('Row with empty media_id column detected in CSV file.') + if not media_csv_row["media_id"]: # Media ID column is present but empty + logging.error("Row with empty media_id column detected in CSV file.") return None # If media ID is not numeric, assume it is a media URL alias. - if not value_is_numeric(media_csv_row['media_id']): + if not value_is_numeric(media_csv_row["media_id"]): # Note that this function returns False if the media URL alias does not exist. - media_id = get_mid_from_media_url_alias(config, media_csv_row['media_id']) + media_id = get_mid_from_media_url_alias(config, media_csv_row["media_id"]) # Media URL alias does not exist. if media_id is False: - logging.error('Media URL alias %s does not exist.', media_csv_row['media_id']) + logging.error( + "Media URL alias %s does not exist.", media_csv_row["media_id"] + ) return None else: return str(media_id) # If media ID is numeric, use it as is, if it is a valid media ID else: - media_response_code = ping_media(config, media_csv_row['media_id']) + media_response_code = ping_media(config, media_csv_row["media_id"]) if media_response_code is not True: - logging.error('Media ID %s does not exist.', media_csv_row['media_id']) + logging.error("Media ID %s does not exist.", media_csv_row["media_id"]) return None # If media ID exists, use it as is (since this is a string) else: - return media_csv_row['media_id'] + return media_csv_row["media_id"] def ping_remote_file(config, url): """Ping remote file, but logging, exiting, etc. happens in caller, except on requests error. - Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - url : str - URL of remote file to be pinged. - Returns - ------- - None + Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + url : str + URL of remote file to be pinged. + Returns + ------- + None """ sections = urllib.parse.urlparse(url) try: - response = requests.head(url, allow_redirects=True, verify=config['secure_ssl_only']) + response = requests.head( + url, allow_redirects=True, verify=config["secure_ssl_only"] + ) return response.status_code except requests.exceptions.Timeout as err_timeout: - message = 'Workbench timed out trying to reach ' + \ - sections.netloc + ' while connecting to ' + url + '. Please verify that URL and check your network connection.' + message = ( + "Workbench timed out trying to reach " + + sections.netloc + + " while connecting to " + + url + + ". Please verify that URL and check your network connection." + ) logging.error(message) logging.error(err_timeout) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) except requests.exceptions.ConnectionError as error_connection: - message = 'Workbench cannot connect to ' + \ - sections.netloc + ' while connecting to ' + url + '. Please verify that URL and check your network connection.' + message = ( + "Workbench cannot connect to " + + sections.netloc + + " while connecting to " + + url + + ". Please verify that URL and check your network connection." + ) logging.error(message) logging.error(error_connection) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) def get_nid_from_url_alias(config, url_alias): """Gets a node ID from a URL alias. This function also works - canonical URLs, e.g. http://localhost:8000/node/1648. + canonical URLs, e.g. http://localhost:8000/node/1648. - Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - url_alias : string - The full URL alias (or canonical URL), including http://, etc. - Returns - ------- - int|boolean - The node ID, or False if the URL cannot be found. + Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + url_alias : string + The full URL alias (or canonical URL), including http://, etc. + Returns + ------- + int|boolean + The node ID, or False if the URL cannot be found. """ if url_alias is False: return False - url = url_alias + '?_format=json' - response = issue_request(config, 'GET', url) + url = url_alias + "?_format=json" + response = issue_request(config, "GET", url) if response.status_code != 200: return False else: node = json.loads(response.text) - return node['nid'][0]['value'] + return node["nid"][0]["value"] def get_mid_from_media_url_alias(config, url_alias): """Gets a media ID from a media URL alias. This function also works - with canonical URLs, e.g. http://localhost:8000/media/1234. + with canonical URLs, e.g. http://localhost:8000/media/1234. - Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - url_alias : string - The full URL alias (or canonical URL), including http://, etc. - Returns - ------- - int|boolean - The media ID, or False if the URL cannot be found. + Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + url_alias : string + The full URL alias (or canonical URL), including http://, etc. + Returns + ------- + int|boolean + The media ID, or False if the URL cannot be found. """ - url = url_alias + '?_format=json' - response = issue_request(config, 'GET', url) + url = url_alias + "?_format=json" + response = issue_request(config, "GET", url) if response.status_code != 200: return False else: node = json.loads(response.text) - return node['mid'][0]['value'] + return node["mid"][0]["value"] def get_node_title_from_nid(config, node_id): """Get node title from Drupal. - Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - node_id : string - The node ID for the node title being fetched. - Returns - ------- - str|boolean - The node title, or False if the URL does not return HTTP status 200. + Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + node_id : string + The node ID for the node title being fetched. + Returns + ------- + str|boolean + The node title, or False if the URL does not return HTTP status 200. """ - node_url = config['host'] + '/node/' + node_id + '?_format=json' - node_response = issue_request(config, 'GET', node_url) + node_url = config["host"] + "/node/" + node_id + "?_format=json" + node_response = issue_request(config, "GET", node_url) if node_response.status_code == 200: node_dict = json.loads(node_response.text) - return node_dict['title'][0]['value'] + return node_dict["title"][0]["value"] else: return False @@ -944,269 +1052,371 @@ def get_node_title_from_nid(config, node_id): def get_field_definitions(config, entity_type, bundle_type=None): """Get field definitions from Drupal. - Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - entity_type : string - One of 'node', 'media', 'taxonomy_term', or 'paragraph'. - bundle_type : string, optional - None for nodes (the content type can optionally be gotten from config), - the vocabulary name, or the media type (image', 'document', 'audio', - 'video', 'file', etc.). - Returns - ------- - dict - A dictionary with field names as keys and values arrays containing - field config data. Config data varies slightly by entity type. + Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + entity_type : string + One of 'node', 'media', 'taxonomy_term', or 'paragraph'. + bundle_type : string, optional + None for nodes (the content type can optionally be gotten from config), + the vocabulary name, or the media type (image', 'document', 'audio', + 'video', 'file', etc.). + Returns + ------- + dict + A dictionary with field names as keys and values arrays containing + field config data. Config data varies slightly by entity type. """ ping_islandora(config, print_message=False) field_definitions = {} - if entity_type == 'node': - bundle_type = config['content_type'] + if entity_type == "node": + bundle_type = config["content_type"] fields = get_entity_fields(config, entity_type, bundle_type) for fieldname in fields: field_definitions[fieldname] = {} - raw_field_config = get_entity_field_config(config, fieldname, entity_type, bundle_type) + raw_field_config = get_entity_field_config( + config, fieldname, entity_type, bundle_type + ) field_config = json.loads(raw_field_config) - field_definitions[fieldname]['entity_type'] = field_config['entity_type'] - field_definitions[fieldname]['required'] = field_config['required'] - field_definitions[fieldname]['label'] = field_config['label'] - raw_vocabularies = [x for x in field_config['dependencies']['config'] if re.match("^taxonomy.vocabulary.", x)] + field_definitions[fieldname]["entity_type"] = field_config["entity_type"] + field_definitions[fieldname]["required"] = field_config["required"] + field_definitions[fieldname]["label"] = field_config["label"] + raw_vocabularies = [ + x + for x in field_config["dependencies"]["config"] + if re.match("^taxonomy.vocabulary.", x) + ] if len(raw_vocabularies) > 0: - vocabularies = [x.replace("taxonomy.vocabulary.", '') for x in raw_vocabularies] - field_definitions[fieldname]['vocabularies'] = vocabularies + vocabularies = [ + x.replace("taxonomy.vocabulary.", "") for x in raw_vocabularies + ] + field_definitions[fieldname]["vocabularies"] = vocabularies # Reference 'handler' could be nothing, 'default:taxonomy_term' (or some other entity type), or 'views'. - if 'handler' in field_config['settings']: - field_definitions[fieldname]['handler'] = field_config['settings']['handler'] + if "handler" in field_config["settings"]: + field_definitions[fieldname]["handler"] = field_config["settings"][ + "handler" + ] else: - field_definitions[fieldname]['handler'] = None - if 'handler_settings' in field_config['settings']: - field_definitions[fieldname]['handler_settings'] = field_config['settings']['handler_settings'] + field_definitions[fieldname]["handler"] = None + if "handler_settings" in field_config["settings"]: + field_definitions[fieldname]["handler_settings"] = field_config[ + "settings" + ]["handler_settings"] else: - field_definitions[fieldname]['handler_settings'] = None + field_definitions[fieldname]["handler_settings"] = None raw_field_storage = get_entity_field_storage(config, fieldname, entity_type) field_storage = json.loads(raw_field_storage) - field_definitions[fieldname]['field_type'] = field_storage['type'] - field_definitions[fieldname]['cardinality'] = field_storage['cardinality'] - if 'max_length' in field_storage['settings']: - field_definitions[fieldname]['max_length'] = field_storage['settings']['max_length'] + field_definitions[fieldname]["field_type"] = field_storage["type"] + field_definitions[fieldname]["cardinality"] = field_storage["cardinality"] + if "max_length" in field_storage["settings"]: + field_definitions[fieldname]["max_length"] = field_storage["settings"][ + "max_length" + ] else: - field_definitions[fieldname]['max_length'] = None - if 'target_type' in field_storage['settings']: - field_definitions[fieldname]['target_type'] = field_storage['settings']['target_type'] + field_definitions[fieldname]["max_length"] = None + if "target_type" in field_storage["settings"]: + field_definitions[fieldname]["target_type"] = field_storage["settings"][ + "target_type" + ] else: - field_definitions[fieldname]['target_type'] = None - if field_storage['type'] == 'typed_relation' and 'rel_types' in field_config['settings']: - field_definitions[fieldname]['typed_relations'] = field_config['settings']['rel_types'] - if 'authority_sources' in field_config['settings']: - field_definitions[fieldname]['authority_sources'] = list(field_config['settings']['authority_sources'].keys()) + field_definitions[fieldname]["target_type"] = None + if ( + field_storage["type"] == "typed_relation" + and "rel_types" in field_config["settings"] + ): + field_definitions[fieldname]["typed_relations"] = field_config[ + "settings" + ]["rel_types"] + if "authority_sources" in field_config["settings"]: + field_definitions[fieldname]["authority_sources"] = list( + field_config["settings"]["authority_sources"].keys() + ) else: - field_definitions[fieldname]['authority_sources'] = None - if 'allowed_values' in field_storage['settings']: - field_definitions[fieldname]['allowed_values'] = list(field_storage['settings']['allowed_values'].keys()) + field_definitions[fieldname]["authority_sources"] = None + if "allowed_values" in field_storage["settings"]: + field_definitions[fieldname]["allowed_values"] = list( + field_storage["settings"]["allowed_values"].keys() + ) else: - field_definitions[fieldname]['allowed_values'] = None - if field_config['field_type'].startswith('text'): - field_definitions[fieldname]['formatted_text'] = True + field_definitions[fieldname]["allowed_values"] = None + if field_config["field_type"].startswith("text"): + field_definitions[fieldname]["formatted_text"] = True else: - field_definitions[fieldname]['formatted_text'] = False + field_definitions[fieldname]["formatted_text"] = False # title's configuration is not returned by Drupal so we construct it here. Note: if you add a new key to # 'field_definitions', also add it to title's entry here. Also add it for 'title' in the other entity types, below. - field_definitions['title'] = { - 'entity_type': 'node', - 'required': True, - 'label': 'Title', - 'field_type': 'string', - 'cardinality': 1, - 'max_length': config['max_node_title_length'], - 'target_type': None, - 'handler': None, - 'handler_settings': None + field_definitions["title"] = { + "entity_type": "node", + "required": True, + "label": "Title", + "field_type": "string", + "cardinality": 1, + "max_length": config["max_node_title_length"], + "target_type": None, + "handler": None, + "handler_settings": None, } - if entity_type == 'taxonomy_term': - fields = get_entity_fields(config, 'taxonomy_term', bundle_type) + if entity_type == "taxonomy_term": + fields = get_entity_fields(config, "taxonomy_term", bundle_type) for fieldname in fields: field_definitions[fieldname] = {} - raw_field_config = get_entity_field_config(config, fieldname, entity_type, bundle_type) + raw_field_config = get_entity_field_config( + config, fieldname, entity_type, bundle_type + ) field_config = json.loads(raw_field_config) - field_definitions[fieldname]['entity_type'] = field_config['entity_type'] - field_definitions[fieldname]['required'] = field_config['required'] - field_definitions[fieldname]['label'] = field_config['label'] - raw_vocabularies = [x for x in field_config['dependencies']['config'] if re.match("^taxonomy.vocabulary.", x)] + field_definitions[fieldname]["entity_type"] = field_config["entity_type"] + field_definitions[fieldname]["required"] = field_config["required"] + field_definitions[fieldname]["label"] = field_config["label"] + raw_vocabularies = [ + x + for x in field_config["dependencies"]["config"] + if re.match("^taxonomy.vocabulary.", x) + ] if len(raw_vocabularies) > 0: - vocabularies = [x.replace("taxonomy.vocabulary.", '') for x in raw_vocabularies] - field_definitions[fieldname]['vocabularies'] = vocabularies + vocabularies = [ + x.replace("taxonomy.vocabulary.", "") for x in raw_vocabularies + ] + field_definitions[fieldname]["vocabularies"] = vocabularies # Reference 'handler' could be nothing, 'default:taxonomy_term' (or some other entity type), or 'views'. - if 'handler' in field_config['settings']: - field_definitions[fieldname]['handler'] = field_config['settings']['handler'] + if "handler" in field_config["settings"]: + field_definitions[fieldname]["handler"] = field_config["settings"][ + "handler" + ] else: - field_definitions[fieldname]['handler'] = None - if 'handler_settings' in field_config['settings']: - field_definitions[fieldname]['handler_settings'] = field_config['settings']['handler_settings'] + field_definitions[fieldname]["handler"] = None + if "handler_settings" in field_config["settings"]: + field_definitions[fieldname]["handler_settings"] = field_config[ + "settings" + ]["handler_settings"] else: - field_definitions[fieldname]['handler_settings'] = None + field_definitions[fieldname]["handler_settings"] = None raw_field_storage = get_entity_field_storage(config, fieldname, entity_type) field_storage = json.loads(raw_field_storage) - field_definitions[fieldname]['field_type'] = field_storage['type'] - field_definitions[fieldname]['cardinality'] = field_storage['cardinality'] - if 'max_length' in field_storage['settings']: - field_definitions[fieldname]['max_length'] = field_storage['settings']['max_length'] + field_definitions[fieldname]["field_type"] = field_storage["type"] + field_definitions[fieldname]["cardinality"] = field_storage["cardinality"] + if "max_length" in field_storage["settings"]: + field_definitions[fieldname]["max_length"] = field_storage["settings"][ + "max_length" + ] else: - field_definitions[fieldname]['max_length'] = None - if 'target_type' in field_storage['settings']: - field_definitions[fieldname]['target_type'] = field_storage['settings']['target_type'] + field_definitions[fieldname]["max_length"] = None + if "target_type" in field_storage["settings"]: + field_definitions[fieldname]["target_type"] = field_storage["settings"][ + "target_type" + ] else: - field_definitions[fieldname]['target_type'] = None - if 'authority_sources' in field_config['settings']: - field_definitions[fieldname]['authority_sources'] = list(field_config['settings']['authority_sources'].keys()) + field_definitions[fieldname]["target_type"] = None + if "authority_sources" in field_config["settings"]: + field_definitions[fieldname]["authority_sources"] = list( + field_config["settings"]["authority_sources"].keys() + ) else: - field_definitions[fieldname]['authority_sources'] = None - if field_storage['type'] == 'typed_relation' and 'rel_types' in field_config['settings']: - field_definitions[fieldname]['typed_relations'] = field_config['settings']['rel_types'] - if 'allowed_values' in field_storage['settings']: - field_definitions[fieldname]['allowed_values'] = list(field_storage['settings']['allowed_values'].keys()) + field_definitions[fieldname]["authority_sources"] = None + if ( + field_storage["type"] == "typed_relation" + and "rel_types" in field_config["settings"] + ): + field_definitions[fieldname]["typed_relations"] = field_config[ + "settings" + ]["rel_types"] + if "allowed_values" in field_storage["settings"]: + field_definitions[fieldname]["allowed_values"] = list( + field_storage["settings"]["allowed_values"].keys() + ) else: - field_definitions[fieldname]['allowed_values'] = None - if field_config['field_type'].startswith('text'): - field_definitions[fieldname]['formatted_text'] = True + field_definitions[fieldname]["allowed_values"] = None + if field_config["field_type"].startswith("text"): + field_definitions[fieldname]["formatted_text"] = True else: - field_definitions[fieldname]['formatted_text'] = False - - field_definitions['term_name'] = { - 'entity_type': 'taxonomy_term', - 'required': True, - 'label': 'Name', - 'field_type': 'string', - 'cardinality': 1, - 'max_length': 255, - 'target_type': None, - 'handler': None, - 'handler_settings': None + field_definitions[fieldname]["formatted_text"] = False + + field_definitions["term_name"] = { + "entity_type": "taxonomy_term", + "required": True, + "label": "Name", + "field_type": "string", + "cardinality": 1, + "max_length": 255, + "target_type": None, + "handler": None, + "handler_settings": None, } - if entity_type == 'media': + if entity_type == "media": fields = get_entity_fields(config, entity_type, bundle_type) for fieldname in fields: field_definitions[fieldname] = {} - raw_field_config = get_entity_field_config(config, fieldname, entity_type, bundle_type) + raw_field_config = get_entity_field_config( + config, fieldname, entity_type, bundle_type + ) field_config = json.loads(raw_field_config) - field_definitions[fieldname]['media_type'] = bundle_type - field_definitions[fieldname]['field_type'] = field_config['field_type'] - field_definitions[fieldname]['required'] = field_config['required'] - field_definitions[fieldname]['label'] = field_config['label'] - raw_vocabularies = [x for x in field_config['dependencies']['config'] if re.match("^taxonomy.vocabulary.", x)] + field_definitions[fieldname]["media_type"] = bundle_type + field_definitions[fieldname]["field_type"] = field_config["field_type"] + field_definitions[fieldname]["required"] = field_config["required"] + field_definitions[fieldname]["label"] = field_config["label"] + raw_vocabularies = [ + x + for x in field_config["dependencies"]["config"] + if re.match("^taxonomy.vocabulary.", x) + ] if len(raw_vocabularies) > 0: - vocabularies = [x.replace("taxonomy.vocabulary.", '') for x in raw_vocabularies] - field_definitions[fieldname]['vocabularies'] = vocabularies + vocabularies = [ + x.replace("taxonomy.vocabulary.", "") for x in raw_vocabularies + ] + field_definitions[fieldname]["vocabularies"] = vocabularies # Reference 'handler' could be nothing, 'default:taxonomy_term' (or some other entity type), or 'views'. - if 'handler' in field_config['settings']: - field_definitions[fieldname]['handler'] = field_config['settings']['handler'] + if "handler" in field_config["settings"]: + field_definitions[fieldname]["handler"] = field_config["settings"][ + "handler" + ] else: - field_definitions[fieldname]['handler'] = None - if 'handler_settings' in field_config['settings']: - field_definitions[fieldname]['handler_settings'] = field_config['settings']['handler_settings'] + field_definitions[fieldname]["handler"] = None + if "handler_settings" in field_config["settings"]: + field_definitions[fieldname]["handler_settings"] = field_config[ + "settings" + ]["handler_settings"] else: - field_definitions[fieldname]['handler_settings'] = None - if 'file_extensions' in field_config['settings']: - field_definitions[fieldname]['file_extensions'] = field_config['settings']['file_extensions'] + field_definitions[fieldname]["handler_settings"] = None + if "file_extensions" in field_config["settings"]: + field_definitions[fieldname]["file_extensions"] = field_config[ + "settings" + ]["file_extensions"] raw_field_storage = get_entity_field_storage(config, fieldname, entity_type) field_storage = json.loads(raw_field_storage) - field_definitions[fieldname]['field_type'] = field_storage['type'] - field_definitions[fieldname]['cardinality'] = field_storage['cardinality'] - if 'max_length' in field_storage['settings']: - field_definitions[fieldname]['max_length'] = field_storage['settings']['max_length'] + field_definitions[fieldname]["field_type"] = field_storage["type"] + field_definitions[fieldname]["cardinality"] = field_storage["cardinality"] + if "max_length" in field_storage["settings"]: + field_definitions[fieldname]["max_length"] = field_storage["settings"][ + "max_length" + ] else: - field_definitions[fieldname]['max_length'] = None - if 'target_type' in field_storage['settings']: - field_definitions[fieldname]['target_type'] = field_storage['settings']['target_type'] + field_definitions[fieldname]["max_length"] = None + if "target_type" in field_storage["settings"]: + field_definitions[fieldname]["target_type"] = field_storage["settings"][ + "target_type" + ] else: - field_definitions[fieldname]['target_type'] = None - if field_storage['type'] == 'typed_relation' and 'rel_types' in field_config['settings']: - field_definitions[fieldname]['typed_relations'] = field_config['settings']['rel_types'] - if 'authority_sources' in field_config['settings']: - field_definitions[fieldname]['authority_sources'] = list(field_config['settings']['authority_sources'].keys()) + field_definitions[fieldname]["target_type"] = None + if ( + field_storage["type"] == "typed_relation" + and "rel_types" in field_config["settings"] + ): + field_definitions[fieldname]["typed_relations"] = field_config[ + "settings" + ]["rel_types"] + if "authority_sources" in field_config["settings"]: + field_definitions[fieldname]["authority_sources"] = list( + field_config["settings"]["authority_sources"].keys() + ) else: - field_definitions[fieldname]['authority_sources'] = None - if 'allowed_values' in field_storage['settings']: - field_definitions[fieldname]['allowed_values'] = list(field_storage['settings']['allowed_values'].keys()) + field_definitions[fieldname]["authority_sources"] = None + if "allowed_values" in field_storage["settings"]: + field_definitions[fieldname]["allowed_values"] = list( + field_storage["settings"]["allowed_values"].keys() + ) else: - field_definitions[fieldname]['allowed_values'] = None - if field_config['field_type'].startswith('text'): - field_definitions[fieldname]['formatted_text'] = True + field_definitions[fieldname]["allowed_values"] = None + if field_config["field_type"].startswith("text"): + field_definitions[fieldname]["formatted_text"] = True else: - field_definitions[fieldname]['formatted_text'] = False - - field_definitions['name'] = { - 'entity_type': 'media', - 'required': True, - 'label': 'Name', - 'field_type': 'string', - 'cardinality': 1, - 'max_length': 255, - 'target_type': None, - 'handler': None, - 'handler_settings': None + field_definitions[fieldname]["formatted_text"] = False + + field_definitions["name"] = { + "entity_type": "media", + "required": True, + "label": "Name", + "field_type": "string", + "cardinality": 1, + "max_length": 255, + "target_type": None, + "handler": None, + "handler_settings": None, } - if entity_type == 'paragraph': + if entity_type == "paragraph": fields = get_entity_fields(config, entity_type, bundle_type) for fieldname in fields: # NOTE, WIP on #292. Code below copied from 'node' section above, may need modification. field_definitions[fieldname] = {} - raw_field_config = get_entity_field_config(config, fieldname, entity_type, bundle_type) + raw_field_config = get_entity_field_config( + config, fieldname, entity_type, bundle_type + ) field_config = json.loads(raw_field_config) - field_definitions[fieldname]['entity_type'] = field_config['entity_type'] - field_definitions[fieldname]['required'] = field_config['required'] - field_definitions[fieldname]['label'] = field_config['label'] - raw_vocabularies = [x for x in field_config['dependencies']['config'] if re.match("^taxonomy.vocabulary.", x)] + field_definitions[fieldname]["entity_type"] = field_config["entity_type"] + field_definitions[fieldname]["required"] = field_config["required"] + field_definitions[fieldname]["label"] = field_config["label"] + raw_vocabularies = [ + x + for x in field_config["dependencies"]["config"] + if re.match("^taxonomy.vocabulary.", x) + ] if len(raw_vocabularies) > 0: - vocabularies = [x.replace("taxonomy.vocabulary.", '') for x in raw_vocabularies] - field_definitions[fieldname]['vocabularies'] = vocabularies + vocabularies = [ + x.replace("taxonomy.vocabulary.", "") for x in raw_vocabularies + ] + field_definitions[fieldname]["vocabularies"] = vocabularies # Reference 'handler' could be nothing, 'default:taxonomy_term' (or some other entity type), or 'views'. - if 'handler' in field_config['settings']: - field_definitions[fieldname]['handler'] = field_config['settings']['handler'] + if "handler" in field_config["settings"]: + field_definitions[fieldname]["handler"] = field_config["settings"][ + "handler" + ] else: - field_definitions[fieldname]['handler'] = None - if 'handler_settings' in field_config['settings']: - field_definitions[fieldname]['handler_settings'] = field_config['settings']['handler_settings'] + field_definitions[fieldname]["handler"] = None + if "handler_settings" in field_config["settings"]: + field_definitions[fieldname]["handler_settings"] = field_config[ + "settings" + ]["handler_settings"] else: - field_definitions[fieldname]['handler_settings'] = None + field_definitions[fieldname]["handler_settings"] = None raw_field_storage = get_entity_field_storage(config, fieldname, entity_type) field_storage = json.loads(raw_field_storage) - field_definitions[fieldname]['field_type'] = field_storage['type'] - field_definitions[fieldname]['cardinality'] = field_storage['cardinality'] - if 'max_length' in field_storage['settings']: - field_definitions[fieldname]['max_length'] = field_storage['settings']['max_length'] + field_definitions[fieldname]["field_type"] = field_storage["type"] + field_definitions[fieldname]["cardinality"] = field_storage["cardinality"] + if "max_length" in field_storage["settings"]: + field_definitions[fieldname]["max_length"] = field_storage["settings"][ + "max_length" + ] else: - field_definitions[fieldname]['max_length'] = None - if 'target_type' in field_storage['settings']: - field_definitions[fieldname]['target_type'] = field_storage['settings']['target_type'] + field_definitions[fieldname]["max_length"] = None + if "target_type" in field_storage["settings"]: + field_definitions[fieldname]["target_type"] = field_storage["settings"][ + "target_type" + ] else: - field_definitions[fieldname]['target_type'] = None - if field_storage['type'] == 'typed_relation' and 'rel_types' in field_config['settings']: - field_definitions[fieldname]['typed_relations'] = field_config['settings']['rel_types'] - if 'authority_sources' in field_config['settings']: - field_definitions[fieldname]['authority_sources'] = list(field_config['settings']['authority_sources'].keys()) + field_definitions[fieldname]["target_type"] = None + if ( + field_storage["type"] == "typed_relation" + and "rel_types" in field_config["settings"] + ): + field_definitions[fieldname]["typed_relations"] = field_config[ + "settings" + ]["rel_types"] + if "authority_sources" in field_config["settings"]: + field_definitions[fieldname]["authority_sources"] = list( + field_config["settings"]["authority_sources"].keys() + ) else: - field_definitions[fieldname]['authority_sources'] = None - if 'allowed_values' in field_storage['settings']: - field_definitions[fieldname]['allowed_values'] = list(field_storage['settings']['allowed_values'].keys()) + field_definitions[fieldname]["authority_sources"] = None + if "allowed_values" in field_storage["settings"]: + field_definitions[fieldname]["allowed_values"] = list( + field_storage["settings"]["allowed_values"].keys() + ) else: - field_definitions[fieldname]['allowed_values'] = None - if field_config['field_type'].startswith('text'): - field_definitions[fieldname]['formatted_text'] = True + field_definitions[fieldname]["allowed_values"] = None + if field_config["field_type"].startswith("text"): + field_definitions[fieldname]["formatted_text"] = True else: - field_definitions[fieldname]['formatted_text'] = False + field_definitions[fieldname]["formatted_text"] = False return field_definitions @@ -1214,33 +1424,44 @@ def get_field_definitions(config, entity_type, bundle_type=None): def get_entity_fields(config, entity_type, bundle_type): """Get all the fields configured on a bundle. - Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). + Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). - entity_type : string - Values could be 'node', 'media', 'taxonomy_term', or 'paragraph'. - bundle_type : string + entity_type : string + Values could be 'node', 'media', 'taxonomy_term', or 'paragraph'. + bundle_type : string - Returns - ------- - list - A list with field names, e.g. ['field_name1', 'field_name2']. + Returns + ------- + list + A list with field names, e.g. ['field_name1', 'field_name2']. """ if ping_content_type(config) == 404: message = f"Content type '{config['content_type']}' does not exist on {config['host']}." logging.error(message) - sys.exit('Error: ' + message) - fields_endpoint = config['host'] + '/entity/entity_form_display/' + entity_type + '.' + bundle_type + '.default?_format=json' - bundle_type_response = issue_request(config, 'GET', fields_endpoint) + sys.exit("Error: " + message) + fields_endpoint = ( + config["host"] + + "/entity/entity_form_display/" + + entity_type + + "." + + bundle_type + + ".default?_format=json" + ) + bundle_type_response = issue_request(config, "GET", fields_endpoint) # If a vocabulary has no custom fields (like the default "Tags" vocab), this query will # return a 404 response. So, we need to use an alternative way to check if the vocabulary # really doesn't exist. - if bundle_type_response.status_code == 404 and entity_type == 'taxonomy_term': - fallback_fields_endpoint = '/entity/taxonomy_vocabulary/' + bundle_type + '?_format=json' - fallback_bundle_type_response = issue_request(config, 'GET', fallback_fields_endpoint) + if bundle_type_response.status_code == 404 and entity_type == "taxonomy_term": + fallback_fields_endpoint = ( + "/entity/taxonomy_vocabulary/" + bundle_type + "?_format=json" + ) + fallback_bundle_type_response = issue_request( + config, "GET", fallback_fields_endpoint + ) # If this request confirms the vocabulary exists, its OK to make some assumptions # about what fields it has. if fallback_bundle_type_response.status_code == 200: @@ -1249,24 +1470,30 @@ def get_entity_fields(config, entity_type, bundle_type): fields = [] if bundle_type_response.status_code == 200: node_config_raw = json.loads(bundle_type_response.text) - fieldname_prefix = 'field.field.' + entity_type + '.' + bundle_type + '.' + fieldname_prefix = "field.field." + entity_type + "." + bundle_type + "." fieldnames = [ - field_dependency.replace( - fieldname_prefix, - '') for field_dependency in node_config_raw['dependencies']['config']] - for fieldname in node_config_raw['dependencies']['config']: - fieldname_prefix = 'field.field.' + entity_type + '.' + bundle_type + '.' + field_dependency.replace(fieldname_prefix, "") + for field_dependency in node_config_raw["dependencies"]["config"] + ] + for fieldname in node_config_raw["dependencies"]["config"]: + fieldname_prefix = "field.field." + entity_type + "." + bundle_type + "." if re.match(fieldname_prefix, fieldname): - fieldname = fieldname.replace(fieldname_prefix, '') + fieldname = fieldname.replace(fieldname_prefix, "") fields.append(fieldname) else: - message = 'Workbench cannot retrieve field definitions from Drupal.' - if config['task'] == 'create_terms' or config['task'] == 'update_terms': + message = "Workbench cannot retrieve field definitions from Drupal." + if config["task"] == "create_terms" or config["task"] == "update_terms": message_detail = f" Check that the vocabulary name identified in your vocab_id config setting is spelled correctly." - if config['task'] == 'create' or config['task'] == 'create_from_files': + if config["task"] == "create" or config["task"] == "create_from_files": message_detail = f" Check that the content type named in your content_type config setting is spelled correctly." - logging.error(message + message_detail + " HTTP response code was " + str(bundle_type_response.status_code) + '.') - sys.exit('Error: ' + message + ' See the log for more information.') + logging.error( + message + + message_detail + + " HTTP response code was " + + str(bundle_type_response.status_code) + + "." + ) + sys.exit("Error: " + message + " See the log for more information.") return fields @@ -1274,27 +1501,33 @@ def get_entity_fields(config, entity_type, bundle_type): def get_required_bundle_fields(config, entity_type, bundle_type): """Gets a list of required fields for the given bundle type. - Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - entity_type : string - One of 'node', 'media', or 'taxonomy_term'. - bundle_type : string - The (node) content type, the vocabulary name, or the media type ('image', - 'document', 'audio', 'video', 'file', etc.). - - Returns - ------- - list - A list of Drupal field names that are configured as required for this bundle, e.g - ['required_field1_name', 'required_field2_name']. + Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + entity_type : string + One of 'node', 'media', or 'taxonomy_term'. + bundle_type : string + The (node) content type, the vocabulary name, or the media type ('image', + 'document', 'audio', 'video', 'file', etc.). + + Returns + ------- + list + A list of Drupal field names that are configured as required for this bundle, e.g + ['required_field1_name', 'required_field2_name']. """ field_definitions = get_field_definitions(config, entity_type, bundle_type) required_drupal_fields = list() for drupal_fieldname in field_definitions: - if 'entity_type' in field_definitions[drupal_fieldname] and field_definitions[drupal_fieldname]['entity_type'] == entity_type: - if 'required' in field_definitions[drupal_fieldname] and field_definitions[drupal_fieldname]['required'] is True: + if ( + "entity_type" in field_definitions[drupal_fieldname] + and field_definitions[drupal_fieldname]["entity_type"] == entity_type + ): + if ( + "required" in field_definitions[drupal_fieldname] + and field_definitions[drupal_fieldname]["required"] is True + ): required_drupal_fields.append(drupal_fieldname) return required_drupal_fields @@ -1302,63 +1535,81 @@ def get_required_bundle_fields(config, entity_type, bundle_type): def get_entity_field_config(config, fieldname, entity_type, bundle_type): """Get a specific fields's configuration. - Example query for taxo terms: /entity/field_config/taxonomy_term.islandora_media_use.field_external_uri?_format=json - """ - field_config_endpoint = config['host'] + '/entity/field_config/' + entity_type + '.' + bundle_type + '.' + fieldname + '?_format=json' - field_config_response = issue_request(config, 'GET', field_config_endpoint) + Example query for taxo terms: /entity/field_config/taxonomy_term.islandora_media_use.field_external_uri?_format=json + """ + field_config_endpoint = ( + config["host"] + + "/entity/field_config/" + + entity_type + + "." + + bundle_type + + "." + + fieldname + + "?_format=json" + ) + field_config_response = issue_request(config, "GET", field_config_endpoint) if field_config_response.status_code == 200: return field_config_response.text else: - message = 'Workbench cannot retrieve field definitions from Drupal. Please confirm that the Field, Field Storage, and Entity Form Display REST resources are enabled.' + message = "Workbench cannot retrieve field definitions from Drupal. Please confirm that the Field, Field Storage, and Entity Form Display REST resources are enabled." logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) def get_entity_field_storage(config, fieldname, entity_type): """Get a specific fields's storage configuration. - Example query for taxo terms: /entity/field_storage_config/taxonomy_term.field_external_uri?_format=json - """ - field_storage_endpoint = config['host'] + '/entity/field_storage_config/' + entity_type + '.' + fieldname + '?_format=json' - field_storage_response = issue_request(config, 'GET', field_storage_endpoint) + Example query for taxo terms: /entity/field_storage_config/taxonomy_term.field_external_uri?_format=json + """ + field_storage_endpoint = ( + config["host"] + + "/entity/field_storage_config/" + + entity_type + + "." + + fieldname + + "?_format=json" + ) + field_storage_response = issue_request(config, "GET", field_storage_endpoint) if field_storage_response.status_code == 200: return field_storage_response.text else: - message = 'Workbench cannot retrieve field definitions from Drupal. Please confirm that the Field, Field Storage, and Entity Form Display REST resources are enabled.' + message = "Workbench cannot retrieve field definitions from Drupal. Please confirm that the Field, Field Storage, and Entity Form Display REST resources are enabled." logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) def get_fieldname_map(config, entity_type, bundle_type, keys, die=True): """Get a mapping of field machine names to labels, or labels to machine names. - Note: does not account for multilingual configurations. - - Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - entity_type : string - One of 'node', 'media', 'taxonomy_term', or 'paragraph'. - bundle_type : string - The node content type, the vocabulary name, or the media type - (image', 'document', 'audio', 'video', 'file', etc.). - keys: string - One of 'labels' or 'names'. 'labels' returns a dictionary where the field labels are - the keys, 'names' returns a dictionary where the field machine names are the keys. - die: bool - Whether or not to exit if there is a problem generating the map. - Returns - ------- - dict|bool - A dictionary with either field labels or machine names as the keys. - Returns False if the field labels are not unique. + Note: does not account for multilingual configurations. + + Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + entity_type : string + One of 'node', 'media', 'taxonomy_term', or 'paragraph'. + bundle_type : string + The node content type, the vocabulary name, or the media type + (image', 'document', 'audio', 'video', 'file', etc.). + keys: string + One of 'labels' or 'names'. 'labels' returns a dictionary where the field labels are + the keys, 'names' returns a dictionary where the field machine names are the keys. + die: bool + Whether or not to exit if there is a problem generating the map. + Returns + ------- + dict|bool + A dictionary with either field labels or machine names as the keys. + Returns False if the field labels are not unique. """ # We delete the cached map in check_input() and in Workbench's create(), update(), and # create_terms() functions so the cache is always fresh. - fieldname_map_cache_path = os.path.join(config['temp_dir'], f"{entity_type}-{bundle_type}-{keys}.fieldname_map") + fieldname_map_cache_path = os.path.join( + config["temp_dir"], f"{entity_type}-{bundle_type}-{keys}.fieldname_map" + ) if os.path.exists(fieldname_map_cache_path): - cache_file = open(fieldname_map_cache_path, 'r') + cache_file = open(fieldname_map_cache_path, "r") cache = cache_file.read() cache_file.close() return json.loads(cache) @@ -1367,25 +1618,29 @@ def get_fieldname_map(config, entity_type, bundle_type, keys, die=True): map = dict() labels = [] for field, properties in field_defs.items(): - labels.append(properties['label']) - if keys == 'labels': - map[properties['label']] = field + labels.append(properties["label"]) + if keys == "labels": + map[properties["label"]] = field - if keys == 'names': - map[field] = properties['label'] + if keys == "names": + map[field] = properties["label"] - if keys == 'labels': - duplicate_labels = [label for label, count in collections.Counter(labels).items() if count > 1] + if keys == "labels": + duplicate_labels = [ + label for label, count in collections.Counter(labels).items() if count > 1 + ] if len(duplicate_labels) > 0: if die is True: - message = f"Duplicate field labels exist ({', '. join(duplicate_labels)}). To continue, remove the \"csv_headers\" setting " + \ - "from your configuration file and change your CSV headers from field labels to field machine names." + message = ( + f"Duplicate field labels exist ({', '. join(duplicate_labels)}). To continue, remove the \"csv_headers\" setting " + + "from your configuration file and change your CSV headers from field labels to field machine names." + ) logging.error(message) sys.exit("Error: " + message) else: return False - cache_file = open(fieldname_map_cache_path, 'w') + cache_file = open(fieldname_map_cache_path, "w") cache_file.write(json.dumps(map)) cache_file.close() @@ -1395,24 +1650,26 @@ def get_fieldname_map(config, entity_type, bundle_type, keys, die=True): def replace_field_labels_with_names(config, csv_headers): """Replace field labels in a list of CSV column headers with their machine name equivalents. - Note: we can't use this feature for add_media or update_media tasks since the fieldnames - vary by media type, and each row in the CSV can have a different media type. - - Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - csv_headers: list - A list containing the CSV headers. - Returns - ------- - list - The list of CSV headers with any labels replaced with field names. - """ - if config['task'] == 'create_terms' or config['task'] == 'update_terms': - field_map = get_fieldname_map(config, 'taxonomy_term', config['vocab_id'], 'labels') + Note: we can't use this feature for add_media or update_media tasks since the fieldnames + vary by media type, and each row in the CSV can have a different media type. + + Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + csv_headers: list + A list containing the CSV headers. + Returns + ------- + list + The list of CSV headers with any labels replaced with field names. + """ + if config["task"] == "create_terms" or config["task"] == "update_terms": + field_map = get_fieldname_map( + config, "taxonomy_term", config["vocab_id"], "labels" + ) else: - field_map = get_fieldname_map(config, 'node', config['content_type'], 'labels') + field_map = get_fieldname_map(config, "node", config["content_type"], "labels") for header_index in range(len(csv_headers)): if csv_headers[header_index] in field_map: @@ -1424,18 +1681,22 @@ def replace_field_labels_with_names(config, csv_headers): def check_input(config, args): """Validate the config file and input data. - Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - args: ArgumentParser - Command-line arguments from argparse.parse_args(). - Returns - ------- - None - Exits if an error is encountered. + Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + args: ArgumentParser + Command-line arguments from argparse.parse_args(). + Returns + ------- + None + Exits if an error is encountered. """ - logging.info('Starting configuration check for "%s" task using config file %s.', config['task'], args.config) + logging.info( + 'Starting configuration check for "%s" task using config file %s.', + config["task"], + args.config, + ) ping_islandora(config, print_message=False) check_integration_module_version(config) @@ -1443,187 +1704,240 @@ def check_input(config, args): rows_with_missing_files = list() # @todo #606: break out node entity and reserved field, media entity and reserved field, and term entity and reserved fields? - node_base_fields = ['title', 'status', 'promote', 'sticky', 'uid', 'created', 'published'] + node_base_fields = [ + "title", + "status", + "promote", + "sticky", + "uid", + "created", + "published", + ] # Any new reserved columns introduced into the CSV need to be removed here. 'langcode' is a standard Drupal field # but it doesn't show up in any field configs. - reserved_fields = ['file', 'media_use_tid', 'checksum', 'node_id', 'url_alias', 'image_alt_text', 'parent_id', 'langcode', 'revision_log'] - entity_fields = get_entity_fields(config, 'node', config['content_type']) - if config['id_field'] not in entity_fields: - reserved_fields.append(config['id_field']) + reserved_fields = [ + "file", + "media_use_tid", + "checksum", + "node_id", + "url_alias", + "image_alt_text", + "parent_id", + "langcode", + "revision_log", + ] + entity_fields = get_entity_fields(config, "node", config["content_type"]) + if config["id_field"] not in entity_fields: + reserved_fields.append(config["id_field"]) # Check the config file. tasks = [ - 'create', - 'update', - 'delete', - 'add_media', - 'update_media', - 'delete_media', - 'delete_media_by_node', - 'create_from_files', - 'create_terms', - 'export_csv', - 'get_data_from_view', - 'get_media_report_from_view', - 'update_terms' + "create", + "update", + "delete", + "add_media", + "update_media", + "delete_media", + "delete_media_by_node", + "create_from_files", + "create_terms", + "export_csv", + "get_data_from_view", + "get_media_report_from_view", + "update_terms", ] - joiner = ', ' - if config['task'] not in tasks: - message = '"task" in your configuration file must be one of "create", "update", "delete", ' + \ - '"add_media", "update_media", "delete_media", "delete_media_by_node", "create_from_files", "create_terms", "export_csv", "get_data_from_view", or "update_terms".' + joiner = ", " + if config["task"] not in tasks: + message = ( + '"task" in your configuration file must be one of "create", "update", "delete", ' + + '"add_media", "update_media", "delete_media", "delete_media_by_node", "create_from_files", "create_terms", "export_csv", "get_data_from_view", or "update_terms".' + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) config_keys = list(config.keys()) - config_keys.remove('check') + config_keys.remove("check") # Check for presence of required config keys, which varies by task. - if config['task'] == 'create': - if config['nodes_only'] is True: + if config["task"] == "create": + if config["nodes_only"] is True: message = '"nodes_only" option in effect. Media files will not be checked/validated.' print(message) logging.info(message) - create_required_options = [ - 'task', - 'host', - 'username', - 'password'] + create_required_options = ["task", "host", "username", "password"] for create_required_option in create_required_options: if create_required_option not in config_keys: - message = 'Please check your config file for required values: ' + joiner.join(create_required_options) + '.' + message = ( + "Please check your config file for required values: " + + joiner.join(create_required_options) + + "." + ) logging.error(message) - sys.exit('Error: ' + message) - if config['task'] == 'update': - update_required_options = [ - 'task', - 'host', - 'username', - 'password'] + sys.exit("Error: " + message) + if config["task"] == "update": + update_required_options = ["task", "host", "username", "password"] for update_required_option in update_required_options: if update_required_option not in config_keys: - message = 'Please check your config file for required values: ' + joiner.join(update_required_options) + '.' + message = ( + "Please check your config file for required values: " + + joiner.join(update_required_options) + + "." + ) logging.error(message) - sys.exit('Error: ' + message) - update_mode_options = ['replace', 'append', 'delete'] - if config['update_mode'] not in update_mode_options: - message = 'Your "update_mode" config option must be one of the following: ' + joiner.join(update_mode_options) + '.' + sys.exit("Error: " + message) + update_mode_options = ["replace", "append", "delete"] + if config["update_mode"] not in update_mode_options: + message = ( + 'Your "update_mode" config option must be one of the following: ' + + joiner.join(update_mode_options) + + "." + ) logging.error(message) - sys.exit('Error: ' + message) - - if config['task'] == 'delete': - delete_required_options = [ - 'task', - 'host', - 'username', - 'password'] + sys.exit("Error: " + message) + + if config["task"] == "delete": + delete_required_options = ["task", "host", "username", "password"] for delete_required_option in delete_required_options: if delete_required_option not in config_keys: - message = 'Please check your config file for required values: ' + joiner.join(delete_required_options) + '.' + message = ( + "Please check your config file for required values: " + + joiner.join(delete_required_options) + + "." + ) logging.error(message) - sys.exit('Error: ' + message) - if config['task'] == 'add_media': - add_media_required_options = [ - 'task', - 'host', - 'username', - 'password'] + sys.exit("Error: " + message) + if config["task"] == "add_media": + add_media_required_options = ["task", "host", "username", "password"] for add_media_required_option in add_media_required_options: if add_media_required_option not in config_keys: - message = 'Please check your config file for required values: ' + joiner.join(add_media_required_options) + '.' + message = ( + "Please check your config file for required values: " + + joiner.join(add_media_required_options) + + "." + ) logging.error(message) - sys.exit('Error: ' + message) - if config['task'] == 'update_media': + sys.exit("Error: " + message) + if config["task"] == "update_media": update_media_required_options = [ - 'task', - 'host', - 'username', - 'password', - 'input_csv', - 'media_type'] + "task", + "host", + "username", + "password", + "input_csv", + "media_type", + ] for update_media_required_option in update_media_required_options: if update_media_required_option not in config_keys: - message = 'Please check your config file for required values: ' + joiner.join(update_media_required_options) + '.' + message = ( + "Please check your config file for required values: " + + joiner.join(update_media_required_options) + + "." + ) logging.error(message) - sys.exit('Error: ' + message) - update_mode_options = ['replace', 'append', 'delete'] - if config['update_mode'] not in update_mode_options: - message = 'Your "update_mode" config option must be one of the following: ' + joiner.join(update_mode_options) + '.' + sys.exit("Error: " + message) + update_mode_options = ["replace", "append", "delete"] + if config["update_mode"] not in update_mode_options: + message = ( + 'Your "update_mode" config option must be one of the following: ' + + joiner.join(update_mode_options) + + "." + ) logging.error(message) - sys.exit('Error: ' + message) - if config['task'] == 'delete_media': - delete_media_required_options = [ - 'task', - 'host', - 'username', - 'password'] + sys.exit("Error: " + message) + if config["task"] == "delete_media": + delete_media_required_options = ["task", "host", "username", "password"] for delete_media_required_option in delete_media_required_options: if delete_media_required_option not in config_keys: - message = 'Please check your config file for required values: ' + joiner.join(delete_media_required_options) + '.' + message = ( + "Please check your config file for required values: " + + joiner.join(delete_media_required_options) + + "." + ) logging.error(message) - sys.exit('Error: ' + message) - if config['task'] == 'delete_media_by_node': - delete_media_by_node_required_options = [ - 'task', - 'host', - 'username', - 'password'] - for delete_media_by_node_required_option in delete_media_by_node_required_options: + sys.exit("Error: " + message) + if config["task"] == "delete_media_by_node": + delete_media_by_node_required_options = ["task", "host", "username", "password"] + for ( + delete_media_by_node_required_option + ) in delete_media_by_node_required_options: if delete_media_by_node_required_option not in config_keys: - message = 'Please check your config file for required values: ' + joiner.join(delete_media_by_node_required_options) + '.' + message = ( + "Please check your config file for required values: " + + joiner.join(delete_media_by_node_required_options) + + "." + ) logging.error(message) - sys.exit('Error: ' + message) - if config['task'] == 'export_csv': - export_csv_required_options = [ - 'task', - 'host', - 'username', - 'password'] + sys.exit("Error: " + message) + if config["task"] == "export_csv": + export_csv_required_options = ["task", "host", "username", "password"] for export_csv_required_option in export_csv_required_options: if export_csv_required_option not in config_keys: - message = 'Please check your config file for required values: ' + joiner.join(export_csv_required_options) + '.' + message = ( + "Please check your config file for required values: " + + joiner.join(export_csv_required_options) + + "." + ) logging.error(message) - sys.exit('Error: ' + message) - if config['export_csv_term_mode'] == 'name': + sys.exit("Error: " + message) + if config["export_csv_term_mode"] == "name": message = 'The "export_csv_term_mode" configuration option is set to "name", which will slow down the export.' print(message) - if config['task'] == 'create_terms': + if config["task"] == "create_terms": create_terms_required_options = [ - 'task', - 'host', - 'username', - 'password', - 'vocab_id'] + "task", + "host", + "username", + "password", + "vocab_id", + ] for create_terms_required_option in create_terms_required_options: if create_terms_required_option not in config_keys: - message = 'Please check your config file for required values: ' + joiner.join(create_terms_required_options) + '.' + message = ( + "Please check your config file for required values: " + + joiner.join(create_terms_required_options) + + "." + ) logging.error(message) - sys.exit('Error: ' + message) - if config['task'] == 'get_data_from_view' or config['task'] == 'get_media_report_from_view': + sys.exit("Error: " + message) + if ( + config["task"] == "get_data_from_view" + or config["task"] == "get_media_report_from_view" + ): get_data_from_view_required_options = [ - 'task', - 'host', - 'username', - 'password', - 'view_path'] + "task", + "host", + "username", + "password", + "view_path", + ] for get_data_from_view_required_option in get_data_from_view_required_options: if get_data_from_view_required_option not in config_keys: - message = 'Please check your config file for required values: ' + joiner.join(get_data_from_view_required_options) + '.' + message = ( + "Please check your config file for required values: " + + joiner.join(get_data_from_view_required_options) + + "." + ) logging.error(message) - sys.exit('Error: ' + message) - if config['task'] == 'update_terms': + sys.exit("Error: " + message) + if config["task"] == "update_terms": update_terms_required_options = [ - 'task', - 'host', - 'username', - 'password', - 'vocab_id'] + "task", + "host", + "username", + "password", + "vocab_id", + ] for update_terms_required_option in update_terms_required_options: if update_terms_required_option not in config_keys: - message = 'Please check your config file for required values: ' + joiner.join(update_terms_required_options) + '.' + message = ( + "Please check your config file for required values: " + + joiner.join(update_terms_required_options) + + "." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) - message = 'OK, configuration file has all required values (did not check for optional values).' + message = "OK, configuration file has all required values (did not check for optional values)." print(message) logging.info(message) @@ -1631,13 +1945,24 @@ def check_input(config, args): # Perform checks on get_data_from_view tasks. Since this task doesn't use input_dir, input_csv, etc., # we exit immediately after doing these checks. - if config['task'] == 'get_data_from_view' or config['task'] == 'get_media_report_from_view': + if ( + config["task"] == "get_data_from_view" + or config["task"] == "get_media_report_from_view" + ): # First, ping the View. - view_parameters = '&'.join(config['view_parameters']) if 'view_parameters' in config else '' - view_url = config['host'] + '/' + config['view_path'].lstrip('/') + '?page=0&' + view_parameters + view_parameters = ( + "&".join(config["view_parameters"]) if "view_parameters" in config else "" + ) + view_url = ( + config["host"] + + "/" + + config["view_path"].lstrip("/") + + "?page=0&" + + view_parameters + ) view_path_status_code = ping_view_endpoint(config, view_url) - view_url_for_message = config['host'] + '/' + config['view_path'].lstrip('/') + view_url_for_message = config["host"] + "/" + config["view_path"].lstrip("/") if view_path_status_code != 200: message = f"Cannot access View at {view_url_for_message}." logging.error(message) @@ -1647,26 +1972,34 @@ def check_input(config, args): logging.info(message) print("OK, " + message) - if config['export_file_directory'] is not None: - if not os.path.exists(config['export_file_directory']): + if config["export_file_directory"] is not None: + if not os.path.exists(config["export_file_directory"]): try: - os.mkdir(config['export_file_directory']) - os.rmdir(config['export_file_directory']) + os.mkdir(config["export_file_directory"]) + os.rmdir(config["export_file_directory"]) except Exception as e: - message = 'Path in configuration option "export_file_directory" ("' + config['export_file_directory'] + '") is not writable.' - logging.error(message + ' ' + str(e)) - sys.exit('Error: ' + message + ' See log for more detail.') - - if config['export_file_media_use_term_id'] is False: + message = ( + 'Path in configuration option "export_file_directory" ("' + + config["export_file_directory"] + + '") is not writable.' + ) + logging.error(message + " " + str(e)) + sys.exit("Error: " + message + " See log for more detail.") + + if config["export_file_media_use_term_id"] is False: message = f'Unknown value for configuration setting "export_file_media_use_term_id": {config["export_file_media_use_term_id"]}.' logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) # Check to make sure the output path for the CSV file is writable. - if config['export_csv_file_path'] is not None: - csv_file_path = config['export_csv_file_path'] + if config["export_csv_file_path"] is not None: + csv_file_path = config["export_csv_file_path"] else: - csv_file_path = os.path.join(config['input_dir'], os.path.basename(args.config).split('.')[0] + '.csv_file_with_data_from_view') + csv_file_path = os.path.join( + config["input_dir"], + os.path.basename(args.config).split(".")[0] + + ".csv_file_with_data_from_view", + ) csv_file_path_file = open(csv_file_path, "a") if csv_file_path_file.writable() is False: message = f'Path to CSV file "{csv_file_path}" is not writable.' @@ -1674,7 +2007,7 @@ def check_input(config, args): csv_file_path_file.close() sys.exit("Error: " + message) else: - message = f'CSV output file location at {csv_file_path} is writable.' + message = f"CSV output file location at {csv_file_path} is writable." logging.info(message) print("OK, " + message) csv_file_path_file.close() @@ -1684,40 +2017,74 @@ def check_input(config, args): # If nothing has failed by now, exit with a positive, upbeat message. print("Configuration and input data appear to be valid.") - if config['perform_soft_checks'] is True: - print('Warning: "perform_soft_checks" is enabled so you need to review your log for errors despite the "OK" reports above.') - logging.info('Configuration checked for "%s" task using config file "%s", no problems found.', config['task'], args.config) + if config["perform_soft_checks"] is True: + print( + 'Warning: "perform_soft_checks" is enabled so you need to review your log for errors despite the "OK" reports above.' + ) + logging.info( + 'Configuration checked for "%s" task using config file "%s", no problems found.', + config["task"], + args.config, + ) sys.exit() validate_input_dir(config) - check_csv_file_exists(config, 'node_fields') + check_csv_file_exists(config, "node_fields") # Check column headers in CSV file. Does not apply to add_media or update_media tasks (handled just below). csv_data = get_csv_data(config) - if config['csv_headers'] == 'labels' and config['task'] in ['create', 'update', 'create_terms', 'update_terms']: - if config['task'] == 'create_terms' or config['task'] == 'update_terms': - fieldname_map_cache_path = os.path.join(config['temp_dir'], f"taxonomy_term-{config['vocab_id']}-labels.fieldname_map") + if config["csv_headers"] == "labels" and config["task"] in [ + "create", + "update", + "create_terms", + "update_terms", + ]: + if config["task"] == "create_terms" or config["task"] == "update_terms": + fieldname_map_cache_path = os.path.join( + config["temp_dir"], + f"taxonomy_term-{config['vocab_id']}-labels.fieldname_map", + ) else: - fieldname_map_cache_path = os.path.join(config['temp_dir'], f"node-{config['content_type']}-labels.fieldname_map") + fieldname_map_cache_path = os.path.join( + config["temp_dir"], + f"node-{config['content_type']}-labels.fieldname_map", + ) if os.path.exists(fieldname_map_cache_path): os.remove(fieldname_map_cache_path) - csv_column_headers = replace_field_labels_with_names(config, csv_data.fieldnames) + csv_column_headers = replace_field_labels_with_names( + config, csv_data.fieldnames + ) else: csv_column_headers = csv_data.fieldnames - if config['task'] in ['add_media', 'update_media']: - field_definitions = get_field_definitions(config, 'media', config['media_type']) - base_media_fields = ['status', 'uid', 'langcode'] + if config["task"] in ["add_media", "update_media"]: + field_definitions = get_field_definitions(config, "media", config["media_type"]) + base_media_fields = ["status", "uid", "langcode"] drupal_fieldnames = [] for drupal_fieldname in field_definitions: drupal_fieldnames.append(drupal_fieldname) for csv_column_header in csv_column_headers: - if csv_column_header not in drupal_fieldnames and csv_column_header != 'media_id' and csv_column_header != 'file' and csv_column_header not in base_media_fields: - logging.error('CSV column header %s does not match any Drupal field names in the %s media type', csv_column_header, config['media_type']) - sys.exit('Error: CSV column header "' + csv_column_header + '" does not match any Drupal field names in the "' + config['media_type'] + '" media type.') - message = 'OK, CSV column headers match Drupal field names.' + if ( + csv_column_header not in drupal_fieldnames + and csv_column_header != "media_id" + and csv_column_header != "file" + and csv_column_header not in base_media_fields + ): + logging.error( + "CSV column header %s does not match any Drupal field names in the %s media type", + csv_column_header, + config["media_type"], + ) + sys.exit( + 'Error: CSV column header "' + + csv_column_header + + '" does not match any Drupal field names in the "' + + config["media_type"] + + '" media type.' + ) + message = "OK, CSV column headers match Drupal field names." print(message) logging.info(message) @@ -1728,68 +2095,109 @@ def check_input(config, args): field_count = 0 for field in row: # 'stringtopopulateextrafields' is added by get_csv_data() if there are extra headers. - if row[field] == 'stringtopopulateextrafields': + if row[field] == "stringtopopulateextrafields": extra_headers = True else: field_count += 1 if extra_headers is True: - message = "Row " + str(row_count) + " (ID " + row[config['id_field']] + ") of the CSV file has fewer columns " + \ - "than there are headers (" + str(len(csv_column_headers)) + ")." + message = ( + "Row " + + str(row_count) + + " (ID " + + row[config["id_field"]] + + ") of the CSV file has fewer columns " + + "than there are headers (" + + str(len(csv_column_headers)) + + ")." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) # Note: this message is also generated in get_csv_data() since CSV Writer thows an exception if the row has form fields than headers. if len(csv_column_headers) < field_count: - message = "Row " + str(row_count) + " (ID " + row[config['id_field']] + ") of the CSV file has more columns (" + \ - str(field_count) + ") than there are headers (" + str(len(csv_column_headers)) + ")." + message = ( + "Row " + + str(row_count) + + " (ID " + + row[config["id_field"]] + + ") of the CSV file has more columns (" + + str(field_count) + + ") than there are headers (" + + str(len(csv_column_headers)) + + ")." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) if row_count == 0: - message = "Input CSV file " + config['input_csv'] + " has 0 rows." + message = "Input CSV file " + config["input_csv"] + " has 0 rows." logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) else: - message = "OK, all " + str(row_count) + " rows in the CSV file have the same number of columns as there are headers (" + str(len(csv_column_headers)) + ")." + message = ( + "OK, all " + + str(row_count) + + " rows in the CSV file have the same number of columns as there are headers (" + + str(len(csv_column_headers)) + + ")." + ) print(message) logging.info(message) # Task-specific CSV checks. langcode_was_present = False - if config['task'] == 'create': - field_definitions = get_field_definitions(config, 'node') - if config['id_field'] not in csv_column_headers: + if config["task"] == "create": + field_definitions = get_field_definitions(config, "node") + if config["id_field"] not in csv_column_headers: message = 'For "create" tasks, your CSV file must have a column containing a unique identifier.' logging.error(message) - sys.exit('Error: ' + message) - if config['nodes_only'] is False and 'file' not in csv_column_headers and config['paged_content_from_directories'] is False: + sys.exit("Error: " + message) + if ( + config["nodes_only"] is False + and "file" not in csv_column_headers + and config["paged_content_from_directories"] is False + ): message = 'For "create" tasks, your CSV file must contain a "file" column.' logging.error(message) - sys.exit('Error: ' + message) - if 'title' not in csv_column_headers: + sys.exit("Error: " + message) + if "title" not in csv_column_headers: message = 'For "create" tasks, your CSV file must contain a "title" column.' logging.error(message) - sys.exit('Error: ' + message) - if 'output_csv' in config.keys(): - if os.path.exists(config['output_csv']): - message = 'Output CSV already exists at ' + config['output_csv'] + ', records will be appended to it.' + sys.exit("Error: " + message) + if "output_csv" in config.keys(): + if os.path.exists(config["output_csv"]): + message = ( + "Output CSV already exists at " + + config["output_csv"] + + ", records will be appended to it." + ) print(message) logging.info(message) - if 'url_alias' in csv_column_headers: + if "url_alias" in csv_column_headers: validate_url_aliases_csv_data = get_csv_data(config) validate_url_aliases(config, validate_url_aliases_csv_data) # We populate the ISLANDORA_WORKBENCH_PRIMARY_TASK_EXECUTION_START_TIME environment variable here so secondary # tasks can access it during in validate_parent_ids_in_csv_id_to_node_id_map(). - workbench_execution_start_time = "{:%Y-%m-%d %H:%M:%S}".format(datetime.datetime.now()) + workbench_execution_start_time = "{:%Y-%m-%d %H:%M:%S}".format( + datetime.datetime.now() + ) # Assumes that only primary tasks have something in their 'secondary_tasks' config setting. - if config['secondary_tasks'] is not None: - os.environ["ISLANDORA_WORKBENCH_PRIMARY_TASK_EXECUTION_START_TIME"] = workbench_execution_start_time - if 'parent_id' in csv_column_headers: + if config["secondary_tasks"] is not None: + os.environ["ISLANDORA_WORKBENCH_PRIMARY_TASK_EXECUTION_START_TIME"] = ( + workbench_execution_start_time + ) + if "parent_id" in csv_column_headers: validate_parent_ids_precede_children_csv_data = get_csv_data(config) - validate_parent_ids_precede_children(config, validate_parent_ids_precede_children_csv_data) + validate_parent_ids_precede_children( + config, validate_parent_ids_precede_children_csv_data + ) prepare_csv_id_to_node_id_map(config) - if config['query_csv_id_to_node_id_map_for_parents'] is True: - validate_parent_ids_in_csv_id_to_node_id_map_csv_data = get_csv_data(config) - validate_parent_ids_in_csv_id_to_node_id_map(config, validate_parent_ids_in_csv_id_to_node_id_map_csv_data) + if config["query_csv_id_to_node_id_map_for_parents"] is True: + validate_parent_ids_in_csv_id_to_node_id_map_csv_data = get_csv_data( + config + ) + validate_parent_ids_in_csv_id_to_node_id_map( + config, validate_parent_ids_in_csv_id_to_node_id_map_csv_data + ) else: message = "Only node IDs for parents created during this session will be used (not using the CSV ID to node ID map)." print(message) @@ -1797,34 +2205,48 @@ def check_input(config, args): # Specific to creating aggregated content such as collections, compound objects and paged content. Currently, if 'parent_id' is present # in the CSV file 'field_member_of' is mandatory. - if 'parent_id' in csv_column_headers: - if ('field_weight' not in csv_column_headers): + if "parent_id" in csv_column_headers: + if "field_weight" not in csv_column_headers: message = 'If ingesting paged content, or compound objects where order is required a "field_weight" column is required.' logging.info(message) - if ('field_member_of' not in csv_column_headers): + if "field_member_of" not in csv_column_headers: message = 'If your CSV file contains a "parent_id" column, it must also contain a "field_member_of" column (with empty values in child rows).' logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) drupal_fieldnames = [] for drupal_fieldname in field_definitions: drupal_fieldnames.append(drupal_fieldname) if len(drupal_fieldnames) == 0: - message = 'Workbench cannot retrieve field definitions from Drupal. Please confirm that the Field, Field Storage, and Entity Form Display REST resources are enabled.' + message = "Workbench cannot retrieve field definitions from Drupal. Please confirm that the Field, Field Storage, and Entity Form Display REST resources are enabled." logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) - if config['list_missing_drupal_fields'] is True: + if config["list_missing_drupal_fields"] is True: missing_drupal_fields = [] for csv_column_header in csv_column_headers: - if csv_column_header not in drupal_fieldnames and csv_column_header not in node_base_fields: - if csv_column_header not in reserved_fields and csv_column_header not in get_additional_files_config(config).keys(): - if csv_column_header != config['id_field']: + if ( + csv_column_header not in drupal_fieldnames + and csv_column_header not in node_base_fields + ): + if ( + csv_column_header not in reserved_fields + and csv_column_header + not in get_additional_files_config(config).keys() + ): + if csv_column_header != config["id_field"]: missing_drupal_fields.append(csv_column_header) if len(missing_drupal_fields) > 0: - missing_drupal_fields_message = ', '.join(missing_drupal_fields) - logging.error("The following header(s) require a matching Drupal field name: %s.", missing_drupal_fields_message) - sys.exit('Error: The following header(s) require a matching Drupal field name: ' + missing_drupal_fields_message + '.') + missing_drupal_fields_message = ", ".join(missing_drupal_fields) + logging.error( + "The following header(s) require a matching Drupal field name: %s.", + missing_drupal_fields_message, + ) + sys.exit( + "Error: The following header(s) require a matching Drupal field name: " + + missing_drupal_fields_message + + "." + ) # We .remove() CSV column headers for this check because they are not Drupal field names (including 'langcode'). for reserved_field in reserved_fields: @@ -1832,17 +2254,22 @@ def check_input(config, args): csv_column_headers.remove(reserved_field) # langcode is a standard Drupal field but it doesn't show up in any field configs. - if 'langcode' in csv_column_headers: - csv_column_headers.remove('langcode') + if "langcode" in csv_column_headers: + csv_column_headers.remove("langcode") # Set this so we can validate langcode below. langcode_was_present = True # We .remove() CSV column headers that use the 'media:video:field_foo' media track convention. media_track_headers = list() for column_header in csv_column_headers: - if column_header.startswith('media:'): - media_track_header_parts = column_header.split(':') - if media_track_header_parts[1] in config['media_track_file_fields'].keys() and media_track_header_parts[2] == config['media_track_file_fields'][media_track_header_parts[1]]: + if column_header.startswith("media:"): + media_track_header_parts = column_header.split(":") + if ( + media_track_header_parts[1] + in config["media_track_file_fields"].keys() + and media_track_header_parts[2] + == config["media_track_file_fields"][media_track_header_parts[1]] + ): media_track_headers.append(column_header) for media_track_header in media_track_headers: if media_track_header in csv_column_headers: @@ -1850,140 +2277,231 @@ def check_input(config, args): # We also validate the structure of the media track column headers. for media_track_header in media_track_headers: - media_track_header_parts = media_track_header.split(':') - if media_track_header_parts[0] != 'media' and len(media_track_header_parts) != 3: - message = f'"{media_track_header}" is not a valide media track CSV header.' + media_track_header_parts = media_track_header.split(":") + if ( + media_track_header_parts[0] != "media" + and len(media_track_header_parts) != 3 + ): + message = ( + f'"{media_track_header}" is not a valide media track CSV header.' + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) # Check for the View that is necessary for entity reference fields configured # as "Views: Filter by an entity reference View" (issue 452). for csv_column_header in csv_column_headers: - if csv_column_header in field_definitions and field_definitions[csv_column_header]['handler'] == 'views': - if config['require_entity_reference_views'] is True: - entity_reference_view_exists = ping_entity_reference_view_endpoint(config, csv_column_header, field_definitions[csv_column_header]['handler_settings']) + if ( + csv_column_header in field_definitions + and field_definitions[csv_column_header]["handler"] == "views" + ): + if config["require_entity_reference_views"] is True: + entity_reference_view_exists = ping_entity_reference_view_endpoint( + config, + csv_column_header, + field_definitions[csv_column_header]["handler_settings"], + ) if entity_reference_view_exists is False: - console_message = 'Workbench cannot access the View "' + field_definitions[csv_column_header]['handler_settings']['view']['view_name'] + \ - '" required to validate values for field "' + csv_column_header + '". See log for more detail.' - log_message = 'Workbench cannot access the path defined by the REST Export display "' + \ - field_definitions[csv_column_header]['handler_settings']['view']['display_name'] + \ - '" in the View "' + field_definitions[csv_column_header]['handler_settings']['view']['view_name'] + \ - '" required to validate values for field "' + csv_column_header + '". Please check your Drupal Views configuration.' + \ - ' See the "Entity Reference Views fields" section of ' + \ - 'https://mjordan.github.io/islandora_workbench_docs/fields/#entity-reference-views-fields for more info.' + console_message = ( + 'Workbench cannot access the View "' + + field_definitions[csv_column_header]["handler_settings"][ + "view" + ]["view_name"] + + '" required to validate values for field "' + + csv_column_header + + '". See log for more detail.' + ) + log_message = ( + 'Workbench cannot access the path defined by the REST Export display "' + + field_definitions[csv_column_header]["handler_settings"][ + "view" + ]["display_name"] + + '" in the View "' + + field_definitions[csv_column_header]["handler_settings"][ + "view" + ]["view_name"] + + '" required to validate values for field "' + + csv_column_header + + '". Please check your Drupal Views configuration.' + + ' See the "Entity Reference Views fields" section of ' + + "https://mjordan.github.io/islandora_workbench_docs/fields/#entity-reference-views-fields for more info." + ) logging.error(log_message) - sys.exit('Error: ' + console_message) + sys.exit("Error: " + console_message) else: message = f'Workbench will not validate values in your CSV file\'s "{csv_column_header}" column because your "require_entity_reference_views" configuration setting is "false".' - print('Warning: ' + message) - logging.warning(message + - ' See the "Entity Reference Views fields" section of ' + - 'https://mjordan.github.io/islandora_workbench_docs/fields/#entity-reference-views-fields for more info.') + print("Warning: " + message) + logging.warning( + message + + ' See the "Entity Reference Views fields" section of ' + + "https://mjordan.github.io/islandora_workbench_docs/fields/#entity-reference-views-fields for more info." + ) if len(get_additional_files_config(config)) > 0: - if csv_column_header not in drupal_fieldnames and csv_column_header not in node_base_fields and csv_column_header not in get_additional_files_config(config).keys(): - if csv_column_header in config['ignore_csv_columns']: + if ( + csv_column_header not in drupal_fieldnames + and csv_column_header not in node_base_fields + and csv_column_header + not in get_additional_files_config(config).keys() + ): + if csv_column_header in config["ignore_csv_columns"]: continue additional_files_entries = get_additional_files_config(config) if csv_column_header in additional_files_entries.keys(): continue - logging.error('CSV column header %s does not match any Drupal, reserved, or "additional_files" field names.', csv_column_header) - sys.exit('Error: CSV column header "' + csv_column_header + '" does not match any Drupal, reserved, or "additional_files" field names.') + logging.error( + 'CSV column header %s does not match any Drupal, reserved, or "additional_files" field names.', + csv_column_header, + ) + sys.exit( + 'Error: CSV column header "' + + csv_column_header + + '" does not match any Drupal, reserved, or "additional_files" field names.' + ) else: - if csv_column_header not in drupal_fieldnames and csv_column_header not in node_base_fields and csv_column_header: - if csv_column_header in config['ignore_csv_columns']: + if ( + csv_column_header not in drupal_fieldnames + and csv_column_header not in node_base_fields + and csv_column_header + ): + if csv_column_header in config["ignore_csv_columns"]: continue - logging.error("CSV column header %s does not match any Drupal or reserved field names.", csv_column_header) - sys.exit('Error: CSV column header "' + csv_column_header + '" does not match any Drupal or reserved field names.') - message = 'OK, CSV column headers match Drupal field names.' + logging.error( + "CSV column header %s does not match any Drupal or reserved field names.", + csv_column_header, + ) + sys.exit( + 'Error: CSV column header "' + + csv_column_header + + '" does not match any Drupal or reserved field names.' + ) + message = "OK, CSV column headers match Drupal field names." print(message) logging.info(message) # Check that Drupal fields that are required are in the 'create' task CSV file. - if config['task'] == 'create': - required_drupal_fields_node = get_required_bundle_fields(config, 'node', config['content_type']) + if config["task"] == "create": + required_drupal_fields_node = get_required_bundle_fields( + config, "node", config["content_type"] + ) for required_drupal_field in required_drupal_fields_node: if required_drupal_field not in csv_column_headers: - logging.error("Required Drupal field %s is not present in the CSV file.", required_drupal_field) - sys.exit('Error: Field "' + required_drupal_field + '" required for content type "' + config['content_type'] + '" is not present in the CSV file.') - message = 'OK, required Drupal fields are present in the CSV file.' + logging.error( + "Required Drupal field %s is not present in the CSV file.", + required_drupal_field, + ) + sys.exit( + 'Error: Field "' + + required_drupal_field + + '" required for content type "' + + config["content_type"] + + '" is not present in the CSV file.' + ) + message = "OK, required Drupal fields are present in the CSV file." print(message) logging.info(message) validate_required_fields_have_values_csv_data = get_csv_data(config) # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). - validate_required_fields_have_values(config, required_drupal_fields_node, validate_required_fields_have_values_csv_data) + validate_required_fields_have_values( + config, + required_drupal_fields_node, + validate_required_fields_have_values_csv_data, + ) # Validate dates in 'created' field, if present. # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). - if 'created' in csv_column_headers: + if "created" in csv_column_headers: validate_node_created_csv_data = get_csv_data(config) validate_node_created_date(config, validate_node_created_csv_data) # Validate user IDs in 'uid' field, if present. # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). - if 'uid' in csv_column_headers: + if "uid" in csv_column_headers: validate_node_uid_csv_data = get_csv_data(config) validate_node_uid(config, validate_node_uid_csv_data) - if config['task'] == 'update': - if 'node_id' not in csv_column_headers: - message = 'For "update" tasks, your CSV file must contain a "node_id" column.' + if config["task"] == "update": + if "node_id" not in csv_column_headers: + message = ( + 'For "update" tasks, your CSV file must contain a "node_id" column.' + ) logging.error(message) - sys.exit('Error: ' + message) - if 'url_alias' in csv_column_headers: + sys.exit("Error: " + message) + if "url_alias" in csv_column_headers: # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). validate_url_aliases_csv_data = get_csv_data(config) validate_url_aliases(config, validate_url_aliases_csv_data) - field_definitions = get_field_definitions(config, 'node') + field_definitions = get_field_definitions(config, "node") drupal_fieldnames = [] for drupal_fieldname in field_definitions: drupal_fieldnames.append(drupal_fieldname) - if 'title' in csv_column_headers: - csv_column_headers.remove('title') - if 'url_alias' in csv_column_headers: - csv_column_headers.remove('url_alias') - if 'image_alt_text' in csv_column_headers: - csv_column_headers.remove('image_alt_text') - if 'media_use_tid' in csv_column_headers: - csv_column_headers.remove('media_use_tid') - if 'revision_log' in csv_column_headers: - csv_column_headers.remove('revision_log') - if 'file' in csv_column_headers: + if "title" in csv_column_headers: + csv_column_headers.remove("title") + if "url_alias" in csv_column_headers: + csv_column_headers.remove("url_alias") + if "image_alt_text" in csv_column_headers: + csv_column_headers.remove("image_alt_text") + if "media_use_tid" in csv_column_headers: + csv_column_headers.remove("media_use_tid") + if "revision_log" in csv_column_headers: + csv_column_headers.remove("revision_log") + if "file" in csv_column_headers: message = 'Error: CSV column header "file" is not allowed in update tasks.' logging.error(message) sys.exit(message) - if 'node_id' in csv_column_headers: - csv_column_headers.remove('node_id') + if "node_id" in csv_column_headers: + csv_column_headers.remove("node_id") # langcode is a standard Drupal field but it doesn't show up in any field configs. - if 'langcode' in csv_column_headers: - csv_column_headers.remove('langcode') + if "langcode" in csv_column_headers: + csv_column_headers.remove("langcode") # Set this so we can validate langcode below. langcode_was_present = True for csv_column_header in csv_column_headers: - if csv_column_header not in drupal_fieldnames and csv_column_header not in node_base_fields: - if csv_column_header in config['ignore_csv_columns']: + if ( + csv_column_header not in drupal_fieldnames + and csv_column_header not in node_base_fields + ): + if csv_column_header in config["ignore_csv_columns"]: continue - logging.error('CSV column header %s does not match any Drupal field names in the %s content type.', csv_column_header, config['content_type']) - sys.exit('Error: CSV column header "' + csv_column_header + '" does not match any Drupal field names in the ' + config['content_type'] + ' content type.') - message = 'OK, CSV column headers match Drupal field names.' + logging.error( + "CSV column header %s does not match any Drupal field names in the %s content type.", + csv_column_header, + config["content_type"], + ) + sys.exit( + 'Error: CSV column header "' + + csv_column_header + + '" does not match any Drupal field names in the ' + + config["content_type"] + + " content type." + ) + message = "OK, CSV column headers match Drupal field names." print(message) logging.info(message) # If the task is update media, check if all media_id values are valid. - if config['task'] == 'update_media': + if config["task"] == "update_media": csv_data = get_csv_data(config) row_number = 1 for row in csv_data: media_id = extract_media_id(config, row) if media_id is None: - message = 'Error: Invalid media ID in row ' + str(row_number) + ' of the CSV file.' + message = ( + "Error: Invalid media ID in row " + + str(row_number) + + " of the CSV file." + ) logging.error(message) sys.exit(message) row_number += 1 - if config['task'] == 'add_media' or config['task'] == 'create' and config['nodes_only'] is False: + if ( + config["task"] == "add_media" + or config["task"] == "create" + and config["nodes_only"] is False + ): # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). validate_media_use_tid(config) # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). @@ -1991,91 +2509,157 @@ def check_input(config, args): # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). validate_media_use_tids_in_csv(config, validate_media_use_tid_values_csv_data) - if config['fixity_algorithm'] is not None: - allowed_algorithms = ['md5', 'sha1', 'sha256'] - if config['fixity_algorithm'] not in allowed_algorithms: - message = "Configured fixity algorithm '" + config['fixity_algorithm'] + "' must be one of 'md5', 'sha1', or 'sha256'." + if config["fixity_algorithm"] is not None: + allowed_algorithms = ["md5", "sha1", "sha256"] + if config["fixity_algorithm"] not in allowed_algorithms: + message = ( + "Configured fixity algorithm '" + + config["fixity_algorithm"] + + "' must be one of 'md5', 'sha1', or 'sha256'." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) - if config['validate_fixity_during_check'] is True and config['fixity_algorithm'] is not None: + if ( + config["validate_fixity_during_check"] is True + and config["fixity_algorithm"] is not None + ): print("Performing local checksum validation. This might take some time.") - if 'file' in csv_column_headers and 'checksum' in csv_column_headers: + if "file" in csv_column_headers and "checksum" in csv_column_headers: # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). validate_checksums_csv_data = get_csv_data(config) - if config['task'] == 'add_media': - row_id = 'node_id' + if config["task"] == "add_media": + row_id = "node_id" else: - row_id = config['id_field'] + row_id = config["id_field"] checksum_validation_all_ok = True - for checksum_validation_row_count, checksum_validation_row in enumerate(validate_checksums_csv_data, start=1): - file_path = checksum_validation_row['file'] - hash_from_local = get_file_hash_from_local(config, file_path, config['fixity_algorithm']) - if 'checksum' in checksum_validation_row: - if hash_from_local == checksum_validation_row['checksum'].strip(): - logging.info('Local %s checksum and value in the CSV "checksum" field for file "%s" (%s) match.', config['fixity_algorithm'], file_path, hash_from_local) + for checksum_validation_row_count, checksum_validation_row in enumerate( + validate_checksums_csv_data, start=1 + ): + file_path = checksum_validation_row["file"] + hash_from_local = get_file_hash_from_local( + config, file_path, config["fixity_algorithm"] + ) + if "checksum" in checksum_validation_row: + if ( + hash_from_local + == checksum_validation_row["checksum"].strip() + ): + logging.info( + 'Local %s checksum and value in the CSV "checksum" field for file "%s" (%s) match.', + config["fixity_algorithm"], + file_path, + hash_from_local, + ) else: checksum_validation_all_ok = False - logging.warning('Local %s checksum and value in the CSV "checksum" field for file "%s" (named in CSV row "%s") do not match (local: %s, CSV: %s).', - config['fixity_algorithm'], file_path, checksum_validation_row[row_id], hash_from_local, checksum_validation_row['checksum']) + logging.warning( + 'Local %s checksum and value in the CSV "checksum" field for file "%s" (named in CSV row "%s") do not match (local: %s, CSV: %s).', + config["fixity_algorithm"], + file_path, + checksum_validation_row[row_id], + hash_from_local, + checksum_validation_row["checksum"], + ) if checksum_validation_all_ok is True: - checksum_validation_message = "OK, checksum validation during complete. All checks pass." + checksum_validation_message = ( + "OK, checksum validation during complete. All checks pass." + ) logging.info(checksum_validation_message) print(checksum_validation_message + " See the log for more detail.") else: checksum_validation_message = "Not all checksum validation passed." logging.warning(checksum_validation_message) - print("Warning: " + checksum_validation_message + " See the log for more detail.") + print( + "Warning: " + + checksum_validation_message + + " See the log for more detail." + ) - if config['task'] == 'create_terms': + if config["task"] == "create_terms": # Check that all required fields are present in the CSV. - field_definitions = get_field_definitions(config, 'taxonomy_term', config['vocab_id']) + field_definitions = get_field_definitions( + config, "taxonomy_term", config["vocab_id"] + ) # Check here that all required fields are present in the CSV. - required_fields = get_required_bundle_fields(config, 'taxonomy_term', config['vocab_id']) - required_fields.insert(0, 'term_name') + required_fields = get_required_bundle_fields( + config, "taxonomy_term", config["vocab_id"] + ) + required_fields.insert(0, "term_name") required_fields_check_csv_data = get_csv_data(config) missing_fields = [] for required_field in required_fields: if required_field not in required_fields_check_csv_data.fieldnames: missing_fields.append(required_field) if len(missing_fields) > 0: - message = 'Required columns missing from input CSV file: ' + joiner.join(missing_fields) + '.' + message = ( + "Required columns missing from input CSV file: " + + joiner.join(missing_fields) + + "." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) # Validate length of 'term_name'. # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). validate_term_name_csv_data = get_csv_data(config) for count, row in enumerate(validate_term_name_csv_data, start=1): - if 'term_name' in row and len(row['term_name']) > 255: - message = "The 'term_name' column in row for term '" + row['term_name'] + "' of your CSV file exceeds Drupal's maximum length of 255 characters." + if "term_name" in row and len(row["term_name"]) > 255: + message = ( + "The 'term_name' column in row for term '" + + row["term_name"] + + "' of your CSV file exceeds Drupal's maximum length of 255 characters." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) - if config['task'] == 'update_terms': - field_definitions = get_field_definitions(config, 'taxonomy_term', config['vocab_id']) - term_base_fields = ['status', 'langcode', 'term_name', 'parent', 'weight', 'description'] + if config["task"] == "update_terms": + field_definitions = get_field_definitions( + config, "taxonomy_term", config["vocab_id"] + ) + term_base_fields = [ + "status", + "langcode", + "term_name", + "parent", + "weight", + "description", + ] drupal_fieldnames = [] for drupal_fieldname in field_definitions: drupal_fieldnames.append(drupal_fieldname) - if 'term_name' in csv_column_headers: - csv_column_headers.remove('term_name') - if 'parent' in csv_column_headers: - csv_column_headers.remove('parent') - if 'weight' in csv_column_headers: - csv_column_headers.remove('weight') - if 'description' in csv_column_headers: - csv_column_headers.remove('description') - if 'term_id' in csv_column_headers: - csv_column_headers.remove('term_id') + if "term_name" in csv_column_headers: + csv_column_headers.remove("term_name") + if "parent" in csv_column_headers: + csv_column_headers.remove("parent") + if "weight" in csv_column_headers: + csv_column_headers.remove("weight") + if "description" in csv_column_headers: + csv_column_headers.remove("description") + if "term_id" in csv_column_headers: + csv_column_headers.remove("term_id") for csv_column_header in csv_column_headers: - if csv_column_header not in drupal_fieldnames and csv_column_header != 'term_id' and csv_column_header not in term_base_fields: - logging.error('CSV column header %s does not match any Drupal field names in the %s taxonomy term.', csv_column_header, config['vocab_id']) - sys.exit('Error: CSV column header "' + csv_column_header + '" does not match any Drupal field names in the ' + config['vocab_id'] + ' taxonomy term.') - message = 'OK, CSV column headers match Drupal field names.' + if ( + csv_column_header not in drupal_fieldnames + and csv_column_header != "term_id" + and csv_column_header not in term_base_fields + ): + logging.error( + "CSV column header %s does not match any Drupal field names in the %s taxonomy term.", + csv_column_header, + config["vocab_id"], + ) + sys.exit( + 'Error: CSV column header "' + + csv_column_header + + '" does not match any Drupal field names in the ' + + config["vocab_id"] + + " taxonomy term." + ) + message = "OK, CSV column headers match Drupal field names." print(message) logging.info(message) @@ -2083,17 +2667,25 @@ def check_input(config, args): # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). validate_term_name_csv_data = get_csv_data(config) for count, row in enumerate(validate_term_name_csv_data, start=1): - if 'term_name' in row and len(row['term_name']) > 255: - message = "The 'term_name' column in row for term '" + row['term_name'] + "' of your CSV file exceeds Drupal's maximum length of 255 characters." + if "term_name" in row and len(row["term_name"]) > 255: + message = ( + "The 'term_name' column in row for term '" + + row["term_name"] + + "' of your CSV file exceeds Drupal's maximum length of 255 characters." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) - if config['task'] == 'create_terms' or config['task'] == 'update_terms': + if config["task"] == "create_terms" or config["task"] == "update_terms": # Check that all required fields are present in the CSV. - field_definitions = get_field_definitions(config, 'taxonomy_term', config['vocab_id']) + field_definitions = get_field_definitions( + config, "taxonomy_term", config["vocab_id"] + ) validate_geolocation_values_csv_data = get_csv_data(config) # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). - validate_geolocation_fields(config, field_definitions, validate_geolocation_values_csv_data) + validate_geolocation_fields( + config, field_definitions, validate_geolocation_values_csv_data + ) validate_link_values_csv_data = get_csv_data(config) # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). @@ -2101,7 +2693,9 @@ def check_input(config, args): validate_authority_link_values_csv_data = get_csv_data(config) # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). - validate_authority_link_fields(config, field_definitions, validate_authority_link_values_csv_data) + validate_authority_link_fields( + config, field_definitions, validate_authority_link_values_csv_data + ) validate_edtf_values_csv_data = get_csv_data(config) # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). @@ -2109,29 +2703,43 @@ def check_input(config, args): validate_csv_field_cardinality_csv_data = get_csv_data(config) # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). - validate_csv_field_cardinality(config, field_definitions, validate_csv_field_cardinality_csv_data) + validate_csv_field_cardinality( + config, field_definitions, validate_csv_field_cardinality_csv_data + ) validate_csv_field_length_csv_data = get_csv_data(config) # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). - validate_csv_field_length(config, field_definitions, validate_csv_field_length_csv_data) + validate_csv_field_length( + config, field_definitions, validate_csv_field_length_csv_data + ) validate_taxonomy_field_csv_data = get_csv_data(config) # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). - warn_user_about_taxo_terms = validate_taxonomy_field_values(config, field_definitions, validate_taxonomy_field_csv_data) + warn_user_about_taxo_terms = validate_taxonomy_field_values( + config, field_definitions, validate_taxonomy_field_csv_data + ) if warn_user_about_taxo_terms is True: - print('Warning: Issues detected with validating taxonomy field values in the CSV file. See the log for more detail.') + print( + "Warning: Issues detected with validating taxonomy field values in the CSV file. See the log for more detail." + ) validate_typed_relation_csv_data = get_csv_data(config) # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). - warn_user_about_typed_relation_terms = validate_typed_relation_field_values(config, field_definitions, validate_typed_relation_csv_data) + warn_user_about_typed_relation_terms = validate_typed_relation_field_values( + config, field_definitions, validate_typed_relation_csv_data + ) if warn_user_about_typed_relation_terms is True: - print('Warning: Issues detected with validating typed relation field values in the CSV file. See the log for more detail.') + print( + "Warning: Issues detected with validating typed relation field values in the CSV file. See the log for more detail." + ) - if config['task'] == 'update' or config['task'] == 'create': - field_definitions = get_field_definitions(config, 'node') + if config["task"] == "update" or config["task"] == "create": + field_definitions = get_field_definitions(config, "node") validate_geolocation_values_csv_data = get_csv_data(config) # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). - validate_geolocation_fields(config, field_definitions, validate_geolocation_values_csv_data) + validate_geolocation_fields( + config, field_definitions, validate_geolocation_values_csv_data + ) validate_link_values_csv_data = get_csv_data(config) # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). @@ -2139,7 +2747,9 @@ def check_input(config, args): validate_authority_link_values_csv_data = get_csv_data(config) # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). - validate_authority_link_fields(config, field_definitions, validate_authority_link_values_csv_data) + validate_authority_link_fields( + config, field_definitions, validate_authority_link_values_csv_data + ) validate_edtf_values_csv_data = get_csv_data(config) # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). @@ -2147,27 +2757,41 @@ def check_input(config, args): validate_csv_field_cardinality_csv_data = get_csv_data(config) # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). - validate_csv_field_cardinality(config, field_definitions, validate_csv_field_cardinality_csv_data) + validate_csv_field_cardinality( + config, field_definitions, validate_csv_field_cardinality_csv_data + ) validate_csv_field_length_csv_data = get_csv_data(config) # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). - validate_csv_field_length(config, field_definitions, validate_csv_field_length_csv_data) + validate_csv_field_length( + config, field_definitions, validate_csv_field_length_csv_data + ) validate_text_list_fields_data = get_csv_data(config) # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). - validate_text_list_fields(config, field_definitions, validate_text_list_fields_data) + validate_text_list_fields( + config, field_definitions, validate_text_list_fields_data + ) validate_taxonomy_field_csv_data = get_csv_data(config) # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). - warn_user_about_taxo_terms = validate_taxonomy_field_values(config, field_definitions, validate_taxonomy_field_csv_data) + warn_user_about_taxo_terms = validate_taxonomy_field_values( + config, field_definitions, validate_taxonomy_field_csv_data + ) if warn_user_about_taxo_terms is True: - print('Warning: Issues detected with validating taxonomy field values in the CSV file. See the log for more detail.') + print( + "Warning: Issues detected with validating taxonomy field values in the CSV file. See the log for more detail." + ) validate_typed_relation_csv_data = get_csv_data(config) # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). - warn_user_about_typed_relation_terms = validate_typed_relation_field_values(config, field_definitions, validate_typed_relation_csv_data) + warn_user_about_typed_relation_terms = validate_typed_relation_field_values( + config, field_definitions, validate_typed_relation_csv_data + ) if warn_user_about_typed_relation_terms is True: - print('Warning: Issues detected with validating typed relation field values in the CSV file. See the log for more detail.') + print( + "Warning: Issues detected with validating typed relation field values in the CSV file. See the log for more detail." + ) validate_media_track_csv_data = get_csv_data(config) # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). @@ -2176,23 +2800,30 @@ def check_input(config, args): # Validate existence of nodes specified in 'field_member_of'. This could be generalized out to validate node IDs in other fields. # See https://github.com/mjordan/islandora_workbench/issues/90. # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). - if config['validate_parent_node_exists'] is True: + if config["validate_parent_node_exists"] is True: validate_field_member_of_csv_data = get_csv_data(config) for count, row in enumerate(validate_field_member_of_csv_data, start=1): - if 'field_member_of' in csv_column_headers: - parent_nids = row['field_member_of'].split(config['subdelimiter']) + if "field_member_of" in csv_column_headers: + parent_nids = row["field_member_of"].split(config["subdelimiter"]) for parent_nid in parent_nids: if len(parent_nid) > 0: parent_node_exists = ping_node(config, parent_nid) if parent_node_exists is False: - message = "The 'field_member_of' field in row with ID '" + row[config['id_field']] + \ - "' of your CSV file contains a node ID (" + parent_nid + ") that " + \ - "doesn't exist or is not accessible. See the workbench log for more information." + message = ( + "The 'field_member_of' field in row with ID '" + + row[config["id_field"]] + + "' of your CSV file contains a node ID (" + + parent_nid + + ") that " + + "doesn't exist or is not accessible. See the workbench log for more information." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) else: - message = '"validate_parent_node_exists" is set to false. Node IDs in "field_member_of" that do not exist or are not accessible ' + \ - 'will result in 422 errors in "create" and "update" tasks.' + message = ( + '"validate_parent_node_exists" is set to false. Node IDs in "field_member_of" that do not exist or are not accessible ' + + 'will result in 422 errors in "create" and "update" tasks.' + ) logging.warning(message) # Validate 'langcode' values if that field exists in the CSV. @@ -2200,106 +2831,177 @@ def check_input(config, args): if langcode_was_present: validate_langcode_csv_data = get_csv_data(config) for count, row in enumerate(validate_langcode_csv_data, start=1): - langcode_valid = validate_language_code(row['langcode']) + langcode_valid = validate_language_code(row["langcode"]) if not langcode_valid: - message = "Row with ID " + row[config['id_field']] + " of your CSV file contains an invalid Drupal language code (" + row['langcode'] + ") in its 'langcode' column." + message = ( + "Row with ID " + + row[config["id_field"]] + + " of your CSV file contains an invalid Drupal language code (" + + row["langcode"] + + ") in its 'langcode' column." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) - if config['task'] == 'delete': - if 'node_id' not in csv_column_headers: - message = 'For "delete" tasks, your CSV file must contain a "node_id" column.' + if config["task"] == "delete": + if "node_id" not in csv_column_headers: + message = ( + 'For "delete" tasks, your CSV file must contain a "node_id" column.' + ) logging.error(message) - sys.exit('Error: ' + message) - if config['task'] == 'add_media': - if 'node_id' not in csv_column_headers: - message = 'For "add_media" tasks, your CSV file must contain a "node_id" column.' + sys.exit("Error: " + message) + if config["task"] == "add_media": + if "node_id" not in csv_column_headers: + message = ( + 'For "add_media" tasks, your CSV file must contain a "node_id" column.' + ) logging.error(message) - sys.exit('Error: ' + message) - if 'file' not in csv_column_headers: - message = 'For "add_media" tasks, your CSV file must contain a "file" column.' + sys.exit("Error: " + message) + if "file" not in csv_column_headers: + message = ( + 'For "add_media" tasks, your CSV file must contain a "file" column.' + ) logging.error(message) - sys.exit('Error: ' + message) - if config['task'] == 'update_media': - if 'media_id' not in csv_column_headers: + sys.exit("Error: " + message) + if config["task"] == "update_media": + if "media_id" not in csv_column_headers: message = 'For "update_media" tasks, your CSV file must contain a "media_id" column.' logging.error(message) - sys.exit('Error: ' + message) - if config['task'] == 'delete_media': - if 'media_id' not in csv_column_headers: + sys.exit("Error: " + message) + if config["task"] == "delete_media": + if "media_id" not in csv_column_headers: message = 'For "delete_media" tasks, your CSV file must contain a "media_id" column.' logging.error(message) - sys.exit('Error: ' + message) - if config['task'] == 'delete_media_by_node': - if 'node_id' not in csv_column_headers: + sys.exit("Error: " + message) + if config["task"] == "delete_media_by_node": + if "node_id" not in csv_column_headers: message = 'For "delete_media_by_node" tasks, your CSV file must contain a "node_id" column.' logging.error(message) - sys.exit('Error: ' + message) - if config['task'] == 'update_terms': - if 'term_id' not in csv_column_headers: + sys.exit("Error: " + message) + if config["task"] == "update_terms": + if "term_id" not in csv_column_headers: message = 'For "update_terms" tasks, your CSV file must contain a "term_id" column.' logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) # Check for existence of files listed in the 'file' column. - if config['task'] == 'create' or config['task'] == 'add_media' or config['task'] == 'update_media' and 'file' in csv_column_headers: - if config['nodes_only'] is False and config['paged_content_from_directories'] is False: + if ( + config["task"] == "create" + or config["task"] == "add_media" + or config["task"] == "update_media" + and "file" in csv_column_headers + ): + if ( + config["nodes_only"] is False + and config["paged_content_from_directories"] is False + ): # Temporary fix for https://github.com/mjordan/islandora_workbench/issues/478. - if config['task'] == 'add_media': - config['id_field'] = 'node_id' - if config['task'] == 'update_media': - config['id_field'] = 'media_id' + if config["task"] == "add_media": + config["id_field"] = "node_id" + if config["task"] == "update_media": + config["id_field"] = "media_id" file_check_csv_data = get_csv_data(config) for count, file_check_row in enumerate(file_check_csv_data, start=1): - file_check_row['file'] = file_check_row['file'].strip() + file_check_row["file"] = file_check_row["file"].strip() # Check for and log empty 'file' values. - if len(file_check_row['file']) == 0: - message = 'CSV row with ID ' + file_check_row[config['id_field']] + ' contains an empty "file" value.' + if len(file_check_row["file"]) == 0: + message = ( + "CSV row with ID " + + file_check_row[config["id_field"]] + + ' contains an empty "file" value.' + ) logging.warning(message) # Check for files that cannot be found. - if not file_check_row['file'].startswith('http') and len(file_check_row['file'].strip()) > 0: - if os.path.isabs(file_check_row['file']): - file_path = file_check_row['file'] + if ( + not file_check_row["file"].startswith("http") + and len(file_check_row["file"].strip()) > 0 + ): + if os.path.isabs(file_check_row["file"]): + file_path = file_check_row["file"] else: - file_path = os.path.join(config['input_dir'], file_check_row['file']) + file_path = os.path.join( + config["input_dir"], file_check_row["file"] + ) if not os.path.exists(file_path) or not os.path.isfile(file_path): - message = 'File "' + file_path + '" identified in CSV "file" column for row with ID "' + file_check_row[config['id_field']] + '" not found.' - if config['allow_missing_files'] is False: + message = ( + 'File "' + + file_path + + '" identified in CSV "file" column for row with ID "' + + file_check_row[config["id_field"]] + + '" not found.' + ) + if config["allow_missing_files"] is False: logging.error(message) - if config['perform_soft_checks'] is False: - sys.exit('Error: ' + message) + if config["perform_soft_checks"] is False: + sys.exit("Error: " + message) else: - if file_check_row[config['id_field']] not in rows_with_missing_files and len(file_check_row['file'].strip()) > 0: - rows_with_missing_files.append(file_check_row[config['id_field']]) + if ( + file_check_row[config["id_field"]] + not in rows_with_missing_files + and len(file_check_row["file"].strip()) > 0 + ): + rows_with_missing_files.append( + file_check_row[config["id_field"]] + ) else: logging.error(message) - if file_check_row[config['id_field']] not in rows_with_missing_files and len(file_check_row['file'].strip()) > 0: - rows_with_missing_files.append(file_check_row[config['id_field']]) + if ( + file_check_row[config["id_field"]] + not in rows_with_missing_files + and len(file_check_row["file"].strip()) > 0 + ): + rows_with_missing_files.append( + file_check_row[config["id_field"]] + ) # Remote files. else: - if len(file_check_row['file'].strip()) > 0: - http_response_code = ping_remote_file(config, file_check_row['file']) - if http_response_code != 200 or ping_remote_file(config, file_check_row['file']) is False: - message = 'Remote file "' + file_check_row['file'] + '" identified in CSV "file" column for row with ID "' \ - + file_check_row[config['id_field']] + '" not found or not accessible (HTTP response code ' + str(http_response_code) + ').' - if config['allow_missing_files'] is False: + if len(file_check_row["file"].strip()) > 0: + http_response_code = ping_remote_file( + config, file_check_row["file"] + ) + if ( + http_response_code != 200 + or ping_remote_file(config, file_check_row["file"]) is False + ): + message = ( + 'Remote file "' + + file_check_row["file"] + + '" identified in CSV "file" column for row with ID "' + + file_check_row[config["id_field"]] + + '" not found or not accessible (HTTP response code ' + + str(http_response_code) + + ")." + ) + if config["allow_missing_files"] is False: logging.error(message) - if config['perform_soft_checks'] is False: - sys.exit('Error: ' + message) + if config["perform_soft_checks"] is False: + sys.exit("Error: " + message) else: - if file_check_row[config['id_field']] not in rows_with_missing_files and len(file_check_row['file'].strip()) > 0: - rows_with_missing_files.append(file_check_row[config['id_field']]) + if ( + file_check_row[config["id_field"]] + not in rows_with_missing_files + and len(file_check_row["file"].strip()) > 0 + ): + rows_with_missing_files.append( + file_check_row[config["id_field"]] + ) else: logging.error(message) - if file_check_row[config['id_field']] not in rows_with_missing_files and len(file_check_row['file'].strip()) > 0: - rows_with_missing_files.append(file_check_row[config['id_field']]) + if ( + file_check_row[config["id_field"]] + not in rows_with_missing_files + and len(file_check_row["file"].strip()) > 0 + ): + rows_with_missing_files.append( + file_check_row[config["id_field"]] + ) # @todo for issue 268: All accumulator variables like 'rows_with_missing_files' should be checked at end of # check_input() (to work with perform_soft_checks: True) in addition to at place of check (to work wit perform_soft_checks: False). if len(rows_with_missing_files) > 0: - if config['allow_missing_files'] is True: + if config["allow_missing_files"] is True: message = '"allow_missing_files" configuration setting is set to "true", and CSV "file" column values containing missing files were detected.' print("Warning: " + message + " See the log for more information.") logging.warning(message + " Details are logged above.") @@ -2309,205 +3011,370 @@ def check_input(config, args): logging.info(message) # Verify that all media bundles/types exist. - if config['nodes_only'] is False: + if config["nodes_only"] is False: media_type_check_csv_data = get_csv_data(config) - for count, file_check_row in enumerate(media_type_check_csv_data, start=1): - filename_fields_to_check = ['file'] + for count, file_check_row in enumerate( + media_type_check_csv_data, start=1 + ): + filename_fields_to_check = ["file"] for filename_field in filename_fields_to_check: if len(file_check_row[filename_field]) != 0: - media_type = set_media_type(config, file_check_row[filename_field], filename_field, file_check_row) - media_bundle_response_code = ping_media_bundle(config, media_type) + media_type = set_media_type( + config, + file_check_row[filename_field], + filename_field, + file_check_row, + ) + media_bundle_response_code = ping_media_bundle( + config, media_type + ) if media_bundle_response_code == 404: - message = 'File "' + file_check_row[filename_field] + '" identified in CSV row ' + file_check_row[config['id_field']] + \ - ' will create a media of type (' + media_type + '), but that media type is not configured in the destination Drupal.' + \ - ' Please make sure your media type configuration matches your Drupal configuration.' + message = ( + 'File "' + + file_check_row[filename_field] + + '" identified in CSV row ' + + file_check_row[config["id_field"]] + + " will create a media of type (" + + media_type + + "), but that media type is not configured in the destination Drupal." + + " Please make sure your media type configuration matches your Drupal configuration." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) # Check that each file's extension is allowed for the current media type. 'file' is the only # CSV field to check here. Files added using the 'additional_files' setting are checked below. - if file_check_row['file'].startswith('http'): + if file_check_row["file"].startswith("http"): # First check to see if the file has an extension. - extension = os.path.splitext(file_check_row['file'])[1] + extension = os.path.splitext(file_check_row["file"])[1] if len(extension) > 0: - extension = extension.lstrip('.').lower() + extension = extension.lstrip(".").lower() else: - extension = get_remote_file_extension(config, file_check_row['file']) - extension = extension.lstrip('.') + extension = get_remote_file_extension( + config, file_check_row["file"] + ) + extension = extension.lstrip(".") else: - extension = os.path.splitext(file_check_row['file'])[1] - extension = extension.lstrip('.').lower() - media_type_file_field = config['media_type_file_fields'][media_type] - registered_extensions = get_registered_media_extensions(config, media_type, media_type_file_field) - if isinstance(extension, str) and isinstance(registered_extensions, dict) and extension not in registered_extensions[media_type_file_field]: - message = 'File "' + file_check_row[filename_field] + '" in CSV row "' + file_check_row[config['id_field']] + \ - '" has an extension (' + str(extension) + ') that is not allowed in the "' + media_type_file_field + '" field of the "' + media_type + '" media type.' + extension = os.path.splitext(file_check_row["file"])[1] + extension = extension.lstrip(".").lower() + media_type_file_field = config["media_type_file_fields"][ + media_type + ] + registered_extensions = get_registered_media_extensions( + config, media_type, media_type_file_field + ) + if ( + isinstance(extension, str) + and isinstance(registered_extensions, dict) + and extension + not in registered_extensions[media_type_file_field] + ): + message = ( + 'File "' + + file_check_row[filename_field] + + '" in CSV row "' + + file_check_row[config["id_field"]] + + '" has an extension (' + + str(extension) + + ') that is not allowed in the "' + + media_type_file_field + + '" field of the "' + + media_type + + '" media type.' + ) logging.error(message) - if config['perform_soft_checks'] is False: - sys.exit('Error: ' + message) + if config["perform_soft_checks"] is False: + sys.exit("Error: " + message) # Check existence of fields identified in 'additional_files' config setting. - if (config['task'] == 'create' or config['task'] == 'add_media') and config['nodes_only'] is False and config['paged_content_from_directories'] is False: - if 'additional_files' in config and len(config['additional_files']) > 0: + if ( + (config["task"] == "create" or config["task"] == "add_media") + and config["nodes_only"] is False + and config["paged_content_from_directories"] is False + ): + if "additional_files" in config and len(config["additional_files"]) > 0: additional_files_entries = get_additional_files_config(config) additional_files_check_csv_data = get_csv_data(config) additional_files_fields = additional_files_entries.keys() - additional_files_fields_csv_headers = additional_files_check_csv_data.fieldnames - if config['nodes_only'] is False: + additional_files_fields_csv_headers = ( + additional_files_check_csv_data.fieldnames + ) + if config["nodes_only"] is False: for additional_file_field in additional_files_fields: if additional_file_field not in additional_files_fields_csv_headers: - message = 'CSV column "' + additional_file_field + '" registered in the "additional_files" configuration setting is missing from your CSV file.' + message = ( + 'CSV column "' + + additional_file_field + + '" registered in the "additional_files" configuration setting is missing from your CSV file.' + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) # Verify media use tids. @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). - if config['nodes_only'] is False: - for additional_files_media_use_field, additional_files_media_use_tid in additional_files_entries.items(): - validate_media_use_tid_in_additional_files_setting(config, additional_files_media_use_tid, additional_files_media_use_field) + if config["nodes_only"] is False: + for ( + additional_files_media_use_field, + additional_files_media_use_tid, + ) in additional_files_entries.items(): + validate_media_use_tid_in_additional_files_setting( + config, + additional_files_media_use_tid, + additional_files_media_use_field, + ) # Check existence of files named in columns identified as 'additional_files' columns. missing_additional_files = False - for count, file_check_row in enumerate(additional_files_check_csv_data, start=1): + for count, file_check_row in enumerate( + additional_files_check_csv_data, start=1 + ): for additional_file_field in additional_files_fields: - file_check_row[additional_file_field] = file_check_row[additional_file_field].strip() + file_check_row[additional_file_field] = file_check_row[ + additional_file_field + ].strip() if len(file_check_row[additional_file_field]) == 0: - message = 'CVS row with ID ' + file_check_row[config['id_field']] + ' contains an empty value in its "' + additional_file_field + '" column.' + message = ( + "CVS row with ID " + + file_check_row[config["id_field"]] + + ' contains an empty value in its "' + + additional_file_field + + '" column.' + ) logging.warning(message) - if file_check_row[additional_file_field].startswith('http'): - http_response_code = ping_remote_file(config, file_check_row[additional_file_field]) - if http_response_code != 200 or ping_remote_file(config, file_check_row[additional_file_field]) is False: + if file_check_row[additional_file_field].startswith("http"): + http_response_code = ping_remote_file( + config, file_check_row[additional_file_field] + ) + if ( + http_response_code != 200 + or ping_remote_file( + config, file_check_row[additional_file_field] + ) + is False + ): missing_additional_files = True - message = 'Additional file "' + file_check_row[additional_file_field] + '" in CSV column "' + additional_file_field + '" in row with ID ' \ - + file_check_row[config['id_field']] + ' not found or not accessible (HTTP response code ' + str(http_response_code) + ').' - if config['allow_missing_files'] is False: + message = ( + 'Additional file "' + + file_check_row[additional_file_field] + + '" in CSV column "' + + additional_file_field + + '" in row with ID ' + + file_check_row[config["id_field"]] + + " not found or not accessible (HTTP response code " + + str(http_response_code) + + ")." + ) + if config["allow_missing_files"] is False: logging.error(message) - if config['perform_soft_checks'] is False: - sys.exit('Error: ' + message) + if config["perform_soft_checks"] is False: + sys.exit("Error: " + message) else: logging.error(message) continue else: if len(file_check_row[additional_file_field]) > 0: - if check_file_exists(config, file_check_row[additional_file_field]) is False: + if ( + check_file_exists( + config, file_check_row[additional_file_field] + ) + is False + ): missing_additional_files = True - message = 'Additional file "' + file_check_row[additional_file_field] + '" in CSV column "' + additional_file_field + '" in row with ID ' + \ - file_check_row[config['id_field']] + ' not found.' - if config['allow_missing_files'] is False: + message = ( + 'Additional file "' + + file_check_row[additional_file_field] + + '" in CSV column "' + + additional_file_field + + '" in row with ID ' + + file_check_row[config["id_field"]] + + " not found." + ) + if config["allow_missing_files"] is False: logging.error(message) - if config['perform_soft_checks'] is False: - sys.exit('Error: ' + message) + if config["perform_soft_checks"] is False: + sys.exit("Error: " + message) else: logging.error(message) continue if missing_additional_files is True: - if config['allow_missing_files'] is True: + if config["allow_missing_files"] is True: message = '"allow_missing_files" configuration setting is set to "true", and "additional_files" CSV columns containing missing files were detected.' print("Warning: " + message + " See the log for more information.") logging.warning(message + " Details are logged above.") else: - if config['perform_soft_checks'] is False: + if config["perform_soft_checks"] is False: sys.exit(message) else: - message = 'OK, files named in "additional_files" CSV columns are all present.' + message = ( + 'OK, files named in "additional_files" CSV columns are all present.' + ) print(message) logging.info(message) # @todo: add the 'rows_with_missing_files' method of accumulating invalid values (issue 268). - if 'additional_files' in config and len(config['additional_files']) > 0 and config['nodes_only'] is False: + if ( + "additional_files" in config + and len(config["additional_files"]) > 0 + and config["nodes_only"] is False + ): additional_files_check_extensions_csv_data = get_csv_data(config) # Check media types for files registered in 'additional_files'. - for count, file_check_row in enumerate(additional_files_check_extensions_csv_data, start=1): + for count, file_check_row in enumerate( + additional_files_check_extensions_csv_data, start=1 + ): for additional_file_field in additional_files_fields: if len(file_check_row[additional_file_field].strip()) > 0: - media_type = set_media_type(config, file_check_row[additional_file_field], additional_file_field, file_check_row) - media_bundle_response_code = ping_media_bundle(config, media_type) + media_type = set_media_type( + config, + file_check_row[additional_file_field], + additional_file_field, + file_check_row, + ) + media_bundle_response_code = ping_media_bundle( + config, media_type + ) if media_bundle_response_code == 404: - message = 'File "' + file_check_row[additional_file_field] + '" identified in CSV row ' + file_check_row[config['id_field']] + \ - ' will create a media of type (' + media_type + '), but that media type is not configured in the destination Drupal.' + \ - ' Please make sure your media type configuration matches your Drupal configuration.' + message = ( + 'File "' + + file_check_row[additional_file_field] + + '" identified in CSV row ' + + file_check_row[config["id_field"]] + + " will create a media of type (" + + media_type + + "), but that media type is not configured in the destination Drupal." + + " Please make sure your media type configuration matches your Drupal configuration." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) # Check that each file's extension is allowed for the current media type. - additional_filenames = file_check_row[additional_file_field].split(config['subdelimiter']) - media_type_file_field = config['media_type_file_fields'][media_type] + additional_filenames = file_check_row[ + additional_file_field + ].split(config["subdelimiter"]) + media_type_file_field = config["media_type_file_fields"][ + media_type + ] for additional_filename in additional_filenames: if check_file_exists(config, additional_filename): - if additional_filename.startswith('http'): + if additional_filename.startswith("http"): # First check to see if the file has an extension. extension = os.path.splitext(additional_filename)[1] if len(extension) > 0: - extension = extension.lstrip('.') - extension = extension.lstrip('.') + extension = extension.lstrip(".") + extension = extension.lstrip(".") else: - extension = get_remote_file_extension(config, additional_filename) - extension = extension.lstrip('.') + extension = get_remote_file_extension( + config, additional_filename + ) + extension = extension.lstrip(".") else: extension = os.path.splitext(additional_filename) - extension = extension[1].lstrip('.').lower() - - registered_extensions = get_registered_media_extensions(config, media_type, media_type_file_field) - if extension not in registered_extensions[media_type_file_field]: - message = 'File "' + additional_filename + '" in the "' + additional_file_field + '" field of row "' + file_check_row[config['id_field']] + \ - '" has an extension (' + str(extension) + ') that is not allowed in the "' + media_type_file_field + '" field of the "' + media_type + '" media type.' + extension = extension[1].lstrip(".").lower() + + registered_extensions = get_registered_media_extensions( + config, media_type, media_type_file_field + ) + if ( + extension + not in registered_extensions[media_type_file_field] + ): + message = ( + 'File "' + + additional_filename + + '" in the "' + + additional_file_field + + '" field of row "' + + file_check_row[config["id_field"]] + + '" has an extension (' + + str(extension) + + ') that is not allowed in the "' + + media_type_file_field + + '" field of the "' + + media_type + + '" media type.' + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) # @todo Add warning to accommodate #639 - if config['task'] == 'create' and config['paged_content_from_directories'] is True: - if 'paged_content_page_model_tid' not in config: + if config["task"] == "create" and config["paged_content_from_directories"] is True: + if "paged_content_page_model_tid" not in config: message = 'If you are creating paged content, you must include "paged_content_page_model_tid" in your configuration.' - logging.error('Configuration requires "paged_content_page_model_tid" setting when creating paged content.') - sys.exit('Error: ' + message) + logging.error( + 'Configuration requires "paged_content_page_model_tid" setting when creating paged content.' + ) + sys.exit("Error: " + message) - if 'paged_content_additional_page_media' in config: - disable_action_message = 'Including the "paged_content_additional_page_media" setting in your configuration will create ' + \ - 'media that are normally generated by Islandora microservices. You should disable any actions your Drupal Contexts ' + \ - '"Derivatives" configuration so that Islandora does not also generate duplicate media.' + if "paged_content_additional_page_media" in config: + disable_action_message = ( + 'Including the "paged_content_additional_page_media" setting in your configuration will create ' + + "media that are normally generated by Islandora microservices. You should disable any actions your Drupal Contexts " + + '"Derivatives" configuration so that Islandora does not also generate duplicate media.' + ) logging.warning(disable_action_message) - print('Warning: ' + disable_action_message) - - if 'paged_content_image_file_extension' not in config or 'paged_content_additional_page_media' not in config: - message = 'If your configuration contains the "paged_content_additional_page_media" setting, it must also include both ' + \ - 'the "paged_content_image_file_extension" and "paged_content_additional_page_media" settings.s.' + print("Warning: " + disable_action_message) + + if ( + "paged_content_image_file_extension" not in config + or "paged_content_additional_page_media" not in config + ): + message = ( + 'If your configuration contains the "paged_content_additional_page_media" setting, it must also include both ' + + 'the "paged_content_image_file_extension" and "paged_content_additional_page_media" settings.s.' + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) paged_content_from_directories_csv_data = get_csv_data(config) - for count, file_check_row in enumerate(paged_content_from_directories_csv_data, start=1): - dir_path = os.path.join(config['input_dir'], file_check_row[config['id_field']]) + for count, file_check_row in enumerate( + paged_content_from_directories_csv_data, start=1 + ): + dir_path = os.path.join( + config["input_dir"], file_check_row[config["id_field"]] + ) if not os.path.exists(dir_path) or os.path.isfile(dir_path): - message = 'Page directory ' + dir_path + ' for CSV record with ID "' + file_check_row[config['id_field']] + '"" not found.' + message = ( + "Page directory " + + dir_path + + ' for CSV record with ID "' + + file_check_row[config["id_field"]] + + '"" not found.' + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) page_files = os.listdir(dir_path) if len(page_files) == 0: - message = 'Page directory ' + dir_path + ' is empty.' + message = "Page directory " + dir_path + " is empty." print("Warning: " + message) logging.warning(message) for page_file_name in page_files: - if config['paged_content_sequence_separator'] not in page_file_name: - message = 'Page file ' + os.path.join(dir_path, page_file_name) + ' does not contain a sequence separator (' + config['paged_content_sequence_separator'] + ').' + if config["paged_content_sequence_separator"] not in page_file_name: + message = ( + "Page file " + + os.path.join(dir_path, page_file_name) + + " does not contain a sequence separator (" + + config["paged_content_sequence_separator"] + + ")." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) - print('OK, page directories are all present.') + print("OK, page directories are all present.") # Check for bootstrap scripts, if any are configured. bootsrap_scripts_present = False - if 'bootstrap' in config and len(config['bootstrap']) > 0: + if "bootstrap" in config and len(config["bootstrap"]) > 0: bootsrap_scripts_present = True - for bootstrap_script in config['bootstrap']: + for bootstrap_script in config["bootstrap"]: if not os.path.exists(bootstrap_script): message = "Bootstrap script " + bootstrap_script + " not found." logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) if os.access(bootstrap_script, os.X_OK) is False: message = "Bootstrap script " + bootstrap_script + " is not executable." logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) if bootsrap_scripts_present is True: message = "OK, registered bootstrap scripts found and executable." logging.info(message) @@ -2515,17 +3382,17 @@ def check_input(config, args): # Check for shutdown scripts, if any are configured. shutdown_scripts_present = False - if 'shutdown' in config and len(config['shutdown']) > 0: + if "shutdown" in config and len(config["shutdown"]) > 0: shutdown_scripts_present = True - for shutdown_script in config['shutdown']: + for shutdown_script in config["shutdown"]: if not os.path.exists(shutdown_script): message = "shutdown script " + shutdown_script + " not found." logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) if os.access(shutdown_script, os.X_OK) is False: message = "Shutdown script " + shutdown_script + " is not executable." logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) if shutdown_scripts_present is True: message = "OK, registered shutdown scripts found and executable." logging.info(message) @@ -2533,92 +3400,127 @@ def check_input(config, args): # Check for preprocessor scripts, if any are configured. preprocessor_scripts_present = False - if 'preprocessors' in config and len(config['preprocessors']) > 0: + if "preprocessors" in config and len(config["preprocessors"]) > 0: preprocessor_scripts_present = True # for preprocessor_script in config['preprocessors']: - for field, script_path in config['preprocessors'].items(): + for field, script_path in config["preprocessors"].items(): if not os.path.exists(script_path): message = f'Preprocessor script "{script_path}" for field "{field}" not found.' logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) if os.access(script_path, os.X_OK) is False: message = f'Preprocessor script "{script_path}" for field "{field}" is not executable.' logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) if preprocessor_scripts_present is True: - message = f'OK, registered preprocessor scripts found and executable.' + message = f"OK, registered preprocessor scripts found and executable." logging.info(message) print(message) # Check for the existence and executableness of post-action scripts, if any are configured. - if config['task'] == 'create' or config['task'] == 'update' or config['task'] == 'add_media': - post_action_scripts_configs = ['node_post_create', 'node_post_update', 'media_post_create'] + if ( + config["task"] == "create" + or config["task"] == "update" + or config["task"] == "add_media" + ): + post_action_scripts_configs = [ + "node_post_create", + "node_post_update", + "media_post_create", + ] for post_action_script_config in post_action_scripts_configs: post_action_scripts_present = False - if post_action_script_config in config and len(config[post_action_script_config]) > 0: + if ( + post_action_script_config in config + and len(config[post_action_script_config]) > 0 + ): post_action_scripts_present = True for post_action_script in config[post_action_script_config]: if not os.path.exists(post_action_script): - message = "Post-action script " + post_action_script + " not found." + message = ( + "Post-action script " + post_action_script + " not found." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) if os.access(post_action_script, os.X_OK) is False: - message = "Post-action script " + post_action_script + " is not executable." + message = ( + "Post-action script " + + post_action_script + + " is not executable." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) if post_action_scripts_present is True: message = "OK, registered post-action scripts found and executable." logging.info(message) print(message) - if config['task'] == 'export_csv': - if 'node_id' not in csv_column_headers: - message = 'For "export_csv" tasks, your CSV file must contain a "node_id" column.' + if config["task"] == "export_csv": + if "node_id" not in csv_column_headers: + message = ( + 'For "export_csv" tasks, your CSV file must contain a "node_id" column.' + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) - export_csv_term_mode_options = ['tid', 'name'] - if config['export_csv_term_mode'] not in export_csv_term_mode_options: + export_csv_term_mode_options = ["tid", "name"] + if config["export_csv_term_mode"] not in export_csv_term_mode_options: message = 'Configuration option "export_csv_term_mode_options" must be either "tid" or "name".' logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) - if config['export_file_directory'] is not None: - if not os.path.exists(config['export_csv_file_path']): + if config["export_file_directory"] is not None: + if not os.path.exists(config["export_csv_file_path"]): try: - os.mkdir(config['export_file_directory']) - os.rmdir(config['export_file_directory']) + os.mkdir(config["export_file_directory"]) + os.rmdir(config["export_file_directory"]) except Exception as e: - message = 'Path in configuration option "export_file_directory" ("' + config['export_file_directory'] + '") is not writable.' - logging.error(message + ' ' + str(e)) - sys.exit('Error: ' + message + ' See log for more detail.') - - if config['export_file_media_use_term_id'] is False: + message = ( + 'Path in configuration option "export_file_directory" ("' + + config["export_file_directory"] + + '") is not writable.' + ) + logging.error(message + " " + str(e)) + sys.exit("Error: " + message + " See log for more detail.") + + if config["export_file_media_use_term_id"] is False: message = f'Unknown value for configuration setting "export_file_media_use_term_id": {config["export_file_media_use_term_id"]}.' logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) - if len(rows_with_missing_files) > 0 and config['allow_missing_files'] is False and config['perform_soft_checks'] is False: - logging.error('Missing or empty CSV "file" column values detected. See log entries above.') - sys.exit('Error: Missing or empty CSV "file" column values detected. See the log for more information.') + if ( + len(rows_with_missing_files) > 0 + and config["allow_missing_files"] is False + and config["perform_soft_checks"] is False + ): + logging.error( + 'Missing or empty CSV "file" column values detected. See log entries above.' + ) + sys.exit( + 'Error: Missing or empty CSV "file" column values detected. See the log for more information.' + ) - if len(rows_with_missing_files) > 0 and config['perform_soft_checks'] is True: + if len(rows_with_missing_files) > 0 and config["perform_soft_checks"] is True: message = '"perform_soft_checks" configuration setting is set to "true" and some values in the "file" column were not found.' logging.warning(message + " See log entries above.") print("Warning: " + message + " See the log for more information.") - if 'additional_files' in config and len(config['additional_files']) > 0 and config['nodes_only'] is False: + if ( + "additional_files" in config + and len(config["additional_files"]) > 0 + and config["nodes_only"] is False + ): if missing_additional_files is True: - if config['allow_missing_files'] is False: + if config["allow_missing_files"] is False: message = '"allow_missing_files" configuration setting is set to "false", and some files in fields configured as "additional_file" fields cannot be found.' logging.error(message + " See log entries above.") print(message + " See the log for more information.") - if config['perform_soft_checks'] is True: + if config["perform_soft_checks"] is True: message = 'The "perform_soft_checks" configuration setting is set to "true", so Workbench did not exit after finding the first missing file.' logging.warning(message) print(message + " See the log for more information.") else: - sys.exit('Error: ' + message) + sys.exit("Error: " + message) else: message = 'OK, files in fields configured as "additional_file" fields are all present.' logging.info(message) @@ -2626,27 +3528,47 @@ def check_input(config, args): # If nothing has failed by now, exit with a positive, upbeat message. print("Configuration and input data appear to be valid.") - logging.info('Configuration checked for "%s" task using config file "%s", no problems found.', config['task'], args.config) + logging.info( + 'Configuration checked for "%s" task using config file "%s", no problems found.', + config["task"], + args.config, + ) if args.contactsheet is True: - if os.path.isabs(config['contact_sheet_output_dir']): - contact_sheet_path = os.path.join(config['contact_sheet_output_dir'], 'contact_sheet.htm') + if os.path.isabs(config["contact_sheet_output_dir"]): + contact_sheet_path = os.path.join( + config["contact_sheet_output_dir"], "contact_sheet.htm" + ) else: - contact_sheet_path = os.path.join(os.getcwd(), config['contact_sheet_output_dir'], 'contact_sheet.htm') + contact_sheet_path = os.path.join( + os.getcwd(), config["contact_sheet_output_dir"], "contact_sheet.htm" + ) generate_contact_sheet_from_csv(config) message = f"Contact sheet is at {contact_sheet_path}." print(message) logging.info(message) - if config['secondary_tasks'] is None: + if config["secondary_tasks"] is None: sys.exit(0) else: - for secondary_config_file in json.loads(os.environ["ISLANDORA_WORKBENCH_SECONDARY_TASKS"]): + for secondary_config_file in json.loads( + os.environ["ISLANDORA_WORKBENCH_SECONDARY_TASKS"] + ): print("") - print('Running --check using secondary configuration file "' + secondary_config_file + '"') - if os.name == 'nt': + print( + 'Running --check using secondary configuration file "' + + secondary_config_file + + '"' + ) + if os.name == "nt": # Assumes python.exe is in the user's PATH. - cmd = ["python", "./workbench", "--config", secondary_config_file, "--check"] + cmd = [ + "python", + "./workbench", + "--config", + secondary_config_file, + "--check", + ] else: cmd = ["./workbench", "--config", secondary_config_file, "--check"] output = subprocess.run(cmd) @@ -2656,7 +3578,7 @@ def check_input(config, args): def get_registered_media_extensions(config, media_bundle, field_name_filter=None): """For the given media bundle, gets a list of file extensions registered in Drupal's - "Allowed file extensions" configuration for each field that has this setting. + "Allowed file extensions" configuration for each field that has this setting. """ """Parameters ---------- @@ -2676,117 +3598,222 @@ def get_registered_media_extensions(config, media_bundle, field_name_filter=None registered for those fields in Drupal. All extensions are lower case. """ registered_extensions = dict() - media_field_definitions = get_field_definitions(config, 'media', media_bundle) + media_field_definitions = get_field_definitions(config, "media", media_bundle) for field_name, field_def in media_field_definitions.items(): - if 'file_extensions' in field_def: - registered_extensions[field_name] = re.split(r'\s+', field_def['file_extensions']) + if "file_extensions" in field_def: + registered_extensions[field_name] = re.split( + r"\s+", field_def["file_extensions"] + ) for i in range(len(registered_extensions[field_name])): - registered_extensions[field_name][i] = registered_extensions[field_name][i].lower() + registered_extensions[field_name][i] = registered_extensions[ + field_name + ][i].lower() if field_name_filter is not None and field_name_filter in registered_extensions: return {field_name_filter: registered_extensions[field_name_filter]} - elif field_name_filter is not None and field_name_filter not in registered_extensions: + elif ( + field_name_filter is not None and field_name_filter not in registered_extensions + ): return False else: return registered_extensions def check_input_for_create_from_files(config, args): - """Validate the config file and input data if task is 'create_from_files'. - """ - if config['task'] != 'create_from_files': + """Validate the config file and input data if task is 'create_from_files'.""" + if config["task"] != "create_from_files": message = 'Your task must be "create_from_files".' logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) - logging.info('Starting configuration check for "%s" task using config file %s.', config['task'], args.config) + logging.info( + 'Starting configuration check for "%s" task using config file %s.', + config["task"], + args.config, + ) ping_islandora(config, print_message=False) config_keys = list(config.keys()) unwanted_in_create_from_files = [ - 'check', - 'delimiter', - 'subdelimiter', - 'allow_missing_files', - 'paged_content_from_directories', - 'delete_media_with_nodes', - 'allow_adding_terms'] + "check", + "delimiter", + "subdelimiter", + "allow_missing_files", + "paged_content_from_directories", + "delete_media_with_nodes", + "allow_adding_terms", + ] for option in unwanted_in_create_from_files: if option in config_keys: config_keys.remove(option) - joiner = ', ' + joiner = ", " # Check for presence of required config keys. - create_required_options = [ - 'task', - 'host', - 'username', - 'password'] + create_required_options = ["task", "host", "username", "password"] for create_required_option in create_required_options: if create_required_option not in config_keys: - message = 'Please check your config file for required values: ' + joiner.join(create_required_options) + '.' + message = ( + "Please check your config file for required values: " + + joiner.join(create_required_options) + + "." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) # Check existence of input directory. - if os.path.exists(config['input_dir']): - message = 'OK, input directory "' + config['input_dir'] + '" found.' + if os.path.exists(config["input_dir"]): + message = 'OK, input directory "' + config["input_dir"] + '" found.' print(message) logging.info(message) else: - message = 'Input directory "' + config['input_dir'] + '"" not found.' + message = 'Input directory "' + config["input_dir"] + '"" not found.' logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) # Validate length of 'title'. - files = os.listdir(config['input_dir']) + files = os.listdir(config["input_dir"]) for file_name in files: filename_without_extension = os.path.splitext(file_name)[0] - if len(filename_without_extension) > int(config['max_node_title_length']): - message = 'The filename "' + filename_without_extension + \ - '" exceeds Drupal\'s maximum length of ' + config['max_node_title_length'] + ' characters and cannot be used for a node title.' + if len(filename_without_extension) > int(config["max_node_title_length"]): + message = ( + 'The filename "' + + filename_without_extension + + "\" exceeds Drupal's maximum length of " + + config["max_node_title_length"] + + " characters and cannot be used for a node title." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) # Check that either 'model' or 'models' are present in the config file. - if ('model' not in config and 'models' not in config): + if "model" not in config and "models" not in config: message = 'You must include either the "model" or "models" option in your configuration.' logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) # If nothing has failed by now, exit with a positive message. print("Configuration and input data appear to be valid.") logging.info( 'Configuration checked for "%s" task using config file %s, no problems found.', - config['task'], - args.config) + config["task"], + args.config, + ) sys.exit(0) def log_field_cardinality_violation(field_name, record_id, cardinality): """Writes an entry to the log during create/update tasks if any field values - are sliced off. Workbench does this if the number of values in a field - exceeds the field's cardinality. record_id could be a value from the - configured id_field or a node ID. + are sliced off. Workbench does this if the number of values in a field + exceeds the field's cardinality. record_id could be a value from the + configured id_field or a node ID. """ logging.warning( "Adding all values in CSV field %s for record %s would exceed maximum number of allowed values (%s). Skipping adding extra values.", field_name, record_id, - cardinality) + cardinality, + ) def validate_language_code(langcode): # Drupal's language codes. - codes = ['af', 'am', 'ar', 'ast', 'az', 'be', 'bg', 'bn', 'bo', 'bs', - 'ca', 'cs', 'cy', 'da', 'de', 'dz', 'el', 'en', 'en-x-simple', 'eo', - 'es', 'et', 'eu', 'fa', 'fi', 'fil', 'fo', 'fr', 'fy', 'ga', 'gd', 'gl', - 'gsw-berne', 'gu', 'he', 'hi', 'hr', 'ht', 'hu', 'hy', 'id', 'is', 'it', - 'ja', 'jv', 'ka', 'kk', 'km', 'kn', 'ko', 'ku', 'ky', 'lo', 'lt', 'lv', - 'mg', 'mk', 'ml', 'mn', 'mr', 'ms', 'my', 'ne', 'nl', 'nb', 'nn', 'oc', - 'pa', 'pl', 'pt-pt', 'pt-br', 'ro', 'ru', 'sco', 'se', 'si', 'sk', 'sl', - 'sq', 'sr', 'sv', 'sw', 'ta', 'ta-lk', 'te', 'th', 'tr', 'tyv', 'ug', - 'uk', 'ur', 'vi', 'xx-lolspeak', 'zh-hans', 'zh-hant'] + codes = [ + "af", + "am", + "ar", + "ast", + "az", + "be", + "bg", + "bn", + "bo", + "bs", + "ca", + "cs", + "cy", + "da", + "de", + "dz", + "el", + "en", + "en-x-simple", + "eo", + "es", + "et", + "eu", + "fa", + "fi", + "fil", + "fo", + "fr", + "fy", + "ga", + "gd", + "gl", + "gsw-berne", + "gu", + "he", + "hi", + "hr", + "ht", + "hu", + "hy", + "id", + "is", + "it", + "ja", + "jv", + "ka", + "kk", + "km", + "kn", + "ko", + "ku", + "ky", + "lo", + "lt", + "lv", + "mg", + "mk", + "ml", + "mn", + "mr", + "ms", + "my", + "ne", + "nl", + "nb", + "nn", + "oc", + "pa", + "pl", + "pt-pt", + "pt-br", + "ro", + "ru", + "sco", + "se", + "si", + "sk", + "sl", + "sq", + "sr", + "sv", + "sw", + "ta", + "ta-lk", + "te", + "th", + "tr", + "tyv", + "ug", + "uk", + "ur", + "vi", + "xx-lolspeak", + "zh-hans", + "zh-hant", + ] if langcode in codes: return True else: @@ -2795,7 +3822,7 @@ def validate_language_code(langcode): def clean_csv_values(config, row): """Performs basic string cleanup on CSV values. Applies to entier value, - not each subdivided value. + not each subdivided value. """ """Parameters ---------- @@ -2809,36 +3836,36 @@ def clean_csv_values(config, row): The CSV DictReader object. """ for field in row: - if 'smart_quotes' not in config['clean_csv_values_skip']: + if "smart_quotes" not in config["clean_csv_values_skip"]: # Replace smart/curly quotes with straight ones. - row[field] = str(row[field]).replace('“', '"').replace('”', '"') + row[field] = str(row[field]).replace("“", '"').replace("”", '"') row[field] = str(row[field]).replace("‘", "'").replace("’", "'") - if 'inside_spaces' not in config['clean_csv_values_skip']: + if "inside_spaces" not in config["clean_csv_values_skip"]: # Remove multiple spaces within string. - row[field] = re.sub(' +', ' ', str(row[field])) + row[field] = re.sub(" +", " ", str(row[field])) # Any outside .strip()s should come after 'outside_spaces', so they are removed first. # Assumes that spaces/newlines are the most likely extraneous leading and trailing # characters in CSV values. - if 'outside_spaces' not in config['clean_csv_values_skip']: + if "outside_spaces" not in config["clean_csv_values_skip"]: # Strip leading and trailing whitespace, including newlines. row[field] = str(row[field]).strip() - if 'outside_subdelimiters' not in config['clean_csv_values_skip']: + if "outside_subdelimiters" not in config["clean_csv_values_skip"]: # Strip leading and trailing subdelimters. - row[field] = str(row[field]).strip(config['subdelimiter']) + row[field] = str(row[field]).strip(config["subdelimiter"]) return row def truncate_csv_value(field_name, record_id, field_config, value): """Drupal will not accept text field values that have a length that - exceeds the configured maximum length for that field. 'value' - here is a field subvalue. + exceeds the configured maximum length for that field. 'value' + here is a field subvalue. """ - if isinstance(value, str) and 'max_length' in field_config: - max_length = field_config['max_length'] + if isinstance(value, str) and "max_length" in field_config: + max_length = field_config["max_length"] if max_length is not None and len(value) > int(max_length): original_value = value value = value[:max_length] @@ -2847,13 +3874,14 @@ def truncate_csv_value(field_name, record_id, field_config, value): original_value, field_name, record_id, - max_length) + max_length, + ) return value def deduplicate_field_values(values): """Removes duplicate entries from 'values' while retaining - the order of the unique members. + the order of the unique members. """ """Parameters ---------- @@ -2874,63 +3902,65 @@ def deduplicate_field_values(values): def get_node_field_values(config, nid): - """Get a node's field data so we can use it during PATCH updates, which replace a field's values. - """ - node_url = config['host'] + '/node/' + nid + '?_format=json' - response = issue_request(config, 'GET', node_url) + """Get a node's field data so we can use it during PATCH updates, which replace a field's values.""" + node_url = config["host"] + "/node/" + nid + "?_format=json" + response = issue_request(config, "GET", node_url) node_fields = json.loads(response.text) return node_fields def get_media_field_values(config, media_id): - """Get a media's field data so we can use it during PATCH updates, which replace a field's values. - """ - if config['standalone_media_url'] is True: - media_url = config['host'] + '/media/' + media_id + '?_format=json' + """Get a media's field data so we can use it during PATCH updates, which replace a field's values.""" + if config["standalone_media_url"] is True: + media_url = config["host"] + "/media/" + media_id + "?_format=json" else: - media_url = config['host'] + '/media/' + media_id + '/edit?_format=json' + media_url = config["host"] + "/media/" + media_id + "/edit?_format=json" - get_media_response = issue_request(config, 'GET', media_url) + get_media_response = issue_request(config, "GET", media_url) media_fields = json.loads(get_media_response.text) return media_fields def get_target_ids(node_field_values): - """Get the target IDs of all entities in a field. - """ + """Get the target IDs of all entities in a field.""" target_ids = [] for target in node_field_values: - target_ids.append(target['target_id']) + target_ids.append(target["target_id"]) return target_ids def get_additional_files_config(config): """Converts values in 'additional_files' config setting to a simple - dictionary for easy access. + dictionary for easy access. """ additional_files_entries = dict() - if 'additional_files' in config and len(config['additional_files']) > 0: - for additional_files_entry in config['additional_files']: - for additional_file_field, additional_file_media_use_tid in additional_files_entry.items(): - additional_files_entries[additional_file_field] = additional_file_media_use_tid + if "additional_files" in config and len(config["additional_files"]) > 0: + for additional_files_entry in config["additional_files"]: + for ( + additional_file_field, + additional_file_media_use_tid, + ) in additional_files_entry.items(): + additional_files_entries[additional_file_field] = ( + additional_file_media_use_tid + ) return additional_files_entries def split_typed_relation_string(config, typed_relation_string, target_type): """Fields of type 'typed_relation' are represented in the CSV file - using a structured string, specifically namespace:property:id, - e.g., 'relators:pht:5'. 'id' is either a term ID or a node ID. This - function takes one of those strings (optionally with a multivalue - subdelimiter) and returns a list of dictionaries in the form they - take in existing node values. ID values can also be term names (strings) - and term URIs (also strings, but in the form 'http....'). - - Also, these values can (but don't need to) have an optional namespace - in the term ID segment, which is the vocabulary ID string. These - typed relation strings look like 'relators:pht:person:Jordan, Mark'. - However, since we split the typed relation strings only on the first - two :, the entire third segment is considered, for the purposes of - splitting the value, to be the term. + using a structured string, specifically namespace:property:id, + e.g., 'relators:pht:5'. 'id' is either a term ID or a node ID. This + function takes one of those strings (optionally with a multivalue + subdelimiter) and returns a list of dictionaries in the form they + take in existing node values. ID values can also be term names (strings) + and term URIs (also strings, but in the form 'http....'). + + Also, these values can (but don't need to) have an optional namespace + in the term ID segment, which is the vocabulary ID string. These + typed relation strings look like 'relators:pht:person:Jordan, Mark'. + However, since we split the typed relation strings only on the first + two :, the entire third segment is considered, for the purposes of + splitting the value, to be the term. """ typed_relation_string = typed_relation_string.strip() @@ -2938,17 +3968,18 @@ def split_typed_relation_string(config, typed_relation_string, target_type): if len(typed_relation_string) == 0: return return_list - temp_list = typed_relation_string.split(config['subdelimiter']) + temp_list = typed_relation_string.split(config["subdelimiter"]) for item in temp_list: - item_list = item.split(':', 2) + item_list = item.split(":", 2) if value_is_numeric(item_list[2]): target_id = int(item_list[2]) else: target_id = item_list[2] item_dict = { - 'target_id': target_id, - 'rel_type': item_list[0] + ':' + item_list[1], - 'target_type': target_type} + "target_id": target_id, + "rel_type": item_list[0] + ":" + item_list[1], + "target_type": target_type, + } return_list.append(item_dict) return return_list @@ -2956,11 +3987,11 @@ def split_typed_relation_string(config, typed_relation_string, target_type): def split_geolocation_string(config, geolocation_string): """Fields of type 'geolocation' are represented in the CSV file using a - structured string, specifically lat,lng, e.g. "49.16667, -123.93333" - or "+49.16667, -123.93333". This function takes one of those strings - (optionally with a multivalue subdelimiter) and returns a list of - dictionaries with 'lat' and 'lng' keys required by the 'geolocation' - field type. + structured string, specifically lat,lng, e.g. "49.16667, -123.93333" + or "+49.16667, -123.93333". This function takes one of those strings + (optionally with a multivalue subdelimiter) and returns a list of + dictionaries with 'lat' and 'lng' keys required by the 'geolocation' + field type. """ geolocation_string = geolocation_string.strip() @@ -2968,11 +3999,14 @@ def split_geolocation_string(config, geolocation_string): if len(geolocation_string) == 0: return return_list - temp_list = geolocation_string.split(config['subdelimiter']) + temp_list = geolocation_string.split(config["subdelimiter"]) for item in temp_list: - item_list = item.split(',') + item_list = item.split(",") # Remove any leading \ which might be in value if it comes from a spreadsheet. - item_dict = {'lat': item_list[0].lstrip('\\').strip(), 'lng': item_list[1].lstrip('\\').strip()} + item_dict = { + "lat": item_list[0].lstrip("\\").strip(), + "lng": item_list[1].lstrip("\\").strip(), + } return_list.append(item_dict) return return_list @@ -2980,10 +4014,10 @@ def split_geolocation_string(config, geolocation_string): def split_link_string(config, link_string): """Fields of type 'link' are represented in the CSV file using a structured string, - specifically uri%%title, e.g. "https://www.lib.sfu.ca%%SFU Library Website". - This function takes one of those strings (optionally with a multivalue subdelimiter) - and returns a list of dictionaries with 'uri' and 'title' keys required by the - 'link' field type. + specifically uri%%title, e.g. "https://www.lib.sfu.ca%%SFU Library Website". + This function takes one of those strings (optionally with a multivalue subdelimiter) + and returns a list of dictionaries with 'uri' and 'title' keys required by the + 'link' field type. """ link_string = link_string.strip() @@ -2991,15 +4025,15 @@ def split_link_string(config, link_string): if len(link_string) == 0: return return_list - temp_list = link_string.split(config['subdelimiter']) + temp_list = link_string.split(config["subdelimiter"]) for item in temp_list: - if '%%' in item: - item_list = item.split('%%', 1) - item_dict = {'uri': item_list[0].strip(), 'title': item_list[1].strip()} + if "%%" in item: + item_list = item.split("%%", 1) + item_dict = {"uri": item_list[0].strip(), "title": item_list[1].strip()} return_list.append(item_dict) else: # If there is no %% and title, use the URL as the title. - item_dict = {'uri': item.strip(), 'title': item.strip()} + item_dict = {"uri": item.strip(), "title": item.strip()} return_list.append(item_dict) return return_list @@ -3007,10 +4041,10 @@ def split_link_string(config, link_string): def split_authority_link_string(config, authority_link_string): """Fields of type 'authority_link' are represented in the CSV file using a structured string, - specifically source%%uri%%title, e.g. "viaf%%http://viaf.org/viaf/153525475%%Rush (Musical group)". - This function takes one of those strings (optionally with a multivalue subdelimiter) - and returns a list of dictionaries with 'source', 'uri' and 'title' keys required by the - 'authority_link' field type. + specifically source%%uri%%title, e.g. "viaf%%http://viaf.org/viaf/153525475%%Rush (Musical group)". + This function takes one of those strings (optionally with a multivalue subdelimiter) + and returns a list of dictionaries with 'source', 'uri' and 'title' keys required by the + 'authority_link' field type. """ authority_link_string = authority_link_string.strip() @@ -3018,16 +4052,24 @@ def split_authority_link_string(config, authority_link_string): if len(authority_link_string) == 0: return return_list - temp_list = authority_link_string.split(config['subdelimiter']) + temp_list = authority_link_string.split(config["subdelimiter"]) for item in temp_list: - if item.count('%%') == 2: - item_list = item.split('%%', 2) - item_dict = {'source': item_list[0].strip(), 'uri': item_list[1].strip(), 'title': item_list[2].strip()} + if item.count("%%") == 2: + item_list = item.split("%%", 2) + item_dict = { + "source": item_list[0].strip(), + "uri": item_list[1].strip(), + "title": item_list[2].strip(), + } return_list.append(item_dict) - if item.count('%%') == 1: + if item.count("%%") == 1: # There is no title. - item_list = item.split('%%', 1) - item_dict = {'source': item_list[0].strip(), 'uri': item_list[1].strip(), 'title': ''} + item_list = item.split("%%", 1) + item_dict = { + "source": item_list[0].strip(), + "uri": item_list[1].strip(), + "title": "", + } return_list.append(item_dict) return return_list @@ -3035,10 +4077,10 @@ def split_authority_link_string(config, authority_link_string): def split_media_track_string(config, media_track_string): """Fields of type 'media_track' are represented in the CSV file using a structured string, - specifically 'label:kind:srclang:path_to_vtt_file', e.g. "en:subtitles:en:path/to/the/vtt/file.vtt". - This function takes one of those strings (optionally with a multivalue subdelimiter) and returns - a list of dictionaries with 'label', 'kind', 'srclang', 'file_path' keys required by the - 'media_track' field type. + specifically 'label:kind:srclang:path_to_vtt_file', e.g. "en:subtitles:en:path/to/the/vtt/file.vtt". + This function takes one of those strings (optionally with a multivalue subdelimiter) and returns + a list of dictionaries with 'label', 'kind', 'srclang', 'file_path' keys required by the + 'media_track' field type. """ media_track_string = media_track_string.strip() @@ -3046,176 +4088,300 @@ def split_media_track_string(config, media_track_string): if len(media_track_string) == 0: return return_list - temp_list = media_track_string.split(config['subdelimiter']) + temp_list = media_track_string.split(config["subdelimiter"]) for item in temp_list: - track_parts_list = item.split(':', 3) + track_parts_list = item.split(":", 3) item_dict = { - 'label': track_parts_list[0], - 'kind': track_parts_list[1], - 'srclang': track_parts_list[2], - 'file_path': track_parts_list[3]} + "label": track_parts_list[0], + "kind": track_parts_list[1], + "srclang": track_parts_list[2], + "file_path": track_parts_list[3], + } return_list.append(item_dict) return return_list -def validate_media_use_tid_in_additional_files_setting(config, media_use_tid_value, additional_field_name): +def validate_media_use_tid_in_additional_files_setting( + config, media_use_tid_value, additional_field_name +): """Validate whether the term ID registered in the "additional_files" config setting - is in the Islandora Media Use vocabulary. + is in the Islandora Media Use vocabulary. """ media_use_tids = [] - if (config['subdelimiter'] in str(media_use_tid_value)): - media_use_tids = str(media_use_tid_value).split(config['subdelimiter']) + if config["subdelimiter"] in str(media_use_tid_value): + media_use_tids = str(media_use_tid_value).split(config["subdelimiter"]) else: media_use_tids.append(media_use_tid_value) for media_use_tid in media_use_tids: - if not value_is_numeric(media_use_tid) and media_use_tid.strip().startswith('http'): + if not value_is_numeric(media_use_tid) and media_use_tid.strip().startswith( + "http" + ): media_use_tid = get_term_id_from_uri(config, media_use_tid.strip()) - if not value_is_numeric(media_use_tid) and not media_use_tid.strip().startswith('http'): - media_use_tid = find_term_in_vocab(config, 'islandora_media_use', media_use_tid.strip()) - - term_endpoint = config['host'] + '/taxonomy/term/' + str(media_use_tid).strip() + '?_format=json' - headers = {'Content-Type': 'application/json'} - response = issue_request(config, 'GET', term_endpoint, headers) + if not value_is_numeric(media_use_tid) and not media_use_tid.strip().startswith( + "http" + ): + media_use_tid = find_term_in_vocab( + config, "islandora_media_use", media_use_tid.strip() + ) + + term_endpoint = ( + config["host"] + + "/taxonomy/term/" + + str(media_use_tid).strip() + + "?_format=json" + ) + headers = {"Content-Type": "application/json"} + response = issue_request(config, "GET", term_endpoint, headers) if response.status_code == 404: - message = 'Term ID "' + str(media_use_tid) + '" registered in the "additional_files" config option ' + \ - 'for field "' + additional_field_name + '" is not a term ID (term doesn\'t exist).' + message = ( + 'Term ID "' + + str(media_use_tid) + + '" registered in the "additional_files" config option ' + + 'for field "' + + additional_field_name + + "\" is not a term ID (term doesn't exist)." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) if response.status_code == 200: response_body = json.loads(response.text) - if 'vid' in response_body: - if response_body['vid'][0]['target_id'] != 'islandora_media_use': - message = 'Term ID "' + str(media_use_tid) + '" registered in the "additional_files" config option ' + \ - 'for field "' + additional_field_name + '" is not in the Islandora Media Use vocabulary.' + if "vid" in response_body: + if response_body["vid"][0]["target_id"] != "islandora_media_use": + message = ( + 'Term ID "' + + str(media_use_tid) + + '" registered in the "additional_files" config option ' + + 'for field "' + + additional_field_name + + '" is not in the Islandora Media Use vocabulary.' + ) logging.error(message) - sys.exit('Error: ' + message) - if 'field_external_uri' in response_body: - if len(response_body['field_external_uri']) > 0 and response_body['field_external_uri'][0]['uri'] != 'http://pcdm.org/use#OriginalFile': - message = 'Warning: Term ID "' + str(media_use_tid) + '" registered in the "additional_files" config option ' + \ - 'for CSV field "' + additional_field_name + '" will assign an Islandora Media Use term that might ' + \ - "conflict with derivative media. You should temporarily disable the Context or Action that generates those derivatives." + sys.exit("Error: " + message) + if "field_external_uri" in response_body: + if ( + len(response_body["field_external_uri"]) > 0 + and response_body["field_external_uri"][0]["uri"] + != "http://pcdm.org/use#OriginalFile" + ): + message = ( + 'Warning: Term ID "' + + str(media_use_tid) + + '" registered in the "additional_files" config option ' + + 'for CSV field "' + + additional_field_name + + '" will assign an Islandora Media Use term that might ' + + "conflict with derivative media. You should temporarily disable the Context or Action that generates those derivatives." + ) else: # There is no field_external_uri so we can't identify the term. Provide a generic message. - message = 'Warning: Terms registered in the "additional_files" config option ' + \ - 'for CSV field "' + additional_field_name + '" may assign an Islandora Media Use term that will ' + \ - "conflict with derivative media. You should temporarily disable the Context or Action that generates those derivatives." + message = ( + 'Warning: Terms registered in the "additional_files" config option ' + + 'for CSV field "' + + additional_field_name + + '" may assign an Islandora Media Use term that will ' + + "conflict with derivative media. You should temporarily disable the Context or Action that generates those derivatives." + ) print(message) logging.warning(message) def validate_media_use_tid(config, media_use_tid_value_from_csv=None, csv_row_id=None): """Validate whether the term ID, term name, or terms URI provided in the - config value for media_use_tid is in the Islandora Media Use vocabulary. + config value for media_use_tid is in the Islandora Media Use vocabulary. """ if media_use_tid_value_from_csv is not None and csv_row_id is not None: if len(str(media_use_tid_value_from_csv)) > 0: media_use_tid_value = media_use_tid_value_from_csv message_wording = ' in the CSV "media_use_tid" column ' else: - media_use_tid_value = config['media_use_tid'] + media_use_tid_value = config["media_use_tid"] message_wording = ' in configuration option "media_use_tid" ' - media_use_terms = str(media_use_tid_value).split(config['subdelimiter']) + media_use_terms = str(media_use_tid_value).split(config["subdelimiter"]) for media_use_term in media_use_terms: - if value_is_numeric(media_use_term) is not True and media_use_term.strip().startswith('http'): + if value_is_numeric( + media_use_term + ) is not True and media_use_term.strip().startswith("http"): media_use_tid = get_term_id_from_uri(config, media_use_term.strip()) if csv_row_id is None: if media_use_tid is False: - message = 'URI "' + media_use_term + '" provided ' + message_wording + ' does not match any taxonomy terms.' + message = ( + 'URI "' + + media_use_term + + '" provided ' + + message_wording + + " does not match any taxonomy terms." + ) logging.error(message) - sys.exit('Error: ' + message) - if media_use_tid is not False and media_use_term.strip() != 'http://pcdm.org/use#OriginalFile': - message = 'Warning: URI "' + media_use_term + '" provided' + message_wording + \ - "will assign an Islandora Media Use term that might conflict with derivative media. " + \ - "You should temporarily disable the Context or Action that generates those derivatives." + sys.exit("Error: " + message) + if ( + media_use_tid is not False + and media_use_term.strip() != "http://pcdm.org/use#OriginalFile" + ): + message = ( + 'Warning: URI "' + + media_use_term + + '" provided' + + message_wording + + "will assign an Islandora Media Use term that might conflict with derivative media. " + + "You should temporarily disable the Context or Action that generates those derivatives." + ) print(message) logging.warning(message) else: if media_use_tid is False: - message = 'URI "' + media_use_term + '" provided in "media_use_tid" field in CSV row ' + \ - str(csv_row_id) + ' does not match any taxonomy terms.' + message = ( + 'URI "' + + media_use_term + + '" provided in "media_use_tid" field in CSV row ' + + str(csv_row_id) + + " does not match any taxonomy terms." + ) logging.error(message) - sys.exit('Error: ' + message) - if media_use_tid is not False and media_use_term.strip() != 'http://pcdm.org/use#OriginalFile': - message = 'Warning: URI "' + media_use_term + '" provided in "media_use_tid" field in CSV row ' + \ - str(csv_row_id) + "will assign an Islandora Media Use term that might conflict with " + \ - "derivative media. You should temporarily disable the Context or Action that generates those derivatives." + sys.exit("Error: " + message) + if ( + media_use_tid is not False + and media_use_term.strip() != "http://pcdm.org/use#OriginalFile" + ): + message = ( + 'Warning: URI "' + + media_use_term + + '" provided in "media_use_tid" field in CSV row ' + + str(csv_row_id) + + "will assign an Islandora Media Use term that might conflict with " + + "derivative media. You should temporarily disable the Context or Action that generates those derivatives." + ) logging.warning(message) - elif value_is_numeric(media_use_term) is not True and media_use_term.strip().startswith('http') is not True: - media_use_tid = find_term_in_vocab(config, 'islandora_media_use', media_use_term.strip()) + elif ( + value_is_numeric(media_use_term) is not True + and media_use_term.strip().startswith("http") is not True + ): + media_use_tid = find_term_in_vocab( + config, "islandora_media_use", media_use_term.strip() + ) if csv_row_id is None: if media_use_tid is False: - message = 'Warning: Term name "' + media_use_term.strip() + '" provided in configuration option "media_use_tid" does not match any taxonomy terms.' + message = ( + 'Warning: Term name "' + + media_use_term.strip() + + '" provided in configuration option "media_use_tid" does not match any taxonomy terms.' + ) logging.warning(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) else: if media_use_tid is False: - message = 'Warning: Term name "' + media_use_term.strip() + '" provided in "media_use_tid" field in CSV row ' + \ - str(csv_row_id) + ' does not match any taxonomy terms.' + message = ( + 'Warning: Term name "' + + media_use_term.strip() + + '" provided in "media_use_tid" field in CSV row ' + + str(csv_row_id) + + " does not match any taxonomy terms." + ) logging.warning(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) else: # Confirm the tid exists and is in the islandora_media_use vocabulary - term_endpoint = config['host'] + '/taxonomy/term/' + str(media_use_term.strip()) + '?_format=json' - headers = {'Content-Type': 'application/json'} - response = issue_request(config, 'GET', term_endpoint, headers) + term_endpoint = ( + config["host"] + + "/taxonomy/term/" + + str(media_use_term.strip()) + + "?_format=json" + ) + headers = {"Content-Type": "application/json"} + response = issue_request(config, "GET", term_endpoint, headers) if response.status_code == 404: if csv_row_id is None: - message = 'Term ID "' + str(media_use_term) + '" used in the "media_use_tid" configuration option is not a term ID (term doesn\'t exist).' + message = ( + 'Term ID "' + + str(media_use_term) + + '" used in the "media_use_tid" configuration option is not a term ID (term doesn\'t exist).' + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) else: - message = 'Term ID "' + str(media_use_term) + '" used in the "media_use_tid" field in CSV row ' + \ - str(csv_row_id) + ' is not a term ID (term doesn\'t exist).' + message = ( + 'Term ID "' + + str(media_use_term) + + '" used in the "media_use_tid" field in CSV row ' + + str(csv_row_id) + + " is not a term ID (term doesn't exist)." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) if response.status_code == 200: response_body = json.loads(response.text) if csv_row_id is None: - if 'vid' in response_body: - if response_body['vid'][0]['target_id'] != 'islandora_media_use': - message = 'Term ID "' + \ - str(media_use_term) + '" provided in configuration option "media_use_tid" is not in the Islandora Media Use vocabulary.' + if "vid" in response_body: + if ( + response_body["vid"][0]["target_id"] + != "islandora_media_use" + ): + message = ( + 'Term ID "' + + str(media_use_term) + + '" provided in configuration option "media_use_tid" is not in the Islandora Media Use vocabulary.' + ) logging.error(message) - sys.exit('Error: ' + message) - if 'field_external_uri' in response_body: - if response_body['field_external_uri'][0]['uri'] != 'http://pcdm.org/use#OriginalFile': - message = 'Warning: Term ID "' + media_use_term + '" provided in configuration option "media_use_tid" ' + \ - "will assign an Islandora Media Use term that might conflict with derivative media. " + \ - " You should temporarily disable the Context or Action that generates those derivatives." + sys.exit("Error: " + message) + if "field_external_uri" in response_body: + if ( + response_body["field_external_uri"][0]["uri"] + != "http://pcdm.org/use#OriginalFile" + ): + message = ( + 'Warning: Term ID "' + + media_use_term + + '" provided in configuration option "media_use_tid" ' + + "will assign an Islandora Media Use term that might conflict with derivative media. " + + " You should temporarily disable the Context or Action that generates those derivatives." + ) print(message) logging.warning(message) else: - if 'vid' in response_body: - if response_body['vid'][0]['target_id'] != 'islandora_media_use': - message = 'Term ID "' + \ - str(media_use_term) + '" provided in the "media_use_tid" field in CSV row ' + \ - str(csv_row_id) + ' is not in the Islandora Media Use vocabulary.' + if "vid" in response_body: + if ( + response_body["vid"][0]["target_id"] + != "islandora_media_use" + ): + message = ( + 'Term ID "' + + str(media_use_term) + + '" provided in the "media_use_tid" field in CSV row ' + + str(csv_row_id) + + " is not in the Islandora Media Use vocabulary." + ) logging.error(message) - sys.exit('Error: ' + message) - if 'field_external_uri' in response_body: - if response_body['field_external_uri'][0]['uri'] != 'http://pcdm.org/use#OriginalFile': - message = 'Warning: Term ID "' + media_use_term + '" provided in "media_use_tid" field in CSV row ' + \ - str(csv_row_id) + " will assign an Islandora Media Use term that might conflict with " + \ - "derivative media. You should temporarily disable the Context or Action that generates those derivatives." + sys.exit("Error: " + message) + if "field_external_uri" in response_body: + if ( + response_body["field_external_uri"][0]["uri"] + != "http://pcdm.org/use#OriginalFile" + ): + message = ( + 'Warning: Term ID "' + + media_use_term + + '" provided in "media_use_tid" field in CSV row ' + + str(csv_row_id) + + " will assign an Islandora Media Use term that might conflict with " + + "derivative media. You should temporarily disable the Context or Action that generates those derivatives." + ) print(message) logging.warning(message) def validate_media_use_tids_in_csv(config, csv_data): - """Validate 'media_use_tid' values in CSV if they exist. - """ - if config['task'] == 'add_media': - csv_id_field = 'node_id' + """Validate 'media_use_tid' values in CSV if they exist.""" + if config["task"] == "add_media": + csv_id_field = "node_id" else: - csv_id_field = config['id_field'] + csv_id_field = config["id_field"] for count, row in enumerate(csv_data, start=1): - if 'media_use_tid' in row: - delimited_field_values = row['media_use_tid'].split(config['subdelimiter']) + if "media_use_tid" in row: + delimited_field_values = row["media_use_tid"].split(config["subdelimiter"]) for field_value in delimited_field_values: if len(field_value.strip()) > 0: validate_media_use_tid(config, field_value, row[csv_id_field]) @@ -3223,37 +4389,45 @@ def validate_media_use_tids_in_csv(config, csv_data): def preprocess_field_data(subdelimiter, field_value, path_to_script): """Executes a field preprocessor script and returns its output and exit status code. The script - is passed the field subdelimiter as defined in the config YAML and the field's value, and - prints a modified vesion of the value (result) back to this function. + is passed the field subdelimiter as defined in the config YAML and the field's value, and + prints a modified vesion of the value (result) back to this function. """ - cmd = subprocess.Popen([path_to_script, subdelimiter, field_value], stdout=subprocess.PIPE) + cmd = subprocess.Popen( + [path_to_script, subdelimiter, field_value], stdout=subprocess.PIPE + ) result, stderrdata = cmd.communicate() return result, cmd.returncode def execute_bootstrap_script(path_to_script, path_to_config_file): - """Executes a bootstrap script and returns its output and exit status code. - """ - cmd = subprocess.Popen([path_to_script, path_to_config_file], stdout=subprocess.PIPE) + """Executes a bootstrap script and returns its output and exit status code.""" + cmd = subprocess.Popen( + [path_to_script, path_to_config_file], stdout=subprocess.PIPE + ) result, stderrdata = cmd.communicate() return result, cmd.returncode def execute_shutdown_script(path_to_script, path_to_config_file): - """Executes a shutdown script and returns its output and exit status code. - """ - cmd = subprocess.Popen([path_to_script, path_to_config_file], stdout=subprocess.PIPE) + """Executes a shutdown script and returns its output and exit status code.""" + cmd = subprocess.Popen( + [path_to_script, path_to_config_file], stdout=subprocess.PIPE + ) result, stderrdata = cmd.communicate() return result, cmd.returncode -def execute_entity_post_task_script(path_to_script, path_to_config_file, http_response_code, entity_json=''): - """Executes a entity-level post-task script and returns its output and exit status code. - """ - cmd = subprocess.Popen([path_to_script, path_to_config_file, str(http_response_code), entity_json], stdout=subprocess.PIPE) +def execute_entity_post_task_script( + path_to_script, path_to_config_file, http_response_code, entity_json="" +): + """Executes a entity-level post-task script and returns its output and exit status code.""" + cmd = subprocess.Popen( + [path_to_script, path_to_config_file, str(http_response_code), entity_json], + stdout=subprocess.PIPE, + ) result, stderrdata = cmd.communicate() return result, cmd.returncode @@ -3309,41 +4483,46 @@ def execute_entity_post_task_script(path_to_script, path_to_config_file, http_re def create_file(config, filename, file_fieldname, node_csv_row, node_id): """Creates a file in Drupal, which is then referenced by the accompanying media. - Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - filename : string - The full path to the file (either from the 'file' CSV column or downloaded from somewhere). - file_fieldname: string - The name of the CSV column containing the filename. None if the file isn't - in a CSV field (e.g., when config['paged_content_from_directories'] is True). - node_csv_row: OrderedDict - E.g., OrderedDict([('file', 'IMG_5083.JPG'), ('id', '05'), ('title', 'Alcatraz Island'). - node_id: string - The nid of the parent media's parent node. - Returns - ------- - int|bool|None - The file ID (int) of the successfully created file; False if there is insufficient - information to create the file or file creation failed, or None if config['nodes_only']. - """ - if config['nodes_only'] is True: + Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + filename : string + The full path to the file (either from the 'file' CSV column or downloaded from somewhere). + file_fieldname: string + The name of the CSV column containing the filename. None if the file isn't + in a CSV field (e.g., when config['paged_content_from_directories'] is True). + node_csv_row: OrderedDict + E.g., OrderedDict([('file', 'IMG_5083.JPG'), ('id', '05'), ('title', 'Alcatraz Island'). + node_id: string + The nid of the parent media's parent node. + Returns + ------- + int|bool|None + The file ID (int) of the successfully created file; False if there is insufficient + information to create the file or file creation failed, or None if config['nodes_only']. + """ + if config["nodes_only"] is True: return None - if config['task'] == 'add_media' or config['task'] == 'create': - if file_fieldname is not None and len(node_csv_row[file_fieldname].strip()) == 0: + if config["task"] == "add_media" or config["task"] == "create": + if ( + file_fieldname is not None + and len(node_csv_row[file_fieldname].strip()) == 0 + ): return None is_remote = False filename = filename.strip() - if filename.startswith('http'): + if filename.startswith("http"): remote_file_http_response_code = ping_remote_file(config, filename) if remote_file_http_response_code != 200: return False - file_path = download_remote_file(config, filename, file_fieldname, node_csv_row, node_id) + file_path = download_remote_file( + config, filename, file_fieldname, node_csv_row, node_id + ) if file_path is False: return False filename = file_path.split("/")[-1] @@ -3351,21 +4530,33 @@ def create_file(config, filename, file_fieldname, node_csv_row, node_id): elif os.path.isabs(filename): # Validate that the file exists if check_file_exists(config, filename) is False: - logging.error('File not created for CSV row "%s": file "%s" does not exist.', node_csv_row[config['id_field']], filename) + logging.error( + 'File not created for CSV row "%s": file "%s" does not exist.', + node_csv_row[config["id_field"]], + filename, + ) return False file_path = filename else: if check_file_exists(config, filename) is False: - logging.error('File not created for CSV row "%s": file "%s" does not exist.', node_csv_row[config['id_field']], filename) + logging.error( + 'File not created for CSV row "%s": file "%s" does not exist.', + node_csv_row[config["id_field"]], + filename, + ) return False - file_path = os.path.join(config['input_dir'], filename) + file_path = os.path.join(config["input_dir"], filename) media_type = set_media_type(config, file_path, file_fieldname, node_csv_row) - if media_type in config['media_type_file_fields']: - media_file_field = config['media_type_file_fields'][media_type] + if media_type in config["media_type_file_fields"]: + media_file_field = config["media_type_file_fields"][media_type] else: - logging.error('File not created for CSV row "%s": media type "%s" not recognized.', node_csv_row[config['id_field']], media_type) + logging.error( + 'File not created for CSV row "%s": media type "%s" not recognized.', + node_csv_row[config["id_field"]], + media_type, + ) return False # Requests/urllib3 requires filenames used in Content-Disposition headers to be encoded as latin-1. @@ -3379,42 +4570,89 @@ def create_file(config, filename, file_fieldname, node_csv_row, node_id): if ascii_only is False: original_filename = copy.copy(filename) filename = unidecode(filename) - logging.warning("Filename '" + original_filename + "' contains non-ASCII characters, normalized to '" + filename + "'.") + logging.warning( + "Filename '" + + original_filename + + "' contains non-ASCII characters, normalized to '" + + filename + + "'." + ) - file_endpoint_path = '/file/upload/media/' + media_type + '/' + media_file_field + '?_format=json' + file_endpoint_path = ( + "/file/upload/media/" + media_type + "/" + media_file_field + "?_format=json" + ) file_headers = { - 'Content-Type': 'application/octet-stream', - 'Content-Disposition': 'file; filename="' + filename + '"' + "Content-Type": "application/octet-stream", + "Content-Disposition": 'file; filename="' + filename + '"', } - binary_data = open(file_path, 'rb') + binary_data = open(file_path, "rb") try: - file_response = issue_request(config, 'POST', file_endpoint_path, file_headers, '', binary_data) + file_response = issue_request( + config, "POST", file_endpoint_path, file_headers, "", binary_data + ) if file_response.status_code == 201: file_json = json.loads(file_response.text) - file_id = file_json['fid'][0]['value'] + file_id = file_json["fid"][0]["value"] # For now, we can only validate checksums for files named in the 'file' CSV column. # See https://github.com/mjordan/islandora_workbench/issues/307. - if config['fixity_algorithm'] is not None and file_fieldname == 'file': - file_uuid = file_json['uuid'][0]['value'] - hash_from_drupal = get_file_hash_from_drupal(config, file_uuid, config['fixity_algorithm']) - hash_from_local = get_file_hash_from_local(config, file_path, config['fixity_algorithm']) + if config["fixity_algorithm"] is not None and file_fieldname == "file": + file_uuid = file_json["uuid"][0]["value"] + hash_from_drupal = get_file_hash_from_drupal( + config, file_uuid, config["fixity_algorithm"] + ) + hash_from_local = get_file_hash_from_local( + config, file_path, config["fixity_algorithm"] + ) if hash_from_drupal == hash_from_local: - logging.info('Local and Drupal %s checksums for file "%s" (%s) match.', config['fixity_algorithm'], file_path, hash_from_local) + logging.info( + 'Local and Drupal %s checksums for file "%s" (%s) match.', + config["fixity_algorithm"], + file_path, + hash_from_local, + ) else: - print("Warning: local and Drupal checksums for '" + file_path + "' do not match. See the log for more detail.") - logging.warning('Local and Drupal %s checksums for file "%s" (named in CSV row "%s") do not match (local: %s, Drupal: %s).', - config['fixity_algorithm'], file_path, node_csv_row[config['id_field']], hash_from_local, hash_from_drupal) - if 'checksum' in node_csv_row: - if hash_from_local == node_csv_row['checksum'].strip(): - logging.info('Local %s checksum and value in the CSV "checksum" field for file "%s" (%s) match.', config['fixity_algorithm'], file_path, hash_from_local) + print( + "Warning: local and Drupal checksums for '" + + file_path + + "' do not match. See the log for more detail." + ) + logging.warning( + 'Local and Drupal %s checksums for file "%s" (named in CSV row "%s") do not match (local: %s, Drupal: %s).', + config["fixity_algorithm"], + file_path, + node_csv_row[config["id_field"]], + hash_from_local, + hash_from_drupal, + ) + if "checksum" in node_csv_row: + if hash_from_local == node_csv_row["checksum"].strip(): + logging.info( + 'Local %s checksum and value in the CSV "checksum" field for file "%s" (%s) match.', + config["fixity_algorithm"], + file_path, + hash_from_local, + ) else: - print("Warning: local checksum and value in CSV for '" + file_path + "' do not match. See the log for more detail.") - logging.warning('Local %s checksum and value in the CSV "checksum" field for file "%s" (named in CSV row "%s") do not match (local: %s, CSV: %s).', - config['fixity_algorithm'], file_path, node_csv_row[config['id_field']], hash_from_local, node_csv_row['checksum']) - if is_remote and config['delete_tmp_upload'] is True: - containing_folder = os.path.join(config['temp_dir'], re.sub('[^A-Za-z0-9]+', '_', node_csv_row[config['id_field']])) + print( + "Warning: local checksum and value in CSV for '" + + file_path + + "' do not match. See the log for more detail." + ) + logging.warning( + 'Local %s checksum and value in the CSV "checksum" field for file "%s" (named in CSV row "%s") do not match (local: %s, CSV: %s).', + config["fixity_algorithm"], + file_path, + node_csv_row[config["id_field"]], + hash_from_local, + node_csv_row["checksum"], + ) + if is_remote and config["delete_tmp_upload"] is True: + containing_folder = os.path.join( + config["temp_dir"], + re.sub("[^A-Za-z0-9]+", "_", node_csv_row[config["id_field"]]), + ) try: # E.g., on Windows, "[WinError 32] The process cannot access the file because it is being used by another process" shutil.rmtree(containing_folder) @@ -3423,55 +4661,79 @@ def create_file(config, filename, file_fieldname, node_csv_row, node_id): return file_id else: - logging.error('File not created for "' + file_path + '", POST request to "%s" returned an HTTP status code of "%s" and a response body of %s.', - file_endpoint_path, file_response.status_code, file_response.content) + logging.error( + 'File not created for "' + + file_path + + '", POST request to "%s" returned an HTTP status code of "%s" and a response body of %s.', + file_endpoint_path, + file_response.status_code, + file_response.content, + ) return False except requests.exceptions.RequestException as e: logging.error(e) return False -def create_media(config, filename, file_fieldname, node_id, csv_row, media_use_tid=None): +def create_media( + config, filename, file_fieldname, node_id, csv_row, media_use_tid=None +): """Creates a media in Drupal. - Parameters - ---------- - config : dict - The configuration settings defined by workbench_config.get_config(). - filename : string - The value of the CSV 'file' field for the current node. - file_fieldname: string - The name of the CSV column containing the filename. None if the file isn't - in a CSV field (e.g., when config['paged_content_from_directories'] is True). - node_id: string - The ID of the node to attach the media to. This is False if file creation failed. - csv_row: OrderedDict - E.g., OrderedDict([('file', 'IMG_5083.JPG'), ('id', '05'), ('title', 'Alcatraz Island'). - Could be either a CSV row describing nodes (e.g. during 'create' tasks) or describing - media (e.g. during 'add_media' tasks). - media_use_tid : int|str - A valid term ID (or a subdelimited list of IDs) from the Islandora Media Use vocabulary. - Returns - ------- - int|False - The HTTP status code from the attempt to create the media, False if - it doesn't have sufficient information to create the media, or None - if config['nodes_only'] is True. - """ - if config['nodes_only'] is True: + Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + filename : string + The value of the CSV 'file' field for the current node. + file_fieldname: string + The name of the CSV column containing the filename. None if the file isn't + in a CSV field (e.g., when config['paged_content_from_directories'] is True). + node_id: string + The ID of the node to attach the media to. This is False if file creation failed. + csv_row: OrderedDict + E.g., OrderedDict([('file', 'IMG_5083.JPG'), ('id', '05'), ('title', 'Alcatraz Island'). + Could be either a CSV row describing nodes (e.g. during 'create' tasks) or describing + media (e.g. during 'add_media' tasks). + media_use_tid : int|str + A valid term ID (or a subdelimited list of IDs) from the Islandora Media Use vocabulary. + Returns + ------- + int|False + The HTTP status code from the attempt to create the media, False if + it doesn't have sufficient information to create the media, or None + if config['nodes_only'] is True. + """ + if config["nodes_only"] is True: return None if len(filename.strip()) == 0: if file_fieldname is None: - message = 'Media not created because field "' + file_fieldname + '" in CSV row with ID "' + csv_row[config['id_field']] + '" is empty.' + message = ( + 'Media not created because field "' + + file_fieldname + + '" in CSV row with ID "' + + csv_row[config["id_field"]] + + '" is empty.' + ) logging.error(message) return False if check_file_exists(config, filename) is False: if file_fieldname is None: - message = 'Media not created because file "' + filename + '" could not be found.' + message = ( + 'Media not created because file "' + filename + '" could not be found.' + ) else: - message = 'Media not created because file "' + filename + '" identified in field "' + file_fieldname + '" in CSV row with ID "' + csv_row[config['id_field']] + '" could not be found.' + message = ( + 'Media not created because file "' + + filename + + '" identified in field "' + + file_fieldname + + '" in CSV row with ID "' + + csv_row[config["id_field"]] + + '" could not be found.' + ) logging.error(message) return False @@ -3491,41 +4753,58 @@ def create_media(config, filename, file_fieldname, node_id, csv_row, media_use_t else: file_result = create_file(config, filename, file_fieldname, csv_row, node_id) - if filename.startswith('http'): + if filename.startswith("http"): if file_result > 0: - filename = get_preprocessed_file_path(config, file_fieldname, csv_row, node_id, False) + filename = get_preprocessed_file_path( + config, file_fieldname, csv_row, node_id, False + ) if isinstance(file_result, int): - if 'media_use_tid' in csv_row and len(csv_row['media_use_tid']) > 0: - media_use_tid_value = csv_row['media_use_tid'] + if "media_use_tid" in csv_row and len(csv_row["media_use_tid"]) > 0: + media_use_tid_value = csv_row["media_use_tid"] else: - media_use_tid_value = config['media_use_tid'] + media_use_tid_value = config["media_use_tid"] if media_use_tid is not None: media_use_tid_value = media_use_tid media_use_tids = [] - media_use_terms = str(media_use_tid_value).split(config['subdelimiter']) + media_use_terms = str(media_use_tid_value).split(config["subdelimiter"]) for media_use_term in media_use_terms: if value_is_numeric(media_use_term): media_use_tids.append(media_use_term) - if not value_is_numeric(media_use_term) and media_use_term.strip().startswith('http'): + if not value_is_numeric( + media_use_term + ) and media_use_term.strip().startswith("http"): media_use_tids.append(get_term_id_from_uri(config, media_use_term)) - if not value_is_numeric(media_use_term) and not media_use_term.strip().startswith('http'): - media_use_tids.append(find_term_in_vocab(config, 'islandora_media_use', media_use_term.strip())) + if not value_is_numeric( + media_use_term + ) and not media_use_term.strip().startswith("http"): + media_use_tids.append( + find_term_in_vocab( + config, "islandora_media_use", media_use_term.strip() + ) + ) media_bundle_response_code = ping_media_bundle(config, media_type) if media_bundle_response_code == 404: - message = 'File "' + filename + '" identified in CSV row ' + file_fieldname + \ - ' will create a media of type (' + media_type + '), but that media type is not configured in the destination Drupal.' + message = ( + 'File "' + + filename + + '" identified in CSV row ' + + file_fieldname + + " will create a media of type (" + + media_type + + "), but that media type is not configured in the destination Drupal." + ) logging.error(message) return False - media_field = config['media_type_file_fields'][media_type] + media_field = config["media_type_file_fields"][media_type] if media_type in get_oembed_media_types(config): - if 'title' in csv_row: + if "title" in csv_row: # WIP on #572: 'title' applies to node CSVs, for media, it should be 'name'. - media_name = csv_row['title'] + media_name = csv_row["title"] else: media_name = get_node_title_from_nid(config, node_id) if not media_name: @@ -3535,98 +4814,87 @@ def create_media(config, filename, file_fieldname, node_id, csv_row, media_use_t else: media_name = os.path.basename(filename) - if config['use_node_title_for_media_title']: - if 'title' in csv_row: + if config["use_node_title_for_media_title"]: + if "title" in csv_row: # WIP on #572: 'title' applies to node CSVs, for media, it should be 'name'. - media_name = csv_row['title'] + media_name = csv_row["title"] else: media_name = get_node_title_from_nid(config, node_id) if not media_name: message = 'Cannot access node " + node_id + ", so cannot get its title for use in media title. Using filename instead.' logging.warning(message) media_name = os.path.basename(filename) - elif config['use_nid_in_media_title']: + elif config["use_nid_in_media_title"]: media_name = f"{node_id}-Original File" - elif config['field_for_media_title']: - if len(csv_row[config['field_for_media_title']]) > 0: - media_name = csv_row[config['field_for_media_title']][:255] + elif config["field_for_media_title"]: + if len(csv_row[config["field_for_media_title"]]) > 0: + media_name = csv_row[config["field_for_media_title"]][:255] else: media_name = os.path.basename(filename) # Create a media from an oEmbed URL. if media_type in get_oembed_media_types(config): media_json = { - "bundle": [{ - "target_id": media_type, - "target_type": "media_type", - }], - "name": [{ - "value": media_name - }], - media_field: [{ - "value": filename - }], - "field_media_of": [{ - "target_id": int(node_id), - "target_type": 'node' - }], - "field_media_use": [{ - "target_id": media_use_tids[0], - "target_type": 'taxonomy_term' - }] + "bundle": [ + { + "target_id": media_type, + "target_type": "media_type", + } + ], + "name": [{"value": media_name}], + media_field: [{"value": filename}], + "field_media_of": [{"target_id": int(node_id), "target_type": "node"}], + "field_media_use": [ + {"target_id": media_use_tids[0], "target_type": "taxonomy_term"} + ], } # Create a media from a local or remote file. else: media_json = { - "bundle": [{ - "target_id": media_type, - "target_type": "media_type", - }], - "name": [{ - "value": media_name - }], - media_field: [{ - "target_id": file_result, - "target_type": 'file' - }], - "field_media_of": [{ - "target_id": int(node_id), - "target_type": 'node' - }], - "field_media_use": [{ - "target_id": media_use_tids[0], - "target_type": 'taxonomy_term' - }] + "bundle": [ + { + "target_id": media_type, + "target_type": "media_type", + } + ], + "name": [{"value": media_name}], + media_field: [{"target_id": file_result, "target_type": "file"}], + "field_media_of": [{"target_id": int(node_id), "target_type": "node"}], + "field_media_use": [ + {"target_id": media_use_tids[0], "target_type": "taxonomy_term"} + ], } - if 'published' in csv_row and len(csv_row['published']) > 0: - media_json['status'] = {'value': csv_row['published']} + if "published" in csv_row and len(csv_row["published"]) > 0: + media_json["status"] = {"value": csv_row["published"]} # Populate some media type-specific fields on the media. @todo: We need a generalized way of # determining which media fields are required, e.g. checking the media type configuration. - if media_field == 'field_media_image': - if 'image_alt_text' in csv_row and len(csv_row['image_alt_text']) > 0: - alt_text = clean_image_alt_text(csv_row['image_alt_text']) - media_json[media_field][0]['alt'] = alt_text + if media_field == "field_media_image": + if "image_alt_text" in csv_row and len(csv_row["image_alt_text"]) > 0: + alt_text = clean_image_alt_text(csv_row["image_alt_text"]) + media_json[media_field][0]["alt"] = alt_text else: alt_text = clean_image_alt_text(media_name) - media_json[media_field][0]['alt'] = alt_text + media_json[media_field][0]["alt"] = alt_text # extracted_text media must have their field_edited_text field populated for full text indexing. - if media_type == 'extracted_text': + if media_type == "extracted_text": if check_file_exists(config, filename): - media_json['field_edited_text'] = list() + media_json["field_edited_text"] = list() if os.path.isabs(filename) is False: - filename = os.path.join(config['input_dir'], filename) - extracted_text_file = open(filename, 'r', -1, 'utf-8') - media_json['field_edited_text'].append({'value': extracted_text_file.read()}) + filename = os.path.join(config["input_dir"], filename) + extracted_text_file = open(filename, "r", -1, "utf-8") + media_json["field_edited_text"].append( + {"value": extracted_text_file.read()} + ) else: logging.error("Extracted text file %s not found.", filename) # WIP on #572: if this is an `add_media` task, add fields in CSV to media_json, being careful to # not stomp on existing fields. Block below is copied from create() and needs to be modified to # suit creation of custom fields in add_media tasks. - ''' + """ if config['task'] == 'add_media': field_definitions = get_field_definitions(config, 'media') @@ -3706,22 +4974,32 @@ def create_media(config, filename, file_fieldname, node_id, csv_row, media_use_t else: simple_field = workbench_fields.SimpleField() node = simple_field.create(config, field_definitions, node, row, custom_field) - ''' + """ # Create media_track files here, since they should exist before we create the parent media. # @todo WIP on #572: if there are track file fields in the add_media CSV, create them here, as below for track file field in node CSV. - media_types_with_track_files = config['media_track_file_fields'].keys() + media_types_with_track_files = config["media_track_file_fields"].keys() valid_media_track_fields = list() if media_type in media_types_with_track_files: # Check for fields in node_csv_row that have names like 'media:video:field_track' and validate their contents. # Note: Does not validate the fields' configuration (--check does that). node_csv_field_names = list(csv_row.keys()) if len(node_csv_field_names): - media_track_fields = [x for x in node_csv_field_names if x.startswith('media:' + media_type)] + media_track_fields = [ + x + for x in node_csv_field_names + if x.startswith("media:" + media_type) + ] # Should be just one field per media type. - if len(media_track_fields) and media_type in config['media_track_file_fields']: + if ( + len(media_track_fields) + and media_type in config["media_track_file_fields"] + ): for media_track_field in media_track_fields: - if validate_media_track_value(csv_row[media_track_field]) is True: + if ( + validate_media_track_value(csv_row[media_track_field]) + is True + ): valid_media_track_fields.append(media_track_field) # Create the media track file(s) for each entry in valid_potential_media_track_fields (there could be multiple track entries). @@ -3729,60 +5007,111 @@ def create_media(config, filename, file_fieldname, node_id, csv_row, media_use_t media_track_field_data = [] # Should be just one field per media type. fully_qualified_media_track_field_name = valid_media_track_fields[0] - media_track_entries = split_media_track_string(config, csv_row[fully_qualified_media_track_field_name]) + media_track_entries = split_media_track_string( + config, csv_row[fully_qualified_media_track_field_name] + ) for media_track_entry in media_track_entries: - media_track_field_name_parts = fully_qualified_media_track_field_name.split(':') - create_track_file_result = create_file(config, media_track_entry['file_path'], fully_qualified_media_track_field_name, csv_row, node_id) - if create_track_file_result is not False and isinstance(create_track_file_result, int): + media_track_field_name_parts = ( + fully_qualified_media_track_field_name.split(":") + ) + create_track_file_result = create_file( + config, + media_track_entry["file_path"], + fully_qualified_media_track_field_name, + csv_row, + node_id, + ) + if create_track_file_result is not False and isinstance( + create_track_file_result, int + ): # /entity/file/663?_format=json will return JSON containing the file's 'uri'. - track_file_info_response = issue_request(config, 'GET', f"/entity/file/{create_track_file_result}?_format=json") + track_file_info_response = issue_request( + config, + "GET", + f"/entity/file/{create_track_file_result}?_format=json", + ) track_file_info = json.loads(track_file_info_response.text) - track_file_url = track_file_info['uri'][0]['url'] - logging.info(f"Media track file {config['host'].rstrip('/')}{track_file_url} created from {media_track_entry['file_path']}.") + track_file_url = track_file_info["uri"][0]["url"] + logging.info( + f"Media track file {config['host'].rstrip('/')}{track_file_url} created from {media_track_entry['file_path']}." + ) track_file_data = { - 'target_id': track_file_info['fid'][0]['value'], - 'kind': media_track_entry['kind'], - 'label': media_track_entry['label'], - 'srclang': media_track_entry['srclang'], - 'default': False, - 'url': track_file_url} + "target_id": track_file_info["fid"][0]["value"], + "kind": media_track_entry["kind"], + "label": media_track_entry["label"], + "srclang": media_track_entry["srclang"], + "default": False, + "url": track_file_url, + } media_track_field_data.append(track_file_data) else: # If there are any failures, proceed with creating the parent media. - logging.error(f"Media track using {media_track_entry['file_path']} not created; create_file returned {create_track_file_result}.") + logging.error( + f"Media track using {media_track_entry['file_path']} not created; create_file returned {create_track_file_result}." + ) # Set the "default" attribute of the first media track. if media_track_field_data: - media_track_field_data[0]['default'] = True - media_json[media_track_field_name_parts[2]] = media_track_field_data - - media_endpoint_path = '/entity/media?_format=json' if config['standalone_media_url'] else '/entity/media' - media_headers = { - 'Content-Type': 'application/json' - } + media_track_field_data[0]["default"] = True + media_json[media_track_field_name_parts[2]] = ( + media_track_field_data + ) + + media_endpoint_path = ( + "/entity/media?_format=json" + if config["standalone_media_url"] + else "/entity/media" + ) + media_headers = {"Content-Type": "application/json"} try: - media_response = issue_request(config, 'POST', media_endpoint_path, media_headers, media_json) + media_response = issue_request( + config, "POST", media_endpoint_path, media_headers, media_json + ) if media_response.status_code != 201: - logging.error('Media not created, POST request to "%s" returned an HTTP status code of "%s" and a response body of %s.', - media_endpoint_path, media_response.status_code, media_response.content) - logging.error('JSON request body used in previous POST to "%s" was %s.', media_endpoint_path, media_json) + logging.error( + 'Media not created, POST request to "%s" returned an HTTP status code of "%s" and a response body of %s.', + media_endpoint_path, + media_response.status_code, + media_response.content, + ) + logging.error( + 'JSON request body used in previous POST to "%s" was %s.', + media_endpoint_path, + media_json, + ) if len(media_use_tids) > 1: media_response_body = json.loads(media_response.text) - if 'mid' in media_response_body: - media_id = media_response_body['mid'][0]['value'] + if "mid" in media_response_body: + media_id = media_response_body["mid"][0]["value"] patch_media_use_terms(config, media_id, media_type, media_use_tids) else: - logging.error("Could not PATCH additional media use terms to media created from '%s' because media ID is not available.", filename) + logging.error( + "Could not PATCH additional media use terms to media created from '%s' because media ID is not available.", + filename, + ) # Execute media-specific post-create scripts, if any are configured. - if 'media_post_create' in config and len(config['media_post_create']) > 0: - for command in config['media_post_create']: - post_task_output, post_task_return_code = execute_entity_post_task_script(command, config['config_file_path'], media_response.status_code, media_response.text) + if "media_post_create" in config and len(config["media_post_create"]) > 0: + for command in config["media_post_create"]: + post_task_output, post_task_return_code = ( + execute_entity_post_task_script( + command, + config["config_file_path"], + media_response.status_code, + media_response.text, + ) + ) if post_task_return_code == 0: - logging.info("Post media create script " + command + " executed successfully.") + logging.info( + "Post media create script " + + command + + " executed successfully." + ) else: - logging.error("Post media create script " + command + " failed.") + logging.error( + "Post media create script " + command + " failed." + ) return media_response.status_code except requests.exceptions.RequestException as e: @@ -3797,53 +5126,47 @@ def create_media(config, filename, file_fieldname, node_id, csv_row, media_use_t def patch_media_fields(config, media_id, media_type, node_csv_row): - """Patch the media entity with base fields from the parent node. - """ - media_json = { - 'bundle': [ - {'target_id': media_type} - ] - } + """Patch the media entity with base fields from the parent node.""" + media_json = {"bundle": [{"target_id": media_type}]} for field_name, field_value in node_csv_row.items(): - if field_name == 'created' and len(field_value) > 0: - media_json['created'] = [{'value': field_value}] - if field_name == 'uid' and len(field_value) > 0: - media_json['uid'] = [{'target_id': field_value}] + if field_name == "created" and len(field_value) > 0: + media_json["created"] = [{"value": field_value}] + if field_name == "uid" and len(field_value) > 0: + media_json["uid"] = [{"target_id": field_value}] if len(media_json) > 1: - if config['standalone_media_url'] is True: - endpoint = config['host'] + '/media/' + str(media_id) + '?_format=json' + if config["standalone_media_url"] is True: + endpoint = config["host"] + "/media/" + str(media_id) + "?_format=json" else: - endpoint = config['host'] + '/media/' + str(media_id) + '/edit?_format=json' - headers = {'Content-Type': 'application/json'} - response = issue_request(config, 'PATCH', endpoint, headers, media_json) + endpoint = config["host"] + "/media/" + str(media_id) + "/edit?_format=json" + headers = {"Content-Type": "application/json"} + response = issue_request(config, "PATCH", endpoint, headers, media_json) if response.status_code == 200: logging.info("Media %s fields updated to match parent node's.", endpoint) else: - logging.warning("Media %s fields not updated to match parent node's.", endpoint) + logging.warning( + "Media %s fields not updated to match parent node's.", endpoint + ) def patch_media_use_terms(config, media_id, media_type, media_use_tids): - """Patch the media entity's field_media_use. - """ - media_json = { - 'bundle': [ - {'target_id': media_type} - ] - } + """Patch the media entity's field_media_use.""" + media_json = {"bundle": [{"target_id": media_type}]} media_use_tids_json = [] for media_use_tid in media_use_tids: - media_use_tids_json.append({'target_id': media_use_tid, 'target_type': 'taxonomy_term'}) + media_use_tids_json.append( + {"target_id": media_use_tid, "target_type": "taxonomy_term"} + ) - media_json['field_media_use'] = media_use_tids_json - if config['standalone_media_url'] is True: - endpoint = config['host'] + '/media/' + str(media_id) + '?_format=json' + media_json["field_media_use"] = media_use_tids_json + if config["standalone_media_url"] is True: + endpoint = config["host"] + "/media/" + str(media_id) + "?_format=json" else: - endpoint = config['host'] + '/media/' + str(media_id) + '/edit?_format=json' - headers = {'Content-Type': 'application/json'} - response = issue_request(config, 'PATCH', endpoint, headers, media_json) + endpoint = config["host"] + "/media/" + str(media_id) + "/edit?_format=json" + headers = {"Content-Type": "application/json"} + response = issue_request(config, "PATCH", endpoint, headers, media_json) if response.status_code == 200: logging.info("Media %s Islandora Media Use terms updated.", endpoint) else: @@ -3851,139 +5174,175 @@ def patch_media_use_terms(config, media_id, media_type, media_use_tids): def clean_image_alt_text(input_string): - ''' Strip out HTML markup to guard against CSRF in alt text. - ''' - cleaned_string = re.sub('<[^<]+?>', '', input_string) + """Strip out HTML markup to guard against CSRF in alt text.""" + cleaned_string = re.sub("<[^<]+?>", "", input_string) return cleaned_string def patch_image_alt_text(config, media_id, node_csv_row): """Patch the alt text value for an image media. Use the parent node's title - unless the CSV record contains an image_alt_text field with something in it. + unless the CSV record contains an image_alt_text field with something in it. """ - if config['standalone_media_url'] is True: - get_endpoint = config['host'] + '/media/' + str(media_id) + '?_format=json' + if config["standalone_media_url"] is True: + get_endpoint = config["host"] + "/media/" + str(media_id) + "?_format=json" else: - get_endpoint = config['host'] + '/media/' + str(media_id) + '/edit?_format=json' - get_headers = {'Content-Type': 'application/json'} - get_response = issue_request(config, 'GET', get_endpoint, get_headers) + get_endpoint = config["host"] + "/media/" + str(media_id) + "/edit?_format=json" + get_headers = {"Content-Type": "application/json"} + get_response = issue_request(config, "GET", get_endpoint, get_headers) get_response_body = json.loads(get_response.text) - field_media_image_target_id = get_response_body['field_media_image'][0]['target_id'] + field_media_image_target_id = get_response_body["field_media_image"][0]["target_id"] for field_name, field_value in node_csv_row.items(): - if field_name == 'title': + if field_name == "title": alt_text = clean_image_alt_text(field_value) - if field_name == 'image_alt_text' and len(field_value) > 0: + if field_name == "image_alt_text" and len(field_value) > 0: alt_text = clean_image_alt_text(field_value) media_json = { - 'bundle': [ - {'target_id': 'image'} - ], - 'field_media_image': [ + "bundle": [{"target_id": "image"}], + "field_media_image": [ {"target_id": field_media_image_target_id, "alt": alt_text} ], } - if config['standalone_media_url'] is True: - patch_endpoint = config['host'] + '/media/' + str(media_id) + '?_format=json' + if config["standalone_media_url"] is True: + patch_endpoint = config["host"] + "/media/" + str(media_id) + "?_format=json" else: - patch_endpoint = config['host'] + '/media/' + str(media_id) + '/edit?_format=json' - patch_headers = {'Content-Type': 'application/json'} + patch_endpoint = ( + config["host"] + "/media/" + str(media_id) + "/edit?_format=json" + ) + patch_headers = {"Content-Type": "application/json"} patch_response = issue_request( - config, - 'PATCH', - patch_endpoint, - patch_headers, - media_json) + config, "PATCH", patch_endpoint, patch_headers, media_json + ) if patch_response.status_code != 200: logging.warning("Alt text for image media %s not updated.", patch_endpoint) def remove_media_and_file(config, media_id): - """Delete a media and the file associated with it. - """ + """Delete a media and the file associated with it.""" # First get the media JSON. - if config['standalone_media_url'] is True: - get_media_url = config['host'] + '/media/' + str(media_id) + '?_format=json' + if config["standalone_media_url"] is True: + get_media_url = config["host"] + "/media/" + str(media_id) + "?_format=json" else: - get_media_url = config['host'] + '/media/' + str(media_id) + '/edit?_format=json' - get_media_response = issue_request(config, 'GET', get_media_url) + get_media_url = ( + config["host"] + "/media/" + str(media_id) + "/edit?_format=json" + ) + get_media_response = issue_request(config, "GET", get_media_url) get_media_response_body = json.loads(get_media_response.text) # See https://github.com/mjordan/islandora_workbench/issues/446 for background. - if 'message' in get_media_response_body and get_media_response_body['message'].startswith("No route found for"): + if "message" in get_media_response_body and get_media_response_body[ + "message" + ].startswith("No route found for"): message = f'Please visit {config["host"]}/admin/config/media/media-settings and uncheck the "Standalone media URL" option.' logging.error(message) sys.exit("Error: " + message) # See https://github.com/mjordan/islandora_workbench/issues/446 for background. if get_media_response.status_code == 403: - message = f'If the "Standalone media URL" option at {config["host"]}/admin/config/media/media-settings is unchecked, clear your Drupal cache and run Workbench again.' + \ - ' If that doesn\'t work, try adding "standalone_media_url: true" to your configuration file.' + message = ( + f'If the "Standalone media URL" option at {config["host"]}/admin/config/media/media-settings is unchecked, clear your Drupal cache and run Workbench again.' + + ' If that doesn\'t work, try adding "standalone_media_url: true" to your configuration file.' + ) logging.error(message) sys.exit("Error: " + message) for file_field_name in file_fields: if file_field_name in get_media_response_body: try: - file_id = get_media_response_body[file_field_name][0]['target_id'] + file_id = get_media_response_body[file_field_name][0]["target_id"] except Exception as e: - logging.error("Unable to get file ID for media %s (reason: %s); proceeding to delete media without file.", media_id, e) + logging.error( + "Unable to get file ID for media %s (reason: %s); proceeding to delete media without file.", + media_id, + e, + ) file_id = None break # Delete the file first. if file_id is not None: - file_endpoint = config['host'] + '/entity/file/' + str(file_id) + '?_format=json' - file_response = issue_request(config, 'DELETE', file_endpoint) + file_endpoint = ( + config["host"] + "/entity/file/" + str(file_id) + "?_format=json" + ) + file_response = issue_request(config, "DELETE", file_endpoint) if file_response.status_code == 204: logging.info("File %s (from media %s) deleted.", file_id, media_id) else: - logging.error("File %s (from media %s) not deleted (HTTP response code %s).", file_id, media_id, file_response.status_code) + logging.error( + "File %s (from media %s) not deleted (HTTP response code %s).", + file_id, + media_id, + file_response.status_code, + ) # Delete any audio/video media_track files. - media_bundle_name = get_media_response_body['bundle'][0]['target_id'] - if media_bundle_name in config['media_track_file_fields']: - track_file_field = config['media_track_file_fields'][media_bundle_name] + media_bundle_name = get_media_response_body["bundle"][0]["target_id"] + if media_bundle_name in config["media_track_file_fields"]: + track_file_field = config["media_track_file_fields"][media_bundle_name] if track_file_field in get_media_response_body[track_file_field]: for track_file in get_media_response_body[track_file_field]: - track_file_id = track_file['target_id'] - track_file_endpoint = config['host'] + '/entity/file/' + str(track_file_id) + '?_format=json' - track_file_response = issue_request(config, 'DELETE', track_file_endpoint) + track_file_id = track_file["target_id"] + track_file_endpoint = ( + config["host"] + + "/entity/file/" + + str(track_file_id) + + "?_format=json" + ) + track_file_response = issue_request( + config, "DELETE", track_file_endpoint + ) if track_file_response.status_code == 204: - logging.info("Media track file %s (from media %s) deleted.", track_file_id, media_id) + logging.info( + "Media track file %s (from media %s) deleted.", + track_file_id, + media_id, + ) else: - logging.error("Media track file %s (from media %s) not deleted (HTTP response code %s).", track_file_id, media_id, track_file_response.status_code) + logging.error( + "Media track file %s (from media %s) not deleted (HTTP response code %s).", + track_file_id, + media_id, + track_file_response.status_code, + ) # Then the media. if file_id is None or file_response.status_code == 204: - if config['standalone_media_url'] is True: - media_endpoint = config['host'] + '/media/' + str(media_id) + '?_format=json' + if config["standalone_media_url"] is True: + media_endpoint = ( + config["host"] + "/media/" + str(media_id) + "?_format=json" + ) else: - media_endpoint = config['host'] + '/media/' + str(media_id) + '/edit?_format=json' - media_response = issue_request(config, 'DELETE', media_endpoint) + media_endpoint = ( + config["host"] + "/media/" + str(media_id) + "/edit?_format=json" + ) + media_response = issue_request(config, "DELETE", media_endpoint) if media_response.status_code == 204: logging.info("Media %s deleted.", media_id) return media_response.status_code else: - logging.error("Media %s not deleted (HTTP response code %s).", media_id, media_response.status_code) + logging.error( + "Media %s not deleted (HTTP response code %s).", + media_id, + media_response.status_code, + ) return False return False -def get_csv_data(config, csv_file_target='node_fields', file_path=None): +def get_csv_data(config, csv_file_target="node_fields", file_path=None): """Read the input CSV data and prepare it for use in create, update, etc. tasks. - This function reads the source CSV file (or the CSV dump from Google Sheets or Excel), - applies some prepocessing to each CSV record (specifically, it adds any CSV field - templates that are registered in the config file, and it filters out any CSV - records or lines in the CSV file that begine with a #), and finally, writes out - a version of the CSV data to a file that appends .preprocessed to the input - CSV file name. It is this .preprocessed file that is used in create, update, etc. - tasks. + This function reads the source CSV file (or the CSV dump from Google Sheets or Excel), + applies some prepocessing to each CSV record (specifically, it adds any CSV field + templates that are registered in the config file, and it filters out any CSV + records or lines in the CSV file that begine with a #), and finally, writes out + a version of the CSV data to a file that appends .preprocessed to the input + CSV file name. It is this .preprocessed file that is used in create, update, etc. + tasks. """ """Parameters ---------- @@ -3998,52 +5357,72 @@ def get_csv_data(config, csv_file_target='node_fields', file_path=None): preprocessed_csv_reader The CSV DictReader object. """ - if csv_file_target == 'node_fields': - file_path = config['input_csv'] + if csv_file_target == "node_fields": + file_path = config["input_csv"] if os.path.isabs(file_path): input_csv_path = file_path - elif file_path.startswith('http') is True: + elif file_path.startswith("http") is True: input_csv_path = get_extracted_csv_file_path(config) if os.path.exists(input_csv_path): os.remove(input_csv_path) get_csv_from_google_sheet(config) - elif file_path.endswith('.xlsx') is True: + elif file_path.endswith(".xlsx") is True: input_csv_path = get_extracted_csv_file_path(config) if os.path.exists(input_csv_path): os.remove(input_csv_path) get_csv_from_excel(config) else: - input_csv_path = os.path.join(config['input_dir'], file_path) + input_csv_path = os.path.join(config["input_dir"], file_path) if not os.path.exists(input_csv_path): - message = 'CSV file ' + input_csv_path + ' not found.' + message = "CSV file " + input_csv_path + " not found." logging.error(message) sys.exit("Error: " + message) try: # 'utf-8-sig' encoding skips Microsoft BOM (0xef, 0xbb, 0xbf) at the start of files, # e.g. exported from Excel and has no effect when reading standard UTF-8 encoded files. - csv_reader_file_handle = open(input_csv_path, 'r', encoding="utf-8-sig", newline='') - except (UnicodeDecodeError): - message = 'Error: CSV file ' + input_csv_path + ' must be encoded in ASCII or UTF-8.' + csv_reader_file_handle = open( + input_csv_path, "r", encoding="utf-8-sig", newline="" + ) + except UnicodeDecodeError: + message = ( + "Error: CSV file " + input_csv_path + " must be encoded in ASCII or UTF-8." + ) logging.error(message) sys.exit(message) - preprocessed_csv_path = os.path.join(config['temp_dir'], os.path.basename(input_csv_path)) + '.preprocessed' - csv_writer_file_handle = open(preprocessed_csv_path, 'w+', newline='', encoding='utf-8') + preprocessed_csv_path = ( + os.path.join(config["temp_dir"], os.path.basename(input_csv_path)) + + ".preprocessed" + ) + csv_writer_file_handle = open( + preprocessed_csv_path, "w+", newline="", encoding="utf-8" + ) # 'restval' is used to populate superfluous fields/labels. - csv_reader = csv.DictReader(csv_reader_file_handle, delimiter=config['delimiter'], restval='stringtopopulateextrafields') + csv_reader = csv.DictReader( + csv_reader_file_handle, + delimiter=config["delimiter"], + restval="stringtopopulateextrafields", + ) # Unfinished (e.g. still need to apply this to creating taxonomies) WIP on #559. - if config['csv_headers'] == 'labels' and config['task'] in ['create', 'update', 'create_terms', 'update_terms']: - ''' + if config["csv_headers"] == "labels" and config["task"] in [ + "create", + "update", + "create_terms", + "update_terms", + ]: + """ if config['task'] == 'create_terms' or config['task'] == 'update_terms': field_map = get_fieldname_map(config, 'taxonomy_term', config['vocab_id'], 'labels') else: field_map = get_fieldname_map(config, 'node', config['content_type'], 'labels') - ''' - csv_reader_fieldnames = replace_field_labels_with_names(config, csv_reader.fieldnames) + """ + csv_reader_fieldnames = replace_field_labels_with_names( + config, csv_reader.fieldnames + ) else: csv_reader_fieldnames = csv_reader.fieldnames @@ -4055,114 +5434,163 @@ def get_csv_data(config, csv_file_target='node_fields', file_path=None): else: duplicates.append(item) if len(duplicates) > 0: - message = "Error: CSV has duplicate header names - " + ', '.join(duplicates) + message = "Error: CSV has duplicate header names - " + ", ".join(duplicates) logging.error(message) sys.exit(message) - csv_reader_fieldnames = [x for x in csv_reader_fieldnames if x not in config['ignore_csv_columns']] + csv_reader_fieldnames = [ + x for x in csv_reader_fieldnames if x not in config["ignore_csv_columns"] + ] # CSV field templates and CSV value templates currently apply only to node CSV files, not vocabulary CSV files. - tasks = ['create', 'update'] - if config['task'] in tasks and csv_file_target == 'node_fields': + tasks = ["create", "update"] + if config["task"] in tasks and csv_file_target == "node_fields": # If the config file contains CSV field templates, append them to the CSV data. # Make a copy of the column headers so we can skip adding templates to the new CSV # if they're present in the source CSV. We don't want fields in the source CSV to be # stomped on by templates. csv_reader_fieldnames_orig = copy.copy(csv_reader_fieldnames) - if 'csv_field_templates' in config: - for template in config['csv_field_templates']: + if "csv_field_templates" in config: + for template in config["csv_field_templates"]: for field_name, field_value in template.items(): if field_name not in csv_reader_fieldnames_orig: csv_reader_fieldnames.append(field_name) - csv_writer = csv.DictWriter(csv_writer_file_handle, fieldnames=csv_reader_fieldnames, delimiter=config['delimiter']) + csv_writer = csv.DictWriter( + csv_writer_file_handle, + fieldnames=csv_reader_fieldnames, + delimiter=config["delimiter"], + ) csv_writer.writeheader() row_num = 0 unique_identifiers = [] # We subtract 1 from config['csv_start_row'] so user's expectation of the actual # start row match up with Python's 0-based counting. - if config['csv_start_row'] > 0: - csv_start_row = config['csv_start_row'] - 1 + if config["csv_start_row"] > 0: + csv_start_row = config["csv_start_row"] - 1 else: - csv_start_row = config['csv_start_row'] - for row in itertools.islice(csv_reader, csv_start_row, config['csv_stop_row']): + csv_start_row = config["csv_start_row"] + for row in itertools.islice(csv_reader, csv_start_row, config["csv_stop_row"]): row_num += 1 # Remove columns specified in config['ignore_csv_columns']. - if len(config['ignore_csv_columns']) > 0: - for column_to_ignore in config['ignore_csv_columns']: + if len(config["ignore_csv_columns"]) > 0: + for column_to_ignore in config["ignore_csv_columns"]: if column_to_ignore in row: del row[column_to_ignore] - if 'csv_field_templates' in config: - for template in config['csv_field_templates']: + if "csv_field_templates" in config: + for template in config["csv_field_templates"]: for field_name, field_value in template.items(): if field_name not in csv_reader_fieldnames_orig: row[field_name] = field_value # Skip CSV records whose first column begin with #. - if not list(row.values())[0].startswith('#'): + if not list(row.values())[0].startswith("#"): try: - unique_identifiers.append(row[config['id_field']]) + unique_identifiers.append(row[config["id_field"]]) - if 'csv_value_templates' in config and len(config['csv_value_templates']) > 0: + if ( + "csv_value_templates" in config + and len(config["csv_value_templates"]) > 0 + ): row = apply_csv_value_templates(config, row) row = clean_csv_values(config, row) csv_writer.writerow(row) - except (ValueError): + except ValueError: # Note: this message is also generated in check_input(). - message = "Row " + str(row_num) + " (ID " + row[config['id_field']] + ') of the CSV file "' + input_csv_path + '" ' + \ - "has more columns (" + str(len(row)) + ") than there are headers (" + \ - str(len(csv_reader.fieldnames)) + ').' + message = ( + "Row " + + str(row_num) + + " (ID " + + row[config["id_field"]] + + ') of the CSV file "' + + input_csv_path + + '" ' + + "has more columns (" + + str(len(row)) + + ") than there are headers (" + + str(len(csv_reader.fieldnames)) + + ")." + ) logging.error(message) - print('Error: ' + message) + print("Error: " + message) sys.exit(message) - repeats = set(([x for x in unique_identifiers if unique_identifiers.count(x) > 1])) + repeats = set( + ([x for x in unique_identifiers if unique_identifiers.count(x) > 1]) + ) if len(repeats) > 0: - message = "Duplicate identifiers in column " + config['id_field'] + " found: " + ','.join(repeats) + "." + message = ( + "Duplicate identifiers in column " + + config["id_field"] + + " found: " + + ",".join(repeats) + + "." + ) logging.error(message) sys.exit("Error: " + message) else: - csv_writer = csv.DictWriter(csv_writer_file_handle, fieldnames=csv_reader_fieldnames, delimiter=config['delimiter']) + csv_writer = csv.DictWriter( + csv_writer_file_handle, + fieldnames=csv_reader_fieldnames, + delimiter=config["delimiter"], + ) csv_writer.writeheader() row_num = 0 # We subtract 1 from config['csv_start_row'] so user's expectation of the actual # start row match up with Python's 0-based counting. - if config['csv_start_row'] > 0: - csv_start_row = config['csv_start_row'] - 1 + if config["csv_start_row"] > 0: + csv_start_row = config["csv_start_row"] - 1 else: - csv_start_row = config['csv_start_row'] - for row in itertools.islice(csv_reader, csv_start_row, config['csv_stop_row']): + csv_start_row = config["csv_start_row"] + for row in itertools.islice(csv_reader, csv_start_row, config["csv_stop_row"]): row_num += 1 # Remove columns specified in config['ignore_csv_columns']. - if len(config['ignore_csv_columns']) > 0: - for column_to_ignore in config['ignore_csv_columns']: + if len(config["ignore_csv_columns"]) > 0: + for column_to_ignore in config["ignore_csv_columns"]: if column_to_ignore in row: del row[column_to_ignore] # Skip CSV records whose first column begin with #. - if not list(row.values())[0].startswith('#'): + if not list(row.values())[0].startswith("#"): try: row = clean_csv_values(config, row) csv_writer.writerow(row) - except (ValueError): + except ValueError: # Note: this message is also generated in check_input(). - message = "Row " + str(row_num) + " (ID " + row[config['id_field']] + ') of the CSV file "' + input_csv_path + '" ' + \ - "has more columns (" + str(len(row)) + ") than there are headers (" + \ - str(len(csv_reader.fieldnames)) + ').' + message = ( + "Row " + + str(row_num) + + " (ID " + + row[config["id_field"]] + + ') of the CSV file "' + + input_csv_path + + '" ' + + "has more columns (" + + str(len(row)) + + ") than there are headers (" + + str(len(csv_reader.fieldnames)) + + ")." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) csv_writer_file_handle.close() - preprocessed_csv_reader_file_handle = open(preprocessed_csv_path, 'r', encoding='utf-8') - preprocessed_csv_reader = csv.DictReader(preprocessed_csv_reader_file_handle, delimiter=config['delimiter'], restval='stringtopopulateextrafields') + preprocessed_csv_reader_file_handle = open( + preprocessed_csv_path, "r", encoding="utf-8" + ) + preprocessed_csv_reader = csv.DictReader( + preprocessed_csv_reader_file_handle, + delimiter=config["delimiter"], + restval="stringtopopulateextrafields", + ) return preprocessed_csv_reader def find_term_in_vocab(config, vocab_id, term_name_to_find): """Query the Term from term name View using the vocab_id to see if term_name_to_find is - is found in that vocabulary. If so, returns the term ID; if not returns False. If - more than one term found, returns the term ID of the first one. Also populates global - lists of terms (checked_terms and newly_created_terms) to reduce queries to Drupal. + is found in that vocabulary. If so, returns the term ID; if not returns False. If + more than one term found, returns the term ID of the first one. Also populates global + lists of terms (checked_terms and newly_created_terms) to reduce queries to Drupal. """ """Parameters ---------- @@ -4180,23 +5608,25 @@ def find_term_in_vocab(config, vocab_id, term_name_to_find): The term ID, existing or newly created. Returns False if term name is not found (or config['validate_terms_exist'] is False). """ - if 'check' in config.keys() and config['check'] is True: - if config['validate_terms_exist'] is False: + if "check" in config.keys() and config["check"] is True: + if config["validate_terms_exist"] is False: return False # Attempt to detect term names (including typed relation taxonomy terms) that are namespaced. Some term names may # contain a colon (which is used in the incoming CSV to seprate the vocab ID from the term name). If there is # a ':', maybe it's part of the term name and it's not namespaced. To find out, split term_name_to_find # and compare the first segment with the vocab_id. - if ':' in term_name_to_find: + if ":" in term_name_to_find: original_term_name_to_find = copy.copy(term_name_to_find) - [tentative_vocab_id, tentative_term_name] = term_name_to_find.split(':', maxsplit=1) + [tentative_vocab_id, tentative_term_name] = term_name_to_find.split( + ":", maxsplit=1 + ) if tentative_vocab_id.strip() == vocab_id.strip(): term_name_to_find = tentative_term_name else: term_name_to_find = original_term_name_to_find - ''' + """ # Namespaced terms (inc. typed relation terms): if a vocabulary namespace is present, we need to split it out # from the term name. This only applies in --check since namespaced terms are parsed in prepare_term_id(). # Assumptions: the term namespace always directly precedes the term name, and the term name may @@ -4207,154 +5637,223 @@ def find_term_in_vocab(config, vocab_id, term_name_to_find): # Assumption is that the term name is the last part, and the namespace is the second-last. term_name_to_find = namespaced_term_parts[-1] vocab_id = namespaced_term_parts[-2] - ''' + """ term_name_for_check_matching = term_name_to_find.lower().strip() for checked_term in checked_terms: - if checked_term['vocab_id'] == vocab_id and checked_term['name_for_matching'] == term_name_for_check_matching: - if value_is_numeric(checked_term['tid']): - return checked_term['tid'] + if ( + checked_term["vocab_id"] == vocab_id + and checked_term["name_for_matching"] == term_name_for_check_matching + ): + if value_is_numeric(checked_term["tid"]): + return checked_term["tid"] else: return False for newly_created_term in newly_created_terms: - if newly_created_term['vocab_id'] == vocab_id and newly_created_term['name_for_matching'] == term_name_to_find.lower().strip(): - return newly_created_term['tid'] - - url = config['host'] + '/term_from_term_name?vocab=' + vocab_id.strip() + '&name=' + urllib.parse.quote_plus(term_name_to_find.strip()) + '&_format=json' - response = issue_request(config, 'GET', url) + if ( + newly_created_term["vocab_id"] == vocab_id + and newly_created_term["name_for_matching"] + == term_name_to_find.lower().strip() + ): + return newly_created_term["tid"] + + url = ( + config["host"] + + "/term_from_term_name?vocab=" + + vocab_id.strip() + + "&name=" + + urllib.parse.quote_plus(term_name_to_find.strip()) + + "&_format=json" + ) + response = issue_request(config, "GET", url) if response.status_code == 200: term_data = json.loads(response.text) # Term name is not found. if len(term_data) == 0: - if 'check' in config.keys() and config['check'] is True: - checked_term_to_add = {'tid': None, 'vocab_id': vocab_id, 'name': term_name_to_find, 'name_for_matching': term_name_for_check_matching} + if "check" in config.keys() and config["check"] is True: + checked_term_to_add = { + "tid": None, + "vocab_id": vocab_id, + "name": term_name_to_find, + "name_for_matching": term_name_for_check_matching, + } if checked_term_to_add not in checked_terms: checked_terms.append(checked_term_to_add) return False elif len(term_data) > 1: - print("Warning: See log for important message about duplicate terms within the same vocabulary.") + print( + "Warning: See log for important message about duplicate terms within the same vocabulary." + ) logging.warning( 'Query for term "%s" found %s terms with that name in the %s vocabulary. Workbench is choosing the first term ID (%s)).', term_name_to_find, len(term_data), vocab_id, - term_data[0]['tid'][0]['value']) - if 'check' in config.keys() and config['check'] is True: - checked_term_to_add = {'tid': term_data[0]['tid'][0]['value'], 'vocab_id': vocab_id, 'name': term_name_to_find, 'name_for_matching': term_name_for_check_matching} + term_data[0]["tid"][0]["value"], + ) + if "check" in config.keys() and config["check"] is True: + checked_term_to_add = { + "tid": term_data[0]["tid"][0]["value"], + "vocab_id": vocab_id, + "name": term_name_to_find, + "name_for_matching": term_name_for_check_matching, + } if checked_term_to_add not in checked_terms: checked_terms.append(checked_term_to_add) - return term_data[0]['tid'][0]['value'] + return term_data[0]["tid"][0]["value"] # Term name is found. else: - if 'check' in config.keys() and config['check'] is True: - checked_term_to_add = {'tid': term_data[0]['tid'][0]['value'], 'vocab_id': vocab_id, 'name': term_name_to_find, 'name_for_matching': term_name_for_check_matching} + if "check" in config.keys() and config["check"] is True: + checked_term_to_add = { + "tid": term_data[0]["tid"][0]["value"], + "vocab_id": vocab_id, + "name": term_name_to_find, + "name_for_matching": term_name_for_check_matching, + } if checked_term_to_add not in checked_terms: checked_terms.append(checked_term_to_add) - return term_data[0]['tid'][0]['value'] + return term_data[0]["tid"][0]["value"] else: - logging.warning('Query for term "%s" in vocabulary "%s" returned a %s status code', term_name_to_find, vocab_id, response.status_code) + logging.warning( + 'Query for term "%s" in vocabulary "%s" returned a %s status code', + term_name_to_find, + vocab_id, + response.status_code, + ) return False def get_term_vocab(config, term_id): """Get the term's parent vocabulary ID and return it. If the term doesn't - exist, return False. + exist, return False. """ - url = config['host'] + '/taxonomy/term/' + str(term_id).strip() + '?_format=json' - response = issue_request(config, 'GET', url) + url = config["host"] + "/taxonomy/term/" + str(term_id).strip() + "?_format=json" + response = issue_request(config, "GET", url) if response.status_code == 200: term_data = json.loads(response.text) - return term_data['vid'][0]['target_id'] + return term_data["vid"][0]["target_id"] else: - logging.warning('Query for term ID "%s" returned a %s status code', term_id, response.status_code) + logging.warning( + 'Query for term ID "%s" returned a %s status code', + term_id, + response.status_code, + ) return False def get_term_name(config, term_id): - """Get the term's name and return it. If the term doesn't exist, return False. - """ - url = config['host'] + '/taxonomy/term/' + str(term_id).strip() + '?_format=json' - response = issue_request(config, 'GET', url) + """Get the term's name and return it. If the term doesn't exist, return False.""" + url = config["host"] + "/taxonomy/term/" + str(term_id).strip() + "?_format=json" + response = issue_request(config, "GET", url) if response.status_code == 200: term_data = json.loads(response.text) - return term_data['name'][0]['value'] + return term_data["name"][0]["value"] else: - logging.warning('Query for term ID "%s" returned a %s status code', term_id, response.status_code) + logging.warning( + 'Query for term ID "%s" returned a %s status code', + term_id, + response.status_code, + ) return False def get_term_uri(config, term_id): """Get the term's URI and return it. If the term or URI doesn't exist, return False. - If the term has no URI, return None. + If the term has no URI, return None. """ - url = config['host'] + '/taxonomy/term/' + str(term_id).strip() + '?_format=json' - response = issue_request(config, 'GET', url) + url = config["host"] + "/taxonomy/term/" + str(term_id).strip() + "?_format=json" + response = issue_request(config, "GET", url) if response.status_code == 200: term_data = json.loads(response.text) - if 'field_external_uri' in term_data: - uri = term_data['field_external_uri'][0]['uri'] + if "field_external_uri" in term_data: + uri = term_data["field_external_uri"][0]["uri"] return uri - elif 'field_authority_link' in term_data: - uri = term_data['field_authority_link'][0]['uri'] + elif "field_authority_link" in term_data: + uri = term_data["field_authority_link"][0]["uri"] return uri else: - logging.warning('Query for term ID "%s" does not have either a field_authority_link or field_exteral_uri field.', term_id) + logging.warning( + 'Query for term ID "%s" does not have either a field_authority_link or field_exteral_uri field.', + term_id, + ) return None else: - logging.warning('Query for term ID "%s" returned a %s status code', term_id, response.status_code) + logging.warning( + 'Query for term ID "%s" returned a %s status code', + term_id, + response.status_code, + ) return False def get_term_id_from_uri(config, uri): """For a given URI, query the Term from URI View created by the Islandora - Workbench Integration module. Because we don't know which field each - taxonomy uses to store URIs (it's either field_external_uri or field_authority_link), - we need to check both options in the "Term from URI" View. + Workbench Integration module. Because we don't know which field each + taxonomy uses to store URIs (it's either field_external_uri or field_authority_link), + we need to check both options in the "Term from URI" View. """ # Some vocabularies use this View. terms_with_uri = [] - term_from_uri_url = config['host'] + '/term_from_uri?_format=json&uri=' + uri.replace('#', '%23') - term_from_uri_response = issue_request(config, 'GET', term_from_uri_url) + term_from_uri_url = ( + config["host"] + "/term_from_uri?_format=json&uri=" + uri.replace("#", "%23") + ) + term_from_uri_response = issue_request(config, "GET", term_from_uri_url) if term_from_uri_response.status_code == 200: term_from_uri_response_body_json = term_from_uri_response.text term_from_uri_response_body = json.loads(term_from_uri_response_body_json) if len(term_from_uri_response_body) == 1: - tid = term_from_uri_response_body[0]['tid'][0]['value'] + tid = term_from_uri_response_body[0]["tid"][0]["value"] return tid if len(term_from_uri_response_body) > 1: for term in term_from_uri_response_body: - terms_with_uri.append({term['tid'][0]['value']: term['vid'][0]['target_id']}) - tid = term_from_uri_response_body[0]['tid'][0]['value'] + terms_with_uri.append( + {term["tid"][0]["value"]: term["vid"][0]["target_id"]} + ) + tid = term_from_uri_response_body[0]["tid"][0]["value"] print("Warning: See log for important message about use of term URIs.") logging.warning( - 'Term URI "%s" is used for more than one term (with these term ID/vocabulary ID combinations: ' + - str(terms_with_uri) + - '). Workbench is choosing the first term ID (%s)).', + 'Term URI "%s" is used for more than one term (with these term ID/vocabulary ID combinations: ' + + str(terms_with_uri) + + "). Workbench is choosing the first term ID (%s)).", uri, - tid) + tid, + ) return tid # And some vocabuluaries use this View. - term_from_authority_link_url = config['host'] + '/term_from_authority_link?_format=json&authority_link=' + uri.replace('#', '%23') - term_from_authority_link_response = issue_request(config, 'GET', term_from_authority_link_url) + term_from_authority_link_url = ( + config["host"] + + "/term_from_authority_link?_format=json&authority_link=" + + uri.replace("#", "%23") + ) + term_from_authority_link_response = issue_request( + config, "GET", term_from_authority_link_url + ) if term_from_authority_link_response.status_code == 200: - term_from_authority_link_response_body_json = term_from_authority_link_response.text - term_from_authority_link_response_body = json.loads(term_from_authority_link_response_body_json) + term_from_authority_link_response_body_json = ( + term_from_authority_link_response.text + ) + term_from_authority_link_response_body = json.loads( + term_from_authority_link_response_body_json + ) if len(term_from_authority_link_response_body) == 1: - tid = term_from_authority_link_response_body[0]['tid'][0]['value'] + tid = term_from_authority_link_response_body[0]["tid"][0]["value"] return tid elif len(term_from_authority_link_response_body) > 1: for term in term_from_authority_link_response_body: - terms_with_uri.append({term['tid'][0]['value']: term['vid'][0]['target_id']}) - tid = term_from_authority_link_response_body[0]['tid'][0]['value'] + terms_with_uri.append( + {term["tid"][0]["value"]: term["vid"][0]["target_id"]} + ) + tid = term_from_authority_link_response_body[0]["tid"][0]["value"] print("Warning: See log for important message about use of term URIs.") logging.warning( - 'Term URI "%s" is used for more than one term (with these term ID/vocabulary ID combinations: ' + - str(terms_with_uri) + - '). Workbench is choosing the first term ID (%s)).', + 'Term URI "%s" is used for more than one term (with these term ID/vocabulary ID combinations: ' + + str(terms_with_uri) + + "). Workbench is choosing the first term ID (%s)).", uri, - tid) + tid, + ) return tid else: # URI does not match any term. @@ -4364,7 +5863,9 @@ def get_term_id_from_uri(config, uri): return False -def get_all_representations_of_term(config, vocab_id=None, name=None, term_id=None, uri=None): +def get_all_representations_of_term( + config, vocab_id=None, name=None, term_id=None, uri=None +): """Parameters ---------- config : dict @@ -4395,12 +5896,12 @@ def get_all_representations_of_term(config, vocab_id=None, name=None, term_id=No term_id = get_term_id_from_uri(config, uri) name = get_term_name(config, term_id) - return {'term_id': term_id, 'name': name, 'uri': uri} + return {"term_id": term_id, "name": name, "uri": uri} def create_term(config, vocab_id, term_name, term_csv_row=None): """Adds a term to the target vocabulary. Returns the new term's ID - if successful (or if the term already exists) or False if not. + if successful (or if the term already exists) or False if not. """ """Parameters ---------- @@ -4421,19 +5922,38 @@ def create_term(config, vocab_id, term_name, term_csv_row=None): # Check to see if term exists; if so, return its ID, if not, proceed to create it. tid = find_term_in_vocab(config, vocab_id, term_name) if value_is_numeric(tid): - if (config['task'] == 'create' or config['task'] == 'update') and config['log_term_creation'] is True: - logging.info('Term "%s" (term ID %s) already exists in vocabulary "%s".', term_name, tid, vocab_id) - if config['task'] == 'create_terms': - logging.info('Term "%s" (term ID %s) already exists in vocabulary "%s".', term_name, tid, vocab_id) + if (config["task"] == "create" or config["task"] == "update") and config[ + "log_term_creation" + ] is True: + logging.info( + 'Term "%s" (term ID %s) already exists in vocabulary "%s".', + term_name, + tid, + vocab_id, + ) + if config["task"] == "create_terms": + logging.info( + 'Term "%s" (term ID %s) already exists in vocabulary "%s".', + term_name, + tid, + vocab_id, + ) return tid - if config['allow_adding_terms'] is False: - logging.warning('To create new taxonomy terms, you must add "allow_adding_terms: true" to your configuration file.') + if config["allow_adding_terms"] is False: + logging.warning( + 'To create new taxonomy terms, you must add "allow_adding_terms: true" to your configuration file.' + ) return False if len(term_name) > 255: truncated_term_name = term_name[:255] - message = 'Term "' + term_name + '"' + "provided in the CSV data exceeds Drupal's maximum length of 255 characters." + message = ( + 'Term "' + + term_name + + '"' + + "provided in the CSV data exceeds Drupal's maximum length of 255 characters." + ) message_2 = ' It has been trucated to "' + truncated_term_name + '".' logging.info(message + message_2) term_name = truncated_term_name @@ -4441,49 +5961,61 @@ def create_term(config, vocab_id, term_name, term_csv_row=None): term_field_data = get_term_field_data(config, vocab_id, term_name, term_csv_row) if term_field_data is False: # @todo: Failure details should be logged in get_term_field_data(). - logging.warning('Unable to create term "' + term_name + '" because Workbench could not get term field data.') + logging.warning( + 'Unable to create term "' + + term_name + + '" because Workbench could not get term field data.' + ) return False # Common values for all terms, simple and complex. term = { - "vid": [ - { - "target_id": str(vocab_id), - "target_type": "taxonomy_vocabulary" - } - ], - "name": [ - { - "value": term_name - } - ] + "vid": [{"target_id": str(vocab_id), "target_type": "taxonomy_vocabulary"}], + "name": [{"value": term_name}], } term.update(term_field_data) - term_endpoint = config['host'] + '/taxonomy/term?_format=json' - headers = {'Content-Type': 'application/json'} - response = issue_request(config, 'POST', term_endpoint, headers, term, None) + term_endpoint = config["host"] + "/taxonomy/term?_format=json" + headers = {"Content-Type": "application/json"} + response = issue_request(config, "POST", term_endpoint, headers, term, None) if response.status_code == 201: term_response_body = json.loads(response.text) - tid = term_response_body['tid'][0]['value'] - if (config['task'] == 'create' or config['task'] == 'update') and config['log_term_creation'] is True: - logging.info('Term %s ("%s") added to vocabulary "%s".', tid, term_name, vocab_id) - if config['task'] == 'create_terms': - logging.info('Term %s ("%s") added to vocabulary "%s".', tid, term_name, vocab_id) + tid = term_response_body["tid"][0]["value"] + if (config["task"] == "create" or config["task"] == "update") and config[ + "log_term_creation" + ] is True: + logging.info( + 'Term %s ("%s") added to vocabulary "%s".', tid, term_name, vocab_id + ) + if config["task"] == "create_terms": + logging.info( + 'Term %s ("%s") added to vocabulary "%s".', tid, term_name, vocab_id + ) newly_created_term_name_for_matching = term_name.lower().strip() - newly_created_terms.append({'tid': tid, 'vocab_id': vocab_id, 'name': term_name, 'name_for_matching': newly_created_term_name_for_matching}) + newly_created_terms.append( + { + "tid": tid, + "vocab_id": vocab_id, + "name": term_name, + "name_for_matching": newly_created_term_name_for_matching, + } + ) return tid else: - logging.warning("Term '%s' not created, HTTP response code was %s.", term_name, response.status_code) + logging.warning( + "Term '%s' not created, HTTP response code was %s.", + term_name, + response.status_code, + ) return False def get_term_field_data(config, vocab_id, term_name, term_csv_row): """Assemble the dict that will be added to the 'term' dict in create_term(). status, description, - weight, parent, default_langcode, path fields are added here, even for simple term_name-only - terms. Check the vocabulary CSV file to see if there is a corresponding row. If the vocabulary - has any required fields, and any of them are absent, return False. + weight, parent, default_langcode, path fields are added here, even for simple term_name-only + terms. Check the vocabulary CSV file to see if there is a corresponding row. If the vocabulary + has any required fields, and any of them are absent, return False. """ """Parameters ---------- @@ -4503,40 +6035,12 @@ def get_term_field_data(config, vocab_id, term_name, term_csv_row): """ # 'vid' and 'name' are added in create_term(). term_field_data = { - "status": [ - { - "value": True - } - ], - "description": [ - { - "value": "", - "format": None - } - ], - "weight": [ - { - "value": 0 - } - ], - "parent": [ - { - "target_type": "taxonomy_term", - "target_id": None - } - ], - "default_langcode": [ - { - "value": True - } - ], - "path": [ - { - "alias": None, - "pid": None, - "langcode": "en" - } - ] + "status": [{"value": True}], + "description": [{"value": "", "format": None}], + "weight": [{"value": 0}], + "parent": [{"target_type": "taxonomy_term", "target_id": None}], + "default_langcode": [{"value": True}], + "path": [{"alias": None, "pid": None, "langcode": "en"}], } # We're creating a simple term, with only a term name. @@ -4548,132 +6052,184 @@ def get_term_field_data(config, vocab_id, term_name, term_csv_row): # rest of the imports causes a circular import exception, so we do it here. import workbench_fields - vocab_field_definitions = get_field_definitions(config, 'taxonomy_term', vocab_id.strip()) + vocab_field_definitions = get_field_definitions( + config, "taxonomy_term", vocab_id.strip() + ) # Build the JSON from the CSV row and create the term. vocab_csv_column_headers = term_csv_row.keys() for field_name in vocab_csv_column_headers: # term_name is the "id" field in the vocabulary CSV and not a field in the term JSON, so skip it. - if field_name == 'term_name': + if field_name == "term_name": continue # 'parent' field is present and not empty, so we need to look up the parent term. All terms # that are parents will have already been created back in workbench.create_terms() as long as # they preceded the children. If they come after the children in the CSV, we create the child # term anyway but log that the parent could not be found. - if 'parent' in term_csv_row and len(term_csv_row['parent'].strip()) != 0: - parent_tid = find_term_in_vocab(config, vocab_id, term_csv_row['parent']) + if "parent" in term_csv_row and len(term_csv_row["parent"].strip()) != 0: + parent_tid = find_term_in_vocab( + config, vocab_id, term_csv_row["parent"] + ) if value_is_numeric(parent_tid): - term_field_data['parent'][0]['target_id'] = str(parent_tid) + term_field_data["parent"][0]["target_id"] = str(parent_tid) else: # Create the term, but log that its parent could not be found. - message = 'Term "' + term_csv_row['term_name'] + '" added to vocabulary "' + vocab_id + '", but without its parent "' + \ - term_csv_row['parent'] + '", which isn\'t present in that vocabulary (possibly hasn\'t been create yet?).' + message = ( + 'Term "' + + term_csv_row["term_name"] + + '" added to vocabulary "' + + vocab_id + + '", but without its parent "' + + term_csv_row["parent"] + + "\", which isn't present in that vocabulary (possibly hasn't been create yet?)." + ) logging.warning(message) # 'parent' is not a field added to the term JSON in the field handlers, so skip it. - if field_name == 'parent': + if field_name == "parent": continue # Set 'description' and 'weight' JSON values if there are corresponding columns in the CSV. - if 'weight' in term_csv_row and len(term_csv_row['weight'].strip()) != 0: - if value_is_numeric(term_csv_row['weight']): - term_field_data['weight'][0]['value'] = str(term_csv_row['weight']) + if "weight" in term_csv_row and len(term_csv_row["weight"].strip()) != 0: + if value_is_numeric(term_csv_row["weight"]): + term_field_data["weight"][0]["value"] = str(term_csv_row["weight"]) else: # Create the term, but log that its weight could not be populated. - message = 'Term "' + term_csv_row['term_name'] + '" added to vocabulary "' + vocab_id + '", but without its weight "' + \ - term_csv_row['weight'] + '", which must be an integer.' + message = ( + 'Term "' + + term_csv_row["term_name"] + + '" added to vocabulary "' + + vocab_id + + '", but without its weight "' + + term_csv_row["weight"] + + '", which must be an integer.' + ) logging.warning(message) # 'weight' is not a field added to the term JSON in the field handlers, so skip it. - if field_name == 'weight': + if field_name == "weight": continue - if 'description' in term_csv_row and len(term_csv_row['description'].strip()) != 0: - term_field_data['description'][0]['value'] = term_csv_row['description'] + if ( + "description" in term_csv_row + and len(term_csv_row["description"].strip()) != 0 + ): + term_field_data["description"][0]["value"] = term_csv_row["description"] # 'description' is not a field added to the term JSON in the field handlers, so skip it. - if field_name == 'description': + if field_name == "description": continue # Assemble additional Drupal field structures for entity reference fields from CSV data. # Entity reference fields (taxonomy_term and node) - if vocab_field_definitions[field_name]['field_type'] == 'entity_reference': + if vocab_field_definitions[field_name]["field_type"] == "entity_reference": entity_reference_field = workbench_fields.EntityReferenceField() - term_field_data = entity_reference_field.create(config, vocab_field_definitions, term_field_data, term_csv_row, field_name) + term_field_data = entity_reference_field.create( + config, + vocab_field_definitions, + term_field_data, + term_csv_row, + field_name, + ) # Typed relation fields. - elif vocab_field_definitions[field_name]['field_type'] == 'typed_relation': + elif vocab_field_definitions[field_name]["field_type"] == "typed_relation": typed_relation_field = workbench_fields.TypedRelationField() - term_field_data = typed_relation_field.create(config, vocab_field_definitions, term_field_data, term_csv_row, field_name) + term_field_data = typed_relation_field.create( + config, + vocab_field_definitions, + term_field_data, + term_csv_row, + field_name, + ) # Geolocation fields. - elif vocab_field_definitions[field_name]['field_type'] == 'geolocation': + elif vocab_field_definitions[field_name]["field_type"] == "geolocation": geolocation_field = workbench_fields.GeolocationField() - term_field_data = geolocation_field.create(config, vocab_field_definitions, term_field_data, term_csv_row, field_name) + term_field_data = geolocation_field.create( + config, + vocab_field_definitions, + term_field_data, + term_csv_row, + field_name, + ) # Link fields. - elif vocab_field_definitions[field_name]['field_type'] == 'link': + elif vocab_field_definitions[field_name]["field_type"] == "link": link_field = workbench_fields.LinkField() - term_field_data = link_field.create(config, vocab_field_definitions, term_field_data, term_csv_row, field_name) + term_field_data = link_field.create( + config, + vocab_field_definitions, + term_field_data, + term_csv_row, + field_name, + ) # Authority Link fields. - elif vocab_field_definitions[field_name]['field_type'] == 'authority_link': + elif vocab_field_definitions[field_name]["field_type"] == "authority_link": authority_link_field = workbench_fields.AuthorityLinkField() - term_field_data = authority_link_field.create(config, vocab_field_definitions, term_field_data, term_csv_row, field_name) + term_field_data = authority_link_field.create( + config, + vocab_field_definitions, + term_field_data, + term_csv_row, + field_name, + ) # For non-entity reference and non-typed relation fields (text, integer, boolean etc.). else: simple_field = workbench_fields.SimpleField() - term_field_data = simple_field.create(config, vocab_field_definitions, term_field_data, term_csv_row, field_name) + term_field_data = simple_field.create( + config, + vocab_field_definitions, + term_field_data, + term_csv_row, + field_name, + ) return term_field_data def get_term_uuid(config, term_id): - """Given a term ID, get the term's UUID. - """ - term_url = config['host'] + '/taxonomy/term/' + str(term_id) + '?_format=json' - response = issue_request(config, 'GET', term_url) + """Given a term ID, get the term's UUID.""" + term_url = config["host"] + "/taxonomy/term/" + str(term_id) + "?_format=json" + response = issue_request(config, "GET", term_url) term = json.loads(response.text) - uuid = term['uuid'][0]['value'] + uuid = term["uuid"][0]["value"] return uuid def create_url_alias(config, node_id, url_alias): - json = {'path': [ - {'value': '/node/' + str(node_id)} - ], - 'alias': [ - {'value': url_alias} - ] + json = { + "path": [{"value": "/node/" + str(node_id)}], + "alias": [{"value": url_alias}], } - headers = {'Content-Type': 'application/json'} + headers = {"Content-Type": "application/json"} response = issue_request( config, - 'POST', - config['host'] + - '/entity/path_alias?_format=json', + "POST", + config["host"] + "/entity/path_alias?_format=json", headers, json, - None) + None, + ) if response.status_code != 201: logging.error( "URL alias '%s' not created for node %s, HTTP response code was %s (it might already exist).", url_alias, - config['host'] + - '/node/' + - str(node_id), - response.status_code) + config["host"] + "/node/" + str(node_id), + response.status_code, + ) def prepare_term_id(config, vocab_ids, field_name, term): """Checks to see if 'term' is numeric (i.e., a term ID) and if it is, returns it as - is. If it's not (i.e., it's a string term name) it looks for the term name in the - referenced vocabulary and returns its term ID if the term exists, and if it doesn't - exist, creates the term and returns the new term ID. + is. If it's not (i.e., it's a string term name) it looks for the term name in the + referenced vocabulary and returns its term ID if the term exists, and if it doesn't + exist, creates the term and returns the new term ID. """ """Parameters ---------- @@ -4701,7 +6257,7 @@ def prepare_term_id(config, vocab_ids, field_name, term): return None # Special case: if the term starts with 'http', assume it's a Linked Data URI # and get its term ID from the URI. - elif term.startswith('http'): + elif term.startswith("http"): # Note: get_term_id_from_uri() will return False if the URI doesn't match a term. tid_from_uri = get_term_id_from_uri(config, term) if value_is_numeric(tid_from_uri): @@ -4711,9 +6267,9 @@ def prepare_term_id(config, vocab_ids, field_name, term): # A namespace is not needed but it might be present. If there is, # since this vocabulary is the only one linked to its field, # we remove it before sending it to create_term(). - namespaced = re.search(':', term) + namespaced = re.search(":", term) if namespaced: - [vocab_id, term_name] = term.split(':', maxsplit=1) + [vocab_id, term_name] = term.split(":", maxsplit=1) if vocab_id == vocab_ids[0]: tid = create_term(config, vocab_id.strip(), term_name.strip()) return tid @@ -4734,8 +6290,8 @@ def prepare_term_id(config, vocab_ids, field_name, term): # to namespace term names if they are used in multi-taxonomy fields. # # Split the namespace/vocab ID from the term name on ':'. - if ':' in term: - [tentative_vocab_id, term_name] = term.split(':', maxsplit=1) + if ":" in term: + [tentative_vocab_id, term_name] = term.split(":", maxsplit=1) for vocab_id in vocab_ids: if tentative_vocab_id == vocab_id: tid = create_term(config, vocab_id.strip(), term_name.strip()) @@ -4746,9 +6302,12 @@ def prepare_term_id(config, vocab_ids, field_name, term): return tid message = f"Because The field '{field_name}' allows more than one vocabulary the term '{term}' must be namespaced." - message = message + "See documentation at https://mjordan.github.io/islandora_workbench_docs/fields/#using-term-names-in-multi-vocabulary-fields" + message = ( + message + + "See documentation at https://mjordan.github.io/islandora_workbench_docs/fields/#using-term-names-in-multi-vocabulary-fields" + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) # Explicitly return None if hasn't returned from one of the conditions above, e.g. if # the term name contains a colon and it wasn't namespaced with a valid vocabulary ID. @@ -4756,18 +6315,16 @@ def prepare_term_id(config, vocab_ids, field_name, term): def get_field_vocabularies(config, field_definitions, field_name): - """Gets IDs of vocabularies linked from the current field (could be more than one). - """ - if 'vocabularies' in field_definitions[field_name]: - vocabularies = field_definitions[field_name]['vocabularies'] + """Gets IDs of vocabularies linked from the current field (could be more than one).""" + if "vocabularies" in field_definitions[field_name]: + vocabularies = field_definitions[field_name]["vocabularies"] return vocabularies else: return False def value_is_numeric(value): - """Tests to see if value is numeric. - """ + """Tests to see if value is numeric.""" var = str(value) var = var.strip() if var.isnumeric(): @@ -4778,8 +6335,8 @@ def value_is_numeric(value): def compare_strings(known, unknown): """Normalizes the unknown string and the known one, and compares - them. If they match, returns True, if not, False. We could - use FuzzyWuzzy or something but this is probably sufficient. + them. If they match, returns True, if not, False. We could + use FuzzyWuzzy or something but this is probably sufficient. """ # Strips leading and trailing whitespace. known = known.strip() @@ -4789,8 +6346,8 @@ def compare_strings(known, unknown): unknown = unknown.lower() # Remove all punctuation. for p in string.punctuation: - known = known.replace(p, ' ') - unknown = unknown.replace(p, ' ') + known = known.replace(p, " ") + unknown = unknown.replace(p, " ") # Replaces whitespace with a single space. known = " ".join(known.split()) unknown = " ".join(unknown.split()) @@ -4803,9 +6360,9 @@ def compare_strings(known, unknown): def get_csv_record_hash(row): """Concatenate values in the CSV record and get an MD5 hash on the - resulting string. + resulting string. """ - serialized_row = '' + serialized_row = "" for field in row: if isinstance(row[field], str) or isinstance(row[field], int): if isinstance(row[field], int): @@ -4814,26 +6371,29 @@ def get_csv_record_hash(row): row_value = " ".join(row_value.split()) serialized_row = serialized_row + row_value + " " - serialized_row = bytes(serialized_row.strip().lower(), 'utf-8') + serialized_row = bytes(serialized_row.strip().lower(), "utf-8") hash_object = hashlib.md5(serialized_row) return hash_object.hexdigest() def validate_input_dir(config): # Check existence input directory. - if os.path.isabs(config['input_dir']): - input_dir_path = config['input_dir'] + if os.path.isabs(config["input_dir"]): + input_dir_path = config["input_dir"] else: - input_dir_path = os.path.abspath(config['input_dir']) + input_dir_path = os.path.abspath(config["input_dir"]) if not os.path.exists(input_dir_path): - message = 'Input directory specified in the "input_dir" configuration setting ("' + config['input_dir'] + '") not found.' + message = ( + 'Input directory specified in the "input_dir" configuration setting ("' + + config["input_dir"] + + '") not found.' + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) def validate_required_fields_have_values(config, required_drupal_fields, csv_data): - """Loop through all fields in CSV to ensure that required field have a value in the CSV. - """ + """Loop through all fields in CSV to ensure that required field have a value in the CSV.""" rows_with_missing_required_values = [] for row in csv_data: for required_field in required_drupal_fields: @@ -4843,19 +6403,22 @@ def validate_required_fields_have_values(config, required_drupal_fields, csv_dat logging.error(message) if len(rows_with_missing_required_values) > 0: - sys.exit('Error: ' + "Some required Drupal fields in your CSV file are empty. See log for more information.") + sys.exit( + "Error: " + + "Some required Drupal fields in your CSV file are empty. See log for more information." + ) def validate_csv_field_cardinality(config, field_definitions, csv_data): """Compare values in the CSV data with the fields' cardinality. Log CSV - fields that have more values than allowed, and warn user if - these fields exist in their CSV data. + fields that have more values than allowed, and warn user if + these fields exist in their CSV data. """ field_cardinalities = dict() csv_headers = csv_data.fieldnames for csv_header in csv_headers: if csv_header in field_definitions.keys(): - cardinality = field_definitions[csv_header]['cardinality'] + cardinality = field_definitions[csv_header]["cardinality"] # We don't care about cardinality of -1 (unlimited). if int(cardinality) > 0: field_cardinalities[csv_header] = cardinality @@ -4864,67 +6427,125 @@ def validate_csv_field_cardinality(config, field_definitions, csv_data): for field_name in field_cardinalities.keys(): if field_name in row: # Don't check for the subdelimiter in title. - if field_name == 'title': + if field_name == "title": continue - delimited_field_values = row[field_name].split(config['subdelimiter']) - if field_cardinalities[field_name] == 1 and len(delimited_field_values) > 1: - if config['task'] == 'create': - message = 'CSV field "' + field_name + '" in record with ID ' + row[config['id_field']] + ' contains more values than the number ' - if config['task'] == 'update': - message = 'CSV field "' + field_name + '" in record with node ID ' + row['node_id'] + ' contains more values than the number ' - message_2 = 'allowed for that field (' + str(field_cardinalities[field_name]) + '). Workbench will add only the first value.' - print('Warning: ' + message + message_2) + delimited_field_values = row[field_name].split(config["subdelimiter"]) + if ( + field_cardinalities[field_name] == 1 + and len(delimited_field_values) > 1 + ): + if config["task"] == "create": + message = ( + 'CSV field "' + + field_name + + '" in record with ID ' + + row[config["id_field"]] + + " contains more values than the number " + ) + if config["task"] == "update": + message = ( + 'CSV field "' + + field_name + + '" in record with node ID ' + + row["node_id"] + + " contains more values than the number " + ) + message_2 = ( + "allowed for that field (" + + str(field_cardinalities[field_name]) + + "). Workbench will add only the first value." + ) + print("Warning: " + message + message_2) logging.warning(message + message_2) - if int(field_cardinalities[field_name]) > 1 and len(delimited_field_values) > field_cardinalities[field_name]: - if config['task'] == 'create': - message = 'CSV field "' + field_name + '" in record with ID ' + row[config['id_field']] + ' contains more values than the number ' - if config['task'] == 'update': - message = 'CSV field "' + field_name + '" in record with node ID ' + row['node_id'] + ' contains more values than the number ' - message_2 = 'allowed for that field (' + str(field_cardinalities[field_name]) + '). Workbench will add only the first ' + str( - field_cardinalities[field_name]) + ' values.' - print('Warning: ' + message + message_2) + if ( + int(field_cardinalities[field_name]) > 1 + and len(delimited_field_values) > field_cardinalities[field_name] + ): + if config["task"] == "create": + message = ( + 'CSV field "' + + field_name + + '" in record with ID ' + + row[config["id_field"]] + + " contains more values than the number " + ) + if config["task"] == "update": + message = ( + 'CSV field "' + + field_name + + '" in record with node ID ' + + row["node_id"] + + " contains more values than the number " + ) + message_2 = ( + "allowed for that field (" + + str(field_cardinalities[field_name]) + + "). Workbench will add only the first " + + str(field_cardinalities[field_name]) + + " values." + ) + print("Warning: " + message + message_2) logging.warning(message + message_2) def validate_text_list_fields(config, field_definitions, csv_data): """For fields that are of "list_string" field type, check that values - in CSV are in the field's "allowed_values" config setting. + in CSV are in the field's "allowed_values" config setting. """ list_field_allowed_values = dict() csv_headers = csv_data.fieldnames for csv_header in csv_headers: if csv_header in field_definitions.keys(): - if 'allowed_values' in field_definitions[csv_header]: - if field_definitions[csv_header]['allowed_values'] is not None: - list_field_allowed_values[csv_header] = field_definitions[csv_header]['allowed_values'] + if "allowed_values" in field_definitions[csv_header]: + if field_definitions[csv_header]["allowed_values"] is not None: + list_field_allowed_values[csv_header] = field_definitions[ + csv_header + ]["allowed_values"] for count, row in enumerate(csv_data, start=1): for field_name in list_field_allowed_values.keys(): if field_name in row and len(row[field_name]) > 0: - delimited_field_values = row[field_name].split(config['subdelimiter']) + delimited_field_values = row[field_name].split(config["subdelimiter"]) for field_value in delimited_field_values: - if field_name in list_field_allowed_values and field_value not in list_field_allowed_values[field_name]: - if config['task'] == 'create': - message = 'CSV field "' + field_name + '" in record with ID ' + \ - row[config['id_field']] + ' contains a value ("' + field_value + '") that is not in the fields\'s allowed values.' - if config['task'] == 'update': - message = 'CSV field "' + field_name + '" in record with node ID ' + \ - row[config['id_field']] + ' contains a value ("' + field_value + '") that is not in the fields\'s allowed values.' - print('Warning: ' + message) + if ( + field_name in list_field_allowed_values + and field_value not in list_field_allowed_values[field_name] + ): + if config["task"] == "create": + message = ( + 'CSV field "' + + field_name + + '" in record with ID ' + + row[config["id_field"]] + + ' contains a value ("' + + field_value + + "\") that is not in the fields's allowed values." + ) + if config["task"] == "update": + message = ( + 'CSV field "' + + field_name + + '" in record with node ID ' + + row[config["id_field"]] + + ' contains a value ("' + + field_value + + "\") that is not in the fields's allowed values." + ) + print("Warning: " + message) logging.warning(message) def validate_csv_field_length(config, field_definitions, csv_data): """Compare values in the CSV data with the fields' max_length. Log CSV - fields that exceed their max_length, and warn user if - these fields exist in their CSV data. + fields that exceed their max_length, and warn user if + these fields exist in their CSV data. """ field_max_lengths = dict() csv_headers = csv_data.fieldnames for csv_header in csv_headers: if csv_header in field_definitions.keys(): - if 'max_length' in field_definitions[csv_header]: - max_length = field_definitions[csv_header]['max_length'] + if "max_length" in field_definitions[csv_header]: + max_length = field_definitions[csv_header]["max_length"] # We don't care about max_length of None (i.e., it's not applicable or unlimited). if max_length is not None: field_max_lengths[csv_header] = max_length @@ -4932,38 +6553,66 @@ def validate_csv_field_length(config, field_definitions, csv_data): for count, row in enumerate(csv_data, start=1): for field_name in field_max_lengths.keys(): if field_name in row: - delimited_field_values = row[field_name].split(config['subdelimiter']) + delimited_field_values = row[field_name].split(config["subdelimiter"]) for field_value in delimited_field_values: field_value_length = len(field_value) - if field_name in field_max_lengths and len(field_value) > int(field_max_lengths[field_name]): - if config['task'] == 'create': - message = 'CSV field "' + field_name + '" in record with ID ' + \ - row[config['id_field']] + ' contains a value that is longer (' + str(len(field_value)) + ' characters)' - if config['task'] == 'update': - message = 'CSV field "' + field_name + '" in record with node ID ' + \ - row['node_id'] + ' contains a value that is longer (' + str(len(field_value)) + ' characters)' - message_2 = ' than allowed for that field (' + \ - str(field_max_lengths[field_name]) + ' characters). Workbench will truncate this value prior to populating Drupal.' - print('Warning: ' + message + message_2) + if field_name in field_max_lengths and len(field_value) > int( + field_max_lengths[field_name] + ): + if config["task"] == "create": + message = ( + 'CSV field "' + + field_name + + '" in record with ID ' + + row[config["id_field"]] + + " contains a value that is longer (" + + str(len(field_value)) + + " characters)" + ) + if config["task"] == "update": + message = ( + 'CSV field "' + + field_name + + '" in record with node ID ' + + row["node_id"] + + " contains a value that is longer (" + + str(len(field_value)) + + " characters)" + ) + message_2 = ( + " than allowed for that field (" + + str(field_max_lengths[field_name]) + + " characters). Workbench will truncate this value prior to populating Drupal." + ) + print("Warning: " + message + message_2) logging.warning(message + message_2) def validate_geolocation_fields(config, field_definitions, csv_data): - """Validate lat,long values in fields that are of type 'geolocation'. - """ + """Validate lat,long values in fields that are of type 'geolocation'.""" geolocation_fields_present = False for count, row in enumerate(csv_data, start=1): for field_name in field_definitions.keys(): - if field_definitions[field_name]['field_type'] == 'geolocation': + if field_definitions[field_name]["field_type"] == "geolocation": if field_name in row: geolocation_fields_present = True - delimited_field_values = row[field_name].split(config['subdelimiter']) + delimited_field_values = row[field_name].split( + config["subdelimiter"] + ) for field_value in delimited_field_values: if len(field_value.strip()): if not validate_latlong_value(field_value.strip()): - message = 'Value in field "' + field_name + '" in row with ID ' + row[config['id_field']] + ' (' + field_value + ') is not a valid lat,long pair.' + message = ( + 'Value in field "' + + field_name + + '" in row with ID ' + + row[config["id_field"]] + + " (" + + field_value + + ") is not a valid lat,long pair." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) if geolocation_fields_present is True: message = "OK, geolocation field values in the CSV file validate." @@ -4972,21 +6621,30 @@ def validate_geolocation_fields(config, field_definitions, csv_data): def validate_link_fields(config, field_definitions, csv_data): - """Validate values in fields that are of type 'link'. - """ + """Validate values in fields that are of type 'link'.""" link_fields_present = False for count, row in enumerate(csv_data, start=1): for field_name in field_definitions.keys(): - if field_definitions[field_name]['field_type'] == 'link': + if field_definitions[field_name]["field_type"] == "link": if field_name in row: link_fields_present = True - delimited_field_values = row[field_name].split(config['subdelimiter']) + delimited_field_values = row[field_name].split( + config["subdelimiter"] + ) for field_value in delimited_field_values: if len(field_value.strip()): if not validate_link_value(field_value.strip()): - message = 'Value in field "' + field_name + '" in row with ID ' + row[config['id_field']] + ' (' + field_value + ') is not a valid link field value.' + message = ( + 'Value in field "' + + field_name + + '" in row with ID ' + + row[config["id_field"]] + + " (" + + field_value + + ") is not a valid link field value." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) if link_fields_present is True: message = "OK, link field values in the CSV file validate." @@ -4995,25 +6653,36 @@ def validate_link_fields(config, field_definitions, csv_data): def validate_authority_link_fields(config, field_definitions, csv_data): - """Validate values in fields that are of type 'authority_link'. - """ - if config['task'] == 'create_terms': - config['id_field'] = 'term_name' + """Validate values in fields that are of type 'authority_link'.""" + if config["task"] == "create_terms": + config["id_field"] = "term_name" authority_link_fields_present = False for count, row in enumerate(csv_data, start=1): for field_name in field_definitions.keys(): - if field_definitions[field_name]['field_type'] == 'authority_link': + if field_definitions[field_name]["field_type"] == "authority_link": if field_name in row: authority_link_fields_present = True - delimited_field_values = row[field_name].split(config['subdelimiter']) + delimited_field_values = row[field_name].split( + config["subdelimiter"] + ) for field_value in delimited_field_values: if len(field_value.strip()): - if not validate_authority_link_value(field_value.strip(), field_definitions[field_name]['authority_sources']): - message = 'Value in field "' + field_name + '" in row with ID "' + \ - row[config['id_field']] + '" (' + field_value + ') is not a valid authority link field value.' + if not validate_authority_link_value( + field_value.strip(), + field_definitions[field_name]["authority_sources"], + ): + message = ( + 'Value in field "' + + field_name + + '" in row with ID "' + + row[config["id_field"]] + + '" (' + + field_value + + ") is not a valid authority link field value." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) if authority_link_fields_present is True: message = "OK, authority link field values in the CSV file validate." @@ -5022,8 +6691,7 @@ def validate_authority_link_fields(config, field_definitions, csv_data): def validate_media_track_fields(config, csv_data): - """Validate values in fields that are of type 'media_track'. - """ + """Validate values in fields that are of type 'media_track'.""" media_track_fields_present = False # Must accommodate multiple media track fields in the same CSV (e.g. audio and video media in the # same CSV, each with its own track column). Therefore, we'll need to get the field definitions @@ -5031,63 +6699,125 @@ def validate_media_track_fields(config, csv_data): media_track_field_definitions = dict() csv_column_headers = copy.copy(csv_data.fieldnames) for column_header in csv_column_headers: - if column_header.startswith('media:'): + if column_header.startswith("media:"): # Assumes well-formed column headers. - media_bundle_name_parts = column_header.split(':') + media_bundle_name_parts = column_header.split(":") media_bundle_name = media_bundle_name_parts[1] - if media_bundle_name not in config['media_track_file_fields']: - message = 'Media type "' + media_bundle_name + '" in the CSV column header "' + column_header + \ - '" is not registered in the "media_track_file_fields" configuration setting.' + if media_bundle_name not in config["media_track_file_fields"]: + message = ( + 'Media type "' + + media_bundle_name + + '" in the CSV column header "' + + column_header + + '" is not registered in the "media_track_file_fields" configuration setting.' + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) - media_track_field_definitions[media_bundle_name] = get_field_definitions(config, 'media', media_bundle_name) + media_track_field_definitions[media_bundle_name] = get_field_definitions( + config, "media", media_bundle_name + ) for count, row in enumerate(csv_data, start=1): - for field_name in media_track_field_definitions[media_bundle_name].keys(): - if media_track_field_definitions[media_bundle_name][field_name]['field_type'] == 'media_track': - fully_qualified_field_name = f"media:{media_bundle_name}:{field_name}" - if fully_qualified_field_name in row and row[fully_qualified_field_name]: + for field_name in media_track_field_definitions[ + media_bundle_name + ].keys(): + if ( + media_track_field_definitions[media_bundle_name][field_name][ + "field_type" + ] + == "media_track" + ): + fully_qualified_field_name = ( + f"media:{media_bundle_name}:{field_name}" + ) + if ( + fully_qualified_field_name in row + and row[fully_qualified_field_name] + ): media_track_fields_present = True - delimited_field_values = row[fully_qualified_field_name].split(config['subdelimiter']) + delimited_field_values = row[ + fully_qualified_field_name + ].split(config["subdelimiter"]) for field_value in delimited_field_values: if len(field_value.strip()): if validate_media_track_value(field_value) is False: - message = 'Value in field "' + fully_qualified_field_name + '" in row with ID "' + \ - row[config['id_field']] + '" (' + field_value + ') has a media type is not a valid media track field value.' + message = ( + 'Value in field "' + + fully_qualified_field_name + + '" in row with ID "' + + row[config["id_field"]] + + '" (' + + field_value + + ") has a media type is not a valid media track field value." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) # Confirm that the media bundle name in the column header matches the media type # of the file in the 'file' column. - file_media_type = set_media_type(config, row['file'], 'file', row) + file_media_type = set_media_type( + config, row["file"], "file", row + ) if file_media_type != media_bundle_name: - message = 'File named in the "file" field in row with ID "' + \ - row[config['id_field']] + '" (' + row['file'] + ') has a media type ' + \ - '(' + file_media_type + ') that differs from the media type indicated in the column header "' + \ - fully_qualified_field_name + '" (' + media_bundle_name + ').' + message = ( + 'File named in the "file" field in row with ID "' + + row[config["id_field"]] + + '" (' + + row["file"] + + ") has a media type " + + "(" + + file_media_type + + ') that differs from the media type indicated in the column header "' + + fully_qualified_field_name + + '" (' + + media_bundle_name + + ")." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) # Confirm that config['media_use_tid'] and row-level media_use_term is for Service File (http://pcdm.org/use#ServiceFile). - service_file_exists = service_file_present(config, row['media_use_tid']) + service_file_exists = service_file_present( + config, row["media_use_tid"] + ) if service_file_exists is False: message = f"{row['media_use_tid']} cannot be used as a \"media_use_tid\" value in your CSV when creating media tracks." logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) - if config['nodes_only'] is False: + if config["nodes_only"] is False: if len(field_value.strip()): - media_track_field_value_parts = field_value.split(':') - media_track_file_path_in_csv = media_track_field_value_parts[3] - if os.path.isabs(media_track_file_path_in_csv): - media_track_file_path = media_track_file_path_in_csv + media_track_field_value_parts = ( + field_value.split(":") + ) + media_track_file_path_in_csv = ( + media_track_field_value_parts[3] + ) + if os.path.isabs( + media_track_file_path_in_csv + ): + media_track_file_path = ( + media_track_file_path_in_csv + ) else: - media_track_file_path = os.path.join(config['input_dir'], media_track_file_path_in_csv) - if not os.path.exists(media_track_file_path) or not os.path.isfile(media_track_file_path): - message = 'Media track file "' + media_track_file_path_in_csv + '" in row with ID "' + \ - row[config['id_field']] + '" not found.' + media_track_file_path = os.path.join( + config["input_dir"], + media_track_file_path_in_csv, + ) + if not os.path.exists( + media_track_file_path + ) or not os.path.isfile( + media_track_file_path + ): + message = ( + 'Media track file "' + + media_track_file_path_in_csv + + '" in row with ID "' + + row[config["id_field"]] + + '" not found.' + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) if media_track_fields_present is True: message = "OK, media track field values in the CSV file validate." @@ -5096,8 +6826,7 @@ def validate_media_track_fields(config, csv_data): def validate_media_track_value(media_track_value): - """Validates that the string in "media_track_value" has valid values in its subparts. - """ + """Validates that the string in "media_track_value" has valid values in its subparts.""" """Parameters ---------- media_track_value : string @@ -5107,12 +6836,18 @@ def validate_media_track_value(media_track_value): boolean True if it does, False if not. """ - valid_kinds = ['subtitles', 'descriptions', 'metadata', 'captions', 'chapters'] - parts = media_track_value.split(':', 3) + valid_kinds = ["subtitles", "descriptions", "metadata", "captions", "chapters"] + parts = media_track_value.split(":", 3) # First part, the label, needs to have a length; second part needs to be one of the # values in 'valid_kinds'; third part needs to be a valid Drupal language code; the fourth # part needs to end in '.vtt'. - if len(parts) == 4 and len(parts[0]) > 0 and validate_language_code(parts[2]) and parts[1] in valid_kinds and parts[3].lower().endswith('.vtt'): + if ( + len(parts) == 4 + and len(parts[0]) > 0 + and validate_language_code(parts[2]) + and parts[1] in valid_kinds + and parts[3].lower().endswith(".vtt") + ): return True else: return False @@ -5120,8 +6855,11 @@ def validate_media_track_value(media_track_value): def validate_latlong_value(latlong): # Remove leading \ that may be present if input CSV is from a spreadsheet. - latlong = latlong.lstrip('\\') - if re.match(r"^[-+]?([1-8]?\d(\.\d+)?|90(\.0+)?),\s*[-+]?(180(\.0+)?|((1[0-7]\d)|([1-9]?\d))(\.\d+)?)$", latlong): + latlong = latlong.lstrip("\\") + if re.match( + r"^[-+]?([1-8]?\d(\.\d+)?|90(\.0+)?),\s*[-+]?(180(\.0+)?|((1[0-7]\d)|([1-9]?\d))(\.\d+)?)$", + latlong, + ): return True else: return False @@ -5129,7 +6867,7 @@ def validate_latlong_value(latlong): def validate_link_value(link_value): """Validates that the value in 'link_value' starts with either 'http://' or 'https://' - and optionally contains the url/label delimiter '%%'. + and optionally contains the url/label delimiter '%%'. """ """Parameters ---------- @@ -5140,7 +6878,7 @@ def validate_link_value(link_value): boolean True if it does, False if not. """ - parts = link_value.split('%%', 1) + parts = link_value.split("%%", 1) if re.match(r"^https?://", parts[0]): return True else: @@ -5149,7 +6887,7 @@ def validate_link_value(link_value): def validate_authority_link_value(authority_link_value, authority_sources): """Validates that the value in 'authority_link_value' has a 'source' and that the URI - component starts with either 'http://' or 'https://'. + component starts with either 'http://' or 'https://'. """ """Parameters ---------- @@ -5162,7 +6900,7 @@ def validate_authority_link_value(authority_link_value, authority_sources): boolean True if it does, False if not. """ - parts = authority_link_value.split('%%', 2) + parts = authority_link_value.split("%%", 2) if parts[0] not in authority_sources: return False if re.match(r"^https?://", parts[1]): @@ -5173,42 +6911,60 @@ def validate_authority_link_value(authority_link_value, authority_sources): def validate_term_name_length(term_name, row_number, column_name): """Checks that the length of a term name does not exceed - Drupal's 255 character length. + Drupal's 255 character length. """ term_name = term_name.strip() if len(term_name) > 255: - message = 'CSV field "' + column_name + '" in record ' + row_number + \ - " contains a taxonomy term that exceeds Drupal's limit of 255 characters (length of term is " + str(len(term_name)) + ' characters).' + message = ( + 'CSV field "' + + column_name + + '" in record ' + + row_number + + " contains a taxonomy term that exceeds Drupal's limit of 255 characters (length of term is " + + str(len(term_name)) + + " characters)." + ) message_2 = ' Term provided in CSV is "' + term_name + '".' message_3 = " Please reduce the term's length to less than 256 characters." logging.error(message + message_2 + message_3) - sys.exit('Error: ' + message + ' See the Workbench log for more information.') + sys.exit("Error: " + message + " See the Workbench log for more information.") def validate_node_created_date(config, csv_data): """Checks that date_string is in the format used by Drupal's 'created' node property, - e.g., 2020-11-15T23:49:22+00:00. Also check to see if the date is in the future. + e.g., 2020-11-15T23:49:22+00:00. Also check to see if the date is in the future. """ for count, row in enumerate(csv_data, start=1): for field_name, field_value in row.items(): - if field_name == 'created' and len(field_value) > 0: + if field_name == "created" and len(field_value) > 0: # matches = re.match(r'^\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d[+-]\d\d:\d\d$', field_value) if not validate_node_created_date_string(field_value): - message = 'CSV field "created" in record with ID ' + \ - row[config['id_field']] + ' contains a date "' + field_value + '" that is not formatted properly.' + message = ( + 'CSV field "created" in record with ID ' + + row[config["id_field"]] + + ' contains a date "' + + field_value + + '" that is not formatted properly.' + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) now = datetime.datetime.now() # Remove the GMT differential at the end of the time string. - date_string_trimmed = re.sub( - r'[+-]\d\d:\d\d$', '', field_value) - created_date = datetime.datetime.strptime(date_string_trimmed, '%Y-%m-%dT%H:%M:%S') + date_string_trimmed = re.sub(r"[+-]\d\d:\d\d$", "", field_value) + created_date = datetime.datetime.strptime( + date_string_trimmed, "%Y-%m-%dT%H:%M:%S" + ) if created_date > now: - message = 'CSV field "created" in record with ID ' + \ - row[config['id_field']] + ' contains a date "' + field_value + '" that is in the future.' + message = ( + 'CSV field "created" in record with ID ' + + row[config["id_field"]] + + ' contains a date "' + + field_value + + '" that is in the future.' + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) message = 'OK, dates in the "created" CSV field are all formated correctly and in the future.' print(message) @@ -5223,23 +6979,32 @@ def validate_node_created_date_string(created_date_string): def validate_edtf_fields(config, field_definitions, csv_data): - """Validate values in fields that are of type 'edtf'. - """ + """Validate values in fields that are of type 'edtf'.""" edtf_fields_present = False for count, row in enumerate(csv_data, start=1): for field_name in field_definitions.keys(): - if field_definitions[field_name]['field_type'] == 'edtf': + if field_definitions[field_name]["field_type"] == "edtf": if field_name in row: edtf_fields_present = True - delimited_field_values = row[field_name].split(config['subdelimiter']) + delimited_field_values = row[field_name].split( + config["subdelimiter"] + ) for field_value in delimited_field_values: if len(field_value.strip()): valid = validate_edtf_date(field_value) if valid is False: - message = 'Value in field "' + field_name + '" in row with ID ' + row[config['id_field']] + ' ("' + field_value + '") is not a valid EDTF date/time.' + message = ( + 'Value in field "' + + field_name + + '" in row with ID ' + + row[config["id_field"]] + + ' ("' + + field_value + + '") is not a valid EDTF date/time.' + ) logging.error(message) - if config['perform_soft_checks'] is False: - sys.exit('Error: ' + message) + if config["perform_soft_checks"] is False: + sys.exit("Error: " + message) if edtf_fields_present is True: message = "OK, EDTF field values in the CSV file validate." @@ -5250,40 +7015,40 @@ def validate_edtf_fields(config, field_definitions, csv_data): def validate_edtf_date(date): date = date.strip() # nnnX? - if re.match(r'^[1-2]\d\dX\?', date): + if re.match(r"^[1-2]\d\dX\?", date): return True # nnXX? - elif re.match(r'^[1-2]\dXX\?', date): + elif re.match(r"^[1-2]\dXX\?", date): return True # nXXX? - elif re.match(r'^[1-2]XXX\?', date): + elif re.match(r"^[1-2]XXX\?", date): return True # nXXX~ - elif re.match(r'^[1-2]XXX\~', date): + elif re.match(r"^[1-2]XXX\~", date): return True # nnXX~ - elif re.match(r'^[1-2]\dXX\~', date): + elif re.match(r"^[1-2]\dXX\~", date): return True # nnnX~ - elif re.match(r'^[1-2]\d\dX\~', date): + elif re.match(r"^[1-2]\d\dX\~", date): return True # nXXX% - elif re.match(r'^[1-2]XXX\%', date): + elif re.match(r"^[1-2]XXX\%", date): return True # nnXX% - elif re.match(r'^[1-2]\dXX\%', date): + elif re.match(r"^[1-2]\dXX\%", date): return True # nnnX% - elif re.match(r'^[1-2]\d\dX\%', date): + elif re.match(r"^[1-2]\d\dX\%", date): return True # XXXX? - elif re.match(r'^XXXX\?', date): + elif re.match(r"^XXXX\?", date): return True # XXXX~ - elif re.match(r'^XXXX\~', date): + elif re.match(r"^XXXX\~", date): return True # XXXX% - elif re.match(r'^XXXX\%', date): + elif re.match(r"^XXXX\%", date): return True elif edtf_validate.valid_edtf.is_valid(date): return True @@ -5292,46 +7057,60 @@ def validate_edtf_date(date): def validate_url_aliases(config, csv_data): - """Checks that URL aliases don't already exist. - """ + """Checks that URL aliases don't already exist.""" for count, row in enumerate(csv_data, start=1): for field_name, field_value in row.items(): - if field_name == 'url_alias' and len(field_value) > 0: - if field_value.strip()[0] != '/': - message = 'CSV field "url_alias" in record with ID ' + \ - row[config['id_field']] + ' contains an alias "' + field_value + '" that is missing its leading /.' + if field_name == "url_alias" and len(field_value) > 0: + if field_value.strip()[0] != "/": + message = ( + 'CSV field "url_alias" in record with ID ' + + row[config["id_field"]] + + ' contains an alias "' + + field_value + + '" that is missing its leading /.' + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) alias_ping = ping_url_alias(config, field_value) # @todo: Add 301 and 302 as acceptable status codes? if alias_ping == 200: - message = 'CSV field "url_alias" in record with ID ' + \ - row[config['id_field']] + ' contains an alias "' + field_value + '" that already exists.' + message = ( + 'CSV field "url_alias" in record with ID ' + + row[config["id_field"]] + + ' contains an alias "' + + field_value + + '" that already exists.' + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) - message = 'OK, URL aliases do not already exist.' + message = "OK, URL aliases do not already exist." print(message) logging.info(message) def validate_node_uid(config, csv_data): """Checks that the user identified in the 'uid' field exists in Drupal. Note that - this does not validate any permissions the user may have. + this does not validate any permissions the user may have. """ for count, row in enumerate(csv_data, start=1): for field_name, field_value in row.items(): - if field_name == 'uid' and len(field_value) > 0: + if field_name == "uid" and len(field_value) > 0: # Request to /user/x?_format=json goes here; 200 means the user # exists, 404 means they do no. - uid_url = config['host'] + '/user/' + str(field_value) + '?_format=json' - uid_response = issue_request(config, 'GET', uid_url) + uid_url = config["host"] + "/user/" + str(field_value) + "?_format=json" + uid_response = issue_request(config, "GET", uid_url) if uid_response.status_code == 404: - message = 'CSV field "uid" in record with ID ' + \ - row[config['id_field']] + ' contains a user ID "' + field_value + '" that does not exist in the target Drupal.' + message = ( + 'CSV field "uid" in record with ID ' + + row[config["id_field"]] + + ' contains a user ID "' + + field_value + + '" that does not exist in the target Drupal.' + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) message = 'OK, user IDs in the "uid" CSV field all exist.' print(message) @@ -5340,17 +7119,20 @@ def validate_node_uid(config, csv_data): def validate_parent_ids_precede_children(config, csv_data): """In the page/child-level metadata method of creating compound content, - CSV rows for parent items must come before their children in the CSV file. - This function checks for that. Note that this check only applies to one - level of parent/child hierarchy (i.e., parents and their immediate children). + CSV rows for parent items must come before their children in the CSV file. + This function checks for that. Note that this check only applies to one + level of parent/child hierarchy (i.e., parents and their immediate children). """ positions = dict() - id_field = config['id_field'] + id_field = config["id_field"] row_num = 0 - if 'parent_id' in csv_data.fieldnames: + if "parent_id" in csv_data.fieldnames: for row in csv_data: row_num += 1 - positions[row[id_field]] = {'position': row_num, 'parent_id': row['parent_id']} + positions[row[id_field]] = { + "position": row_num, + "parent_id": row["parent_id"], + } else: return False @@ -5359,46 +7141,55 @@ def validate_parent_ids_precede_children(config, csv_data): # of the row identified in its "parent_id" value. If it is lower, error out. for row in positions.items(): # Only child items have a value in their "parent_id" field. - if row[1]['parent_id'] == '': + if row[1]["parent_id"] == "": continue - parent_id = row[1]['parent_id'] + parent_id = row[1]["parent_id"] if parent_id in positions: - if row[1]['position'] < positions[parent_id]['position']: + if row[1]["position"] < positions[parent_id]["position"]: message = f"Child item with CSV ID \"{row[0]}\" must come after its parent (CSV ID \"{row[1]['parent_id']}\") in the CSV file." logging.error(message) - if config['perform_soft_checks'] is False: - sys.exit('Error: ' + message) + if config["perform_soft_checks"] is False: + sys.exit("Error: " + message) def validate_parent_ids_in_csv_id_to_node_id_map(config, csv_data): """Query the CSV ID to node ID map to check for non-unique parent IDs. - If they exist, report out but do not exit. + If they exist, report out but do not exit. """ - if config['query_csv_id_to_node_id_map_for_parents'] is True: + if config["query_csv_id_to_node_id_map_for_parents"] is True: message = "Validating parent IDs in the CSV ID to node ID map, please wait." print(message) else: return # First, confirm the databae exists; if not, tell the user and exit. - if config['csv_id_to_node_id_map_path'] is not False: - if not os.path.exists(config['csv_id_to_node_id_map_path']): + if config["csv_id_to_node_id_map_path"] is not False: + if not os.path.exists(config["csv_id_to_node_id_map_path"]): message = f"Can't find CSV ID to node ID database path at {config['csv_id_to_node_id_map_path']}." logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) # If database exists, query it. - if config['query_csv_id_to_node_id_map_for_parents'] is True and config['csv_id_to_node_id_map_path'] is not False: - id_field = config['id_field'] + if ( + config["query_csv_id_to_node_id_map_for_parents"] is True + and config["csv_id_to_node_id_map_path"] is not False + ): + id_field = config["id_field"] parents_from_id_map = [] for row in csv_data: - if config['ignore_duplicate_parent_ids'] is True: + if config["ignore_duplicate_parent_ids"] is True: query = "select * from csv_id_to_node_id_map where parent_csv_id = ? order by timestamp desc limit 1" else: query = "select * from csv_id_to_node_id_map where parent_csv_id = ?" - parent_in_id_map_result = sqlite_manager(config, operation='select', query=query, values=(row[id_field],), db_file_path=config['csv_id_to_node_id_map_path']) + parent_in_id_map_result = sqlite_manager( + config, + operation="select", + query=query, + values=(row[id_field],), + db_file_path=config["csv_id_to_node_id_map_path"], + ) for parent_in_id_map_row in parent_in_id_map_result: - parents_from_id_map.append(parent_in_id_map_row['node_id'].strip()) + parents_from_id_map.append(parent_in_id_map_row["node_id"].strip()) if len(parents_from_id_map) > 1: message = f'Query of ID map for parent ID "{row["parent_id"]}" returned multiple node IDs: ({", ".join(parents_from_id_map)}).' logging.warning(message) @@ -5407,10 +7198,10 @@ def validate_parent_ids_in_csv_id_to_node_id_map(config, csv_data): def validate_taxonomy_field_values(config, field_definitions, csv_data): """Loop through all fields in field_definitions, and if a field - is a taxonomy reference field, validate all values in the CSV - data in that field against term IDs in the taxonomies referenced - by the field. Does not validate Typed Relation fields - (see validate_typed_relation_field_values()). + is a taxonomy reference field, validate all values in the CSV + data in that field against term IDs in the taxonomies referenced + by the field. Does not validate Typed Relation fields + (see validate_typed_relation_field_values()). """ # Define a list to store names of CSV fields that reference vocabularies. fields_with_vocabularies = list() @@ -5418,10 +7209,12 @@ def validate_taxonomy_field_values(config, field_definitions, csv_data): # Get all the term IDs for vocabularies referenced in all fields in the CSV. for column_name in csv_data.fieldnames: if column_name in field_definitions: - if field_definitions[column_name]['field_type'] == 'typed_relation': + if field_definitions[column_name]["field_type"] == "typed_relation": continue - if 'vocabularies' in field_definitions[column_name]: - vocabularies = get_field_vocabularies(config, field_definitions, column_name) + if "vocabularies" in field_definitions[column_name]: + vocabularies = get_field_vocabularies( + config, field_definitions, column_name + ) # If there are no vocabularies linked to the current field, 'vocabularies' # will be False and will throw a TypeError. try: @@ -5429,9 +7222,13 @@ def validate_taxonomy_field_values(config, field_definitions, csv_data): if num_vocabs > 0: fields_with_vocabularies.append(column_name) except BaseException: - message = 'Workbench cannot get vocabularies linked to field "' + column_name + '". Please confirm that field has at least one vocabulary.' + message = ( + 'Workbench cannot get vocabularies linked to field "' + + column_name + + '". Please confirm that field has at least one vocabulary.' + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) # If none of the CSV fields are taxonomy reference fields, return. if len(fields_with_vocabularies) == 0: @@ -5442,35 +7239,53 @@ def validate_taxonomy_field_values(config, field_definitions, csv_data): for count, row in enumerate(csv_data, start=1): for column_name in fields_with_vocabularies: if len(row[column_name]): - new_term_names_in_csv = validate_taxonomy_reference_value(config, field_definitions, column_name, row[column_name], count) + new_term_names_in_csv = validate_taxonomy_reference_value( + config, field_definitions, column_name, row[column_name], count + ) new_term_names_in_csv_results.append(new_term_names_in_csv) - if True in new_term_names_in_csv_results and config['allow_adding_terms'] is True: - if config['validate_terms_exist'] is True: - message = "OK, term IDs/names in CSV file exist in their respective taxonomies" - if config['log_term_creation'] is True: - message = message + " (new terms will be created as noted in the Workbench log)." + if True in new_term_names_in_csv_results and config["allow_adding_terms"] is True: + if config["validate_terms_exist"] is True: + message = ( + "OK, term IDs/names in CSV file exist in their respective taxonomies" + ) + if config["log_term_creation"] is True: + message = ( + message + + " (new terms will be created as noted in the Workbench log)." + ) else: - message = message + ' (new terms will be created but not noted in the Workbench log since "log_term_creation" is set to false).' + message = ( + message + + ' (new terms will be created but not noted in the Workbench log since "log_term_creation" is set to false).' + ) print(message) else: - if config['log_term_creation'] is True: - print("Skipping check for existence of terms (new terms will be created as noted in the Workbench log).") + if config["log_term_creation"] is True: + print( + "Skipping check for existence of terms (new terms will be created as noted in the Workbench log)." + ) else: - print('Skipping check for existence of terms (notee: terms will be created but not noted in the Workbench log - "log_term_creation" is set to false).') - logging.warning("Skipping check for existence of terms (but new terms will be created).") + print( + 'Skipping check for existence of terms (notee: terms will be created but not noted in the Workbench log - "log_term_creation" is set to false).' + ) + logging.warning( + "Skipping check for existence of terms (but new terms will be created)." + ) else: # All term IDs are in their field's vocabularies. print("OK, term IDs/names in CSV file exist in their respective taxonomies.") - logging.info("OK, term IDs/names in CSV file exist in their respective taxonomies.") + logging.info( + "OK, term IDs/names in CSV file exist in their respective taxonomies." + ) return vocab_validation_issues def validate_vocabulary_fields_in_csv(config, vocabulary_id, vocab_csv_file_path): """Loop through all fields in CSV to ensure that all present fields match field - from the vocab's field definitions, and that any required fields are present. - Also checks that each row has the same number of columns as there are headers. + from the vocab's field definitions, and that any required fields are present. + Also checks that each row has the same number of columns as there are headers. """ """Parameters ---------- @@ -5481,7 +7296,7 @@ def validate_vocabulary_fields_in_csv(config, vocabulary_id, vocab_csv_file_path vocab_csv_file_path: string Location of vocabulary CSV file. """ - csv_data = get_csv_data(config, 'taxonomy_fields', vocab_csv_file_path) + csv_data = get_csv_data(config, "taxonomy_fields", vocab_csv_file_path) csv_column_headers = copy.copy(csv_data.fieldnames) # Check whether each row contains the same number of columns as there are headers. @@ -5492,66 +7307,115 @@ def validate_vocabulary_fields_in_csv(config, vocabulary_id, vocab_csv_file_path row_count += 1 for field in row: # 'stringtopopulateextrafields' is added by get_csv_data() if there are extra headers. - if row[field] == 'stringtopopulateextrafields': + if row[field] == "stringtopopulateextrafields": extra_headers = True else: field_count += 1 if extra_headers is True: - message = "Row with ID " + row[config['id_field']] + ') of the vocabulary CSV file "' + \ - vocab_csv_file_path + '" has fewer columns than there are headers (' + str(len(csv_column_headers)) + ")." + message = ( + "Row with ID " + + row[config["id_field"]] + + ') of the vocabulary CSV file "' + + vocab_csv_file_path + + '" has fewer columns than there are headers (' + + str(len(csv_column_headers)) + + ")." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) # Note: this message is also generated in get_csv_data() since CSV Writer thows an exception if the row has # form fields than headers. if len(csv_column_headers) < field_count: - message = "Row with term name '" + row['term_name'] + ') of the vocabulary CSV file "' + vocab_csv_file_path + \ - '" has more columns (' + str(field_count) + ") than there are headers (" + str(len(csv_column_headers)) + ")." + message = ( + "Row with term name '" + + row["term_name"] + + ') of the vocabulary CSV file "' + + vocab_csv_file_path + + '" has more columns (' + + str(field_count) + + ") than there are headers (" + + str(len(csv_column_headers)) + + ")." + ) logging.error(message) - sys.exit('Error: ' + message) - message = "OK, all " \ - + str(row_count) + ' rows in the vocabulary CSV file "' + vocab_csv_file_path + '" have the same number of columns as there are headers (' \ - + str(len(csv_column_headers)) + ').' + sys.exit("Error: " + message) + message = ( + "OK, all " + + str(row_count) + + ' rows in the vocabulary CSV file "' + + vocab_csv_file_path + + '" have the same number of columns as there are headers (' + + str(len(csv_column_headers)) + + ")." + ) print(message) logging.info(message) # Check that the required 'term_name' and 'parent' columns are present in the CSV, and that # any fields defined as required in the vocabulary are also present. - field_definitions = get_field_definitions(config, 'taxonomy_term', vocabulary_id.strip()) + field_definitions = get_field_definitions( + config, "taxonomy_term", vocabulary_id.strip() + ) for column_name in csv_column_headers: # Check that 'term_name' and 'parent' are in the CSV. - if 'term_name' not in csv_column_headers: - message = 'Required column "term_name" not found in vocabulary CSV file "' + vocab_csv_file_path + '".' + if "term_name" not in csv_column_headers: + message = ( + 'Required column "term_name" not found in vocabulary CSV file "' + + vocab_csv_file_path + + '".' + ) logging.error(message) - sys.exit('Error: ' + message) - if 'parent' not in csv_column_headers: - message = 'Required column "parent" not found in vocabulary CSV file "' + vocab_csv_file_path + '".' + sys.exit("Error: " + message) + if "parent" not in csv_column_headers: + message = ( + 'Required column "parent" not found in vocabulary CSV file "' + + vocab_csv_file_path + + '".' + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) # Then vocabulary fields that are defined as required. field_definition_fieldnames = field_definitions.keys() for field in field_definition_fieldnames: - if field_definitions[field]['required'] is True and field not in csv_data.fieldnames: - message = 'Required column "' + field + '" not found in vocabulary CSV file "' + vocab_csv_file_path + '".' + if ( + field_definitions[field]["required"] is True + and field not in csv_data.fieldnames + ): + message = ( + 'Required column "' + + field + + '" not found in vocabulary CSV file "' + + vocab_csv_file_path + + '".' + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) # Check whether remaining fields in the vocabulary CSV are fields defined in the current vocabulary. - if 'field_name' in csv_column_headers: - csv_column_headers.remove('field_name') - if 'parent' in csv_column_headers: - csv_column_headers.remove('parent') + if "field_name" in csv_column_headers: + csv_column_headers.remove("field_name") + if "parent" in csv_column_headers: + csv_column_headers.remove("parent") for csv_field in csv_column_headers: if csv_field not in field_definitions.keys(): - message = 'CSV column "' + csv_field + '" in vocabulary CSV file "' + vocab_csv_file_path + '" is not a field in the "' + vocabulary_id + '" vocabulary.' + message = ( + 'CSV column "' + + csv_field + + '" in vocabulary CSV file "' + + vocab_csv_file_path + + '" is not a field in the "' + + vocabulary_id + + '" vocabulary.' + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) def validate_typed_relation_field_values(config, field_definitions, csv_data): """Validate values in fields that are of type 'typed_relation'. Each CSV - value must have this pattern: "string:string:int" or "string:string:string". - If the last segment is a string, it must be term name, a namespaced term name, - or an http URI. + value must have this pattern: "string:string:int" or "string:string:string". + If the last segment is a string, it must be term name, a namespaced term name, + or an http URI. """ # Define a list to store CSV field names that contain vocabularies. fields_with_vocabularies = list() @@ -5559,8 +7423,10 @@ def validate_typed_relation_field_values(config, field_definitions, csv_data): vocab_validation_issues = False for column_name in csv_data.fieldnames: if column_name in field_definitions: - if 'vocabularies' in field_definitions[column_name]: - vocabularies = get_field_vocabularies(config, field_definitions, column_name) + if "vocabularies" in field_definitions[column_name]: + vocabularies = get_field_vocabularies( + config, field_definitions, column_name + ) # If there are no vocabularies linked to the current field, 'vocabularies' # will be False and will throw a TypeError. try: @@ -5568,9 +7434,13 @@ def validate_typed_relation_field_values(config, field_definitions, csv_data): if num_vocabs > 0: fields_with_vocabularies.append(column_name) except BaseException: - message = 'Workbench cannot get vocabularies linked to field "' + column_name + '". Please confirm that field has at least one vocabulary.' + message = ( + 'Workbench cannot get vocabularies linked to field "' + + column_name + + '". Please confirm that field has at least one vocabulary.' + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) all_tids_for_field = [] # If none of the CSV fields are taxonomy reference fields, return. @@ -5581,90 +7451,177 @@ def validate_typed_relation_field_values(config, field_definitions, csv_data): new_term_names_in_csv_results = [] for count, row in enumerate(csv_data, start=1): for field_name in field_definitions.keys(): - if field_definitions[field_name]['field_type'] == 'typed_relation' and 'typed_relations' in field_definitions[field_name]: + if ( + field_definitions[field_name]["field_type"] == "typed_relation" + and "typed_relations" in field_definitions[field_name] + ): if field_name in row: typed_relation_fields_present = True - delimited_field_values = row[field_name].split(config['subdelimiter']) + delimited_field_values = row[field_name].split( + config["subdelimiter"] + ) for field_value in delimited_field_values: if len(field_value) == 0: continue # First check the required patterns. - if not re.match("^[0-9a-zA-Z]+:[0-9a-zA-Z]+:.+$", field_value.strip()): - message = 'Value in field "' + field_name + '" in row with ID ' + row[config['id_field']] + \ - ' (' + field_value + ') does not use the structure required for typed relation fields.' + if not re.match( + "^[0-9a-zA-Z]+:[0-9a-zA-Z]+:.+$", field_value.strip() + ): + message = ( + 'Value in field "' + + field_name + + '" in row with ID ' + + row[config["id_field"]] + + " (" + + field_value + + ") does not use the structure required for typed relation fields." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) # Then, check to see if the relator string (the first two parts of the # value) exist in the field_definitions[fieldname]['typed_relations'] list. - typed_relation_value_parts = field_value.split(':', 2) - relator_string = typed_relation_value_parts[0] + ':' + typed_relation_value_parts[1] - if relator_string not in field_definitions[field_name]['typed_relations']: - message = 'Value in field "' + field_name + '" in row with ID ' + row[config['id_field']] + \ - ' contains a relator (' + relator_string + ') that is not configured for that field.' + typed_relation_value_parts = field_value.split(":", 2) + relator_string = ( + typed_relation_value_parts[0] + + ":" + + typed_relation_value_parts[1] + ) + if ( + relator_string + not in field_definitions[field_name]["typed_relations"] + ): + message = ( + 'Value in field "' + + field_name + + '" in row with ID ' + + row[config["id_field"]] + + " contains a relator (" + + relator_string + + ") that is not configured for that field." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) # Iterate through the CSV and validate the taxonomy term/name/URI in each field subvalue. for column_name in fields_with_vocabularies: if len(row[column_name]): - delimited_field_values = row[column_name].split(config['subdelimiter']) + delimited_field_values = row[column_name].split( + config["subdelimiter"] + ) delimited_field_values_without_relator_strings = [] for field_value in delimited_field_values: # Strip the relator string out from field_value, leaving the vocabulary ID and term ID/name/URI. - term_to_check = re.sub('^[0-9a-zA-Z]+:[0-9a-zA-Z]+:', '', field_value) - delimited_field_values_without_relator_strings.append(term_to_check) - - field_value_to_check = config['subdelimiter'].join(delimited_field_values_without_relator_strings) - new_term_names_in_csv = validate_taxonomy_reference_value(config, field_definitions, column_name, field_value_to_check, count) + term_to_check = re.sub( + "^[0-9a-zA-Z]+:[0-9a-zA-Z]+:", "", field_value + ) + delimited_field_values_without_relator_strings.append( + term_to_check + ) + + field_value_to_check = config["subdelimiter"].join( + delimited_field_values_without_relator_strings + ) + new_term_names_in_csv = validate_taxonomy_reference_value( + config, + field_definitions, + column_name, + field_value_to_check, + count, + ) new_term_names_in_csv_results.append(new_term_names_in_csv) - if typed_relation_fields_present is True and True in new_term_names_in_csv_results and config['allow_adding_terms'] is True: + if ( + typed_relation_fields_present is True + and True in new_term_names_in_csv_results + and config["allow_adding_terms"] is True + ): message = "OK, term IDs/names used in typed relation fields in the CSV file exist in their respective taxonomies" - if config['log_term_creation'] is True: - message = message + " (new terms will be created as noted in the Workbench log)." + if config["log_term_creation"] is True: + message = ( + message + " (new terms will be created as noted in the Workbench log)." + ) else: - message = message + ' (new terms will be created but not noted in the Workbench log since "log_term_creation" is set to false).' + message = ( + message + + ' (new terms will be created but not noted in the Workbench log since "log_term_creation" is set to false).' + ) print(message) else: if typed_relation_fields_present is True: # All term IDs are in their field's vocabularies. - print("OK, term IDs/names used in typed relation fields in the CSV file exist in their respective taxonomies.") - logging.info("OK, term IDs/names used in typed relation fields in the CSV file exist in their respective taxonomies.") + print( + "OK, term IDs/names used in typed relation fields in the CSV file exist in their respective taxonomies." + ) + logging.info( + "OK, term IDs/names used in typed relation fields in the CSV file exist in their respective taxonomies." + ) return vocab_validation_issues -def validate_taxonomy_reference_value(config, field_definitions, csv_field_name, csv_field_value, record_number): - this_fields_vocabularies = get_field_vocabularies(config, field_definitions, csv_field_name) - this_fields_vocabularies_string = ', '.join(this_fields_vocabularies) +def validate_taxonomy_reference_value( + config, field_definitions, csv_field_name, csv_field_value, record_number +): + this_fields_vocabularies = get_field_vocabularies( + config, field_definitions, csv_field_name + ) + this_fields_vocabularies_string = ", ".join(this_fields_vocabularies) new_term_names_in_csv = False # Allow for multiple values in one field. - terms_to_check = csv_field_value.split(config['subdelimiter']) + terms_to_check = csv_field_value.split(config["subdelimiter"]) for field_value in terms_to_check: # If this is a multi-taxonomy field, all term names (not IDs or URIs) must be namespaced using the vocab_id:term_name pattern, # regardless of whether config['allow_adding_terms'] is True. Also, we need to accommodate terms that are namespaced # and also contain a ':'. - if len(this_fields_vocabularies) > 1 and value_is_numeric(field_value) is False and not field_value.startswith('http'): - split_field_values = field_value.split(config['subdelimiter']) + if ( + len(this_fields_vocabularies) > 1 + and value_is_numeric(field_value) is False + and not field_value.startswith("http") + ): + split_field_values = field_value.split(config["subdelimiter"]) for split_field_value in split_field_values: - if ':' in field_value: + if ":" in field_value: # If the : is present, validate that the namespace is one of the vocabulary IDs referenced by this field. - [tentative_namespace, tentative_term_name] = field_value.split(':', 1) + [tentative_namespace, tentative_term_name] = field_value.split( + ":", 1 + ) if tentative_namespace not in this_fields_vocabularies: - message = 'Vocabulary ID "' + tentative_namespace + '" used in CSV column "' + csv_field_name + '", row ' + str(record_number) + \ - ' does not match any of the vocabularies referenced by the' + ' corresponding Drupal field (' + this_fields_vocabularies_string + ').' + message = ( + 'Vocabulary ID "' + + tentative_namespace + + '" used in CSV column "' + + csv_field_name + + '", row ' + + str(record_number) + + " does not match any of the vocabularies referenced by the" + + " corresponding Drupal field (" + + this_fields_vocabularies_string + + ")." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) else: - message = 'Term names in CSV field "' + csv_field_name + '" require a vocabulary namespace; CSV value ' - message_2 = '"' + field_value + '" in row ' + str(record_number) + ' does not have one.' + message = ( + 'Term names in CSV field "' + + csv_field_name + + '" require a vocabulary namespace; CSV value ' + ) + message_2 = ( + '"' + + field_value + + '" in row ' + + str(record_number) + + " does not have one." + ) logging.error(message + message_2) - sys.exit('Error: ' + message + message_2) + sys.exit("Error: " + message + message_2) - validate_term_name_length(split_field_value, str(record_number), csv_field_name) + validate_term_name_length( + split_field_value, str(record_number), csv_field_name + ) # Check to see if field_value is a member of the field's vocabularies. First, check whether field_value is a term ID. if value_is_numeric(field_value): @@ -5675,17 +7632,31 @@ def validate_taxonomy_reference_value(config, field_definitions, csv_field_name, if term_vocab == vocab_id: term_in_vocabs = True if term_in_vocabs is False: - message = 'CSV field "' + csv_field_name + '" in row ' + str(record_number) + ' contains a term ID (' + field_value + ') that is ' + message = ( + 'CSV field "' + + csv_field_name + + '" in row ' + + str(record_number) + + " contains a term ID (" + + field_value + + ") that is " + ) if len(this_fields_vocabularies) > 1: - message_2 = 'not in one of the referenced vocabularies (' + \ - this_fields_vocabularies_string + ').' + message_2 = ( + "not in one of the referenced vocabularies (" + + this_fields_vocabularies_string + + ")." + ) else: - message_2 = 'not in the referenced vocabulary ("' + \ - this_fields_vocabularies[0] + '").' + message_2 = ( + 'not in the referenced vocabulary ("' + + this_fields_vocabularies[0] + + '").' + ) logging.error(message + message_2) - sys.exit('Error: ' + message + message_2) + sys.exit("Error: " + message + message_2) # Then check values that are URIs. - elif field_value.strip().startswith('http'): + elif field_value.strip().startswith("http"): field_value = field_value.strip() tid_from_uri = get_term_id_from_uri(config, field_value) if value_is_numeric(tid_from_uri): @@ -5695,17 +7666,41 @@ def validate_taxonomy_reference_value(config, field_definitions, csv_field_name, if term_vocab == vocab_id: term_in_vocabs = True if term_in_vocabs is False: - message = 'CSV field "' + csv_field_name + '" in row ' + str(record_number) + ' contains a term URI (' + field_value + ') that is ' + message = ( + 'CSV field "' + + csv_field_name + + '" in row ' + + str(record_number) + + " contains a term URI (" + + field_value + + ") that is " + ) if len(this_fields_vocabularies) > 1: - message_2 = 'not in one of the referenced vocabularies (' + this_fields_vocabularies_string + ').' + message_2 = ( + "not in one of the referenced vocabularies (" + + this_fields_vocabularies_string + + ")." + ) else: - message_2 = 'not in the referenced vocabulary ("' + this_fields_vocabularies[0] + '").' + message_2 = ( + 'not in the referenced vocabulary ("' + + this_fields_vocabularies[0] + + '").' + ) logging.error(message + message_2) - sys.exit('Error: ' + message + message_2) + sys.exit("Error: " + message + message_2) else: - message = 'Term URI "' + field_value + '" used in CSV column "' + csv_field_name + '" row ' + str(record_number) + ' does not match any terms.' + message = ( + 'Term URI "' + + field_value + + '" used in CSV column "' + + csv_field_name + + '" row ' + + str(record_number) + + " does not match any terms." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) # Finally, check values that are string term names. else: new_terms_to_add = [] @@ -5714,102 +7709,174 @@ def validate_taxonomy_reference_value(config, field_definitions, csv_field_name, if value_is_numeric(tid) is False: # Single taxonomy fields. if len(this_fields_vocabularies) == 1: - if config['allow_adding_terms'] is True: + if config["allow_adding_terms"] is True: # Warn if namespaced term name is not in specified vocab. if tid is False: new_term_names_in_csv = True - validate_term_name_length(field_value, str(record_number), csv_field_name) - message = 'CSV field "' + csv_field_name + '" in row ' + str(record_number) + ' contains a term ("' + field_value.strip() + '") that is ' - message_2 = 'not in the referenced vocabulary ("' + this_fields_vocabularies[0] + '"). That term will be created.' - if config['validate_terms_exist'] is True: + validate_term_name_length( + field_value, str(record_number), csv_field_name + ) + message = ( + 'CSV field "' + + csv_field_name + + '" in row ' + + str(record_number) + + ' contains a term ("' + + field_value.strip() + + '") that is ' + ) + message_2 = ( + 'not in the referenced vocabulary ("' + + this_fields_vocabularies[0] + + '"). That term will be created.' + ) + if config["validate_terms_exist"] is True: logging.warning(message + message_2) else: new_term_names_in_csv = True - message = 'CSV field "' + csv_field_name + '" in row ' + \ - str(record_number) + ' contains a term ("' + field_value.strip() + '") that is ' - message_2 = 'not in the referenced vocabulary ("' + this_fields_vocabularies[0] + '").' + message = ( + 'CSV field "' + + csv_field_name + + '" in row ' + + str(record_number) + + ' contains a term ("' + + field_value.strip() + + '") that is ' + ) + message_2 = ( + 'not in the referenced vocabulary ("' + + this_fields_vocabularies[0] + + '").' + ) logging.error(message + message_2) - sys.exit('Error: ' + message + message_2) + sys.exit("Error: " + message + message_2) # If this is a multi-taxonomy field, all term names must be namespaced using the vocab_id:term_name pattern, # regardless of whether config['allow_adding_terms'] is True. if len(this_fields_vocabularies) > 1: - split_field_values = field_value.split(config['subdelimiter']) + split_field_values = field_value.split(config["subdelimiter"]) for split_field_value in split_field_values: # Check to see if the namespaced vocab is referenced by this field. - [namespace_vocab_id, namespaced_term_name] = split_field_value.split(':', 1) + [namespace_vocab_id, namespaced_term_name] = ( + split_field_value.split(":", 1) + ) if namespace_vocab_id not in this_fields_vocabularies: - message = 'CSV field "' + csv_field_name + '" in row ' + str(record_number) + ' contains a namespaced term name ' - message_2 = '("' + namespaced_term_name.strip() + '") that specifies a vocabulary not associated with that field (' + namespace_vocab_id + ').' + message = ( + 'CSV field "' + + csv_field_name + + '" in row ' + + str(record_number) + + " contains a namespaced term name " + ) + message_2 = ( + '("' + + namespaced_term_name.strip() + + '") that specifies a vocabulary not associated with that field (' + + namespace_vocab_id + + ")." + ) logging.error(message + message_2) - sys.exit('Error: ' + message + message_2) + sys.exit("Error: " + message + message_2) - tid = find_term_in_vocab(config, namespace_vocab_id, namespaced_term_name) + tid = find_term_in_vocab( + config, namespace_vocab_id, namespaced_term_name + ) # Warn if namespaced term name is not in specified vocab. - if config['allow_adding_terms'] is True: - if tid is False and split_field_value not in new_terms_to_add: + if config["allow_adding_terms"] is True: + if ( + tid is False + and split_field_value not in new_terms_to_add + ): new_term_names_in_csv = True - message = 'CSV field "' + csv_field_name + '" in row ' + str(record_number) + ' contains a term ("' + namespaced_term_name.strip() + '") that is ' - message_2 = 'not in the referenced vocabulary ("' + namespace_vocab_id + '"). That term will be created.' - if config['validate_terms_exist'] is True: + message = ( + 'CSV field "' + + csv_field_name + + '" in row ' + + str(record_number) + + ' contains a term ("' + + namespaced_term_name.strip() + + '") that is ' + ) + message_2 = ( + 'not in the referenced vocabulary ("' + + namespace_vocab_id + + '"). That term will be created.' + ) + if config["validate_terms_exist"] is True: logging.warning(message + message_2) new_terms_to_add.append(split_field_value) - validate_term_name_length(split_field_value, str(record_number), csv_field_name) + validate_term_name_length( + split_field_value, + str(record_number), + csv_field_name, + ) # Die if namespaced term name is not specified vocab. else: if tid is False: - message = 'CSV field "' + csv_field_name + '" in row ' + str(record_number) + ' contains a term ("' + namespaced_term_name.strip() + '") that is ' - message_2 = 'not in the referenced vocabulary ("' + namespace_vocab_id + '").' + message = ( + 'CSV field "' + + csv_field_name + + '" in row ' + + str(record_number) + + ' contains a term ("' + + namespaced_term_name.strip() + + '") that is ' + ) + message_2 = ( + 'not in the referenced vocabulary ("' + + namespace_vocab_id + + '").' + ) logging.warning(message + message_2) - sys.exit('Error: ' + message + message_2) + sys.exit("Error: " + message + message_2) return new_term_names_in_csv def write_to_output_csv(config, id, node_json, input_csv_row=None): - """Appends a row to the CVS file located at config['output_csv']. - """ + """Appends a row to the CVS file located at config['output_csv'].""" # Importing the workbench_fields module at the top of this module with the # rest of the imports causes a circular import exception, so we do it here. import workbench_fields - if config['task'] == 'create_from_files': - config['id_field'] = 'ID' + if config["task"] == "create_from_files": + config["id_field"] = "ID" node_dict = json.loads(node_json) node_field_names = list(node_dict.keys()) - node_field_names.insert(0, 'node_id') - node_field_names.insert(0, config['id_field']) + node_field_names.insert(0, "node_id") + node_field_names.insert(0, config["id_field"]) # Don't include these Drupal fields in our output. fields_to_remove = [ - 'nid', - 'vid', - 'created', - 'changed', - 'langcode', - 'default_langcode', - 'uid', - 'promote', - 'sticky', - 'type', - 'revision_timestamp', - 'revision_translation_affected', - 'revision_uid', - 'revision_log', - 'content_translation_source', - 'content_translation_outdated'] + "nid", + "vid", + "created", + "changed", + "langcode", + "default_langcode", + "uid", + "promote", + "sticky", + "type", + "revision_timestamp", + "revision_translation_affected", + "revision_uid", + "revision_log", + "content_translation_source", + "content_translation_outdated", + ] for field_to_remove in fields_to_remove: if field_to_remove in node_field_names: # print("DEBUG", field_to_remove) node_field_names.remove(field_to_remove) - reserved_fields = ['file', 'parent_id', 'url_alias', 'image_alt_text', 'checksum'] + reserved_fields = ["file", "parent_id", "url_alias", "image_alt_text", "checksum"] - csvfile = open(config['output_csv'], 'a+', encoding='utf-8') + csvfile = open(config["output_csv"], "a+", encoding="utf-8") - if input_csv_row is not None and config['output_csv_include_input_csv'] is True: + if input_csv_row is not None and config["output_csv_include_input_csv"] is True: input_csv_row_fieldnames = list(input_csv_row.keys()) for reserved_field in reserved_fields: if reserved_field in input_csv_row: @@ -5818,20 +7885,20 @@ def write_to_output_csv(config, id, node_json, input_csv_row=None): writer = csv.DictWriter(csvfile, fieldnames=node_field_names, lineterminator="\n") # Check for presence of header row, don't add it if it's already there. - with open(config['output_csv']) as f: + with open(config["output_csv"]) as f: first_line = f.readline() - if not first_line.startswith(config['id_field']): + if not first_line.startswith(config["id_field"]): writer.writeheader() # Assemble the CSV record to write. row = dict() - row[config['id_field']] = id - row['node_id'] = node_dict['nid'][0]['value'] - row['uuid'] = node_dict['uuid'][0]['value'] - row['title'] = node_dict['title'][0]['value'] - row['status'] = node_dict['status'][0]['value'] - if input_csv_row is not None and config['output_csv_include_input_csv'] is True: - field_definitions = get_field_definitions(config, 'node') + row[config["id_field"]] = id + row["node_id"] = node_dict["nid"][0]["value"] + row["uuid"] = node_dict["uuid"][0]["value"] + row["title"] = node_dict["title"][0]["value"] + row["status"] = node_dict["status"][0]["value"] + if input_csv_row is not None and config["output_csv_include_input_csv"] is True: + field_definitions = get_field_definitions(config, "node") for reserved_field in reserved_fields: if reserved_field in input_csv_row: @@ -5839,8 +7906,10 @@ def write_to_output_csv(config, id, node_json, input_csv_row=None): # Then append the input row to the new node data. for field_name in node_dict: - if field_name.startswith('field_'): - row[field_name] = serialize_field_json(config, field_definitions, field_name, node_dict[field_name]) + if field_name.startswith("field_"): + row[field_name] = serialize_field_json( + config, field_definitions, field_name, node_dict[field_name] + ) row.update(input_csv_row) writer.writerow(row) csvfile.close() @@ -5854,12 +7923,18 @@ def create_children_from_directory(config, parent_csv_record, parent_node_id): # Islandora model. Content type and status are inherited as is from parent, as are other required fields. The # weight assigned to the page is the last segment in the filename, split from the rest of the filename using the # character defined in the 'paged_content_sequence_separator' config option. - parent_id = parent_csv_record[config['id_field']] - page_dir_path = os.path.join(config['input_dir'], str(parent_id).strip()) - - if 'paged_content_additional_page_media' in config: - if 'paged_content_image_file_extension' in config: - page_files = [f for f in os.listdir(page_dir_path) if f.endswith(config['paged_content_image_file_extension'].lstrip('.').strip())] + parent_id = parent_csv_record[config["id_field"]] + page_dir_path = os.path.join(config["input_dir"], str(parent_id).strip()) + + if "paged_content_additional_page_media" in config: + if "paged_content_image_file_extension" in config: + page_files = [ + f + for f in os.listdir(page_dir_path) + if f.endswith( + config["paged_content_image_file_extension"].lstrip(".").strip() + ) + ] else: page_files = os.listdir(page_dir_path) else: @@ -5867,201 +7942,344 @@ def create_children_from_directory(config, parent_csv_record, parent_node_id): for page_file_name in page_files: filename_without_extension = os.path.splitext(page_file_name)[0] - filename_segments = filename_without_extension.split(config['paged_content_sequence_separator']) + filename_segments = filename_without_extension.split( + config["paged_content_sequence_separator"] + ) weight = filename_segments[-1] weight = weight.lstrip("0") # @todo: come up with a templated way to generate the page_identifier, and what field to POST it to. - page_identifier = parent_id + '_' + filename_without_extension - page_title = get_page_title_from_template(config, parent_csv_record['title'], weight) + page_identifier = parent_id + "_" + filename_without_extension + page_title = get_page_title_from_template( + config, parent_csv_record["title"], weight + ) node_json = { - 'type': [ - {'target_id': config['paged_content_page_content_type'], - 'target_type': 'node_type'} - ], - 'title': [ - {'value': page_title} + "type": [ + { + "target_id": config["paged_content_page_content_type"], + "target_type": "node_type", + } ], - 'field_member_of': [ - {'target_id': parent_node_id, - 'target_type': 'node'} - ], - 'field_weight': [ - {'value': weight} - ] + "title": [{"value": page_title}], + "field_member_of": [{"target_id": parent_node_id, "target_type": "node"}], + "field_weight": [{"value": weight}], } # Add field_model if that field exists in the child's content type. - entity_fields = get_entity_fields(config, 'node', config['paged_content_page_content_type']) - if 'field_model' in entity_fields: - if not value_is_numeric(config['paged_content_page_model_tid'].strip()) and config['paged_content_page_model_tid'].strip().startswith('http'): - paged_content_model_tid = get_term_id_from_uri(config, config['paged_content_page_model_tid'].strip()) + entity_fields = get_entity_fields( + config, "node", config["paged_content_page_content_type"] + ) + if "field_model" in entity_fields: + if not value_is_numeric( + config["paged_content_page_model_tid"].strip() + ) and config["paged_content_page_model_tid"].strip().startswith("http"): + paged_content_model_tid = get_term_id_from_uri( + config, config["paged_content_page_model_tid"].strip() + ) else: - paged_content_model_tid = config['paged_content_page_model_tid'].strip() - node_json['field_model'] = [{'target_id': paged_content_model_tid, 'target_type': 'taxonomy_term'}] + paged_content_model_tid = config["paged_content_page_model_tid"].strip() + node_json["field_model"] = [ + {"target_id": paged_content_model_tid, "target_type": "taxonomy_term"} + ] - if 'field_display_hints' in parent_csv_record: - node_json['field_display_hints'] = [{'target_id': parent_csv_record['field_display_hints'], 'target_type': 'taxonomy_term'}] + if "field_display_hints" in parent_csv_record: + node_json["field_display_hints"] = [ + { + "target_id": parent_csv_record["field_display_hints"], + "target_type": "taxonomy_term", + } + ] # Some optional base fields, inherited from the parent object. - if 'uid' in parent_csv_record: - if len(parent_csv_record['uid']) > 0: - node_json['uid'] = [{'target_id': parent_csv_record['uid']}] + if "uid" in parent_csv_record: + if len(parent_csv_record["uid"]) > 0: + node_json["uid"] = [{"target_id": parent_csv_record["uid"]}] - if 'created' in parent_csv_record: - if len(parent_csv_record['created']) > 0: - node_json['created'] = [{'value': parent_csv_record['created']}] + if "created" in parent_csv_record: + if len(parent_csv_record["created"]) > 0: + node_json["created"] = [{"value": parent_csv_record["created"]}] # Add any required fields that are in the parent CSV. - required_fields = get_required_bundle_fields(config, 'node', config['content_type']) + required_fields = get_required_bundle_fields( + config, "node", config["content_type"] + ) if len(required_fields) > 0: - field_definitions = get_field_definitions(config, 'node') + field_definitions = get_field_definitions(config, "node") # Importing the workbench_fields module at the top of this module with the # rest of the imports causes a circular import exception, so we do it here. import workbench_fields for required_field in required_fields: # THese fields are populated above. - if required_field in ['title', 'field_model', 'field_display_hints', 'uid', 'created']: + if required_field in [ + "title", + "field_model", + "field_display_hints", + "uid", + "created", + ]: continue # Assemble Drupal field structures for entity reference fields from CSV data. # Entity reference fields (taxonomy_term and node). - if field_definitions[required_field]['field_type'] == 'entity_reference': + if ( + field_definitions[required_field]["field_type"] + == "entity_reference" + ): entity_reference_field = workbench_fields.EntityReferenceField() - node_json = entity_reference_field.create(config, field_definitions, node_json, parent_csv_record, required_field) + node_json = entity_reference_field.create( + config, + field_definitions, + node_json, + parent_csv_record, + required_field, + ) # Typed relation fields. - elif field_definitions[required_field]['field_type'] == 'typed_relation': + elif ( + field_definitions[required_field]["field_type"] == "typed_relation" + ): typed_relation_field = workbench_fields.TypedRelationField() - node_json = typed_relation_field.create(config, field_definitions, node_json, parent_csv_record, required_field) + node_json = typed_relation_field.create( + config, + field_definitions, + node_json, + parent_csv_record, + required_field, + ) # Geolocation fields. - elif field_definitions[required_field]['field_type'] == 'geolocation': + elif field_definitions[required_field]["field_type"] == "geolocation": geolocation_field = workbench_fields.GeolocationField() - node_json = geolocation_field.create(config, field_definitions, node_json, parent_csv_record, required_field) + node_json = geolocation_field.create( + config, + field_definitions, + node_json, + parent_csv_record, + required_field, + ) # Link fields. - elif field_definitions[required_field]['field_type'] == 'link': + elif field_definitions[required_field]["field_type"] == "link": link_field = workbench_fields.LinkField() - node_json = link_field.create(config, field_definitions, node_json, parent_csv_record, required_field) + node_json = link_field.create( + config, + field_definitions, + node_json, + parent_csv_record, + required_field, + ) # Authority Link fields. - elif field_definitions[required_field]['field_type'] == 'authority_link': + elif ( + field_definitions[required_field]["field_type"] == "authority_link" + ): link_field = workbench_fields.AuthorityLinkField() - node_json = link_field.create(config, field_definitions, node_json, parent_csv_record, required_field) + node_json = link_field.create( + config, + field_definitions, + node_json, + parent_csv_record, + required_field, + ) # For non-entity reference and non-typed relation fields (text, integer, boolean etc.). else: simple_field = workbench_fields.SimpleField() - node_json = simple_field.create(config, field_definitions, node_json, parent_csv_record, required_field) - - node_headers = { - 'Content-Type': 'application/json' - } - node_endpoint = '/node?_format=json' - node_response = issue_request(config, 'POST', node_endpoint, node_headers, node_json, None) + node_json = simple_field.create( + config, + field_definitions, + node_json, + parent_csv_record, + required_field, + ) + + node_headers = {"Content-Type": "application/json"} + node_endpoint = "/node?_format=json" + node_response = issue_request( + config, "POST", node_endpoint, node_headers, node_json, None + ) if node_response.status_code == 201: - node_uri = node_response.headers['location'] - print('+ Node for child "' + page_title + '" created at ' + node_uri + '.') + node_uri = node_response.headers["location"] + print('+ Node for child "' + page_title + '" created at ' + node_uri + ".") logging.info('Node for child "%s" created at %s.', page_title, node_uri) - if 'output_csv' in config.keys(): + if "output_csv" in config.keys(): write_to_output_csv(config, page_identifier, node_response.text) node_nid = get_nid_from_url_alias(config, node_uri) write_rollback_node_id(config, node_nid, path_to_rollback_csv_file) - populate_csv_id_to_node_id_map(config, parent_id, parent_node_id, page_file_name, node_nid) + populate_csv_id_to_node_id_map( + config, parent_id, parent_node_id, page_file_name, node_nid + ) page_file_path = os.path.join(parent_id, page_file_name) fake_csv_record = collections.OrderedDict() - fake_csv_record['title'] = page_title - fake_csv_record['file'] = page_file_path - fake_csv_record[config['id_field']] = parent_csv_record[config['id_field']] - media_response_status_code = create_media(config, page_file_path, 'file', node_nid, fake_csv_record) + fake_csv_record["title"] = page_title + fake_csv_record["file"] = page_file_path + fake_csv_record[config["id_field"]] = parent_csv_record[config["id_field"]] + media_response_status_code = create_media( + config, page_file_path, "file", node_nid, fake_csv_record + ) allowed_media_response_codes = [201, 204] if media_response_status_code in allowed_media_response_codes: if media_response_status_code is False: - print(f"- ERROR: Media for {page_file_path} not created. See log for more information.") - logging.error("Media for %s not created. HTTP response code was %s.", page_file_path, media_response_status_code) + print( + f"- ERROR: Media for {page_file_path} not created. See log for more information." + ) + logging.error( + "Media for %s not created. HTTP response code was %s.", + page_file_path, + media_response_status_code, + ) continue else: logging.info("Media for %s created.", page_file_path) print(f"+ Media for {page_file_path} created.") - if config['paged_content_from_directories'] is True: - if 'paged_content_additional_page_media' in config: - for extension_mapping in config['paged_content_additional_page_media']: - for additional_page_media_use_term, additional_page_media_extension in extension_mapping.items(): - if str(additional_page_media_use_term).startswith('http'): - additional_page_media_use_tid = get_term_id_from_uri(config, additional_page_media_use_term) + if config["paged_content_from_directories"] is True: + if "paged_content_additional_page_media" in config: + for extension_mapping in config[ + "paged_content_additional_page_media" + ]: + for ( + additional_page_media_use_term, + additional_page_media_extension, + ) in extension_mapping.items(): + if str(additional_page_media_use_term).startswith("http"): + additional_page_media_use_tid = get_term_id_from_uri( + config, additional_page_media_use_term + ) else: - additional_page_media_use_tid = additional_page_media_use_term + additional_page_media_use_tid = ( + additional_page_media_use_term + ) page_file_base_path = os.path.splitext(page_file_path)[0] - additional_page_media_file_path = page_file_base_path + '.' + additional_page_media_extension.strip() - if check_file_exists(config, additional_page_media_file_path): - media_response_status_code = create_media(config, additional_page_media_file_path, None, node_nid, fake_csv_record, media_use_tid=additional_page_media_use_tid) - if media_response_status_code in allowed_media_response_codes: + additional_page_media_file_path = ( + page_file_base_path + + "." + + additional_page_media_extension.strip() + ) + if check_file_exists( + config, additional_page_media_file_path + ): + media_response_status_code = create_media( + config, + additional_page_media_file_path, + None, + node_nid, + fake_csv_record, + media_use_tid=additional_page_media_use_tid, + ) + if ( + media_response_status_code + in allowed_media_response_codes + ): if media_response_status_code is False: - print(f"- ERROR: Media for {additional_page_media_file_path} not created. See log for more information.") - logging.error("Media for %s not created. HTTP response code was %s.", - page_file_base_path + '.' + additional_page_media_extension, media_response_status_code) + print( + f"- ERROR: Media for {additional_page_media_file_path} not created. See log for more information." + ) + logging.error( + "Media for %s not created. HTTP response code was %s.", + page_file_base_path + + "." + + additional_page_media_extension, + media_response_status_code, + ) continue else: - logging.info("Media for %s created.", additional_page_media_file_path) - print(f"+ Media for {additional_page_media_file_path} created.") + logging.info( + "Media for %s created.", + additional_page_media_file_path, + ) + print( + f"+ Media for {additional_page_media_file_path} created." + ) else: - logging.warning(f"{additional_page_media_file_path} not found.") + logging.warning( + f"{additional_page_media_file_path} not found." + ) else: - print(f"Error: Node for page {page_identifier} not created. See log for more information.") - logging.error('Node for page "%s" not created, HTTP response code was %s, response body was %s', page_identifier, node_response.status_code, node_response.text) - logging.error('JSON request body used in previous POST to "%s" was %s.', node_endpoint, node_json) + print( + f"Error: Node for page {page_identifier} not created. See log for more information." + ) + logging.error( + 'Node for page "%s" not created, HTTP response code was %s, response body was %s', + page_identifier, + node_response.status_code, + node_response.text, + ) + logging.error( + 'JSON request body used in previous POST to "%s" was %s.', + node_endpoint, + node_json, + ) # Execute node-specific post-create scripts, if any are configured. - if 'node_post_create' in config and len(config['node_post_create']) > 0: - for command in config['node_post_create']: - post_task_output, post_task_return_code = execute_entity_post_task_script(command, config['config_file'], node_response.status_code, node_response.text) + if "node_post_create" in config and len(config["node_post_create"]) > 0: + for command in config["node_post_create"]: + post_task_output, post_task_return_code = ( + execute_entity_post_task_script( + command, + config["config_file"], + node_response.status_code, + node_response.text, + ) + ) if post_task_return_code == 0: - logging.info("Post node create script " + command + " executed successfully.") + logging.info( + "Post node create script " + command + " executed successfully." + ) else: - logging.error("Post node create script " + command + " failed with exit code " + str(post_task_return_code) + ".") + logging.error( + "Post node create script " + + command + + " failed with exit code " + + str(post_task_return_code) + + "." + ) def get_rollback_csv_filepath(config): - if config['timestamp_rollback'] is True: + if config["timestamp_rollback"] is True: now_string = EXECUTION_START_TIME.strftime("%Y_%m_%d_%H_%M_%S") - rollback_csv_filename = 'rollback.' + now_string + '.csv' + rollback_csv_filename = "rollback." + now_string + ".csv" else: - rollback_csv_filename = 'rollback.csv' + rollback_csv_filename = "rollback.csv" - if os.environ.get('ISLANDORA_WORKBENCH_SECONDARY_TASKS') is not None: + if os.environ.get("ISLANDORA_WORKBENCH_SECONDARY_TASKS") is not None: secondary_tasks = json.loads(os.environ["ISLANDORA_WORKBENCH_SECONDARY_TASKS"]) - if os.path.abspath(config['current_config_file_path']) in secondary_tasks: + if os.path.abspath(config["current_config_file_path"]) in secondary_tasks: config_file_id = get_config_file_identifier(config) - rollback_csv_filename = rollback_csv_filename + '.' + config_file_id + rollback_csv_filename = rollback_csv_filename + "." + config_file_id - return os.path.join(config['rollback_dir'] or config['input_dir'], rollback_csv_filename) + return os.path.join( + config["rollback_dir"] or config["input_dir"], rollback_csv_filename + ) def write_rollback_config(config, path_to_rollback_csv_file): - if config['timestamp_rollback'] is True: + if config["timestamp_rollback"] is True: now_string = EXECUTION_START_TIME.strftime("%Y_%m_%d_%H_%M_%S") - rollback_config_filename = 'rollback.' + now_string + '.yml' + rollback_config_filename = "rollback." + now_string + ".yml" else: - rollback_config_filename = 'rollback.yml' + rollback_config_filename = "rollback.yml" rollback_config_file = open(rollback_config_filename, "w") yaml.dump( - {'task': 'delete', - 'host': config['host'], - 'username': config['username'], - 'password': config['password'], - 'input_dir': config['input_dir'], - 'standalone_media_url': config['standalone_media_url'], - 'input_csv': os.path.basename(path_to_rollback_csv_file)}, - rollback_config_file) + { + "task": "delete", + "host": config["host"], + "username": config["username"], + "password": config["password"], + "input_dir": config["input_dir"], + "standalone_media_url": config["standalone_media_url"], + "input_csv": os.path.basename(path_to_rollback_csv_file), + }, + rollback_config_file, + ) def prep_rollback_csv(config, path_to_rollback_csv_file): @@ -6072,9 +8290,13 @@ def prep_rollback_csv(config, path_to_rollback_csv_file): rollback_csv_file.write("node_id" + "\n") rollback_csv_file.close() except Exception as e: - message = "Workbench was unable save rollback CSV to " + path_to_rollback_csv_file + "." + message = ( + "Workbench was unable save rollback CSV to " + + path_to_rollback_csv_file + + "." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) def write_rollback_node_id(config, node_id, path_to_rollback_csv_file): @@ -6085,52 +8307,66 @@ def write_rollback_node_id(config, node_id, path_to_rollback_csv_file): def get_csv_from_google_sheet(config): - url_parts = config['input_csv'].split('/') - url_parts[6] = 'export?gid=' + str(config['google_sheets_gid']) + '&format=csv' - csv_url = '/'.join(url_parts) + url_parts = config["input_csv"].split("/") + url_parts[6] = "export?gid=" + str(config["google_sheets_gid"]) + "&format=csv" + csv_url = "/".join(url_parts) response = requests.get(url=csv_url, allow_redirects=True) if response.status_code == 404: - message = 'Workbench cannot find the Google spreadsheet at ' + config['input_csv'] + '. Please check the URL.' + message = ( + "Workbench cannot find the Google spreadsheet at " + + config["input_csv"] + + ". Please check the URL." + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) # Sheets that aren't publicly readable return a 302 and then a 200 with a bunch of HTML for humans to look at. - if response.content.strip().startswith(b' 0: - for mtype, ext in config['mimetype_extensions'].items(): + map = { + "image/jpeg": ".jpg", + "image/jpg": ".jpg", + "image/jp2": ".jp2", + "image/png": ".png", + "image/tif": ".tif", + "image/tiff": ".tif", + "audio/mpeg": ".mp3", + "text/plain": ".txt", + "application/xml": ".xml", + "application/octet-stream": ".bin", + } + if "mimetype_extensions" in config and len(config["mimetype_extensions"]) > 0: + for mtype, ext in config["mimetype_extensions"].items(): map[mtype] = ext if mimetype in map: @@ -6238,7 +8483,7 @@ def get_extension_from_mimetype(config, mimetype): def get_deduped_file_path(path): """Given a file path, return a version of it that contains a version of - the same name with an incremented integer inserted before the extension. + the same name with an incremented integer inserted before the extension. """ """Parameters ---------- @@ -6254,21 +8499,21 @@ def get_deduped_file_path(path): numbers = re.findall(r"_\d+$", base_path) if len(numbers) == 0: - incremented_path = base_path + '_1' + extension + incremented_path = base_path + "_1" + extension else: - number = int(numbers[0].lstrip('_')) + 1 - base_path_parts = base_path.split('_') + number = int(numbers[0].lstrip("_")) + 1 + base_path_parts = base_path.split("_") del base_path_parts[-1] - incremented_path = '_'.join(base_path_parts) + '_' + str(number) + extension + incremented_path = "_".join(base_path_parts) + "_" + str(number) + extension return incremented_path def check_file_exists(config, filename): """Cconfirms file exists and is a file (not a directory). - For remote/downloaded files, checks for a 200 response from a HEAD request. + For remote/downloaded files, checks for a 200 response from a HEAD request. - Does not check whether filename value is blank. + Does not check whether filename value is blank. """ """Parameters ---------- @@ -6282,20 +8527,30 @@ def check_file_exists(config, filename): True if the file exists, false if not. """ # It's a remote file. - if filename.startswith('http'): + if filename.startswith("http"): try: - head_response = requests.head(filename, allow_redirects=True, verify=config['secure_ssl_only']) + head_response = requests.head( + filename, allow_redirects=True, verify=config["secure_ssl_only"] + ) if head_response.status_code == 200: return True else: return False except requests.exceptions.Timeout as err_timeout: - message = 'Workbench timed out trying to reach ' + filename + '. Details in next log entry.' + message = ( + "Workbench timed out trying to reach " + + filename + + ". Details in next log entry." + ) logging.error(message) logging.error(err_timeout) return False except requests.exceptions.ConnectionError as error_connection: - message = 'Workbench cannot connect to ' + filename + '. Details in next log entry.' + message = ( + "Workbench cannot connect to " + + filename + + ". Details in next log entry." + ) logging.error(message) logging.error(error_connection) return False @@ -6304,7 +8559,7 @@ def check_file_exists(config, filename): if os.path.isabs(filename): file_path = filename else: - file_path = os.path.join(config['input_dir'], filename) + file_path = os.path.join(config["input_dir"], filename) if os.path.isfile(file_path): return True @@ -6312,14 +8567,18 @@ def check_file_exists(config, filename): return False # Fall back to False if existence of file can't be determined. - logging.warning(f'Cannot determine if file "{filename}" exists, assuming it does not.') + logging.warning( + f'Cannot determine if file "{filename}" exists, assuming it does not.' + ) return False -def get_preprocessed_file_path(config, file_fieldname, node_csv_row, node_id=None, make_dir=True): +def get_preprocessed_file_path( + config, file_fieldname, node_csv_row, node_id=None, make_dir=True +): """For remote/downloaded files (other than from providers defined in config['oembed_providers]), - generates the path to the local temporary copy and returns that path. For local files or oEmbed URLs, - just returns the value of node_csv_row['file']. + generates the path to the local temporary copy and returns that path. For local files or oEmbed URLs, + just returns the value of node_csv_row['file']. """ """Parameters ---------- @@ -6339,28 +8598,37 @@ def get_preprocessed_file_path(config, file_fieldname, node_csv_row, node_id=Non The path (absolute or relative) to the file. """ file_path_from_csv = node_csv_row[file_fieldname].strip() - if config['task'] == 'add_media': - config['id_field'] = 'node_id' + if config["task"] == "add_media": + config["id_field"] = "node_id" # Test whether file_path_from_csv is from one of the oEmbed providers # and if so, return it here. - for oembed_provider in config['oembed_providers']: + for oembed_provider in config["oembed_providers"]: for provider_url, mtype in oembed_provider.items(): if file_path_from_csv.startswith(provider_url): return file_path_from_csv # It's a remote file. - if file_path_from_csv.startswith('http'): - if config['task'] == 'add_media': - subdir = os.path.join(config['temp_dir'], re.sub('[^A-Za-z0-9]+', '_', str(node_csv_row['node_id']))) - elif config['task'] == 'update_media': - subdir = os.path.join(config['temp_dir'], re.sub('[^A-Za-z0-9]+', '_', node_csv_row['media_id'])) + if file_path_from_csv.startswith("http"): + if config["task"] == "add_media": + subdir = os.path.join( + config["temp_dir"], + re.sub("[^A-Za-z0-9]+", "_", str(node_csv_row["node_id"])), + ) + elif config["task"] == "update_media": + subdir = os.path.join( + config["temp_dir"], + re.sub("[^A-Za-z0-9]+", "_", node_csv_row["media_id"]), + ) else: - subdir = os.path.join(config['temp_dir'], re.sub('[^A-Za-z0-9]+', '_', node_csv_row[config['id_field']])) + subdir = os.path.join( + config["temp_dir"], + re.sub("[^A-Za-z0-9]+", "_", node_csv_row[config["id_field"]]), + ) if make_dir: Path(subdir).mkdir(parents=True, exist_ok=True) - if 'check' in config.keys() and config['check'] is True: + if "check" in config.keys() and config["check"] is True: try: os.rmdir(subdir) except Exception as e: @@ -6368,35 +8636,63 @@ def get_preprocessed_file_path(config, file_fieldname, node_csv_row, node_id=Non message = f'Subdirectory "{subdir}" could not be deleted. See log for more info.' logging.warning(f'Subdictory "{subdir}" could not be deleted: {e}.') - remote_extension_with_dot = get_remote_file_extension(config, file_path_from_csv) + remote_extension_with_dot = get_remote_file_extension( + config, file_path_from_csv + ) remote_filename_parts = os.path.splitext(file_path_from_csv) - if 'use_node_title_for_remote_filename' in config and config['use_node_title_for_remote_filename'] is True: + if ( + "use_node_title_for_remote_filename" in config + and config["use_node_title_for_remote_filename"] is True + ): # CSVs for add_media tasks don't contain 'title', so we need to get it. - if config['task'] == 'add_media': - node_csv_row['title'] = get_node_title_from_nid(config, node_csv_row['node_id']) - if node_csv_row['title'] is False: - message = 'Cannot access node ' + str(node_id) + ', so cannot get its title for use in media filename. Using filename instead.' + if config["task"] == "add_media": + node_csv_row["title"] = get_node_title_from_nid( + config, node_csv_row["node_id"] + ) + if node_csv_row["title"] is False: + message = ( + "Cannot access node " + + str(node_id) + + ", so cannot get its title for use in media filename. Using filename instead." + ) logging.warning(message) - node_csv_row['title'] = os.path.basename(node_csv_row[file_fieldname].strip()) - - filename = re.sub('[^A-Za-z0-9]+', '_', node_csv_row['title']) - filename = filename.strip('_') - downloaded_file_path = os.path.join(subdir, filename + remote_extension_with_dot) - elif 'use_nid_in_remote_filename' in config and config['use_nid_in_remote_filename'] is True: + node_csv_row["title"] = os.path.basename( + node_csv_row[file_fieldname].strip() + ) + + filename = re.sub("[^A-Za-z0-9]+", "_", node_csv_row["title"]) + filename = filename.strip("_") + downloaded_file_path = os.path.join( + subdir, filename + remote_extension_with_dot + ) + elif ( + "use_nid_in_remote_filename" in config + and config["use_nid_in_remote_filename"] is True + ): filename = f"{node_id}{remote_extension_with_dot}" downloaded_file_path = os.path.join(subdir, filename) - elif config['field_for_remote_filename'] is not False and config['field_for_remote_filename'] in node_csv_row and len(node_csv_row[config['field_for_remote_filename']]) > 0: - field_for_remote_filename_string = node_csv_row[config['field_for_remote_filename']][:255] - sanitized_filename = re.sub('[^0-9a-zA-Z]+', '_', field_for_remote_filename_string) - downloaded_file_path = os.path.join(subdir, sanitized_filename.strip('_') + remote_extension_with_dot) + elif ( + config["field_for_remote_filename"] is not False + and config["field_for_remote_filename"] in node_csv_row + and len(node_csv_row[config["field_for_remote_filename"]]) > 0 + ): + field_for_remote_filename_string = node_csv_row[ + config["field_for_remote_filename"] + ][:255] + sanitized_filename = re.sub( + "[^0-9a-zA-Z]+", "_", field_for_remote_filename_string + ) + downloaded_file_path = os.path.join( + subdir, sanitized_filename.strip("_") + remote_extension_with_dot + ) else: # For files from Islandora Legacy ending in /view, we use the CSV ID as the filename. if len(remote_filename_parts[1]) == 0: - filename = node_csv_row[config['id_field']] + remote_extension_with_dot + filename = node_csv_row[config["id_field"]] + remote_extension_with_dot else: # For other files, we use the last part of the path preceding the file extension. - url_path_parts = remote_filename_parts[0].split('/') + url_path_parts = remote_filename_parts[0].split("/") filename = url_path_parts[-1] + remote_extension_with_dot downloaded_file_path = os.path.join(subdir, filename) @@ -6411,13 +8707,12 @@ def get_preprocessed_file_path(config, file_fieldname, node_csv_row, node_id=Non if os.path.isabs(file_path_from_csv): file_path = file_path_from_csv else: - file_path = os.path.join(config['input_dir'], file_path_from_csv) + file_path = os.path.join(config["input_dir"], file_path_from_csv) return file_path def get_node_media_ids(config, node_id, media_use_tids=None): - """Gets a list of media IDs for a node. - """ + """Gets a list of media IDs for a node.""" """Parameters ---------- config : dict @@ -6438,19 +8733,19 @@ def get_node_media_ids(config, node_id, media_use_tids=None): media_id_list = list() url = f"{config['host']}/node/{node_id}/media?_format=json" - response = issue_request(config, 'GET', url) + response = issue_request(config, "GET", url) if response.status_code == 200: body = json.loads(response.text) for media in body: if len(media_use_tids) == 0: - media_id_list.append(media['mid'][0]['value']) + media_id_list.append(media["mid"][0]["value"]) else: - for media_use_tid_json in media['field_media_use']: - if media_use_tid_json['target_id'] in media_use_tids: - media_id_list.append(media['mid'][0]['value']) + for media_use_tid_json in media["field_media_use"]: + if media_use_tid_json["target_id"] in media_use_tids: + media_id_list.append(media["mid"][0]["value"]) return media_id_list else: - message = f'Attempt to get media for node ID {node_id} returned a {response.status_code} status code.' + message = f"Attempt to get media for node ID {node_id} returned a {response.status_code} status code." print("Error: " + message) logging.warning(message) return False @@ -6461,24 +8756,38 @@ def download_remote_file(config, url, file_fieldname, node_csv_row, node_id): try: # do not cache the responses for downloaded files in requests_cache with requests_cache.disabled(): - response = requests.get(url, allow_redirects=True, stream=True, verify=config['secure_ssl_only']) + response = requests.get( + url, allow_redirects=True, stream=True, verify=config["secure_ssl_only"] + ) except requests.exceptions.Timeout as err_timeout: - message = 'Workbench timed out trying to reach ' + \ - sections.netloc + ' while connecting to ' + url + '. Please verify that URL and check your network connection.' + message = ( + "Workbench timed out trying to reach " + + sections.netloc + + " while connecting to " + + url + + ". Please verify that URL and check your network connection." + ) logging.error(message) logging.error(err_timeout) - print('Error: ' + message) + print("Error: " + message) return False except requests.exceptions.ConnectionError as error_connection: - message = 'Workbench cannot connect to ' + \ - sections.netloc + ' while connecting to ' + url + '. Please verify that URL and check your network connection.' + message = ( + "Workbench cannot connect to " + + sections.netloc + + " while connecting to " + + url + + ". Please verify that URL and check your network connection." + ) logging.error(message) logging.error(error_connection) - print('Error: ' + message) + print("Error: " + message) return False - downloaded_file_path = get_preprocessed_file_path(config, file_fieldname, node_csv_row, node_id) - with open(downloaded_file_path, 'wb+') as output_file: + downloaded_file_path = get_preprocessed_file_path( + config, file_fieldname, node_csv_row, node_id + ) + with open(downloaded_file_path, "wb+") as output_file: for chunk in response.iter_content(chunk_size=8192): if chunk: output_file.write(chunk) @@ -6488,22 +8797,24 @@ def download_remote_file(config, url, file_fieldname, node_csv_row, node_id): def get_remote_file_extension(config, file_url): """For remote files that have no extension, such as http://acme.com/islandora/object/some:pid/datastream/OBJ/download, - assign an extension, with a leading dot. If the file has an extension, return it, also with dot. + assign an extension, with a leading dot. If the file has an extension, return it, also with dot. """ # If the file has an extension, just return it. extension = os.path.splitext(file_url)[1] - extension = extension.lstrip('.').lower() + extension = extension.lstrip(".").lower() if len(extension) > 0: - return '.' + extension + return "." + extension # If it doesn't have an extension, assign one based on its MIME type. Request's docs at # https://requests.readthedocs.io/en/latest/user/quickstart/#response-headers say that # headers can be accessed regardless of capitalization, but that's not the case (ha). try: - head_response = requests.head(file_url, allow_redirects=True, verify=config['secure_ssl_only']) - mimetype = head_response.headers['Content-Type'] + head_response = requests.head( + file_url, allow_redirects=True, verify=config["secure_ssl_only"] + ) + mimetype = head_response.headers["Content-Type"] if mimetype is None: - mimetype = head_response.headers['content-type'] + mimetype = head_response.headers["content-type"] if mimetype is None: message = f'Cannot reliably get MIME type of file "{file_url}" from remote server.' logging.error(message) @@ -6512,26 +8823,28 @@ def get_remote_file_extension(config, file_url): # In case servers return stuff beside the MIME type in Content-Type header. # Assumes they use ; to separate stuff and that what we're looking for is # in the first position. - if ';' in mimetype: - mimetype_parts = mimetype.split(';') + if ";" in mimetype: + mimetype_parts = mimetype.split(";") mimetype = mimetype_parts[0].strip() except KeyError: - mimetype = 'application/octet-stream' + mimetype = "application/octet-stream" extension_with_dot = get_extension_from_mimetype(config, mimetype) if extension_with_dot is None: message = f'Workbench does not recognize the MIME type "{mimetype}" received from the remote server for the file "{file_url}". ' - message = message + 'You can assign an extension to this MIME type using the "mimetype_extensions" config setting.' + message = ( + message + + 'You can assign an extension to this MIME type using the "mimetype_extensions" config setting.' + ) logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) return extension_with_dot def download_file_from_drupal(config, node_id): - '''Download a media file from Drupal. - ''' + """Download a media file from Drupal.""" """Parameters ---------- config : dict @@ -6543,78 +8856,116 @@ def download_file_from_drupal(config, node_id): file_name The downloaded file's name, or False if unable to download the file. """ - if config['export_file_directory'] is None: + if config["export_file_directory"] is None: return False - if not os.path.exists(config['export_file_directory']): + if not os.path.exists(config["export_file_directory"]): try: - os.mkdir(config['export_file_directory']) + os.mkdir(config["export_file_directory"]) except Exception as e: - message = 'Path in configuration option "export_file_directory" ("' + config['export_file_directory'] + '") is not writable.' - logging.error(message + ' ' + str(e)) - sys.exit('Error: ' + message + ' See log for more detail.') + message = ( + 'Path in configuration option "export_file_directory" ("' + + config["export_file_directory"] + + '") is not writable.' + ) + logging.error(message + " " + str(e)) + sys.exit("Error: " + message + " See log for more detail.") else: - message = 'Path in configuration option "export_file_directory" ("' + config['export_file_directory'] + '") already exists.' + message = ( + 'Path in configuration option "export_file_directory" ("' + + config["export_file_directory"] + + '") already exists.' + ) logging.info(message) media_list_url = f"{config['host']}/node/{node_id}/media?_format=json" - media_list_response = issue_request(config, 'GET', media_list_url) + media_list_response = issue_request(config, "GET", media_list_url) if media_list_response.status_code == 200: try: media_list = json.loads(media_list_response.text) except json.decoder.JSONDecodeError as e: - logging.error(f'Media query for node {node_id} produced the following error: {e}') + logging.error( + f"Media query for node {node_id} produced the following error: {e}" + ) return False if len(media_list) == 0: - logging.warning(f'Node {node_id} has no media.') + logging.warning(f"Node {node_id} has no media.") return False - if str(config['export_file_media_use_term_id']).startswith('http'): - config['export_file_media_use_term_id'] = get_term_id_from_uri(config, config['export_file_media_use_term_id']) + if str(config["export_file_media_use_term_id"]).startswith("http"): + config["export_file_media_use_term_id"] = get_term_id_from_uri( + config, config["export_file_media_use_term_id"] + ) - if config['export_file_media_use_term_id'] is False: - logging.error(f'Unknown value for configuration setting "export_file_media_use_term_id": {config["export_file_media_use_term_id"]}.') + if config["export_file_media_use_term_id"] is False: + logging.error( + f'Unknown value for configuration setting "export_file_media_use_term_id": {config["export_file_media_use_term_id"]}.' + ) return False for media in media_list: for file_field_name in file_fields: if file_field_name in media: - if len(media[file_field_name]) and media['field_media_use'][0]['target_id'] == config['export_file_media_use_term_id']: - url_filename = os.path.basename(media[file_field_name][0]['url']) - downloaded_file_path = os.path.join(config['export_file_directory'], url_filename) + if ( + len(media[file_field_name]) + and media["field_media_use"][0]["target_id"] + == config["export_file_media_use_term_id"] + ): + url_filename = os.path.basename( + media[file_field_name][0]["url"] + ) + downloaded_file_path = os.path.join( + config["export_file_directory"], url_filename + ) if os.path.exists(downloaded_file_path): - downloaded_file_path = get_deduped_file_path(downloaded_file_path) - f = open(downloaded_file_path, 'wb+') + downloaded_file_path = get_deduped_file_path( + downloaded_file_path + ) + f = open(downloaded_file_path, "wb+") # User needs to be anonymous since authenticated users are getting 403 responses. Probably something in # Drupal's FileAccessControlHandler code is doing this. - file_download_response = requests.get(media[file_field_name][0]['url'], allow_redirects=True, verify=config['secure_ssl_only']) + file_download_response = requests.get( + media[file_field_name][0]["url"], + allow_redirects=True, + verify=config["secure_ssl_only"], + ) if file_download_response.status_code == 200: f.write(file_download_response.content) f.close() - filename_for_logging = os.path.basename(downloaded_file_path) - logging.info(f'File "{filename_for_logging}" downloaded for node {node_id}.') - if os.path.isabs(config['export_file_directory']): + filename_for_logging = os.path.basename( + downloaded_file_path + ) + logging.info( + f'File "{filename_for_logging}" downloaded for node {node_id}.' + ) + if os.path.isabs(config["export_file_directory"]): return downloaded_file_path else: return filename_for_logging else: - message = f"File at {media[file_field_name][0]['url']} (part of media for node {node_id}) could " + \ - f"not be downloaded (HTTP response code {file_download_response.status_code})." + message = ( + f"File at {media[file_field_name][0]['url']} (part of media for node {node_id}) could " + + f"not be downloaded (HTTP response code {file_download_response.status_code})." + ) logging.error(message) return False else: - logging.warning(f'Node {node_id} in new Summit has no files in "{file_field_name}".') + logging.warning( + f'Node {node_id} in new Summit has no files in "{file_field_name}".' + ) return False else: continue else: - logging.error(f'Attempt to fetch media list {media_list_url} returned an {media_list_response.status_code} HTTP response.') + logging.error( + f"Attempt to fetch media list {media_list_url} returned an {media_list_response.status_code} HTTP response." + ) return False def get_file_hash_from_drupal(config, file_uuid, algorithm): """Query the Integration module's hash controller at '/islandora_workbench_integration/file_hash' - to get the hash of the file identified by file_uuid. + to get the hash of the file identified by file_uuid. """ """Parameters ---------- @@ -6629,19 +8980,29 @@ def get_file_hash_from_drupal(config, file_uuid, algorithm): string The requested hash. """ - url = config['host'] + '/islandora_workbench_integration/file_hash?file_uuid=' + file_uuid + '&algorithm=' + algorithm - response = issue_request(config, 'GET', url) + url = ( + config["host"] + + "/islandora_workbench_integration/file_hash?file_uuid=" + + file_uuid + + "&algorithm=" + + algorithm + ) + response = issue_request(config, "GET", url) if response.status_code == 200: response_body = json.loads(response.text) - return response_body[0]['checksum'] + return response_body[0]["checksum"] else: - logging.warning("Request to get %s hash for file %s returned a %s status code", algorithm, file_uuid, response.status_code) + logging.warning( + "Request to get %s hash for file %s returned a %s status code", + algorithm, + file_uuid, + response.status_code, + ) return False def get_file_hash_from_local(config, file_path, algorithm): - """Get the file's hash/checksum. - """ + """Get the file's hash/checksum.""" """Parameters ---------- config : dict @@ -6655,14 +9016,14 @@ def get_file_hash_from_local(config, file_path, algorithm): string The requested hash. """ - if algorithm == 'md5': + if algorithm == "md5": hash_object = hashlib.md5() - if algorithm == 'sha1': + if algorithm == "sha1": hash_object = hashlib.sha1() - if algorithm == 'sha256': + if algorithm == "sha256": hash_object = hashlib.sha256() - with open(file_path, 'rb') as file: + with open(file_path, "rb") as file: while True: chunk = file.read(hash_object.block_size) if not chunk: @@ -6673,25 +9034,28 @@ def get_file_hash_from_local(config, file_path, algorithm): def create_temp_dir(config): - if os.path.exists(config['temp_dir']): - temp_dir_exists_message = 'already exists' + if os.path.exists(config["temp_dir"]): + temp_dir_exists_message = "already exists" make_temp_dir = False else: - temp_dir_exists_message = 'does not exist, will create it' + temp_dir_exists_message = "does not exist, will create it" make_temp_dir = True - if config['temp_dir'] == config['input_dir']: - logging.info(f"Using directory defined in the 'input_dir' config setting ({config['input_dir']}) as the temporary directory ({temp_dir_exists_message}).") + if config["temp_dir"] == config["input_dir"]: + logging.info( + f"Using directory defined in the 'input_dir' config setting ({config['input_dir']}) as the temporary directory ({temp_dir_exists_message})." + ) else: - logging.info(f"Using directory defined in the 'temp_dir' config setting ({config['temp_dir']}) as the temporary directory ({temp_dir_exists_message}).") + logging.info( + f"Using directory defined in the 'temp_dir' config setting ({config['temp_dir']}) as the temporary directory ({temp_dir_exists_message})." + ) if make_temp_dir is True: - Path(config['temp_dir']).mkdir(exist_ok=True) + Path(config["temp_dir"]).mkdir(exist_ok=True) def check_csv_file_exists(config, csv_file_target, file_path=None): - """Confirms a CSV file exists. - """ + """Confirms a CSV file exists.""" """Parameters ---------- config : dict @@ -6705,108 +9069,130 @@ def check_csv_file_exists(config, csv_file_target, file_path=None): string The absolute file path to the CSV file. """ - if csv_file_target == 'node_fields': - if os.path.isabs(config['input_csv']): - input_csv = config['input_csv'] + if csv_file_target == "node_fields": + if os.path.isabs(config["input_csv"]): + input_csv = config["input_csv"] # For Google Sheets, the "extraction" is fired over in workbench. - elif config['input_csv'].startswith('http'): + elif config["input_csv"].startswith("http"): input_csv = get_extracted_csv_file_path(config) - message = "Extracting CSV data from " + config['input_csv'] + " (worksheet gid " + str(config['google_sheets_gid']) + ") to " + input_csv + '.' + message = ( + "Extracting CSV data from " + + config["input_csv"] + + " (worksheet gid " + + str(config["google_sheets_gid"]) + + ") to " + + input_csv + + "." + ) print(message) logging.info(message) - elif config['input_csv'].endswith('xlsx'): + elif config["input_csv"].endswith("xlsx"): input_csv = get_extracted_csv_file_path(config) - message = "Extracting CSV data from " + config['input_csv'] + " to " + input_csv + '.' + message = ( + "Extracting CSV data from " + + config["input_csv"] + + " to " + + input_csv + + "." + ) print(message) logging.info(message) else: - input_csv = os.path.join(config['input_dir'], config['input_csv']) + input_csv = os.path.join(config["input_dir"], config["input_csv"]) if os.path.exists(input_csv): - message = 'OK, CSV file ' + input_csv + ' found.' + message = "OK, CSV file " + input_csv + " found." print(message) logging.info(message) return input_csv else: - message = 'CSV file ' + input_csv + ' not found.' + message = "CSV file " + input_csv + " not found." logging.error(message) - sys.exit('Error: ' + message) - if csv_file_target == 'taxonomy_fields': + sys.exit("Error: " + message) + if csv_file_target == "taxonomy_fields": # For Google Sheets and Excel, the "extraction" is fired in workbench. if os.path.isabs(file_path): input_csv = file_path else: - input_csv = os.path.join(config['input_dir'], file_path) + input_csv = os.path.join(config["input_dir"], file_path) if os.path.exists(input_csv): - message = 'OK, vocabulary CSV file ' + input_csv + ' found.' + message = "OK, vocabulary CSV file " + input_csv + " found." print(message) logging.info(message) return input_csv else: - message = 'Vocabulary CSV file ' + input_csv + ' not found.' + message = "Vocabulary CSV file " + input_csv + " not found." logging.error(message) - sys.exit('Error: ' + message) + sys.exit("Error: " + message) def get_csv_template(config, args): - field_definitions = get_field_definitions(config, 'node') + field_definitions = get_field_definitions(config, "node") field_labels = collections.OrderedDict() - field_labels['REMOVE THIS COLUMN (KEEP THIS ROW)'] = 'LABEL (REMOVE THIS ROW)' + field_labels["REMOVE THIS COLUMN (KEEP THIS ROW)"] = "LABEL (REMOVE THIS ROW)" for field_name in field_definitions: - if field_definitions[field_name]['label'] != '': - field_labels[field_name] = field_definitions[field_name]['label'] + if field_definitions[field_name]["label"] != "": + field_labels[field_name] = field_definitions[field_name]["label"] else: - field_labels[field_name] = '' + field_labels[field_name] = "" required = collections.OrderedDict() - required['REMOVE THIS COLUMN (KEEP THIS ROW)'] = 'REQUIRED IN CREATE TASKS (REMOVE THIS ROW)' + required["REMOVE THIS COLUMN (KEEP THIS ROW)"] = ( + "REQUIRED IN CREATE TASKS (REMOVE THIS ROW)" + ) for field_name in field_definitions: - if field_definitions[field_name]['required'] != '': - if field_definitions[field_name]['required'] is True: - required[field_name] = 'Yes' + if field_definitions[field_name]["required"] != "": + if field_definitions[field_name]["required"] is True: + required[field_name] = "Yes" else: - required[field_name] = 'No' - required['title'] = 'Yes' - required['uid'] = 'No' - required['langcode'] = 'No' - required['created'] = 'No' - required[config['id_field']] = 'Yes' - if config['nodes_only'] is True: - required['file'] = 'Yes' + required[field_name] = "No" + required["title"] = "Yes" + required["uid"] = "No" + required["langcode"] = "No" + required["created"] = "No" + required[config["id_field"]] = "Yes" + if config["nodes_only"] is True: + required["file"] = "Yes" else: - required['file'] = 'No' + required["file"] = "No" mapping = dict() - mapping['string'] = 'Free text' - mapping['string_long'] = 'Free text' - mapping['text'] = 'Free text' - mapping['text_long'] = 'Free text' - mapping['geolocation'] = '+49.16,-123.93' - mapping['entity_reference'] = '100 [or term name or http://foo.com/someuri]' - mapping['edtf'] = '2020-10-28' - mapping['typed_relation'] = 'relators:art:30' - mapping['integer'] = 100 + mapping["string"] = "Free text" + mapping["string_long"] = "Free text" + mapping["text"] = "Free text" + mapping["text_long"] = "Free text" + mapping["geolocation"] = "+49.16,-123.93" + mapping["entity_reference"] = "100 [or term name or http://foo.com/someuri]" + mapping["edtf"] = "2020-10-28" + mapping["typed_relation"] = "relators:art:30" + mapping["integer"] = 100 sample_data = collections.OrderedDict() - sample_data['REMOVE THIS COLUMN (KEEP THIS ROW)'] = 'SAMPLE DATA (REMOVE THIS ROW)' - sample_data[config['id_field']] = '0001' - sample_data['file'] = 'myimage.jpg' - sample_data['uid'] = '21' - sample_data['langcode'] = 'fr' - sample_data['created'] = '2020-11-15T23:49:22+00:00' - sample_data['title'] = 'Free text' + sample_data["REMOVE THIS COLUMN (KEEP THIS ROW)"] = "SAMPLE DATA (REMOVE THIS ROW)" + sample_data[config["id_field"]] = "0001" + sample_data["file"] = "myimage.jpg" + sample_data["uid"] = "21" + sample_data["langcode"] = "fr" + sample_data["created"] = "2020-11-15T23:49:22+00:00" + sample_data["title"] = "Free text" for field_name in field_definitions: - if field_definitions[field_name]['field_type'] in mapping: - sample_data[field_name] = mapping[field_definitions[field_name]['field_type']] + if field_definitions[field_name]["field_type"] in mapping: + sample_data[field_name] = mapping[ + field_definitions[field_name]["field_type"] + ] else: - sample_data[field_name] = '' - - csv_file_path = os.path.join(config['input_dir'], config['input_csv'] + '.csv_file_template') - csv_file = open(csv_file_path, 'a+', encoding='utf-8') - writer = csv.DictWriter(csv_file, fieldnames=sample_data.keys(), lineterminator="\n") + sample_data[field_name] = "" + + csv_file_path = os.path.join( + config["input_dir"], config["input_csv"] + ".csv_file_template" + ) + csv_file = open(csv_file_path, "a+", encoding="utf-8") + writer = csv.DictWriter( + csv_file, fieldnames=sample_data.keys(), lineterminator="\n" + ) writer.writeheader() # We want the labels and required rows to appear as the second and third rows so # add them before we add the sample data. @@ -6815,56 +9201,59 @@ def get_csv_template(config, args): writer.writerow(sample_data) cardinality = collections.OrderedDict() - cardinality['REMOVE THIS COLUMN (KEEP THIS ROW)'] = 'NUMBER OF VALUES ALLOWED (REMOVE THIS ROW)' - cardinality[config['id_field']] = '1' - cardinality['file'] = '1' - cardinality['uid'] = '1' - cardinality['langcode'] = '1' - cardinality['created'] = '1' - cardinality['title'] = '1' + cardinality["REMOVE THIS COLUMN (KEEP THIS ROW)"] = ( + "NUMBER OF VALUES ALLOWED (REMOVE THIS ROW)" + ) + cardinality[config["id_field"]] = "1" + cardinality["file"] = "1" + cardinality["uid"] = "1" + cardinality["langcode"] = "1" + cardinality["created"] = "1" + cardinality["title"] = "1" for field_name in field_definitions: - if field_definitions[field_name]['cardinality'] == -1: - cardinality[field_name] = 'unlimited' + if field_definitions[field_name]["cardinality"] == -1: + cardinality[field_name] = "unlimited" else: - cardinality[field_name] = field_definitions[field_name]['cardinality'] + cardinality[field_name] = field_definitions[field_name]["cardinality"] writer.writerow(cardinality) docs = dict() - docs['string'] = 'Single-valued fields' - docs['string_long'] = 'Single-valued fields' - docs['text'] = 'Single-valued fields' - docs['text_long'] = 'Single-valued fields' - docs['geolocation'] = 'Geolocation fields' - docs['entity_reference'] = 'Taxonomy reference fields' - docs['edtf'] = 'EDTF fields' - docs['typed_relation'] = 'Typed Relation fields' - docs['integer'] = 'Single-valued fields' + docs["string"] = "Single-valued fields" + docs["string_long"] = "Single-valued fields" + docs["text"] = "Single-valued fields" + docs["text_long"] = "Single-valued fields" + docs["geolocation"] = "Geolocation fields" + docs["entity_reference"] = "Taxonomy reference fields" + docs["edtf"] = "EDTF fields" + docs["typed_relation"] = "Typed Relation fields" + docs["integer"] = "Single-valued fields" docs_tips = collections.OrderedDict() - docs_tips['REMOVE THIS COLUMN (KEEP THIS ROW)'] = 'SECTION IN DOCUMENTATION (REMOVE THIS ROW)' - docs_tips[config['id_field']] = 'Required fields' - docs_tips['file'] = 'Required fields' - docs_tips['uid'] = 'Base fields' - docs_tips['langcode'] = 'Base fields' - docs_tips['created'] = 'Base fields' - docs_tips['title'] = 'Base fields' + docs_tips["REMOVE THIS COLUMN (KEEP THIS ROW)"] = ( + "SECTION IN DOCUMENTATION (REMOVE THIS ROW)" + ) + docs_tips[config["id_field"]] = "Required fields" + docs_tips["file"] = "Required fields" + docs_tips["uid"] = "Base fields" + docs_tips["langcode"] = "Base fields" + docs_tips["created"] = "Base fields" + docs_tips["title"] = "Base fields" for field_name in field_definitions: - if field_definitions[field_name]['field_type'] in docs: - doc_reference = docs[field_definitions[field_name]['field_type']] + if field_definitions[field_name]["field_type"] in docs: + doc_reference = docs[field_definitions[field_name]["field_type"]] docs_tips[field_name] = doc_reference else: - docs_tips[field_name] = '' - docs_tips['field_member_of'] = '' + docs_tips[field_name] = "" + docs_tips["field_member_of"] = "" writer.writerow(docs_tips) csv_file.close() - print('CSV template saved at ' + csv_file_path + '.') + print("CSV template saved at " + csv_file_path + ".") sys.exit() def get_page_title_from_template(config, parent_title, weight): - """Generates a page title from a simple template. - """ + """Generates a page title from a simple template.""" """Parameters ---------- config : dict @@ -6878,14 +9267,15 @@ def get_page_title_from_template(config, parent_title, weight): string The output of the template. """ - page_title_template = string.Template(config['page_title_template']) - page_title = str(page_title_template.substitute({'parent_title': parent_title, 'weight': weight})) + page_title_template = string.Template(config["page_title_template"]) + page_title = str( + page_title_template.substitute({"parent_title": parent_title, "weight": weight}) + ) return page_title def apply_csv_value_templates(config, row): - """Applies a simple template to a CSV value. - """ + """Applies a simple template to a CSV value.""" """Parameters ---------- config : dict @@ -6898,27 +9288,28 @@ def apply_csv_value_templates(config, row): The row with CSV value templates applied. """ templates = dict() - for template in config['csv_value_templates']: + for template in config["csv_value_templates"]: for field_name, value_template in template.items(): templates[field_name] = value_template for field in row: if field in templates: - incoming_subvalues = row[field].split(config['subdelimiter']) + incoming_subvalues = row[field].split(config["subdelimiter"]) outgoing_subvalues = [] for subvalue in incoming_subvalues: if len(subvalue) > 0: csv_value_template = string.Template(templates[field]) - subvalue = str(csv_value_template.substitute({'csv_value': subvalue})) + subvalue = str( + csv_value_template.substitute({"csv_value": subvalue}) + ) outgoing_subvalues.append(subvalue) - templated_string = config['subdelimiter'].join(outgoing_subvalues) + templated_string = config["subdelimiter"].join(outgoing_subvalues) row[field] = templated_string return row def serialize_field_json(config, field_definitions, field_name, field_data): - """Serializes JSON from a Drupal field into a string consistent with Workbench's CSV-field input format. - """ + """Serializes JSON from a Drupal field into a string consistent with Workbench's CSV-field input format.""" """Parameters ---------- config : dict @@ -6939,54 +9330,67 @@ def serialize_field_json(config, field_definitions, field_name, field_data): import workbench_fields # Entity reference fields (taxonomy term and node). - if field_definitions[field_name]['field_type'] == 'entity_reference': + if field_definitions[field_name]["field_type"] == "entity_reference": serialized_field = workbench_fields.EntityReferenceField() - csv_field_data = serialized_field.serialize(config, field_definitions, field_name, field_data) + csv_field_data = serialized_field.serialize( + config, field_definitions, field_name, field_data + ) # Entity reference revision fields (mostly paragraphs). - elif field_definitions[field_name]['field_type'] == 'entity_reference_revisions': + elif field_definitions[field_name]["field_type"] == "entity_reference_revisions": serialized_field = workbench_fields.EntityReferenceRevisionsField() - csv_field_data = serialized_field.serialize(config, field_definitions, field_name, field_data) + csv_field_data = serialized_field.serialize( + config, field_definitions, field_name, field_data + ) # Typed relation fields (currently, only taxonomy term) - elif field_definitions[field_name]['field_type'] == 'typed_relation': + elif field_definitions[field_name]["field_type"] == "typed_relation": serialized_field = workbench_fields.TypedRelationField() - csv_field_data = serialized_field.serialize(config, field_definitions, field_name, field_data) + csv_field_data = serialized_field.serialize( + config, field_definitions, field_name, field_data + ) # Geolocation fields. - elif field_definitions[field_name]['field_type'] == 'geolocation': + elif field_definitions[field_name]["field_type"] == "geolocation": serialized_field = workbench_fields.GeolocationField() - csv_field_data = serialized_field.serialize(config, field_definitions, field_name, field_data) + csv_field_data = serialized_field.serialize( + config, field_definitions, field_name, field_data + ) # Link fields. - elif field_definitions[field_name]['field_type'] == 'link': + elif field_definitions[field_name]["field_type"] == "link": serialized_field = workbench_fields.LinkField() - csv_field_data = serialized_field.serialize(config, field_definitions, field_name, field_data) + csv_field_data = serialized_field.serialize( + config, field_definitions, field_name, field_data + ) # Authority Link fields. - elif field_definitions[field_name]['field_type'] == 'authority_link': + elif field_definitions[field_name]["field_type"] == "authority_link": serialized_field = workbench_fields.AuthorityLinkField() - csv_field_data = serialized_field.serialize(config, field_definitions, field_name, field_data) + csv_field_data = serialized_field.serialize( + config, field_definitions, field_name, field_data + ) # Simple fields. else: serialized_field = workbench_fields.SimpleField() - csv_field_data = serialized_field.serialize(config, field_definitions, field_name, field_data) + csv_field_data = serialized_field.serialize( + config, field_definitions, field_name, field_data + ) return csv_field_data def csv_subset_warning(config): """Create a message indicating that the csv_start_row and csv_stop_row config - options are present and that a subset of the input CSV will be used. + options are present and that a subset of the input CSV will be used. """ - if config['csv_start_row'] != 0 or config['csv_stop_row'] is not None: + if config["csv_start_row"] != 0 or config["csv_stop_row"] is not None: message = f"Using a subset of the input CSV (will start at row {config['csv_start_row']}, stop at row {config['csv_stop_row']})." - if config['csv_start_row'] != 0 and config['csv_stop_row'] is None: + if config["csv_start_row"] != 0 and config["csv_stop_row"] is None: message = f"Using a subset of the input CSV (will start at row {config['csv_start_row']})." - if config['csv_start_row'] == 0 and config['csv_stop_row'] is not None: + if config["csv_start_row"] == 0 and config["csv_stop_row"] is not None: message = f"Using a subset of the input CSV (will stop at row {config['csv_stop_row']})." print(message) logging.info(message) def get_entity_reference_view_endpoints(config): - """Gets entity reference View endpoints from config. - """ + """Gets entity reference View endpoints from config.""" """Parameters ---------- config : dict @@ -6997,10 +9401,10 @@ def get_entity_reference_view_endpoints(config): Dictionary with Drupal field names as keys and View REST endpoints as values. """ endpoint_mappings = dict() - if 'entity_reference_view_endpoints' not in config: + if "entity_reference_view_endpoints" not in config: return endpoint_mappings - for endpoint_mapping in config['entity_reference_view_endpoints']: + for endpoint_mapping in config["entity_reference_view_endpoints"]: for field_name, endpoint in endpoint_mapping.items(): endpoint_mappings[field_name] = endpoint @@ -7012,8 +9416,7 @@ def get_percentage(part, whole): def get_config_file_identifier(config): - """Gets a unique identifier of the current config file. Used in names of temp files, etc. - """ + """Gets a unique identifier of the current config file. Used in names of temp files, etc.""" """Parameters ---------- config : dict @@ -7024,15 +9427,16 @@ def get_config_file_identifier(config): A string based on the config file's path, with directory slashes and backslashes replaced with underscores. """ - split_path = os.path.splitdrive(os.path.splitext(config['current_config_file_path'])[0]) - config_file_id = re.sub(r'[/\\]', '_', split_path[1].strip('/\\')) + split_path = os.path.splitdrive( + os.path.splitext(config["current_config_file_path"])[0] + ) + config_file_id = re.sub(r"[/\\]", "_", split_path[1].strip("/\\")) return config_file_id def calculate_response_time_trend(config, response_time): - """Gets the average response time from the most recent 20 HTTP requests. - """ + """Gets the average response time from the most recent 20 HTTP requests.""" """Parameters ---------- config : dict @@ -7049,7 +9453,7 @@ def calculate_response_time_trend(config, response_time): sample = http_response_times[-20:] else: sample = http_response_times - if config['log_response_time_sample'] is True: + if config["log_response_time_sample"] is True: logging.info("Response time trend sample: %s", sample) if len(sample) > 0: average = sum(sample) / len(sample) @@ -7057,8 +9461,7 @@ def calculate_response_time_trend(config, response_time): def is_ascii(input): - """Check if a string contains only ASCII characters. - """ + """Check if a string contains only ASCII characters.""" """Parameters ---------- input : str @@ -7075,7 +9478,7 @@ def is_ascii(input): def quick_delete_node(config, args): logging.info("--quick_delete_node task started for " + args.quick_delete_node) - response = issue_request(config, 'GET', args.quick_delete_node + '?_format=json') + response = issue_request(config, "GET", args.quick_delete_node + "?_format=json") if response.status_code != 200: message = f"Sorry, {args.quick_delete_node} can't be accessed. Please confirm the node exists and is accessible to the user defined in your Workbench configuration." logging.error(message) @@ -7088,29 +9491,42 @@ def quick_delete_node(config, args): sys.exit("Error: " + message) entity = json.loads(response.text) - if 'type' in entity: - if entity['type'][0]['target_type'] == 'node_type': + if "type" in entity: + if entity["type"][0]["target_type"] == "node_type": # Delete the node's media first. - if config['delete_media_with_nodes'] is True: - media_endpoint = config['host'] + '/node/' + str(node_id) + '/media?_format=json' - media_response = issue_request(config, 'GET', media_endpoint) + if config["delete_media_with_nodes"] is True: + media_endpoint = ( + config["host"] + "/node/" + str(node_id) + "/media?_format=json" + ) + media_response = issue_request(config, "GET", media_endpoint) media_response_body = json.loads(media_response.text) media_messages = [] for media in media_response_body: - if 'mid' in media: - media_id = media['mid'][0]['value'] - media_delete_status_code = remove_media_and_file(config, media_id) + if "mid" in media: + media_id = media["mid"][0]["value"] + media_delete_status_code = remove_media_and_file( + config, media_id + ) if media_delete_status_code == 204: - media_messages.append("+ Media " + config['host'] + '/media/' + str(media_id) + " deleted.") + media_messages.append( + "+ Media " + + config["host"] + + "/media/" + + str(media_id) + + " deleted." + ) # Then the node. - node_endpoint = config['host'] + '/node/' + str(node_id) + '?_format=json' - node_response = issue_request(config, 'DELETE', node_endpoint) + node_endpoint = config["host"] + "/node/" + str(node_id) + "?_format=json" + node_response = issue_request(config, "DELETE", node_endpoint) if node_response.status_code == 204: - if config['progress_bar'] is False: + if config["progress_bar"] is False: print("Node " + args.quick_delete_node + " deleted.") logging.info("Node %s deleted.", args.quick_delete_node) - if config['delete_media_with_nodes'] is True and config['progress_bar'] is False: + if ( + config["delete_media_with_nodes"] is True + and config["progress_bar"] is False + ): if len(media_messages): for media_message in media_messages: print(media_message) @@ -7129,24 +9545,30 @@ def quick_delete_node(config, args): def quick_delete_media(config, args): logging.info("--quick_delete_mediatask started for " + args.quick_delete_media) - if config['standalone_media_url'] is False and not args.quick_delete_media.endswith('/edit'): + if config["standalone_media_url"] is False and not args.quick_delete_media.endswith( + "/edit" + ): message = f"You need to add '/edit' to the end of your media URL (e.g. {args.quick_delete_media}/edit)." logging.error(message) sys.exit("Error: " + message) - if config['standalone_media_url'] is True and args.quick_delete_media.endswith('/edit'): + if config["standalone_media_url"] is True and args.quick_delete_media.endswith( + "/edit" + ): message = f"You need to remove '/edit' to the end of your media URL." logging.error(message) sys.exit("Error: " + message) - ping_response = issue_request(config, 'GET', args.quick_delete_media + '?_format=json') + ping_response = issue_request( + config, "GET", args.quick_delete_media + "?_format=json" + ) if ping_response.status_code == 404: message = f"Cannot find {args.quick_delete_media}. Please verify the media URL and try again." logging.error(message) sys.exit("Error: " + message) entity = json.loads(ping_response.text) - if 'mid' not in entity: + if "mid" not in entity: message = f"{args.quick_delete_media} does not apear to be a media." logging.error(message) sys.exit("Error: " + message) @@ -7167,7 +9589,7 @@ def quick_delete_media(config, args): def create_contact_sheet_thumbnail(config, source_filename): """Determines the thumbnail image to use for a given filename, and copies - the image to the output directory. + the image to the output directory. """ """Parameters ---------- @@ -7180,31 +9602,41 @@ def create_contact_sheet_thumbnail(config, source_filename): string The file name of the thumbnail image file. """ - generic_icons_dir = os.path.join('assets', 'contact_sheet', 'generic_icons') + generic_icons_dir = os.path.join("assets", "contact_sheet", "generic_icons") if len(source_filename.strip()) == 0: - no_file_icon_filename = 'tn_generic_no_file.png' - no_file_icon_path = os.path.join(config['contact_sheet_output_dir'], no_file_icon_filename) + no_file_icon_filename = "tn_generic_no_file.png" + no_file_icon_path = os.path.join( + config["contact_sheet_output_dir"], no_file_icon_filename + ) if not os.path.exists(no_file_icon_path): - shutil.copyfile(os.path.join(generic_icons_dir, no_file_icon_filename), no_file_icon_path) + shutil.copyfile( + os.path.join(generic_icons_dir, no_file_icon_filename), + no_file_icon_path, + ) return no_file_icon_filename - if source_filename == 'compound': - compound_icon_filename = 'tn_generic_compound.png' - compound_icon_path = os.path.join(config['contact_sheet_output_dir'], compound_icon_filename) + if source_filename == "compound": + compound_icon_filename = "tn_generic_compound.png" + compound_icon_path = os.path.join( + config["contact_sheet_output_dir"], compound_icon_filename + ) if not os.path.exists(compound_icon_path): - shutil.copyfile(os.path.join(generic_icons_dir, compound_icon_filename), compound_icon_path) + shutil.copyfile( + os.path.join(generic_icons_dir, compound_icon_filename), + compound_icon_path, + ) return compound_icon_filename # todo: get these from config['media_types'] - pdf_extensions = ['.pdf'] - video_extensions = ['.mp4'] - audio_extensions = ['.mp3'] - image_extensions = ['.png', '.jpg', '.jpeg', '.tif', '.tiff', '.jp2'] + pdf_extensions = [".pdf"] + video_extensions = [".mp4"] + audio_extensions = [".mp3"] + image_extensions = [".png", ".jpg", ".jpeg", ".tif", ".tiff", ".jp2"] source_file_name, source_file_extension = os.path.splitext(source_filename) if source_file_extension.lower() in image_extensions: - ''' + """ # Note: this block can be used to generate thumbnails for images if "from PIL import Image". image_source_path = os.path.join(input_dir, source_filename) image_source = Image.open(image_source_path) @@ -7215,233 +9647,351 @@ def create_contact_sheet_thumbnail(config, source_filename): tn_filepath = os.path.join(output_dir, tn_filename) image_tn.save(tn_filepath) tn_filepath = tn_filename - ''' - image_icon_filename = 'tn_generic_image.png' - image_icon_path = os.path.join(config['contact_sheet_output_dir'], image_icon_filename) + """ + image_icon_filename = "tn_generic_image.png" + image_icon_path = os.path.join( + config["contact_sheet_output_dir"], image_icon_filename + ) if not os.path.exists(image_icon_path): - shutil.copyfile(os.path.join(generic_icons_dir, image_icon_filename), image_icon_path) + shutil.copyfile( + os.path.join(generic_icons_dir, image_icon_filename), image_icon_path + ) tn_filepath = image_icon_filename elif source_file_extension.lower() in pdf_extensions: - pdf_icon_filename = 'tn_generic_pdf.png' - pdf_icon_path = os.path.join(config['contact_sheet_output_dir'], pdf_icon_filename) + pdf_icon_filename = "tn_generic_pdf.png" + pdf_icon_path = os.path.join( + config["contact_sheet_output_dir"], pdf_icon_filename + ) if not os.path.exists(pdf_icon_path): - shutil.copyfile(os.path.join(generic_icons_dir, pdf_icon_filename), pdf_icon_path) + shutil.copyfile( + os.path.join(generic_icons_dir, pdf_icon_filename), pdf_icon_path + ) tn_filepath = pdf_icon_filename elif source_file_extension.lower() in audio_extensions: - audio_icon_filename = 'tn_generic_audio.png' - audio_icon_path = os.path.join(config['contact_sheet_output_dir'], audio_icon_filename) + audio_icon_filename = "tn_generic_audio.png" + audio_icon_path = os.path.join( + config["contact_sheet_output_dir"], audio_icon_filename + ) if not os.path.exists(audio_icon_path): - shutil.copyfile(os.path.join(generic_icons_dir, audio_icon_filename), audio_icon_path) + shutil.copyfile( + os.path.join(generic_icons_dir, audio_icon_filename), audio_icon_path + ) tn_filepath = audio_icon_filename elif source_file_extension.lower() in video_extensions: - video_icon_filename = 'tn_generic_video.png' - video_icon_path = os.path.join(config['contact_sheet_output_dir'], video_icon_filename) + video_icon_filename = "tn_generic_video.png" + video_icon_path = os.path.join( + config["contact_sheet_output_dir"], video_icon_filename + ) if not os.path.exists(video_icon_path): - shutil.copyfile(os.path.join(generic_icons_dir, video_icon_filename), video_icon_path) + shutil.copyfile( + os.path.join(generic_icons_dir, video_icon_filename), video_icon_path + ) tn_filepath = video_icon_filename else: - binary_icon_filename = 'tn_generic_binary.png' - binary_icon_path = os.path.join(config['contact_sheet_output_dir'], binary_icon_filename) + binary_icon_filename = "tn_generic_binary.png" + binary_icon_path = os.path.join( + config["contact_sheet_output_dir"], binary_icon_filename + ) if not os.path.exists(binary_icon_path): - shutil.copyfile(os.path.join(generic_icons_dir, binary_icon_filename), binary_icon_path) + shutil.copyfile( + os.path.join(generic_icons_dir, binary_icon_filename), binary_icon_path + ) tn_filepath = binary_icon_filename return tn_filepath def generate_contact_sheet_from_csv(config): - """Generates a contact sheet from CSV data. - """ + """Generates a contact sheet from CSV data.""" """Parameters ---------- config : dict The configuration settings defined by workbench_config.get_config(). """ - css_file_path = config['contact_sheet_css_path'] + css_file_path = config["contact_sheet_css_path"] css_file_name = os.path.basename(css_file_path) - generic_icons_dir = os.path.join('assets', 'contact_sheet', 'generic_icons') + generic_icons_dir = os.path.join("assets", "contact_sheet", "generic_icons") - if not os.path.exists(config['contact_sheet_output_dir']): + if not os.path.exists(config["contact_sheet_output_dir"]): try: - os.mkdir(config['contact_sheet_output_dir']) + os.mkdir(config["contact_sheet_output_dir"]) except Exception as e: - message = 'Path in configuration option "contact_sheet_output_dir" ("' + config['contact_sheet_output_dir'] + '") is not writable.' - logging.error(message + ' ' + str(e)) - sys.exit('Error: ' + message + ' See log for more detail.') + message = ( + 'Path in configuration option "contact_sheet_output_dir" ("' + + config["contact_sheet_output_dir"] + + '") is not writable.' + ) + logging.error(message + " " + str(e)) + sys.exit("Error: " + message + " See log for more detail.") csv_data = get_csv_data(config) compound_items = list() csv_data_to_get_children = get_csv_data(config) - if config['paged_content_from_directories']: + if config["paged_content_from_directories"]: # Collect the IDs of top-level items for use in the "Using subdirectories" method # of creating compound/paged content. for get_children_row in csv_data_to_get_children: - compound_items.append(get_children_row[config['id_field']]) + compound_items.append(get_children_row[config["id_field"]]) else: # Collect the IDs of items whose IDs are in other (child) items' "parent_id" column, # a.k.a. compound items created using the "With page/child-level metadata" method. - if 'parent_id' in csv_data.fieldnames: + if "parent_id" in csv_data.fieldnames: for get_children_row in csv_data_to_get_children: - compound_items.append(get_children_row['parent_id']) + compound_items.append(get_children_row["parent_id"]) deduplicated_compound_items = list(set(compound_items)) compound_items = deduplicated_compound_items - if '' in compound_items: - compound_items.remove('') + if "" in compound_items: + compound_items.remove("") # Create a dict containing all the output file data. contact_sheet_output_files = dict() - contact_sheet_output_files['main_contact_sheet'] = {} - contact_sheet_output_files['main_contact_sheet']['path'] = os.path.join(config['contact_sheet_output_dir'], 'contact_sheet.htm') - contact_sheet_output_files['main_contact_sheet']['file_handle'] = open(contact_sheet_output_files['main_contact_sheet']['path'], 'w') - contact_sheet_output_files['main_contact_sheet']['markup'] = '' + contact_sheet_output_files["main_contact_sheet"] = {} + contact_sheet_output_files["main_contact_sheet"]["path"] = os.path.join( + config["contact_sheet_output_dir"], "contact_sheet.htm" + ) + contact_sheet_output_files["main_contact_sheet"]["file_handle"] = open( + contact_sheet_output_files["main_contact_sheet"]["path"], "w" + ) + contact_sheet_output_files["main_contact_sheet"]["markup"] = "" for compound_item_id in compound_items: - if compound_item_id != '': + if compound_item_id != "": contact_sheet_output_files[compound_item_id] = {} - compound_item_contact_sheet_path = os.path.join(config['contact_sheet_output_dir'], f'{compound_item_id}_contact_sheet.htm') - contact_sheet_output_files[compound_item_id]['path'] = compound_item_contact_sheet_path - contact_sheet_output_files[compound_item_id]['file_handle'] = open(os.path.join(compound_item_contact_sheet_path), 'w') - contact_sheet_output_files[compound_item_id]['markup'] = '' + compound_item_contact_sheet_path = os.path.join( + config["contact_sheet_output_dir"], + f"{compound_item_id}_contact_sheet.htm", + ) + contact_sheet_output_files[compound_item_id][ + "path" + ] = compound_item_contact_sheet_path + contact_sheet_output_files[compound_item_id]["file_handle"] = open( + os.path.join(compound_item_contact_sheet_path), "w" + ) + contact_sheet_output_files[compound_item_id]["markup"] = "" for output_file in contact_sheet_output_files.keys(): - contact_sheet_output_files[output_file]['file_handle'] = open(contact_sheet_output_files[output_file]['path'], 'a') - contact_sheet_output_files[output_file]['file_handle'].write(f'\n\nIslandora Workbench contact sheet') - contact_sheet_output_files[output_file]['file_handle'].write(f'\n') - contact_sheet_output_files[output_file]['file_handle'].write(f'\n\n\n') + contact_sheet_output_files[output_file]["file_handle"] = open( + contact_sheet_output_files[output_file]["path"], "a" + ) + contact_sheet_output_files[output_file]["file_handle"].write( + f"\n\nIslandora Workbench contact sheet" + ) + contact_sheet_output_files[output_file]["file_handle"].write( + f'\n' + ) + contact_sheet_output_files[output_file]["file_handle"].write( + f'\n\n\n' + ) - if output_file != 'main_contact_sheet': + if output_file != "main_contact_sheet": # "output_file" is the same as the CSV ID of the parent of items in the current contact sheet. - members_of_div = f'
Members of ' + \ - f'CSV entry {output_file}.
\n' - contact_sheet_output_files[output_file]['file_handle'].write(members_of_div) - - contact_sheet_output_files[output_file]['file_handle'].write('
\n') + members_of_div = ( + f'
Members of ' + + f'CSV entry {output_file}.
\n' + ) + contact_sheet_output_files[output_file]["file_handle"].write(members_of_div) + + contact_sheet_output_files[output_file]["file_handle"].write( + '
\n' + ) for row in csv_data: - if config['paged_content_from_directories']: + if config["paged_content_from_directories"]: # Note: parent items (i.e. items with rows in the CSV) are processed below. - if row[config['id_field']] in compound_items: + if row[config["id_field"]] in compound_items: # Get all the page files for this parent and create a new contact sheet containing them. - page_files_dir_path = os.path.join(config['input_dir'], row[config['id_field']]) + page_files_dir_path = os.path.join( + config["input_dir"], row[config["id_field"]] + ) page_files = os.listdir(page_files_dir_path) # Page files need to be sorted by weight in the contact sheet. page_file_weights_map = {} for page_file_name in page_files: filename_without_extension = os.path.splitext(page_file_name)[0] - filename_segments = filename_without_extension.split(config['paged_content_sequence_separator']) + filename_segments = filename_without_extension.split( + config["paged_content_sequence_separator"] + ) weight = filename_segments[-1] weight = weight.lstrip("0") # Cast weight as int so we can sort it easily. page_file_weights_map[int(weight)] = page_file_name - page_title = row['title'] + ', page ' + weight + page_title = row["title"] + ", page " + weight sorted_weights = sorted(page_file_weights_map.keys()) for page_sort_order in sorted_weights: - page_output_file = row[config['id_field']] - filename_without_extension = os.path.splitext(page_file_weights_map[page_sort_order])[0] - tn_filename = create_contact_sheet_thumbnail(config, page_file_weights_map[page_sort_order]) + page_output_file = row[config["id_field"]] + filename_without_extension = os.path.splitext( + page_file_weights_map[page_sort_order] + )[0] + tn_filename = create_contact_sheet_thumbnail( + config, page_file_weights_map[page_sort_order] + ) # Start .card. - contact_sheet_output_files[page_output_file]['markup'] = '\n
\n' - contact_sheet_output_files[page_output_file]['markup'] += f'{filename_without_extension}' + contact_sheet_output_files[page_output_file][ + "markup" + ] = '\n
\n' + contact_sheet_output_files[page_output_file][ + "markup" + ] += f'{filename_without_extension}' # Start .fields - contact_sheet_output_files[page_output_file]['markup'] += f'\n
' - contact_sheet_output_files[page_output_file]['markup'] += f'\n
file: ' + \ - f'{page_file_weights_map[page_sort_order]}
' - contact_sheet_output_files[page_output_file]['markup'] += f'\n
title: ' + \ - f'{page_title}
' - contact_sheet_output_files[page_output_file]['markup'] += f'\n
field_weight: ' + \ - f'{page_sort_order}
' + contact_sheet_output_files[page_output_file][ + "markup" + ] += f'\n
' + contact_sheet_output_files[page_output_file]["markup"] += ( + f'\n
file: ' + + f"{page_file_weights_map[page_sort_order]}
" + ) + contact_sheet_output_files[page_output_file]["markup"] += ( + f'\n
title: ' + + f"{page_title}
" + ) + contact_sheet_output_files[page_output_file]["markup"] += ( + f'\n
field_weight: ' + + f"{page_sort_order}
" + ) # Close .fields - contact_sheet_output_files[page_output_file]['markup'] += f'\n\n
' + contact_sheet_output_files[page_output_file][ + "markup" + ] += f"\n\n
" # Close .card - contact_sheet_output_files[page_output_file]['markup'] += f'\n\n
' - contact_sheet_output_files[page_output_file]['file_handle'].write(contact_sheet_output_files[page_output_file]['markup'] + "\n") + contact_sheet_output_files[page_output_file][ + "markup" + ] += f"\n\n
" + contact_sheet_output_files[page_output_file]["file_handle"].write( + contact_sheet_output_files[page_output_file]["markup"] + "\n" + ) else: - if 'parent_id' in row: - if row['parent_id'] == '': - output_file = 'main_contact_sheet' - if row[config['id_field']] in compound_items: - tn_filename = create_contact_sheet_thumbnail(config, 'compound') + if "parent_id" in row: + if row["parent_id"] == "": + output_file = "main_contact_sheet" + if row[config["id_field"]] in compound_items: + tn_filename = create_contact_sheet_thumbnail(config, "compound") else: - tn_filename = create_contact_sheet_thumbnail(config, row['file']) + tn_filename = create_contact_sheet_thumbnail( + config, row["file"] + ) else: - output_file = row['parent_id'] - if row[config['id_field']] in compound_items: - tn_filename = create_contact_sheet_thumbnail(config, 'compound') + output_file = row["parent_id"] + if row[config["id_field"]] in compound_items: + tn_filename = create_contact_sheet_thumbnail(config, "compound") else: - tn_filename = create_contact_sheet_thumbnail(config, row['file']) + tn_filename = create_contact_sheet_thumbnail( + config, row["file"] + ) else: - output_file = 'main_contact_sheet' - tn_filename = create_contact_sheet_thumbnail(config, row['file']) + output_file = "main_contact_sheet" + tn_filename = create_contact_sheet_thumbnail(config, row["file"]) # During 'paged_content_from_directories' parent items (i.e. items with rows in the CSV) # are processed from this point on. - csv_id = row[config['id_field']] - title = row['title'] + csv_id = row[config["id_field"]] + title = row["title"] # Ensure that parent items get the compound icon. - if config['paged_content_from_directories']: - output_file = 'main_contact_sheet' - tn_filename = create_contact_sheet_thumbnail(config, 'compound') + if config["paged_content_from_directories"]: + output_file = "main_contact_sheet" + tn_filename = create_contact_sheet_thumbnail(config, "compound") # start .card - contact_sheet_output_files[output_file]['markup'] = '\n
\n' - contact_sheet_output_files[output_file]['markup'] += f'{title}' + contact_sheet_output_files[output_file]["markup"] = '\n
\n' + contact_sheet_output_files[output_file][ + "markup" + ] += f'{title}' # Start .fields - contact_sheet_output_files[output_file]['markup'] += f'\n
' - if row[config['id_field']] in compound_items: - contact_sheet_output_files[output_file]['markup'] += f'
' + \ - f'members
' - contact_sheet_output_files[output_file]['markup'] += f'\n
{config["id_field"]}: {csv_id}
' - if config['paged_content_from_directories'] is False and len(row["file"]) > 0: - contact_sheet_output_files[output_file]['markup'] += f'\n
file: {row["file"]}
' + contact_sheet_output_files[output_file]["markup"] += f'\n
' + if row[config["id_field"]] in compound_items: + contact_sheet_output_files[output_file]["markup"] += ( + f'
' + + f'members
' + ) + contact_sheet_output_files[output_file][ + "markup" + ] += f'\n
{config["id_field"]}: {csv_id}
' + if config["paged_content_from_directories"] is False and len(row["file"]) > 0: + contact_sheet_output_files[output_file][ + "markup" + ] += f'\n
file: {row["file"]}
' else: - contact_sheet_output_files[output_file]['markup'] += f'\n
file:
' - contact_sheet_output_files[output_file]['markup'] += f'\n
title: {title}
' + contact_sheet_output_files[output_file][ + "markup" + ] += f'\n
file:
' + contact_sheet_output_files[output_file][ + "markup" + ] += f'\n
title: {title}
' for fieldname in row: # These three fields have already been rendered. - if fieldname not in [config['id_field'], 'title', 'file']: + if fieldname not in [config["id_field"], "title", "file"]: if len(row[fieldname].strip()) == 0: continue if len(row[fieldname]) > 30: field_value = row[fieldname][:30] - row_value_with_enhanced_subdelimiter = row[fieldname].replace(config['subdelimiter'], ' □ ') - field_value = field_value.replace(config['subdelimiter'], ' □ ') - contact_sheet_output_files[output_file]['markup'] += f'\n
{fieldname}: ' + \ - f'{field_value} [...]
' + row_value_with_enhanced_subdelimiter = row[fieldname].replace( + config["subdelimiter"], " □ " + ) + field_value = field_value.replace( + config["subdelimiter"], " □ " + ) + contact_sheet_output_files[output_file]["markup"] += ( + f'\n
{fieldname}: ' + + f'{field_value} [...]
' + ) else: field_value = row[fieldname] - field_value = field_value.replace(config['subdelimiter'], ' □ ') - contact_sheet_output_files[output_file]['markup'] += f'\n
{fieldname}: {field_value}
' + field_value = field_value.replace( + config["subdelimiter"], " □ " + ) + contact_sheet_output_files[output_file][ + "markup" + ] += f'\n
{fieldname}: {field_value}
' # Close .fields - contact_sheet_output_files[output_file]['markup'] += f'\n\n
' + contact_sheet_output_files[output_file][ + "markup" + ] += f"\n\n
" # Close .card - contact_sheet_output_files[output_file]['markup'] += f'\n\n
' - contact_sheet_output_files[output_file]['file_handle'].write(contact_sheet_output_files[output_file]['markup'] + "\n") + contact_sheet_output_files[output_file]["markup"] += f"\n\n
" + contact_sheet_output_files[output_file]["file_handle"].write( + contact_sheet_output_files[output_file]["markup"] + "\n" + ) # Zero out the card markup before starting the next CSV row. - contact_sheet_output_files[output_file]['markup'] = '' + contact_sheet_output_files[output_file]["markup"] = "" for output_file in contact_sheet_output_files.keys(): # Close .cards - contact_sheet_output_files[output_file]['file_handle'].write(f'\n\n
') - contact_sheet_output_files[output_file]['file_handle'].write('\n') + contact_sheet_output_files[output_file]["file_handle"].write( + f"\n\n
" + ) + contact_sheet_output_files[output_file]["file_handle"].write( + '\n' + ) now = datetime.datetime.now() - contact_sheet_output_files[output_file]['file_handle'].write(f'\n') - contact_sheet_output_files[output_file]['file_handle'].write('\n') - contact_sheet_output_files[output_file]['file_handle'].close() - - shutil.copyfile(os.path.join(css_file_path), os.path.join(config['contact_sheet_output_dir'], css_file_name)) - - -def sqlite_manager(config, operation='select', table_name=None, query=None, values=(), db_file_path=None, warn_table_exists=False): - """Perform operations on an SQLite database. - """ + contact_sheet_output_files[output_file]["file_handle"].write( + f'\n' + ) + contact_sheet_output_files[output_file]["file_handle"].write("\n") + contact_sheet_output_files[output_file]["file_handle"].close() + + shutil.copyfile( + os.path.join(css_file_path), + os.path.join(config["contact_sheet_output_dir"], css_file_name), + ) + + +def sqlite_manager( + config, + operation="select", + table_name=None, + query=None, + values=(), + db_file_path=None, + warn_table_exists=False, +): + """Perform operations on an SQLite database.""" """ Params config: dict @@ -7473,35 +10023,37 @@ def sqlite_manager(config, operation='select', table_name=None, query=None, valu return False if db_file_path is None: - db_file_name = config['sqlite_db_filename'] + db_file_name = config["sqlite_db_filename"] else: db_file_name = db_file_path if os.path.isabs(db_file_name): db_path = db_file_name else: - db_path = os.path.join(config['temp_dir'], db_file_name) + db_path = os.path.join(config["temp_dir"], db_file_name) # Only create the database if the database file does not exist. Note: Sqlite3 creates the db file # automatically in its .connect method, so you only need to use this operation if you want to # create the db prior to creating a table. No need to use it as a prerequisite for creating a table. - if operation == 'create_database': + if operation == "create_database": if os.path.isfile(db_path): return False else: sqlite3.connect(db_path) logging.info(f'SQLite database "{db_path}" created.') return True - elif operation == 'remove_database': + elif operation == "remove_database": if os.path.isfile(db_path): os.remove(db_path) logging.info(f'SQLite database "{db_path}" deleted.') return True - elif operation == 'create_table': + elif operation == "create_table": con = sqlite3.connect(db_path) cur = con.cursor() args = (table_name,) - tables = cur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name=?", args).fetchall() + tables = cur.execute( + "SELECT name FROM sqlite_master WHERE type='table' AND name=?", args + ).fetchall() # Only create the table if it doesn't exist. if tables == []: # cur = con.cursor() @@ -7511,9 +10063,11 @@ def sqlite_manager(config, operation='select', table_name=None, query=None, valu else: con.close() if warn_table_exists is True: - logging.warning(f'SQLite database "{db_path}" already contains a table named "{table_name}".') + logging.warning( + f'SQLite database "{db_path}" already contains a table named "{table_name}".' + ) return False - elif operation == 'select': + elif operation == "select": try: con = sqlite3.connect(db_path) con.row_factory = sqlite3.Row @@ -7522,8 +10076,10 @@ def sqlite_manager(config, operation='select', table_name=None, query=None, valu con.close() return res except sqlite3.OperationalError as e: - logging.error(f'Error executing SQLite query against database at {db_path}: {e}') - sys.exit(f'Error executing SQLite query against database at {db_path}: {e}') + logging.error( + f"Error executing SQLite query against database at {db_path}: {e}" + ) + sys.exit(f"Error executing SQLite query against database at {db_path}: {e}") else: # 'insert', 'update', 'delete' queries. try: @@ -7535,68 +10091,90 @@ def sqlite_manager(config, operation='select', table_name=None, query=None, valu con.close() return res except sqlite3.OperationalError as e: - logging.error(f'Error executing SQLite query against database at {db_path}: {e}') - sys.exit(f'Error executing SQLite query against database at {db_path}: {e}') + logging.error( + f"Error executing SQLite query against database at {db_path}: {e}" + ) + sys.exit(f"Error executing SQLite query against database at {db_path}: {e}") def prepare_csv_id_to_node_id_map(config): - """Creates the SQLite database used to map CSV row IDs to newly create node IDs. - """ - if config['csv_id_to_node_id_map_path'] is False: + """Creates the SQLite database used to map CSV row IDs to newly create node IDs.""" + if config["csv_id_to_node_id_map_path"] is False: return None - create_table_sql = "CREATE TABLE csv_id_to_node_id_map (timestamp TIMESTAMP DEFAULT (datetime('now','localtime')) NOT NULL, " + \ - " config_file TEXT, parent_csv_id TEXT, parent_node_id, csv_id TEXT, node_id TEXT)" - sqlite_manager(config, operation='create_table', table_name='csv_id_to_node_id_map', query=create_table_sql, db_file_path=config['csv_id_to_node_id_map_path']) - - -def populate_csv_id_to_node_id_map(config, parent_csv_row_id, parent_node_id, csv_row_id, node_id): - """Inserts a row into the SQLite database used to map CSV row IDs to newly create node IDs. - """ - if config['csv_id_to_node_id_map_path'] is False: + create_table_sql = ( + "CREATE TABLE csv_id_to_node_id_map (timestamp TIMESTAMP DEFAULT (datetime('now','localtime')) NOT NULL, " + + " config_file TEXT, parent_csv_id TEXT, parent_node_id, csv_id TEXT, node_id TEXT)" + ) + sqlite_manager( + config, + operation="create_table", + table_name="csv_id_to_node_id_map", + query=create_table_sql, + db_file_path=config["csv_id_to_node_id_map_path"], + ) + + +def populate_csv_id_to_node_id_map( + config, parent_csv_row_id, parent_node_id, csv_row_id, node_id +): + """Inserts a row into the SQLite database used to map CSV row IDs to newly create node IDs.""" + if config["csv_id_to_node_id_map_path"] is False: return None sql_query = "INSERT INTO csv_id_to_node_id_map (config_file, parent_csv_id, parent_node_id, csv_id, node_id) VALUES (?, ?, ?, ?, ?)" - sqlite_manager(config, - operation='insert', - query=sql_query, - values=(config['config_file'], - str(parent_csv_row_id), - str(parent_node_id), - str(csv_row_id), - str(node_id)), db_file_path=config['csv_id_to_node_id_map_path'] - ) + sqlite_manager( + config, + operation="insert", + query=sql_query, + values=( + config["config_file"], + str(parent_csv_row_id), + str(parent_node_id), + str(csv_row_id), + str(node_id), + ), + db_file_path=config["csv_id_to_node_id_map_path"], + ) def get_term_field_values(config, term_id): """Get a term's field data so we can use it during PATCH updates, - which replace a field's values. + which replace a field's values. """ - url = config['host'] + '/taxonomy/term/' + term_id + '?_format=json' - response = issue_request(config, 'GET', url) + url = config["host"] + "/taxonomy/term/" + term_id + "?_format=json" + response = issue_request(config, "GET", url) term_fields = json.loads(response.text) return term_fields def preprocess_csv(config, row, field): """Execute field preprocessor scripts, if any are configured. Note that these scripts - are applied to the entire value from the CSV field and not split field values, - e.g., if a field is multivalued, the preprocesor must split it and then reassemble - it back into a string before returning it. Note that preprocessor scripts work only - on string data and not on binary data like images, etc. and only on custom fields - (so not title). - """ - if 'preprocessors' in config and field in config['preprocessors']: - command = config['preprocessors'][field] - output, return_code = preprocess_field_data(config['subdelimiter'], row[field], command) + are applied to the entire value from the CSV field and not split field values, + e.g., if a field is multivalued, the preprocesor must split it and then reassemble + it back into a string before returning it. Note that preprocessor scripts work only + on string data and not on binary data like images, etc. and only on custom fields + (so not title). + """ + if "preprocessors" in config and field in config["preprocessors"]: + command = config["preprocessors"][field] + output, return_code = preprocess_field_data( + config["subdelimiter"], row[field], command + ) if return_code == 0: preprocessor_input = copy.deepcopy(row[field]) logging.info( 'Preprocess command %s executed, taking "%s" as input and returning "%s".', command, preprocessor_input, - output.decode().strip()) + output.decode().strip(), + ) return output.decode().strip() else: - message = 'Preprocess command ' + command + ' failed with return code ' + str(return_code) + message = ( + "Preprocess command " + + command + + " failed with return code " + + str(return_code) + ) logging.error(message) return row[field] @@ -7618,14 +10196,14 @@ def get_node_media_summary(config, nid): try: media_use_terms = [] url = f"/node/{nid}/media?_format=json" - response = issue_request(config, 'GET', url) + response = issue_request(config, "GET", url) media_list = json.loads(response.text) for media in media_list: - for media_use_term in media['field_media_use']: - term_name = get_term_name(config, media_use_term['target_id']) + for media_use_term in media["field_media_use"]: + term_name = get_term_name(config, media_use_term["target_id"]) media_use_terms.append(term_name) media_use_terms.sort() - return '; '.join(media_use_terms).strip() + return "; ".join(media_use_terms).strip() except Exception as e: message = f"Getting media list for \"{config['host']}{url}\" returned an error." print(f"Error: {message} See log for more detail.") @@ -7633,8 +10211,8 @@ def get_node_media_summary(config, nid): def service_file_present(config, input): - service_uri = 'http://pcdm.org/use#ServiceFile' - candidates = input.split('|') + service_uri = "http://pcdm.org/use#ServiceFile" + candidates = input.split("|") for candidate in candidates: candidate = candidate.strip() if candidate == service_uri: @@ -7642,7 +10220,9 @@ def service_file_present(config, input): if candidate.isnumeric(): if get_term_uri(config, candidate) == service_uri: return True - name_data = get_all_representations_of_term(config, vocab_id='islandora_media_use', name=candidate) - if name_data['uri'] and name_data['uri'] == service_uri: + name_data = get_all_representations_of_term( + config, vocab_id="islandora_media_use", name=candidate + ) + if name_data["uri"] and name_data["uri"] == service_uri: return True return False From ef5f8f58b2c0d19e1f7e1968c71bb1cdc76c66a7 Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Tue, 20 Feb 2024 15:11:40 -0500 Subject: [PATCH 4/4] Remove lint check - it's automatic now --- .github/PULL_REQUEST_TEMPLATE.md | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 6d846ab8..31f5d666 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -23,7 +23,6 @@ ## Checklist * [ ] Before opening this PR, have you opened an issue explaining what you want to to do? -* [ ] Have you run `pycodestyle --show-source --show-pep8 --ignore=E402,W504 --max-line-length=200 yourfile.py`? * [ ] Have you included some configuration and/or CSV files useful for testing this PR? * [ ] Have you written unit or integration tests if applicable? * [ ] Does the code added in this PR require a version of Python that is higher than the current minimum version?