diff --git a/WorkbenchConfig.py b/WorkbenchConfig.py index 3ae59bd7..642b9930 100644 --- a/WorkbenchConfig.py +++ b/WorkbenchConfig.py @@ -26,6 +26,12 @@ def __init__(self, args): def get_config(self): config = self.get_default_config() user_mods = self.get_user_config() + # Allow extension additions and overrides. + if user_mods.get('mimetype_extensions'): + for candidate in user_mods.get('mimetype_extensions'): + for mimetype, extension in candidate.items(): + config['mimetype_extensions'][mimetype] = extension + del user_mods['mimetype_extensions'] # Blend defaults with user mods for key, value in user_mods.items(): config[key] = value @@ -87,6 +93,17 @@ def get_media_types(self): {'extracted_text': ['txt']} ] + # Returns default file extensions for mimetypes + def get_default_extensions(self): + return {'image/jpeg': '.jpg', + 'image/jp2': '.jp2', + 'image/png': '.png', + 'audio/mpeg': '.mp3', + 'audio/mp3': '.mp3', + 'text/plain': '.txt', + 'application/octet-stream': '.bin' + } + # Returns default configs, to be updated by user-supplied config. def get_default_config(self): return { @@ -143,6 +160,7 @@ def get_default_config(self): 'paged_content_sequence_separator': '-', 'media_bundle_file_fields': self.get_media_fields(), 'media_fields': self.get_media_fields(), + 'mimetype_extensions': self.get_default_extensions(), } # Tests validity and existence of path. diff --git a/workbench_utils.py b/workbench_utils.py index 24769616..c88793d0 100644 --- a/workbench_utils.py +++ b/workbench_utils.py @@ -2003,7 +2003,7 @@ def create_media(config, filename, file_fieldname, node_id, node_csv_row, media_ media_type = set_media_type(config, filename, file_fieldname, node_csv_row) media_bundle_response_code = ping_media_bundle(config, media_type) if media_bundle_response_code == 404: - message = 'File "' + file_check_row[filename_field] + '" identified in CSV row ' + file_check_row[config['id_field']] + \ + message = 'File "' + node_csv_row[file_fieldname] + '" identified in CSV row ' + node_csv_row[config['id_field']] + \ ' will create a media of type (' + media_type + '), but that media type is not configured in the destination Drupal.' logging.error(message) return False @@ -4032,21 +4032,19 @@ def get_csv_from_excel(config): csv_writer_file_handle.close() -def get_extension_from_mimetype(mimetype): +def get_extension_from_mimetype(config, mimetype): # mimetypes.add_type() is not working, e.g. mimetypes.add_type('image/jpeg', '.jpg') # Maybe related to https://bugs.python.org/issue4963? In the meantime, provide our own # MIMETYPE to extension mapping for common types, then let mimetypes guess at others. - map = {'image/jpeg': '.jpg', - 'image/jp2': '.jp2', - 'image/png': '.png', - 'audio/mpeg': '.mp3', - 'text/plain': '.txt', - 'application/octet-stream': '.bin' - } + map = config['mimetype_extensions'] if mimetype in map: return map[mimetype] else: - return mimetypes.guess_extension(mimetype) + extension = mimetypes.guess_extension(mimetype) + if (extension): + return mimetypes.guess_extension(mimetype) + else: + print(f"There is no mapping available for {mimetype}") def get_deduped_file_path(path): @@ -4121,7 +4119,7 @@ def check_file_exists(config, filename): return True -def get_prepocessed_file_path(config, file_fieldname, node_csv_row, node_id = None): +def get_prepocessed_file_path(config, file_fieldname, node_csv_row, node_id=None): """For remote/downloaded files, generates the path to the local temporary copy and returns that path. For local files, just returns the value of node_csv_row['file']. @@ -4193,7 +4191,7 @@ def get_prepocessed_file_path(config, file_fieldname, node_csv_row, node_id = No except KeyError: mimetype = 'application/octet-stream' - extension_with_dot = get_extension_from_mimetype(mimetype) + extension_with_dot = get_extension_from_mimetype(config, mimetype) downloaded_file_path = os.path.join(subdir, filename + extension_with_dot) # Check to see if a file with this path already exists; if so, insert an