Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
mjordan committed Nov 12, 2024
2 parents 805f01b + dacfe2d commit b7df4a9
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 17 deletions.
1 change: 1 addition & 0 deletions WorkbenchConfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ def get_default_config(self):
"csv_value_templates_rand_length": 5,
"allow_csv_value_templates_if_field_empty": [],
"remind_user_to_run_check": False,
"media_type_by_media_use": False,
}

# Tests validity and existence of configuration file path.
Expand Down
7 changes: 7 additions & 0 deletions i7Import/i7ImportUtilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def __init__(self, config_location):
"start": 0,
"rows": 100000,
"secure_ssl_only": True,
"pids": False,
}

def get_config(self):
Expand Down Expand Up @@ -181,6 +182,12 @@ def get_default_metadata_solr_request(self):
fedora_collections.append(f'{fedora_prefix}"{collection}"')
fq_string = "&fq=" + " or ".join(fedora_collections)
query = f"{query}{fq_string}"
if self.config["pids"]:
pids_to_use = []
for candidate in self.config["pids"]:
pids_to_use.append(f"PID:{candidate}")
fq_string = "&fq=" + " or ".join(pids_to_use)
query = f"{query}{fq_string}"

# Get the populated CSV from Solr, with the object namespace and field list filters applied.
return query
Expand Down
3 changes: 3 additions & 0 deletions tests/assets/set_media_type_test/multi_types_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,6 @@ media_types_override:
- sometextmedia: ['txt']

secure_ssl_only: false

media_type_by_media_use:
- https://projects.iq.harvard.edu/fits: fits_technical_metadata
8 changes: 4 additions & 4 deletions workbench
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,9 @@ def create():
logging.info(message)

path_to_rollback_csv_file = get_rollback_csv_filepath(config)
write_rollback_config(config, path_to_rollback_csv_file)
prep_rollback_csv(config, path_to_rollback_csv_file)
logging.info("Writing rollback CSV to " + path_to_rollback_csv_file)
logging.info(f"Writing rollback CSV to {path_to_rollback_csv_file}.")

prepare_csv_id_to_node_id_map(config)

Expand Down Expand Up @@ -471,8 +472,6 @@ def create():
if "url_alias" in row and len(row["url_alias"]) > 0:
create_url_alias(config, node_id, row["url_alias"])

write_rollback_config(config, path_to_rollback_csv_file)

# If the file named in 'file' can't be found.
if "file" in row and len(row["file"].strip()) > 0:
if (
Expand Down Expand Up @@ -2265,8 +2264,9 @@ def create_from_files():
files = os.listdir(file_dir_path)

path_to_rollback_csv_file = get_rollback_csv_filepath(config)
write_rollback_config(config, path_to_rollback_csv_file)
prep_rollback_csv(config, path_to_rollback_csv_file)
logging.info("Writing rollback CSV to " + path_to_rollback_csv_file)
logging.info(f"Writing rollback CSV to {path_to_rollback_csv_file}.")

num_files = len(files)
file_count = 0
Expand Down
102 changes: 89 additions & 13 deletions workbench_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,14 @@ def set_media_type(config, filepath, file_fieldname, csv_row):
"""
if "media_type" in config:
return config["media_type"]
if config["media_type_by_media_use"] and len(config["media_type_by_media_use"]) > 0:
additional_files = get_additional_files_config(config)
media_url = additional_files.get(file_fieldname)
if file_fieldname in additional_files:
for entry in config["media_type_by_media_use"]:
for key, value in entry.items():
if key == media_url:
return value

# Determine if the incomtimg filepath matches a registered eEmbed media type.
oembed_media_type = get_oembed_url_media_type(config, filepath)
Expand Down Expand Up @@ -8564,7 +8572,7 @@ def write_to_output_csv(config, id, node_json, input_csv_row=None):
for field_name in node_dict:
if field_name.startswith("field_"):
row[field_name] = serialize_field_json(
config, field_definitions, field_name, node_dict[fifileeld_name]
config, field_definitions, field_name, node_dict[field_name]
)
row.update(input_csv_row)
writer.writerow(row)
Expand Down Expand Up @@ -8937,11 +8945,41 @@ def create_children_from_directory(config, parent_csv_record, parent_node_id):


def get_rollback_csv_filepath(config):
if "rollback_csv_filename_template" in config:
config_filename, task_config_ext = os.path.splitext(config["config_file"])
input_csv_filename, input_csv_ext = os.path.splitext(config["input_csv"])

rollback_csv_filename_template = string.Template(
config["rollback_csv_filename_template"]
)
try:
rollback_csv_filename_basename = str(
rollback_csv_filename_template.substitute(
{
"config_filename": config_filename,
"input_csv_filename": input_csv_filename,
}
)
)
except Exception as e:
# We need to account for the very common case where the user has included "valid identifier characters"
# (as defined in https://peps.python.org/pep-0292/) as part of their template. The most common case will
# likely be underscores separating the template placeholders.
message = f'One or more parts of the configured rollback csv filename template ({config["rollback_csv_filename_template"]}) need adjusting.'
logging.error(
f"{message} A {e.__class__.__name__} exception occured with the error message {e}. Please refer to the Workbench documentation for suggestions."
)
sys.exit(
f"Error: {message} Please refer to your Workbench log and to the Workbench documentation for suggestions."
)
else:
rollback_csv_filename_basename = "rollback"

if config["timestamp_rollback"] is True:
now_string = EXECUTION_START_TIME.strftime("%Y_%m_%d_%H_%M_%S")
rollback_csv_filename = "rollback." + now_string + ".csv"
rollback_csv_filename = f"{rollback_csv_filename_basename}.{now_string}.csv"
else:
rollback_csv_filename = "rollback.csv"
rollback_csv_filename = f"{rollback_csv_filename_basename}.csv"

if os.environ.get("ISLANDORA_WORKBENCH_SECONDARY_TASKS") is not None:
secondary_tasks = json.loads(os.environ["ISLANDORA_WORKBENCH_SECONDARY_TASKS"])
Expand All @@ -8955,12 +8993,45 @@ def get_rollback_csv_filepath(config):


def write_rollback_config(config, path_to_rollback_csv_file):
if "rollback_config_filename_template" in config:
config_filename, task_config_ext = os.path.splitext(config["config_file"])
input_csv_filename, input_csv_ext = os.path.splitext(config["input_csv"])

rollback_config_filename_template = string.Template(
config["rollback_config_filename_template"]
)
try:
rollback_config_filename_basename = str(
rollback_config_filename_template.substitute(
{
"config_filename": config_filename,
"input_csv_filename": input_csv_filename,
}
)
)
except Exception as e:
# We need to account for the very common case where the user has included "valid identifier characters"
# (as defined in https://peps.python.org/pep-0292/) as part of their template. The most common case will
# likely be underscores separating the template placeholders.
message = f'One or more parts of the configured rollback configuration filename template ({config["rollback_config_filename_template"]}) need adjusting.'
logging.error(
f"{message} A {e.__class__.__name__} exception occured with the error message {e}. Please refer to the Workbench documentation for suggestions."
)
sys.exit(
f"Error: {message} Please refer to your Workbench log and to the Workbench documentation for suggestions."
)
else:
rollback_config_filename_basename = "rollback"

if config["timestamp_rollback"] is True:
now_string = EXECUTION_START_TIME.strftime("%Y_%m_%d_%H_%M_%S")
rollback_config_filename = "rollback." + now_string + ".yml"
rollback_config_filename = (
f"{rollback_config_filename_basename}.{now_string}.yml"
)
else:
rollback_config_filename = "rollback.yml"
rollback_config_filename = f"{rollback_config_filename_basename}.yml"

logging.info(f"Writing rollback configuration file to {rollback_config_filename}.")
rollback_config_file = open(rollback_config_filename, "w")
rollback_comments = get_rollback_config_comments(config)
rollback_config_file.write(rollback_comments)
Expand Down Expand Up @@ -9006,17 +9077,22 @@ def write_rollback_node_id(config, node_id, path_to_rollback_csv_file):


def get_rollback_config_comments(config):
comments = list()
task = config["task"]
config_file = config["config_file"]
time_string = now_string = EXECUTION_START_TIME.strftime("%Y:%m:%d %H:%M:%S")
input_csv = config["input_csv"]
comments = (
f'# Generated by a "{task}" task started {time_string} using'
+ "\n"
+ f'# config file "{config_file}" and input CSV "{input_csv}".'
+ "\n"
)
return comments
time_string = now_string = EXECUTION_START_TIME.strftime("%Y:%m:%d %H:%M:%S")

comments.append(f'# Generated by a "{task}" task started {time_string} using')
comments.append(f'config file "{config_file}" and input CSV "{input_csv}".')
if (
"rollback_file_comments" in config
and config["rollback_file_comments"] is not None
and len(config["rollback_file_comments"]) > 0
):
comments.extend(config["rollback_file_comments"])

return "\n# ".join(comments) + "\n"


def get_csv_from_google_sheet(config):
Expand Down

0 comments on commit b7df4a9

Please sign in to comment.