Skip to content

Commit

Permalink
Resolves #817.
Browse files Browse the repository at this point in the history
  • Loading branch information
mjordan committed Aug 23, 2024
1 parent 525c1c9 commit 3c14eac
Show file tree
Hide file tree
Showing 7 changed files with 142 additions and 5 deletions.
7 changes: 7 additions & 0 deletions WorkbenchConfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,13 @@ def get_config(self):
config["temp_dir"], "csv_id_to_node_id_map.db"
)

if "page_files_source_dir_field" in user_mods:
config["page_files_source_dir_field"] = user_mods[
"page_files_source_dir_field"
]
else:
config["page_files_source_dir_field"] = config["id_field"]

config["config_file"] = self.args.config

return config
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
paged_content_from_directories: true
paged_content_page_model_tid: http://id.loc.gov/ontologies/bibframe/part
task: create
host: https://islandora.traefik.me
username: admin
password: password
input_dir: tests/assets/create_paged_content_from_directories_test/samplebooks
input_csv: metadata_page_files_source_dir_field.csv
standalone_media_url: true
secure_ssl_only: false
page_files_source_dir_field: directory
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
id,title,field_model
book1,Using Islandora Workbench for Fun and Profit,Digital Document
book1,Using Islandora Workbench for Fun and Profit,Paged Content
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
id,title,field_model,directory
id:book1,Using Islandora Workbench for Fun and Profit,Paged Content,book1
111 changes: 111 additions & 0 deletions tests/islandora_tests_paged_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,5 +209,116 @@ def tearDown(self):
os.remove(rollback_file_path)


class TestCreatePagedContentFromDirectoriesPageFilesSourceDirField(unittest.TestCase):

def setUp(self):
self.current_dir = os.path.dirname(os.path.abspath(__file__))
self.create_config_file_path = os.path.join(
self.current_dir,
"assets",
"create_paged_content_from_directories_test",
"books_page_files_source_dir_field.yml",
)

yaml = YAML()
with open(self.create_config_file_path, "r") as f:
config_file_contents = f.read()
config_data = yaml.load(config_file_contents)
config = {}
for k, v in config_data.items():
config[k] = v
self.islandora_host = config["host"]
self.islandora_username = config["username"]
self.islandora_password = config["password"]

self.create_cmd = ["./workbench", "--config", self.create_config_file_path]

self.temp_dir = tempfile.gettempdir()

def test_create_paged_content_from_directories(self):
requests.packages.urllib3.disable_warnings()
self.nids = list()
create_output = subprocess.check_output(self.create_cmd)
create_output = create_output.decode().strip()

# Write a file to the system's temp directory containing the node IDs of the
# nodes created during this test so they can be deleted in tearDown().
create_lines = create_output.splitlines()
for line in create_lines:
if "created at" in line:
nid = line.rsplit("/", 1)[-1]
nid = nid.strip(".")
# E.g. a URL alias.
if workbench_utils.value_is_numeric(nid) is False:
url = line[line.find("http") :].strip(".")
nid = workbench_utils.get_nid_from_url_without_config(url)
self.nids.append(nid)

self.assertEqual(len(self.nids), 4)

# Test a page object's 'field_member_of' value to see if it matches its
# parent's node ID. In this test, we'll test the second page. Note: the
# metadata CSV file used to create the paged content and page objects
# uses hard-coded term IDs from the Islandora Models taxonomy as used
# in the Islandora Playbook. If they change or are different in the
# Islandora this test is running against, this test will fail. Also note
# that this test creates media and does not delete them.
parent_node_id_to_test = self.nids[0]
# Get the REST feed for the parent node's members.
members_url = (
self.islandora_host
+ "/node/"
+ parent_node_id_to_test
+ "/members?_format=json"
)
# Need to provide credentials for this REST export.
members_response = requests.get(
members_url,
auth=(self.islandora_username, self.islandora_password),
verify=False,
)
members = json.loads(members_response.text)

expected_member_weights = [1, 2, 3]
retrieved_member_weights = list()
for member in members:
retrieved_member_weights.append(int(member["field_weight"][0]["value"]))
# Test that each page indeed a member of the first node created during this test.
self.assertEqual(
int(parent_node_id_to_test),
int(member["field_member_of"][0]["target_id"]),
)

# Test that the weights assigned to the three pages are what we expect.
self.assertEqual(expected_member_weights, retrieved_member_weights)

def tearDown(self):
for nid in self.nids:
quick_delete_cmd = [
"./workbench",
"--config",
self.create_config_file_path,
"--quick_delete_node",
f"{self.islandora_host}/node/{nid}",
]
quick_delete_output = subprocess.check_output(quick_delete_cmd)

preprocessed_csv_path = os.path.join(
self.temp_dir, "metadata_page_files_source_dir_field.csv.preprocessed"
)
if os.path.exists(preprocessed_csv_path):
os.remove(preprocessed_csv_path)

rollback_file_path = os.path.join(
self.current_dir,
"assets",
"create_paged_content_from_directories_test",
"samplebooks",
"rollback.csv",
)
if os.path.exists(rollback_file_path):
os.remove(rollback_file_path)


if __name__ == "__main__":
unittest.main()
4 changes: 4 additions & 0 deletions workbench
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,10 @@ def create():
if custom_field == "checksum":
continue

# 'directory' is a reserved CSV field.
if custom_field == "directory":
continue

# We skip CSV columns whose headers use the 'media:video:field_foo' media track convention.
if custom_field.startswith("media:"):
continue
Expand Down
10 changes: 6 additions & 4 deletions workbench_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1816,6 +1816,7 @@ def check_input(config, args):
# but it doesn't show up in any field configs.
reserved_fields = [
"file",
"directory",
"media_use_tid",
"checksum",
"node_id",
Expand Down Expand Up @@ -3601,7 +3602,8 @@ def check_input(config, args):
paged_content_from_directories_csv_data, start=1
):
dir_path = os.path.join(
config["input_dir"], file_check_row[config["id_field"]]
config["input_dir"],
file_check_row[config["page_files_source_dir_field"]],
)
if not os.path.exists(dir_path) or os.path.isfile(dir_path):
message = (
Expand Down Expand Up @@ -5783,7 +5785,6 @@ def get_csv_data(config, csv_file_target="node_fields", file_path=None):
row_num = 0
unique_identifiers = []

# WIP on #812.
# Prepare any "csv_row_filters", which we apply to each row, below.
if "csv_row_filters" in config and len(config["csv_row_filters"]) > 0:
row_filters_is = dict()
Expand Down Expand Up @@ -8545,7 +8546,8 @@ def create_children_from_directory(config, parent_csv_record, parent_node_id):
# weight assigned to the page is the last segment in the filename, split from the rest of the filename using the
# character defined in the 'paged_content_sequence_separator' config option.
parent_id = parent_csv_record[config["id_field"]]
page_dir_path = os.path.join(config["input_dir"], str(parent_id).strip())
page_dir_name = parent_csv_record[config["page_files_source_dir_field"]]
page_dir_path = os.path.join(config["input_dir"], page_dir_name)

if "paged_content_additional_page_media" in config:
if "paged_content_image_file_extension" in config:
Expand Down Expand Up @@ -8771,7 +8773,7 @@ def create_children_from_directory(config, parent_csv_record, parent_node_id):
config, parent_id, parent_node_id, page_file_name, node_nid
)

page_file_path = os.path.join(parent_id, page_file_name)
page_file_path = os.path.join(page_dir_name, page_file_name)
fake_csv_record = collections.OrderedDict()
fake_csv_record["title"] = page_title
fake_csv_record["file"] = page_file_path
Expand Down

0 comments on commit 3c14eac

Please sign in to comment.