From 78e7b85f7fa316d4827a894a1029ead3e28b1b31 Mon Sep 17 00:00:00 2001 From: ajstanley Date: Tue, 12 Dec 2023 13:18:07 -0400 Subject: [PATCH 1/4] made allowances for values needing encoding --- .../configs/islandora7_import_sample.config | 7 +-- i7Import/i7ImportUtilities.py | 46 +++++++++++++------ 2 files changed, 37 insertions(+), 16 deletions(-) diff --git a/i7Import/configs/islandora7_import_sample.config b/i7Import/configs/islandora7_import_sample.config index 6c00d0e..5b70f73 100644 --- a/i7Import/configs/islandora7_import_sample.config +++ b/i7Import/configs/islandora7_import_sample.config @@ -1,5 +1,5 @@ -solr_base_url: 'http://localhost:8080/solr' -islandora_base_url: 'http://localhost:8000' +solr_base_url: 'http://solr.islandora.dev/solr/#/' +islandora_base_url: 'https://islandora.dev/' csv_output_path: 'islandora7_metadata.csv' obj_directory: '/tmp/objs' log_file_path: 'islandora_content.log' @@ -12,4 +12,5 @@ collections: - islandora:collection1 - random:collection7 solr_filters: - 'some_solr_field_ms': 'some_value' \ No newline at end of file + 'some_solr_field_ms': 'some_value' +debug: True \ No newline at end of file diff --git a/i7Import/i7ImportUtilities.py b/i7Import/i7ImportUtilities.py index 275f21a..85b0dc8 100644 --- a/i7Import/i7ImportUtilities.py +++ b/i7Import/i7ImportUtilities.py @@ -1,3 +1,5 @@ +import inspect + from ruamel.yaml import YAML import mimetypes import requests @@ -8,6 +10,7 @@ import os from rich.console import Console from rich.table import Table +import urllib.parse class i7ImportUtilities: @@ -139,17 +142,17 @@ def get_default_metadata_solr_request(self): for standard_field in self.config['standard_fields']: filtered_field_list.insert(0, standard_field) fields_param = ','.join(filtered_field_list) - query = f"{self.config['solr_base_url']}/select?q=PID:{self.config['namespace']}*&wt=csv&start={self.config['start']}&rows={self.config['rows']}&fl={fields_param}" - + query = f"{self.config['solr_base_url']}/select?q=PID:{self.config['namespace']}*&wt=csv&rows=1000000&fl={fields_param}" if self.config['collection']: collection = self.config['collection'] - query = f'{query}&fq=RELS_EXT_isMemberOfCollection_uri_s: "info:fedora/{collection}"' + query = f'{query}&fq=RELS_EXT_isMemberOfCollection_uri_s:"info\:fedora/{collection}"' if self.config['content_model']: model = self.config['content_model'] - query = f'{query}&fq=RELS_EXT_hasModel_uri_s:"info:fedora/{model}"' + query = f'{query}&fq=RELS_EXT_hasModel_uri_s:"info\:fedora/{model}"' if self.config['solr_filters']: - for key, value in self.config['solr_filters'].items(): - query = f'{query}&fq={key}:"{value}"' + for filter in self.config['solr_filters']: + for key, value in filter.items(): + query = f'{query}&fq={key}:"{value}"' fedora_prefix = 'RELS_EXT_isMemberOfCollection_uri_s:"info\:fedora/' if self.config['collections']: collections = self.config['collections'] @@ -159,7 +162,6 @@ def get_default_metadata_solr_request(self): fq_string = "&fq=" + ' or '.join(fedora_collections) query = f'{query}{fq_string}' - # Get the populated CSV from Solr, with the object namespace and field list filters applied. return query @@ -208,11 +210,29 @@ def print_config(self): table.add_column("Parameter", justify="left") table.add_column("Value", justify="left") for key, value in self.config.items(): - if str(type(value)) == '': - new_value = '' - for k, v in value.items(): - new_value += f"{k}: {v}\n" - value = new_value table.add_row(key, str(value)) console = Console() - console.print(table) \ No newline at end of file + console.print(table) + + def test_config(self): + query = f"{self.config['solr_base_url']}/select?q=PID:{self.config['namespace']}*&wt=csv&rows=1000000" + if self.config['collection']: + collection = self.config['collection'] + query = f'{query}&fq=RELS_EXT_isMemberOfCollection_uri_s:"info\:fedora/{collection}"' + if self.config['content_model']: + model = self.config['content_model'] + query = f'{query}&fq=RELS_EXT_hasModel_uri_s:"info\:fedora/{model}"' + if self.config['solr_filters']: + keys = list(self.config['solr_filters'].keys()) + for key in keys: + value = urllib.parse.quote(self.config['solr_filters'][key]) + query = f'{query}&fq={key}:{value}' + fedora_prefix = 'RELS_EXT_isMemberOfCollection_uri_s:"info\:fedora/' + if self.config['collections']: + collections = self.config['collections'] + fedora_collections = [] + for collection in collections: + fedora_collections.append(f'{fedora_prefix}"{collection}"') + fq_string = "&fq=" + ' or '.join(fedora_collections) + query = f'{query}{fq_string}' + self.print_config() From c781a28c87967ad9d97a2d3c4efcc5c2e06e8026 Mon Sep 17 00:00:00 2001 From: ajstanley Date: Tue, 12 Dec 2023 13:41:03 -0400 Subject: [PATCH 2/4] Cleaned up config display --- i7Import/get_islandora_7_content.py | 2 +- i7Import/i7ImportUtilities.py | 12 ++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/i7Import/get_islandora_7_content.py b/i7Import/get_islandora_7_content.py index 70102a7..31e02de 100755 --- a/i7Import/get_islandora_7_content.py +++ b/i7Import/get_islandora_7_content.py @@ -36,7 +36,7 @@ filemode='a', format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%d-%b-%y %H:%M:%S') - +utils.print_config() if args.metadata_solr_request: metadata_solr_request = utils.get_metadata_solr_request(args.metadata_solr_request) else: diff --git a/i7Import/i7ImportUtilities.py b/i7Import/i7ImportUtilities.py index 85b0dc8..353ab93 100644 --- a/i7Import/i7ImportUtilities.py +++ b/i7Import/i7ImportUtilities.py @@ -1,5 +1,3 @@ -import inspect - from ruamel.yaml import YAML import mimetypes import requests @@ -210,6 +208,16 @@ def print_config(self): table.add_column("Parameter", justify="left") table.add_column("Value", justify="left") for key, value in self.config.items(): + if str(type(value)) == '': + new_value = '' + for k, v in value.items(): + new_value += f"{k}: {v}\n" + value = new_value + if str(type(value)) == '': + new_value = '' + for item in value: + new_value += f"{item}\n" + value = new_value table.add_row(key, str(value)) console = Console() console.print(table) From ca787956c5c679e41d6f55befa1134e9f30dbaed Mon Sep 17 00:00:00 2001 From: ajstanley Date: Tue, 12 Dec 2023 13:43:38 -0400 Subject: [PATCH 3/4] removed debugging line --- i7Import/get_islandora_7_content.py | 1 - 1 file changed, 1 deletion(-) diff --git a/i7Import/get_islandora_7_content.py b/i7Import/get_islandora_7_content.py index 31e02de..e997fff 100755 --- a/i7Import/get_islandora_7_content.py +++ b/i7Import/get_islandora_7_content.py @@ -36,7 +36,6 @@ filemode='a', format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%d-%b-%y %H:%M:%S') -utils.print_config() if args.metadata_solr_request: metadata_solr_request = utils.get_metadata_solr_request(args.metadata_solr_request) else: From a5399b6eeb993b5d37568d5fb6ba78dc28cca552 Mon Sep 17 00:00:00 2001 From: ajstanley Date: Tue, 12 Dec 2023 14:52:53 -0400 Subject: [PATCH 4/4] code cleanup, better printing --- i7Import/i7ImportUtilities.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/i7Import/i7ImportUtilities.py b/i7Import/i7ImportUtilities.py index 353ab93..8c39439 100644 --- a/i7Import/i7ImportUtilities.py +++ b/i7Import/i7ImportUtilities.py @@ -140,17 +140,18 @@ def get_default_metadata_solr_request(self): for standard_field in self.config['standard_fields']: filtered_field_list.insert(0, standard_field) fields_param = ','.join(filtered_field_list) - query = f"{self.config['solr_base_url']}/select?q=PID:{self.config['namespace']}*&wt=csv&rows=1000000&fl={fields_param}" + query = f"{self.config['solr_base_url']}/select?q=PID:{self.config['namespace']}*&wt=csv&start={self.config['start']}&rows={self.config['rows']}&fl={fields_param}" if self.config['collection']: collection = self.config['collection'] - query = f'{query}&fq=RELS_EXT_isMemberOfCollection_uri_s:"info\:fedora/{collection}"' + query = f'{query}&fq=RELS_EXT_isMemberOfCollection_uri_s: "info:fedora/{collection}"' if self.config['content_model']: model = self.config['content_model'] - query = f'{query}&fq=RELS_EXT_hasModel_uri_s:"info\:fedora/{model}"' + query = f'{query}&fq=RELS_EXT_hasModel_uri_s:"info:fedora/{model}"' if self.config['solr_filters']: - for filter in self.config['solr_filters']: - for key, value in filter.items(): - query = f'{query}&fq={key}:"{value}"' + keys = list(self.config['solr_filters'].keys()) + for key in keys: + value = urllib.parse.quote(self.config['solr_filters'][key]) + query = f'{query}&fq={key}:{value}' fedora_prefix = 'RELS_EXT_isMemberOfCollection_uri_s:"info\:fedora/' if self.config['collections']: collections = self.config['collections'] @@ -226,10 +227,10 @@ def test_config(self): query = f"{self.config['solr_base_url']}/select?q=PID:{self.config['namespace']}*&wt=csv&rows=1000000" if self.config['collection']: collection = self.config['collection'] - query = f'{query}&fq=RELS_EXT_isMemberOfCollection_uri_s:"info\:fedora/{collection}"' + query = f'{query}&fq=RELS_EXT_isMemberOfCollection_uri_s: "info:fedora/{collection}"' if self.config['content_model']: model = self.config['content_model'] - query = f'{query}&fq=RELS_EXT_hasModel_uri_s:"info\:fedora/{model}"' + query = f'{query}&fq=RELS_EXT_hasModel_uri_s:"info:fedora/{model}"' if self.config['solr_filters']: keys = list(self.config['solr_filters'].keys()) for key in keys: