Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

10 Add RDM->CLM CLI migration path #18

Merged
merged 11 commits into from
Aug 12, 2024
120 changes: 120 additions & 0 deletions arches_references/management/commands/controlled_lists.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
from arches.app.models.models import Value
from django.core.management.base import BaseCommand


class Command(BaseCommand):
"""
Commands for running controlled list operations

"""

def add_arguments(self, parser):
parser.add_argument(
"-o",
"--operation",
action="store",
dest="operation",
required=True,
choices=["migrate_collections_to_controlled_lists"],
help="The operation to perform",
)

parser.add_argument(
"-co",
"--collections",
action="store",
dest="collections_to_migrate",
nargs="*",
required=True,
help="One or more collections to migrate to controlled lists",
)

parser.add_argument(
"-ho",
"--host",
action="store",
dest="host",
default="http://localhost:8000/plugins/controlled-list-manager/item/",
help="Provide a host for URI generation. Default is localhost",
)

parser.add_argument(
"-ow",
"--overwrite",
action="store_true",
dest="overwrite",
default=False,
help="Overwrite the entire controlled list and its list items/values. Default false.",
)

parser.add_argument(
"-psl",
"--preferred_sort_language",
action="store",
dest="preferred_sort_language",
default="en",
help="The language to use for sorting preferred labels. Default 'en'",
)

def handle(self, *args, **options):
if options["operation"] == "migrate_collections_to_controlled_lists":
self.migrate_collections_to_controlled_lists(
collections_to_migrate=options["collections_to_migrate"],
host=options["host"],
overwrite=options["overwrite"],
preferred_sort_language=options["preferred_sort_language"],
)

def migrate_collections_to_controlled_lists(
self,
collections_to_migrate,
host,
overwrite,
preferred_sort_language,
):
"""
Uses a postgres function to migrate collections to controlled lists

Example usage:
python manage.py controlled_lists
-o migrate_collections_to_controlled_lists
-co 'Johns list' 'Getty AAT'
-ho 'http://localhost:8000/plugins/controlled-list-manager/item/'
-psl 'fr'
-ow
"""

collections_in_db = list(
Value.objects.filter(
value__in=collections_to_migrate,
valuetype__in=["prefLabel", "identifier"],
concept__nodetype="Collection",
).values_list("value", flat=True)
)

failed_collections = [
collection
for collection in collections_to_migrate
if collection not in collections_in_db
]

if len(failed_collections) > 0:
self.stderr.write(
"Failed to find the following collections in the database: %s"
% ", ".join(failed_collections)
)

if len(collections_in_db) > 0:
from django.db import connection

cursor = connection.cursor()
cursor.execute(
"""
select * from __arches_migrate_collections_to_clm(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool. In the future we can consider implementing the Func expressions API to be able to use custom db functions with the ORM.

ARRAY[%s], %s, %s::boolean, %s
);
""",
[collections_in_db, host, overwrite, preferred_sort_language],
)
result = cursor.fetchone()
self.stdout.write(result[0])
Original file line number Diff line number Diff line change
Expand Up @@ -77,30 +77,30 @@ class Migration(migrations.Migration):

-- If overwrite flag is provided, completely recreate the list/items/values
if overwrite then
delete from controlled_list_item_values
where itemid in (
delete from arches_references_listitemvalue
where list_item_id in (
select id
from controlled_list_items
where listid in (
from arches_references_listitem
where list_id in (
select id
from controlled_lists
from arches_references_list
where name = any(collection_names)
)
);

delete from controlled_list_items
where listid in (
delete from arches_references_listitem
where list_id in (
select id
from controlled_lists
from arches_references_list
where name = any(collection_names)
);

delete from controlled_lists
delete from arches_references_list
where name = any(collection_names);
end if;

-- Migrate Collection -> Controlled List
insert into controlled_lists (
insert into arches_references_list (
id,
name,
dynamic,
Expand Down Expand Up @@ -171,52 +171,52 @@ class Migration(migrations.Migration):
alpha_sorted_list_item_hierarchy as (
select child as id,
row_number() over (partition by root_list order by depth, LOWER(value)) - 1 as sortorder,
root_list as listid,
root_list as list_id,
case when conceptidfrom = root_list then null -- list items at top of hierarchy have no parent list item
else conceptidfrom
end as parent_id,
depth
from ranked_prefLabels rpl
where language_rank = 1 and
root_list in (select id from controlled_lists where name = ANY(collection_names))
root_list in (select id from arches_references_list where name = ANY(collection_names))
)
insert into controlled_list_items(
insert into arches_references_listitem(
id,
uri,
sortorder,
guide,
listid,
list_id,
parent_id
)
select id,
host || id as uri,
sortorder,
false as guide,
listid,
list_id,
parent_id
from alpha_sorted_list_item_hierarchy;


-- Migrate concept values -> controlled list item values
insert into controlled_list_item_values (
insert into arches_references_listitemvalue (
id,
value,
itemid,
list_item_id,
languageid,
valuetype_id
)
select distinct (v.valueid) id,
value,
r.conceptidto as itemid,
r.conceptidto as list_item_id,
languageid,
valuetype as valuetype_id
from relations r
full join values v on r.conceptidto = v.conceptid
where relationtype = 'member' and
(valuetype = 'prefLabel' or valuetype = 'altLabel') and
r.conceptidto in (
select id from controlled_list_items where listid in (
select id from controlled_lists where name = ANY(collection_names)
select id from arches_references_listitem where list_id in (
select id from arches_references_list where name = ANY(collection_names)
)
);

Expand Down
57 changes: 55 additions & 2 deletions tests/cli_tests.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import io
import os

from django.conf import settings
from django.core import management
from django.urls import reverse
from django.test import TestCase
from django.test.client import Client
from django.test.utils import captured_stdout

from arches_references.models import List
from arches.app.utils.skos import SKOSReader

from .test_settings import PROJECT_TEST_ROOT

Expand Down Expand Up @@ -43,7 +45,7 @@ def test_export_controlled_list(self):
class ListImportPackageTests(TestCase):

def test_import_controlled_list(self):
input_file = os.path.join(PROJECT_TEST_ROOT, "data/controlled_lists.xlsx")
input_file = os.path.join(PROJECT_TEST_ROOT, "data", "controlled_lists.xlsx")
output = io.StringIO()
# packages command does not yet fully avoid print()
with captured_stdout():
Expand All @@ -57,3 +59,54 @@ def test_import_controlled_list(self):
self.assertTrue(List.objects.filter(pk=list_pk).exists())

### TODO Add test for creating new language if language code not in db but found in import file

johnatawnclementawn marked this conversation as resolved.
Show resolved Hide resolved

class RDMToControlledListsETLTests(TestCase):

@classmethod
def setUpTestData(cls):

skos = SKOSReader()
rdf = skos.read_file(
os.path.join(PROJECT_TEST_ROOT, "data", "concept_label_test_collection.xml")
)
ret = skos.save_concepts_from_skos(rdf)

client = Client()
client.login(username="admin", password="admin")
response = client.get(
reverse(
"make_collection",
kwargs={"conceptid": "7c90899a-dbe9-4574-9175-e69481a80b3c"},
)
)

def test_migrate_collections_to_controlled_lists(self):
jacobtylerwalls marked this conversation as resolved.
Show resolved Hide resolved
output = io.StringIO()
management.call_command(
"controlled_lists",
operation="migrate_collections_to_controlled_lists",
collections_to_migrate=["Concept Label Import Test"],
host="http://localhost:8000/plugins/controlled-list-manager/item/",
preferred_sort_language="en",
overwrite=False,
stdout=output,
)

imported_list = List.objects.get(name="Concept Label Import Test")
imported_items = imported_list.list_items.all()
self.assertEqual(len(imported_items), 3)

def test_no_matching_collection_error(self):
expected_output = "Failed to find the following collections in the database: Collection That Doesn't Exist"
output = io.StringIO()
management.call_command(
"controlled_lists",
operation="migrate_collections_to_controlled_lists",
collections_to_migrate=["Collection That Doesn't Exist"],
host="http://localhost:8000/plugins/controlled-list-manager/item/",
preferred_sort_language="en",
overwrite=False,
stderr=output,
)
self.assertIn(expected_output, output.getvalue().strip())
28 changes: 28 additions & 0 deletions tests/data/concept_label_test_collection.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:skos="http://www.w3.org/2004/02/skos/core#"
xmlns:dcterms="http://purl.org/dc/terms/"
>
<skos:ConceptScheme rdf:about="http://www.archesproject.org/7c90899a-dbe9-4574-9175-e69481a80b3c">
<skos:hasTopConcept>
<skos:Concept rdf:about="http://www.archesproject.org/0fc72a30-2ead-4afd-9e7a-a5067d2cc5c8">
<skos:inScheme rdf:resource="http://www.archesproject.org/7c90899a-dbe9-4574-9175-e69481a80b3c"/>
<skos:prefLabel xml:lang="en">{"id": "7949d7b5-6e57-469a-8f38-87aac08e1788", "value": "Test Concept 2"}</skos:prefLabel>
</skos:Concept>
</skos:hasTopConcept>
<skos:hasTopConcept>
<skos:Concept rdf:about="http://www.archesproject.org/6490ac87-ac80-41d7-a135-1119b4cd912d">
<skos:prefLabel xml:lang="en">{"id": "fad6f17d-f7c8-4fa1-b358-e8626571599e", "value": "Test Concept 3"}</skos:prefLabel>
<skos:inScheme rdf:resource="http://www.archesproject.org/7c90899a-dbe9-4574-9175-e69481a80b3c"/>
</skos:Concept>
</skos:hasTopConcept>
<skos:hasTopConcept>
<skos:Concept rdf:about="http://www.archesproject.org/89ff530a-f350-44f0-ac88-bdd8904eb57e">
<skos:inScheme rdf:resource="http://www.archesproject.org/7c90899a-dbe9-4574-9175-e69481a80b3c"/>
<skos:prefLabel xml:lang="en">{"id": "9fa56006-6828-480f-8395-ad5c5a84726b", "value": "Test Concept 1"}</skos:prefLabel>
</skos:Concept>
</skos:hasTopConcept>
<dcterms:title xml:lang="en">{"id": "f5e1a756-c658-4a3c-bc3a-e9293242e8f7", "value": "Concept Label Import Test"}</dcterms:title>
</skos:ConceptScheme>
</rdf:RDF>