Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve RDM to CLM migration #23 #26

Merged
merged 28 commits into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
ab25446
Avoid trying to create list items that dont belong in hierarchy when …
johnatawnclementawn Aug 13, 2024
a834ac0
clarification
johnatawnclementawn Aug 13, 2024
97c31c2
Reduce queries to existing RDM tables, use temp tbl to gather records…
johnatawnclementawn Aug 13, 2024
cc7a569
Improve filter on which tree is being built for concepts in multiple …
johnatawnclementawn Aug 14, 2024
552b9ca
Mint new item and itemvalue ids for concepts that participate in mult…
johnatawnclementawn Aug 14, 2024
1eecf83
Add descriptions for new logic #23
johnatawnclementawn Aug 14, 2024
3e5203b
Check for listitems that already exist in CLM, but participate in col…
johnatawnclementawn Aug 15, 2024
3ed8d95
Add note about apostrophes in collection names
johnatawnclementawn Aug 15, 2024
1cd0ae4
nit #23
johnatawnclementawn Aug 15, 2024
d9486d7
Simplify logic for minting new ids for items and values #23
johnatawnclementawn Aug 15, 2024
2adc1bb
Merge branch 'main' into jmc/23_improve_rdm_to_clm_migration
johnatawnclementawn Aug 19, 2024
bf3b9d4
Merge branch 'main' into jmc/23_improve_rdm_to_clm_migration
johnatawnclementawn Aug 29, 2024
3e2821c
Add note for how to handle apostrophes in collection names on python …
johnatawnclementawn Aug 29, 2024
764df7b
Merge branch 'main' into jmc/23_improve_rdm_to_clm_migration
johnatawnclementawn Sep 11, 2024
5726d1b
nit #23
johnatawnclementawn Sep 11, 2024
f8490d5
Avoid id clashes when migrating collections with the same concepts th…
johnatawnclementawn Sep 11, 2024
0c64d03
Avoid hardcoding prefLabel #23
johnatawnclementawn Sep 11, 2024
950106c
Move tests to use django native test fixtures #23
johnatawnclementawn Sep 11, 2024
34bb961
typo nit #23
johnatawnclementawn Sep 11, 2024
b50924b
nit #23
johnatawnclementawn Sep 12, 2024
eaabd91
Rearange test fixtures #23
johnatawnclementawn Sep 12, 2024
7939a9f
Add more robust tests for RDM to CLM migration #23
johnatawnclementawn Sep 12, 2024
862903e
typo nits #23
johnatawnclementawn Sep 12, 2024
fae5c51
Harden against nonexistent psl options #23
johnatawnclementawn Sep 12, 2024
9fcdd84
Capture all possible value types #23
johnatawnclementawn Sep 12, 2024
b77e2ce
Makes fixtures more accessible
johnatawnclementawn Sep 12, 2024
6c90e60
Add test to ensure psl cmd error is functional #23
johnatawnclementawn Sep 12, 2024
f57a31b
Error nicely if list with same name as collection already exists
jacobtylerwalls Sep 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 24 additions & 3 deletions arches_references/management/commands/controlled_lists.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from arches.app.models.models import Value
from django.core.management.base import BaseCommand
from arches.app.models.models import Value, Language
from arches_references.models import List
from django.core.management.base import BaseCommand, CommandError


class Command(BaseCommand):
Expand Down Expand Up @@ -58,11 +59,28 @@ def add_arguments(self, parser):

def handle(self, *args, **options):
if options["operation"] == "migrate_collections_to_controlled_lists":
psl = options["preferred_sort_language"]
try:
Language.objects.get(code=psl)
except Language.DoesNotExist:
raise CommandError(
"The preferred sort language, {0}, does not exist in the database.".format(
psl
)
)

if not options["overwrite"]:
for collection_name in options["collections_to_migrate"]:
if List.objects.filter(name=collection_name).exists():
raise CommandError(
f"The collection '{collection_name}' already exists."
)

self.migrate_collections_to_controlled_lists(
collections_to_migrate=options["collections_to_migrate"],
host=options["host"],
overwrite=options["overwrite"],
preferred_sort_language=options["preferred_sort_language"],
preferred_sort_language=psl,
)

def migrate_collections_to_controlled_lists(
Expand All @@ -82,6 +100,9 @@ def migrate_collections_to_controlled_lists(
-ho 'http://localhost:8000/plugins/controlled-list-manager/item/'
-psl 'fr'
-ow

for collections that contain an apostrophe, wrap the concept in double quotes, e.g. "John''s list"

jacobtylerwalls marked this conversation as resolved.
Show resolved Hide resolved
"""

collections_in_db = list(
Expand Down
259 changes: 191 additions & 68 deletions arches_references/migrations/0002_etl_collections_to_controlled_lists.py

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions arches_references/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,9 @@
# TODO: remove when finalizing release
SILENCED_SYSTEM_CHECKS += ["arches.E002"]

# Location for test data fixtures
FIXTURE_DIRS = [os.path.join(APP_ROOT, "..", "tests", "fixtures", "data")]

try:
from .package_settings import *
except ImportError:
Expand Down
89 changes: 67 additions & 22 deletions tests/cli_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
from django.test import TestCase
from django.test.client import Client
from django.test.utils import captured_stdout
from django.core.management.base import CommandError

from arches_references.models import List
from arches_references.models import List, ListItem, ListItemValue
from arches.app.utils.skos import SKOSReader

from .test_settings import PROJECT_TEST_ROOT
Expand Down Expand Up @@ -45,7 +46,9 @@ def test_export_controlled_list(self):
class ListImportPackageTests(TestCase):

def test_import_controlled_list(self):
input_file = os.path.join(PROJECT_TEST_ROOT, "data", "controlled_lists.xlsx")
input_file = os.path.join(
PROJECT_TEST_ROOT, "fixtures", "data", "controlled_lists.xlsx"
)
output = io.StringIO()
# packages command does not yet fully avoid print()
with captured_stdout():
Expand All @@ -62,41 +65,66 @@ def test_import_controlled_list(self):


class RDMToControlledListsETLTests(TestCase):

@classmethod
def setUpTestData(cls):

skos = SKOSReader()
rdf = skos.read_file(
os.path.join(PROJECT_TEST_ROOT, "data", "concept_label_test_collection.xml")
)
ret = skos.save_concepts_from_skos(rdf)

client = Client()
client.login(username="admin", password="admin")
response = client.get(
reverse(
"make_collection",
kwargs={"conceptid": "7c90899a-dbe9-4574-9175-e69481a80b3c"},
)
)
fixtures = ["polyhierarchical_collections"]

def test_migrate_collections_to_controlled_lists(self):
output = io.StringIO()
management.call_command(
"controlled_lists",
operation="migrate_collections_to_controlled_lists",
collections_to_migrate=["Concept Label Import Test"],
collections_to_migrate=[
"Polyhierarchical Collection Test",
"Polyhierarchy Collection 2",
],
host="http://localhost:8000/plugins/controlled-list-manager/item/",
preferred_sort_language="en",
johnatawnclementawn marked this conversation as resolved.
Show resolved Hide resolved
overwrite=False,
stdout=output,
)

imported_list = List.objects.get(name="Concept Label Import Test")
imported_list = List.objects.get(name="Polyhierarchical Collection Test")
imported_items = imported_list.list_items.all()
self.assertEqual(len(imported_items), 3)

imported_item_values = ListItemValue.objects.filter(
list_item__in=imported_items
)
self.assertQuerySetEqual(
imported_item_values.values_list("value", flat=True).order_by("value"),
[
"French Test Concept 1",
"French Test Concept 2",
"French Test Concept 3",
"Test Concept 1",
"Test Concept 2",
"Test Concept 3",
],
)

imported_list_2 = List.objects.get(name="Polyhierarchy Collection 2")
imported_items_2 = imported_list_2.list_items.all()
imported_item_values_2 = ListItemValue.objects.filter(
list_item__in=imported_items_2
)

# Check that new uuids were generated for polyhierarchical concepts
self.assertNotEqual(
imported_item_values.filter(value="Test Concept 1"),
imported_item_values_2.filter(value="Test Concept 1"),
)

# Check that items with multiple prefLabels in different languages have same listitemid
self.assertEqual(
imported_item_values.get(value="Test Concept 1").list_item_id,
imported_item_values.get(value="French Test Concept 1").list_item_id,
)

# But that items with prefLabels in different languages have different listitemvalue ids
self.assertNotEqual(
imported_item_values.get(value="Test Concept 1").pk,
imported_item_values.get(value="French Test Concept 1").pk,
)

def test_no_matching_collection_error(self):
expected_output = "Failed to find the following collections in the database: Collection That Doesn't Exist"
output = io.StringIO()
Expand All @@ -110,3 +138,20 @@ def test_no_matching_collection_error(self):
stderr=output,
)
self.assertIn(expected_output, output.getvalue().strip())

def test_no_matching_language_error(self):
expected_output = (
"The preferred sort language, nonexistent, does not exist in the database."
)
output = io.StringIO()
with self.assertRaises(CommandError) as e:
management.call_command(
"controlled_lists",
operation="migrate_collections_to_controlled_lists",
collections_to_migrate=["Polyhierarchical Collection Test"],
host="http://localhost:8000/plugins/controlled-list-manager/item/",
preferred_sort_language="nonexistent",
overwrite=False,
stderr=output,
)
self.assertEqual(expected_output, str(e.exception))
28 changes: 0 additions & 28 deletions tests/data/concept_label_test_collection.xml

This file was deleted.

Loading