From dd4d81a1fe1dd5b4f62551bf6c70c2f5089ba12a Mon Sep 17 00:00:00 2001 From: Troy Sankey Date: Tue, 15 Oct 2024 17:46:50 -0700 Subject: [PATCH] feat: include restricted runs in algolia objects ENT-9505 --- enterprise_catalog/apps/api/tasks.py | 28 +++++++++++++++- .../apps/api/tests/test_tasks.py | 10 +++++- enterprise_catalog/apps/catalog/models.py | 33 ++++++++++++++++--- 3 files changed, 64 insertions(+), 7 deletions(-) diff --git a/enterprise_catalog/apps/api/tasks.py b/enterprise_catalog/apps/api/tasks.py index 2fef38b2..8d640bf4 100644 --- a/enterprise_catalog/apps/api/tasks.py +++ b/enterprise_catalog/apps/api/tasks.py @@ -11,6 +11,7 @@ from celery import shared_task, states from celery.exceptions import Ignore from celery_utils.logged_task import LoggedTask +from django.conf import settings from django.db import IntegrityError from django.db.models import Prefetch, Q from django.db.utils import OperationalError @@ -517,7 +518,14 @@ def index_enterprise_catalog_in_algolia_task(self, force=False, dry_run=False): logger.info( f'{_reindex_algolia_prefix(dry_run)} invoking task with arguments force={force}, dry_run={dry_run}.' ) - courses_content_metadata = ContentMetadata.objects.filter(content_type=COURSE) + courses_content_metadata = ContentMetadata.objects.filter( + content_type=COURSE, + ) + # Make sure the courses we consider for indexing actually contain restricted runs so that + # "unicorn" courses (i.e. courses that contain only restricted runs) do not get discarded by + # partition_course_keys_for_indexing() for not having an advertised run. + if getattr(settings, 'SHOULD_INDEX_COURSES_WITH_RESTRICTED_RUNS', False): + courses_content_metadata = courses_content_metadata.prefetch_restricted_overrides() indexable_course_keys, nonindexable_course_keys = partition_course_keys_for_indexing( courses_content_metadata, ) @@ -852,6 +860,15 @@ def _get_algolia_products_for_batch( ).prefetch_related( Prefetch('catalog_queries', queryset=all_catalog_queries), ) + if getattr(settings, 'SHOULD_INDEX_COURSES_WITH_RESTRICTED_RUNS', False): + # Make the courses that we index actually contain restricted runs in the payload. + content_metadata_no_courseruns = content_metadata_no_courseruns.prefetch_restricted_overrides() + # Also just prefetch the rest of the restricted courses which will + # allow us to find all catalog_queries explicitly allowing a restricted + # run for each course. + content_metadata_no_courseruns = content_metadata_no_courseruns.prefetch_related( + 'restricted_courses__catalog_query' + ) # Perform filtering of non-indexable objects in-memory because the list may be too long to shove into a SQL query. content_metadata_no_courseruns = [ cm for cm in content_metadata_no_courseruns @@ -888,6 +905,15 @@ def _get_algolia_products_for_batch( # Course runs should contribute their UUIDs to the parent course. content_key = metadata.parent_content_key associated_catalog_queries = metadata.catalog_queries.all() + if metadata.content_type == COURSE and getattr(settings, 'SHOULD_INDEX_COURSES_WITH_RESTRICTED_RUNS', False): + # "unicorn" courses (i.e. courses with only restricted runs) should only be indexed for + # catalog queries that explicitly allow runs in those courses. We can tell that a course + # has only restricted runs simply by checking that it normally doesn't have an + # advertised run. + # pylint: disable=protected-access + is_unrestricted_course_advertised = bool(metadata._json_metadata.get('advertised_course_run_uuid')) + if not is_unrestricted_course_advertised: + associated_catalog_queries = metadata.restricted_courses.values_list('catalog_query', flat=True) for video in videos: if (metadata.content_type == COURSE_RUN and video.parent_content_metadata.content_key == metadata.content_key): diff --git a/enterprise_catalog/apps/api/tests/test_tasks.py b/enterprise_catalog/apps/api/tests/test_tasks.py index 1505bfa3..b9f90d64 100644 --- a/enterprise_catalog/apps/api/tests/test_tasks.py +++ b/enterprise_catalog/apps/api/tests/test_tasks.py @@ -9,7 +9,7 @@ import ddt from celery import states -from django.test import TestCase +from django.test import TestCase, override_settings from django_celery_results.models import TaskResult from enterprise_catalog.apps.academy.tests.factories import AcademyFactory @@ -980,6 +980,7 @@ def test_get_algolia_objects_from_course_metadata(self): assert set(object_queries_titles) == {str(query.title) for query in catalog_queries} @mock.patch('enterprise_catalog.apps.api.tasks.get_initialized_algolia_client', return_value=mock.MagicMock()) + @override_settings(SHOULD_INDEX_COURSES_WITH_RESTRICTED_RUNS=True) def test_index_algolia_program_common_uuids_only(self, mock_search_client): """ Assert that when a program contains multiple courses, that program only inherits the UUIDs common to all @@ -1076,6 +1077,7 @@ def mock_replace_all_objects(products_iterable): assert expected_program_call_args == actual_program_call_args @mock.patch('enterprise_catalog.apps.api.tasks.get_initialized_algolia_client', return_value=mock.MagicMock()) + @override_settings(SHOULD_INDEX_COURSES_WITH_RESTRICTED_RUNS=True) def test_index_algolia_program_unindexable_content(self, mock_search_client): """ Assert that when a program contains ANY unindexable courses, that program is not indexed for any catalog @@ -1197,6 +1199,7 @@ def mock_get_algolia_products_for_batch( ] @mock.patch('enterprise_catalog.apps.api.tasks.get_initialized_algolia_client', return_value=mock.MagicMock()) + @override_settings(SHOULD_INDEX_COURSES_WITH_RESTRICTED_RUNS=True) def test_index_algolia_published_course_to_program(self, mock_search_client): """ Assert that only only "indexable" objects are indexed, particularly when an unpublished course is associated @@ -1300,6 +1303,7 @@ def mock_replace_all_objects(products_iterable): assert expected_call_args == self._sort_tags_in_algolia_object_list(actual_call_args) @mock.patch('enterprise_catalog.apps.api.tasks.get_initialized_algolia_client', return_value=mock.MagicMock()) + @override_settings(SHOULD_INDEX_COURSES_WITH_RESTRICTED_RUNS=True) def test_index_algolia_unpublished_course_to_program(self, mock_search_client): """ Assert that only only "indexable" objects are indexed, particularly when an unpublished course is associated @@ -1408,6 +1412,7 @@ def mock_replace_all_objects(products_iterable): assert expected_call_args == self._sort_tags_in_algolia_object_list(actual_call_args) @mock.patch('enterprise_catalog.apps.api.tasks.get_initialized_algolia_client', return_value=mock.MagicMock()) + @override_settings(SHOULD_INDEX_COURSES_WITH_RESTRICTED_RUNS=True) def test_index_algolia_published_course_to_pathway(self, mock_search_client,): """ Assert that only only "indexable" objects are indexed, particularly when a published course is associated with a @@ -1507,6 +1512,7 @@ def mock_replace_all_objects(products_iterable): assert expected_call_args == self._sort_tags_in_algolia_object_list(actual_call_args) @mock.patch('enterprise_catalog.apps.api.tasks.get_initialized_algolia_client', return_value=mock.MagicMock()) + @override_settings(SHOULD_INDEX_COURSES_WITH_RESTRICTED_RUNS=True) def test_index_algolia_unpublished_course_to_pathway(self, mock_search_client): """ Assert that only only "indexable" objects are indexed, particularly when an unpublished course is associated @@ -1610,6 +1616,7 @@ def mock_replace_all_objects(products_iterable): assert expected_call_args == self._sort_tags_in_algolia_object_list(actual_call_args) @mock.patch('enterprise_catalog.apps.api.tasks.get_initialized_algolia_client', return_value=mock.MagicMock()) + @override_settings(SHOULD_INDEX_COURSES_WITH_RESTRICTED_RUNS=True) def test_index_algolia_program_to_pathway(self, mock_search_client): """ Assert that only only "indexable" objects are indexed, particularly when a hidden, and non-hidden program is @@ -1746,6 +1753,7 @@ def mock_replace_all_objects(products_iterable): assert expected_call_args == self._sort_tags_in_algolia_object_list(actual_call_args) @mock.patch('enterprise_catalog.apps.api.tasks.get_initialized_algolia_client', return_value=mock.MagicMock()) + @override_settings(SHOULD_INDEX_COURSES_WITH_RESTRICTED_RUNS=True) # pylint: disable=too-many-statements def test_index_algolia_all_uuids(self, mock_search_client): """ diff --git a/enterprise_catalog/apps/catalog/models.py b/enterprise_catalog/apps/catalog/models.py index d5f1c4a8..44217d8f 100644 --- a/enterprise_catalog/apps/catalog/models.py +++ b/enterprise_catalog/apps/catalog/models.py @@ -293,12 +293,10 @@ def content_metadata_with_restricted(self): # Provide json_metadata overrides via dynamic attribute if any restricted runs are allowed. if self.catalog_query.restricted_runs_allowed: # FYI: prefetch causes a performance penalty by introducing a 2nd database query. - prefetch_qs = models.Prefetch( - 'restricted_courses', - queryset=RestrictedCourseMetadata.objects.filter(catalog_query=self.catalog_query), - to_attr='restricted_course_metadata_for_catalog_query', + related_contentmetadata = related_contentmetadata.prefetch_restricted_overrides( + catalog_query=self.catalog_query, ) - related_contentmetadata = related_contentmetadata.prefetch_related(prefetch_qs) + return related_contentmetadata.all() @cached_property @@ -615,6 +613,29 @@ def get_xapi_activity_id(self, content_resource, content_key): return xapi_activity_id +class ContentMetadataQuerySet(models.QuerySet): + """ + Customer queryset for ContentMetadata providing convenience methods to augment the results. + """ + + def prefetch_restricted_overrides(self, catalog_query=None): + """ + Augment this queryset by fetching "override" metadata if any exist for a given + CatalogQuery. The `json_metadata` attribute of courses returned by this new + queryset will be overridden if a related RestrictedCourseMetadata exists. + """ + # If catalog_query is None, look for the "canonical" RestrictedCourseMetadata + # object which has a NULL catalog_query. + catalog_query_filter = {'catalog_query': catalog_query} if catalog_query else {'catalog_query__isnull': True} + return self.prefetch_related( + models.Prefetch( + 'restricted_courses', + queryset=RestrictedCourseMetadata.objects.filter(**catalog_query_filter), + to_attr='restricted_course_metadata_for_catalog_query', + ) + ) + + class ContentMetadataManager(models.Manager): """ Customer manager for ContentMetadata that forces the `modified` field @@ -753,6 +774,8 @@ class Meta: history = HistoricalRecords() + objects = ContentMetadataManager().from_queryset(ContentMetadataQuerySet)() + @property def json_metadata(self): """