feat!: remove Content Libraries V2 index (#33888)

BREAKING CHANGE: Removes all code, tests, and settings related to indexing of V2 (blockstore-backed) content libraries in elasticsearch. This includes indexing of top-level library metadata as well as indexing of library block metadata. Operators who enabled the experimental Library Authoring MFE *and* the experimental ENABLE_CONTENT_LIBRARY_INDEX feature may notice that sorting, filtering, and searching of V2 libraries and their blocks may now be slower and/or less powerful. The ENABLE_CONTENT_LIBRARY_INDEX feature was already disabled by default, so most/all operators (including edx.org) should not notice any difference. Removed settings include: * FEATURES['ENABLE_CONTENT_LIBRARY_INDEX'] * ENABLE_ELASTICSEARCH_FOR_TESTS * TEST_ELASTICSEARCH_USE_SSL * TEST_ELASTICSEARCH_HOST * TEST_ELASTICSEARCH_PORT For rationale, see the updated "Status" section of: ./openedx/core/djangoapps/content_libraries/docs/decisions/0001-index-libraries-in-elasticsearch.rst
openedx · Dec 7, 2023 · 140f858 · 140f858
1 parent 5ca08fb
commit 140f858
Show file tree

Hide file tree

Showing 16 changed files with 193 additions and 1,060 deletions.
diff --git a/cms/djangoapps/contentstore/tests/test_libraries.py b/cms/djangoapps/contentstore/tests/test_libraries.py
@@ -440,10 +440,6 @@ def test_sync_if_source_library_changed(self):
         html_block_2 = modulestore().get_item(lc_block.children[0])
         self.assertEqual(html_block_2.data, data2)
 
-    @patch(
-        'openedx.core.djangoapps.content_libraries.tasks.SearchEngine.get_search_engine',
-        Mock(return_value=None, autospec=True),
-    )
     def test_sync_if_capa_type_changed(self):
         """ Tests that children are automatically refreshed if capa type field changes """
         name1, name2 = "Option Problem", "Multiple Choice Problem"

diff --git a/cms/envs/common.py b/cms/envs/common.py
@@ -297,9 +297,6 @@
     # Enable content libraries (modulestore) search functionality
     'ENABLE_LIBRARY_INDEX': False,
 
-    # Enable content libraries (blockstore) indexing
-    'ENABLE_CONTENT_LIBRARY_INDEX': False,
-
     # .. toggle_name: FEATURES['ALLOW_COURSE_RERUNS']
     # .. toggle_implementation: DjangoSetting
     # .. toggle_default: True

diff --git a/cms/envs/devstack.py b/cms/envs/devstack.py
@@ -147,7 +147,6 @@ def should_show_debug_toolbar(request):  # lint-amnesty, pylint: disable=missing
 ################################ SEARCH INDEX ################################
 FEATURES['ENABLE_COURSEWARE_INDEX'] = True
 FEATURES['ENABLE_LIBRARY_INDEX'] = False
-FEATURES['ENABLE_CONTENT_LIBRARY_INDEX'] = False
 SEARCH_ENGINE = "search.elastic.ElasticSearchEngine"
 
 ELASTIC_SEARCH_CONFIG = [

diff --git a/cms/envs/production.py b/cms/envs/production.py
@@ -510,7 +510,7 @@ def get_env_setting(setting):
 # Example: {'CN': 'http://api.xuetangx.com/edx/video?s3_url='}
 VIDEO_CDN_URL = ENV_TOKENS.get('VIDEO_CDN_URL', {})
 
-if FEATURES['ENABLE_COURSEWARE_INDEX'] or FEATURES['ENABLE_LIBRARY_INDEX'] or FEATURES['ENABLE_CONTENT_LIBRARY_INDEX']:
+if FEATURES['ENABLE_COURSEWARE_INDEX'] or FEATURES['ENABLE_LIBRARY_INDEX']:
     # Use ElasticSearch for the search engine
     SEARCH_ENGINE = "search.elastic.ElasticSearchEngine"
 

diff --git a/cms/envs/test.py b/cms/envs/test.py
@@ -259,21 +259,10 @@
 # Courseware Search Index
 FEATURES['ENABLE_COURSEWARE_INDEX'] = True
 FEATURES['ENABLE_LIBRARY_INDEX'] = True
-FEATURES['ENABLE_CONTENT_LIBRARY_INDEX'] = False
 SEARCH_ENGINE = "search.tests.mock_search_engine.MockSearchEngine"
 
 FEATURES['ENABLE_ENROLLMENT_TRACK_USER_PARTITION'] = True
 
-####################### ELASTICSEARCH TESTS #######################
-# Enable this when testing elasticsearch-based code which couldn't be tested using the mock engine
-ENABLE_ELASTICSEARCH_FOR_TESTS = os.environ.get(
-    'EDXAPP_ENABLE_ELASTICSEARCH_FOR_TESTS', 'no').lower() in ('true', 'yes', '1')
-
-TEST_ELASTICSEARCH_USE_SSL = os.environ.get(
-    'EDXAPP_TEST_ELASTICSEARCH_USE_SSL', 'no').lower() in ('true', 'yes', '1')
-TEST_ELASTICSEARCH_HOST = os.environ.get('EDXAPP_TEST_ELASTICSEARCH_HOST', 'edx.devstack.elasticsearch710')
-TEST_ELASTICSEARCH_PORT = int(os.environ.get('EDXAPP_TEST_ELASTICSEARCH_PORT', '9200'))
-
 ########################## AUTHOR PERMISSION #######################
 FEATURES['ENABLE_CREATOR_GROUP'] = False
 

diff --git a/lms/envs/test.py b/lms/envs/test.py
@@ -418,16 +418,6 @@
 FACEBOOK_APP_ID = "Test"
 FACEBOOK_API_VERSION = "v2.8"
 
-####################### ELASTICSEARCH TESTS #######################
-# Enable this when testing elasticsearch-based code which couldn't be tested using the mock engine
-ENABLE_ELASTICSEARCH_FOR_TESTS = os.environ.get(
-    'EDXAPP_ENABLE_ELASTICSEARCH_FOR_TESTS', 'no').lower() in ('true', 'yes', '1')
-
-TEST_ELASTICSEARCH_USE_SSL = os.environ.get(
-    'EDXAPP_TEST_ELASTICSEARCH_USE_SSL', 'no').lower() in ('true', 'yes', '1')
-TEST_ELASTICSEARCH_HOST = os.environ.get('EDXAPP_TEST_ELASTICSEARCH_HOST', 'edx.devstack.elasticsearch710')
-TEST_ELASTICSEARCH_PORT = int(os.environ.get('EDXAPP_TEST_ELASTICSEARCH_PORT', '9200'))
-
 ######### custom courses #########
 INSTALLED_APPS += ['lms.djangoapps.ccx', 'openedx.core.djangoapps.ccxcon.apps.CCXConnectorConfig']
 FEATURES['CUSTOM_COURSES_EDX'] = True

diff --git a/openedx/core/djangoapps/content_libraries/api.py b/openedx/core/djangoapps/content_libraries/api.py
@@ -67,7 +67,6 @@
 from django.core.validators import validate_unicode_slug
 from django.db import IntegrityError, transaction
 from django.utils.translation import gettext as _
-from elasticsearch.exceptions import ConnectionError as ElasticConnectionError
 from lxml import etree
 from opaque_keys.edx.keys import LearningContextKey, UsageKey
 from opaque_keys.edx.locator import (
@@ -94,7 +93,6 @@
 from openedx.core.djangoapps.content_libraries import permissions
 from openedx.core.djangoapps.content_libraries.constants import DRAFT_NAME, COMPLEX
 from openedx.core.djangoapps.content_libraries.library_bundle import LibraryBundle
-from openedx.core.djangoapps.content_libraries.libraries_index import ContentLibraryIndexer, LibraryBlockIndexer
 from openedx.core.djangoapps.content_libraries.models import (
     ContentLibrary,
     ContentLibraryPermission,
@@ -290,56 +288,35 @@ def get_libraries_for_user(user, org=None, library_type=None):
     return permissions.perms[permissions.CAN_VIEW_THIS_CONTENT_LIBRARY].filter(user, qs)
 
 
-def get_metadata_from_index(queryset, text_search=None):
+def get_metadata(queryset, text_search=None):
     """
-    Take a list of ContentLibrary objects and return metadata stored in
-    ContentLibraryIndex.
+    Take a list of ContentLibrary objects and return metadata from blockstore.
     """
-    metadata = None
-    if ContentLibraryIndexer.indexing_is_enabled():
-        try:
-            library_keys = [str(lib.library_key) for lib in queryset]
-            metadata = ContentLibraryIndexer.get_items(library_keys, text_search=text_search)
-            metadata_dict = {
-                item["id"]: item
-                for item in metadata
-            }
-            metadata = [
-                metadata_dict[key]
-                if key in metadata_dict
-                else None
-                for key in library_keys
-            ]
-        except ElasticConnectionError as e:
-            log.exception(e)
-
-    # If ContentLibraryIndex is not available, we query blockstore for a limited set of metadata
-    if metadata is None:
-        uuids = [lib.bundle_uuid for lib in queryset]
-        bundles = get_bundles(uuids=uuids, text_search=text_search)
-
-        if text_search:
-            # Bundle APIs can't apply text_search on a bundle's org, so including those results here
-            queryset_org_search = queryset.filter(org__short_name__icontains=text_search)
-            if queryset_org_search.exists():
-                uuids_org_search = [lib.bundle_uuid for lib in queryset_org_search]
-                bundles += get_bundles(uuids=uuids_org_search)
-
-        bundle_dict = {
-            bundle.uuid: {
-                'uuid': bundle.uuid,
-                'title': bundle.title,
-                'description': bundle.description,
-                'version': bundle.latest_version,
-            }
-            for bundle in bundles
+    uuids = [lib.bundle_uuid for lib in queryset]
+    bundles = get_bundles(uuids=uuids, text_search=text_search)
+
+    if text_search:
+        # Bundle APIs can't apply text_search on a bundle's org, so including those results here
+        queryset_org_search = queryset.filter(org__short_name__icontains=text_search)
+        if queryset_org_search.exists():
+            uuids_org_search = [lib.bundle_uuid for lib in queryset_org_search]
+            bundles += get_bundles(uuids=uuids_org_search)
+
+    bundle_dict = {
+        bundle.uuid: {
+            'uuid': bundle.uuid,
+            'title': bundle.title,
+            'description': bundle.description,
+            'version': bundle.latest_version,
         }
-        metadata = [
-            bundle_dict[uuid]
-            if uuid in bundle_dict
-            else None
-            for uuid in uuids
-        ]
+        for bundle in bundles
+    }
+    metadata = [
+        bundle_dict[uuid]
+        if uuid in bundle_dict
+        else None
+        for uuid in uuids
+    ]
 
     libraries = [
         ContentLibraryMetadata(
@@ -648,50 +625,28 @@ def get_library_blocks(library_key, text_search=None, block_types=None) -> list[
 
     Returns a list of LibraryXBlockMetadata objects
     """
-    metadata = None
-    if LibraryBlockIndexer.indexing_is_enabled():
-        try:
-            filter_terms = {
-                'library_key': [str(library_key)],
-                'is_child': [False],
-            }
-            if block_types:
-                filter_terms['block_type'] = block_types
-            metadata = [
-                {
-                    **item,
-                    "id": LibraryUsageLocatorV2.from_string(item['id']),
-                }
-                for item in LibraryBlockIndexer.get_items(filter_terms=filter_terms, text_search=text_search)
-                if item is not None
-            ]
-        except ElasticConnectionError as e:
-            log.exception(e)
-
-    # If indexing is disabled, or connection to elastic failed
-    if metadata is None:
-        metadata = []
-        ref = ContentLibrary.objects.get_by_key(library_key)  # type: ignore[attr-defined]
-        lib_bundle = LibraryBundle(library_key, ref.bundle_uuid, draft_name=DRAFT_NAME)
-        usages = lib_bundle.get_top_level_usages()
-
-        for usage_key in usages:
-            # For top-level definitions, we can go from definition key to usage key using the following, but this would
-            # not work for non-top-level blocks as they may have multiple usages. Top level blocks are guaranteed to
-            # have only a single usage in the library, which is part of the definition of top level block.
-            def_key = lib_bundle.definition_for_usage(usage_key)
-            display_name = get_block_display_name(def_key)
-            text_match = (text_search is None or
-                          text_search.lower() in display_name.lower() or
-                          text_search.lower() in str(usage_key).lower())
-            type_match = (block_types is None or usage_key.block_type in block_types)
-            if text_match and type_match:
-                metadata.append({
-                    "id": usage_key,
-                    "def_key": def_key,
-                    "display_name": display_name,
-                    "has_unpublished_changes": lib_bundle.does_definition_have_unpublished_changes(def_key),
-                })
+    metadata = []
+    ref = ContentLibrary.objects.get_by_key(library_key)  # type: ignore[attr-defined]
+    lib_bundle = LibraryBundle(library_key, ref.bundle_uuid, draft_name=DRAFT_NAME)
+    usages = lib_bundle.get_top_level_usages()
+
+    for usage_key in usages:
+        # For top-level definitions, we can go from definition key to usage key using the following, but this would
+        # not work for non-top-level blocks as they may have multiple usages. Top level blocks are guaranteed to
+        # have only a single usage in the library, which is part of the definition of top level block.
+        def_key = lib_bundle.definition_for_usage(usage_key)
+        display_name = get_block_display_name(def_key)
+        text_match = (text_search is None or
+                      text_search.lower() in display_name.lower() or
+                      text_search.lower() in str(usage_key).lower())
+        type_match = (block_types is None or usage_key.block_type in block_types)
+        if text_match and type_match:
+            metadata.append({
+                "id": usage_key,
+                "def_key": def_key,
+                "display_name": display_name,
+                "has_unpublished_changes": lib_bundle.does_definition_have_unpublished_changes(def_key),
+            })
 
     return [
         LibraryXBlockMetadata(

diff --git a/...apps/content_libraries/docs/decisions/0001-index-libraries-in-elasticsearch.rst b/...apps/content_libraries/docs/decisions/0001-index-libraries-in-elasticsearch.rst
@@ -4,7 +4,30 @@
 Status
 ------
 
-Accepted
+**Revoked**
+
+In Dec 2023, we decided to remove the code supporting this decision, because:
+
+* The index is disabled on edx.org, which will initially be the only user
+  of Content Libraries V2.
+* As we migrate libraries from Modulestore to Blockstore and then from
+  Blockstore to Learning Core, the unused indexing code increases complexity
+  and decreases certainty.
+* With the decision to migrate from Blockstore-the-service to an in-process
+  storage backend (that is: Blockstore-the-app or Learning Core), it seems
+  that we will be able to simply use Django ORM in order to filter/sort/search
+  Content Library V2 metadata for the library listing page.
+* Searching Content Library V2 *block* content would still require indexing,
+  but we would rather implement that in Learning Core than use the current
+  implementation in the content_libraries app, which is untested, library-
+  specific, and doesn't take into account library versioning. It always uses
+  the latest draft, which is good for Library Authoring purposes, but not good for
+  Course Authoring purposes.
+
+It is possible that we will end up re-instating a modified version of this ADR
+future. If that happens, we may re-use and adapt the original library index
+code.
+
 
 Context
 -------