From 02cdd5e287f5df155020dc84ac81e0010c351950 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Behmo?= Date: Thu, 17 Oct 2024 23:03:41 +0200 Subject: [PATCH] feat: meilisearch backend for notes search This is a very simple and basic backend. It is based on Django signals, just like the Elasticsearch backend. But it is much simpler, in the sense that there are just two signals: one for saving documents and one for deletion. This backend is limited, in the sense that it does not support highlighting -- but that's probably not such a big deal. To start using this backend, define the following settings: ES_DISABLED = True MEILISEARCH_ENABLED = True MEILISEARCH_URL = "http://meilisearch:7700" MEILISEARCH_API_KEY = "s3cr3t" MEILISEARCH_INDEX = "tutor_student_notes" --- notesapi/v1/views/__init__.py | 5 +- notesapi/v1/views/meilisearch.py | 177 ++++++++++++ requirements/base.in | 1 + requirements/base.txt | 14 +- requirements/ci.txt | 464 ++++++++++++++++++++++++++++++- requirements/test.txt | 21 ++ 6 files changed, 672 insertions(+), 10 deletions(-) create mode 100644 notesapi/v1/views/meilisearch.py diff --git a/notesapi/v1/views/__init__.py b/notesapi/v1/views/__init__.py index eef7e9b4..f1794265 100644 --- a/notesapi/v1/views/__init__.py +++ b/notesapi/v1/views/__init__.py @@ -13,7 +13,10 @@ def get_views_module(): Import views from either mysql or elasticsearch backend """ if settings.ES_DISABLED: - from . import common as backend_module + if getattr(settings, "MEILISEARCH_ENABLED", False): + from . import meilisearch as backend_module + else: + from . import common as backend_module else: from . import elasticsearch as backend_module return backend_module diff --git a/notesapi/v1/views/meilisearch.py b/notesapi/v1/views/meilisearch.py new file mode 100644 index 00000000..853d1b64 --- /dev/null +++ b/notesapi/v1/views/meilisearch.py @@ -0,0 +1,177 @@ +""" +Meilisearch views to search for annotations. + +To enable this backend, define the following settings: + +ES_DISABLED = True +MEILISEARCH_ENABLED = True + +Then check the Client class for more information about Meilisearch credential settings. + +When you start using this backend, you might want to re-index all your content. To do that, run: + + ./manage.py shell -c "from notesapi.v1.views.meilisearch import reindex; reindex()" +""" + +import traceback + +import meilisearch +from django.conf import settings +from django.core.paginator import Paginator +from django.db.models import signals +from django.dispatch import receiver + +from notesapi.v1.models import Note + +from .common import AnnotationSearchView as BaseAnnotationSearchView +from .exceptions import SearchViewRuntimeError + + +class Client: + """ + Simple Meilisearch client class + + It depends on the following Django settings: + + - MEILISEARCH_URL + - MEILISEARCH_API_KEY + - MEILISEARCH_INDEX + """ + + _CLIENT = None + _INDEX = None + FILTERABLES = ["user_id", "course_id"] + + @property + def meilisearch_client(self) -> meilisearch.Client: + """ + Return a meilisearch client. + """ + if self._CLIENT is None: + self._CLIENT = meilisearch.Client( + getattr(settings, "MEILISEARCH_URL", "http://meilisearch:7700"), + getattr(settings, "MEILISEARCH_API_KEY", ""), + ) + return self._CLIENT + + @property + def meilisearch_index(self) -> meilisearch.index.Index: + """ + Return the meilisearch index used to store annotations. + + If the index does not exist, it is created. And if it does not have the right + filterable fields, then it is updated. + """ + if self._INDEX is None: + index_name = getattr(settings, "MEILISEARCH_INDEX", "student_notes") + try: + self._INDEX = self.meilisearch_client.get_index(index_name) + except meilisearch.errors.MeilisearchApiError: + task = self.meilisearch_client.create_index( + index_name, {"primaryKey": "id"} + ) + self.meilisearch_client.wait_for_task(task.task_uid, timeout_in_ms=2000) + self._INDEX = self.meilisearch_client.get_index(index_name) + + # Checking filterable attributes + existing_filterables = set(self._INDEX.get_filterable_attributes()) + if not set(self.FILTERABLES).issubset(existing_filterables): + all_filterables = list(existing_filterables.union(self.FILTERABLES)) + self._INDEX.update_filterable_attributes(all_filterables) + + return self._INDEX + + +class AnnotationSearchView(BaseAnnotationSearchView): + def get_queryset(self): + """ + Simple result filtering method based on test search. + + We simply include in the query only those that match the text search query. Note + that this backend does not support highlighting (yet). + """ + if not self.is_text_search: + return super().get_queryset() + + queryset = Note.objects.filter(**self.query_params).order_by("-updated") + + # Define meilisearch params + filters = [ + f"user_id = '{self.params['user']}'", + f"course_id = '{self.params['course_id']}'", + ] + page_size = int(self.params["page_size"]) + offset = (int(self.params["page"]) - 1) * page_size + + # Perform search + search_results = Client().meilisearch_index.search( + self.params["text"], + {"offset": offset, "limit": page_size, "filter": filters}, + ) + + # Limit to these ID + queryset = queryset.filter(id__in=[r["id"] for r in search_results["hits"]]) + return queryset + + +@receiver(signals.post_save, sender=Note) +def on_note_save(sender, instance, **kwargs): # pylint: disable=unused-argument + """ + Create or update a document. + """ + add_documents([instance]) + + +@receiver(signals.post_delete, sender=Note) +def on_note_delete(sender, instance, **kwargs): # pylint: disable=unused-argument + """ + Delete a document. + """ + Client().meilisearch_index.delete_document(instance.id) + + +def reindex(): + """ + Re-index all notes, in batches of 100. + """ + paginator = Paginator(Note.objects.all(), 100) + for page_number in paginator.page_range: + page = paginator.page(page_number) + add_documents(page.object_list) + + +def add_documents(notes): + """ + Convert some Note objects and insert them in the index. + """ + documents_to_add = [ + { + "id": note.id, + "user_id": note.user_id, + "course_id": note.course_id, + "text": note.text, + } + for note in notes + ] + if documents_to_add: + Client().meilisearch_index.add_documents(documents_to_add) + + +def heartbeat(): + """ + Check that the meilisearch client is healthy. + """ + if not Client().meilisearch_client.is_healthy(): + raise SearchViewRuntimeError("meilisearch") + + +def selftest(): + """ + Check that we can access the meilisearch index. + """ + try: + return {"meilisearch": Client().meilisearch_index.created_at} + except meilisearch.errors.MeilisearchError as e: + raise SearchViewRuntimeError( + {"meilisearch_error": traceback.format_exc()} + ) from e diff --git a/requirements/base.in b/requirements/base.in index dc2ab72b..0a9b7105 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -10,6 +10,7 @@ elasticsearch-dsl django-elasticsearch-dsl django-elasticsearch-dsl-drf django-cors-headers +meilisearch mysqlclient PyJWT gunicorn # MIT diff --git a/requirements/base.txt b/requirements/base.txt index 30bc5a40..f6470daa 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -4,6 +4,8 @@ # # make upgrade # +annotated-types==0.7.0 + # via pydantic asgiref==3.8.1 # via # django @@ -12,6 +14,8 @@ attrs==24.2.0 # via # jsonschema # referencing +camel-converter[pydantic]==4.0.1 + # via meilisearch certifi==2024.8.30 # via # elasticsearch @@ -100,6 +104,8 @@ jsonschema==4.23.0 # via drf-spectacular jsonschema-specifications==2024.10.1 # via jsonschema +meilisearch==0.31.5 + # via -r requirements/base.in mysqlclient==2.2.5 # via -r requirements/base.in newrelic==10.2.0 @@ -120,6 +126,10 @@ psutil==6.1.0 # via edx-django-utils pycparser==2.22 # via cffi +pydantic==2.9.2 + # via camel-converter +pydantic-core==2.23.4 + # via pydantic pyjwt[crypto]==2.9.0 # via # -r requirements/base.in @@ -147,6 +157,7 @@ requests==2.32.3 # via # -r requirements/base.in # edx-drf-extensions + # meilisearch rpds-py==0.21.0 # via # jsonschema @@ -169,7 +180,8 @@ stevedore==5.3.0 typing-extensions==4.12.2 # via # edx-opaque-keys - # elasticsearch-dsl + # pydantic + # pydantic-core uritemplate==4.1.1 # via drf-spectacular urllib3==1.26.20 diff --git a/requirements/ci.txt b/requirements/ci.txt index e128790a..be859b79 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -4,31 +4,479 @@ # # make upgrade # +annotated-types==0.7.0 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # pydantic +asgiref==3.8.1 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # django + # django-cors-headers +astroid==3.3.5 + # via + # -r requirements/test.txt + # pylint + # pylint-celery +attrs==24.2.0 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # jsonschema + # referencing cachetools==5.5.0 - # via tox + # via + # -r requirements/test.txt + # tox +camel-converter[pydantic]==4.0.1 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # meilisearch +certifi==2024.8.30 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # elasticsearch + # requests +cffi==1.17.1 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # cryptography + # pynacl chardet==5.2.0 - # via tox + # via + # -r requirements/test.txt + # diff-cover + # tox +charset-normalizer==3.4.0 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # requests +click==8.1.7 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # click-log + # code-annotations + # edx-django-utils + # edx-lint +click-log==0.4.0 + # via edx-lint +code-annotations==1.8.0 + # via + # -r requirements/quality.in + # -r requirements/test.txt + # edx-lint colorama==0.4.6 - # via tox + # via + # -r requirements/test.txt + # tox +coverage[toml]==7.6.4 + # via + # -r requirements/test.txt + # pytest-cov +cryptography==43.0.3 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # pyjwt +ddt==1.7.2 + # via -r requirements/test.txt +diff-cover==9.2.0 + # via -r requirements/test.txt +dill==0.3.9 + # via + # -r requirements/test.txt + # pylint distlib==0.3.9 - # via virtualenv + # via + # -r requirements/test.txt + # virtualenv +django==4.2.16 + # via + # -c https://raw.githubusercontent.com/openedx/edx-lint/master/edx_lint/files/common_constraints.txt + # -r requirements/base.txt + # -r requirements/test.txt + # django-cors-headers + # django-crum + # django-nine + # django-waffle + # djangorestframework + # drf-jwt + # drf-spectacular + # edx-django-release-util + # edx-django-utils + # edx-drf-extensions +django-cors-headers==4.6.0 + # via + # -r requirements/base.txt + # -r requirements/test.txt +django-crum==0.7.9 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # edx-django-utils +django-elasticsearch-dsl==7.4 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # django-elasticsearch-dsl-drf +django-elasticsearch-dsl-drf==0.22.5 + # via + # -r requirements/base.txt + # -r requirements/test.txt +django-nine==0.2.7 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # django-elasticsearch-dsl-drf +django-waffle==4.1.0 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # edx-django-utils + # edx-drf-extensions +djangorestframework==3.15.2 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # django-elasticsearch-dsl-drf + # drf-jwt + # drf-spectacular + # edx-drf-extensions +dnspython==2.7.0 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # pymongo +drf-jwt==1.19.2 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # edx-drf-extensions +drf-spectacular==0.27.2 + # via + # -r requirements/base.txt + # -r requirements/test.txt +edx-django-release-util==1.4.0 + # via + # -r requirements/base.txt + # -r requirements/test.txt +edx-django-utils==7.0.0 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # edx-drf-extensions +edx-drf-extensions==10.5.0 + # via + # -r requirements/base.txt + # -r requirements/test.txt +edx-lint==5.4.1 + # via -r requirements/quality.in +edx-opaque-keys==2.11.0 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # edx-drf-extensions +elasticsearch==7.13.4 + # via + # -c https://raw.githubusercontent.com/openedx/edx-lint/master/edx_lint/files/common_constraints.txt + # -r requirements/base.txt + # -r requirements/test.txt + # django-elasticsearch-dsl-drf + # elasticsearch-dsl +elasticsearch-dsl==7.4.1 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # django-elasticsearch-dsl + # django-elasticsearch-dsl-drf +factory-boy==3.3.1 + # via -r requirements/test.txt +faker==30.8.2 + # via + # -r requirements/test.txt + # factory-boy filelock==3.16.1 # via + # -r requirements/test.txt # tox # virtualenv +gunicorn==23.0.0 + # via + # -r requirements/base.txt + # -r requirements/test.txt +idna==3.10 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # requests +inflection==0.5.1 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # drf-spectacular +iniconfig==2.0.0 + # via + # -r requirements/test.txt + # pytest +isort==5.13.2 + # via + # -r requirements/test.txt + # pylint +jinja2==3.1.4 + # via + # -r requirements/test.txt + # code-annotations + # diff-cover +jsonschema==4.23.0 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # drf-spectacular +jsonschema-specifications==2024.10.1 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # jsonschema +markupsafe==3.0.2 + # via + # -r requirements/test.txt + # jinja2 +mccabe==0.7.0 + # via + # -r requirements/test.txt + # pylint +meilisearch==0.31.5 + # via + # -r requirements/base.txt + # -r requirements/test.txt +more-itertools==10.5.0 + # via -r requirements/test.txt +mysqlclient==2.2.5 + # via + # -r requirements/base.txt + # -r requirements/test.txt +newrelic==10.2.0 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # edx-django-utils packaging==24.2 # via + # -r requirements/base.txt + # -r requirements/test.txt + # django-nine + # gunicorn # pyproject-api + # pytest # tox +path==17.0.0 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # path-py +path-py==12.5.0 + # via + # -r requirements/base.txt + # -r requirements/test.txt +pbr==6.1.0 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # stevedore +pep8==1.7.1 + # via -r requirements/test.txt platformdirs==4.3.6 # via + # -r requirements/test.txt + # pylint # tox # virtualenv pluggy==1.5.0 - # via tox + # via + # -r requirements/test.txt + # diff-cover + # pytest + # tox +psutil==6.1.0 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # edx-django-utils +pycodestyle==2.12.1 + # via -r requirements/quality.in +pycparser==2.22 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # cffi +pydantic==2.9.2 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # camel-converter +pydantic-core==2.23.4 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # pydantic +pygments==2.18.0 + # via + # -r requirements/test.txt + # diff-cover +pyjwt[crypto]==2.9.0 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # drf-jwt + # edx-drf-extensions +pylint==3.3.1 + # via + # -r requirements/quality.in + # -r requirements/test.txt + # edx-lint + # pylint-celery + # pylint-django + # pylint-plugin-utils +pylint-celery==0.3 + # via edx-lint +pylint-django==2.6.1 + # via edx-lint +pylint-plugin-utils==0.8.2 + # via + # pylint-celery + # pylint-django +pymongo==4.10.1 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # edx-opaque-keys +pynacl==1.5.0 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # edx-django-utils pyproject-api==1.8.0 - # via tox + # via + # -r requirements/test.txt + # tox +pytest==8.3.3 + # via + # -r requirements/test.txt + # pytest-cov + # pytest-django +pytest-cov==6.0.0 + # via -r requirements/test.txt +pytest-django==4.9.0 + # via -r requirements/test.txt +python-dateutil==2.9.0.post0 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # elasticsearch-dsl + # faker +python-slugify==8.0.4 + # via + # -r requirements/test.txt + # code-annotations +pytz==2024.2 + # via + # -r requirements/base.txt + # -r requirements/test.txt +pyyaml==6.0.2 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # code-annotations + # drf-spectacular + # edx-django-release-util +referencing==0.35.1 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # jsonschema + # jsonschema-specifications +requests==2.32.3 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # edx-drf-extensions + # meilisearch +rpds-py==0.21.0 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # jsonschema + # referencing +semantic-version==2.10.0 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # edx-drf-extensions +six==1.16.0 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # django-elasticsearch-dsl + # django-elasticsearch-dsl-drf + # edx-django-release-util + # edx-lint + # elasticsearch-dsl + # python-dateutil +sqlparse==0.5.1 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # django +stevedore==5.3.0 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # code-annotations + # edx-django-utils + # edx-opaque-keys +text-unidecode==1.3 + # via + # -r requirements/test.txt + # python-slugify +tomlkit==0.13.2 + # via + # -r requirements/test.txt + # pylint tox==4.23.2 - # via -r requirements/ci.in + # via -r requirements/test.txt +typing-extensions==4.12.2 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # edx-opaque-keys + # faker + # pydantic + # pydantic-core +uritemplate==4.1.1 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # drf-spectacular +urllib3==1.26.20 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # elasticsearch + # requests virtualenv==20.27.1 - # via tox + # via + # -r requirements/test.txt + # tox + +# The following packages are considered to be unsafe in a requirements file: +setuptools==75.3.0 + # via + # -r requirements/base.txt + # -r requirements/test.txt diff --git a/requirements/test.txt b/requirements/test.txt index d8412d3f..6251f94c 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -4,6 +4,10 @@ # # make upgrade # +annotated-types==0.7.0 + # via + # -r requirements/base.txt + # pydantic asgiref==3.8.1 # via # -r requirements/base.txt @@ -20,6 +24,10 @@ attrs==24.2.0 # referencing cachetools==5.5.0 # via tox +camel-converter[pydantic]==4.0.1 + # via + # -r requirements/base.txt + # meilisearch certifi==2024.8.30 # via # -r requirements/base.txt @@ -175,6 +183,8 @@ markupsafe==3.0.2 # via jinja2 mccabe==0.7.0 # via pylint +meilisearch==0.31.5 + # via -r requirements/base.txt more-itertools==10.5.0 # via -r requirements/test.in mysqlclient==2.2.5 @@ -221,6 +231,14 @@ pycparser==2.22 # via # -r requirements/base.txt # cffi +pydantic==2.9.2 + # via + # -r requirements/base.txt + # camel-converter +pydantic-core==2.23.4 + # via + # -r requirements/base.txt + # pydantic pygments==2.18.0 # via diff-cover pyjwt[crypto]==2.9.0 @@ -273,6 +291,7 @@ requests==2.32.3 # via # -r requirements/base.txt # edx-drf-extensions + # meilisearch rpds-py==0.21.0 # via # -r requirements/base.txt @@ -311,6 +330,8 @@ typing-extensions==4.12.2 # -r requirements/base.txt # edx-opaque-keys # faker + # pydantic + # pydantic-core uritemplate==4.1.1 # via # -r requirements/base.txt