Skip to content

Commit

Permalink
feat: meilisearch backend for notes search
Browse files Browse the repository at this point in the history
This is a very simple and basic backend. It is based on Django signals,
just like the Elasticsearch backend. But it is much simpler, in the
sense that there are just two signals: one for saving documents and one
for deletion.

This backend is limited, in the sense that it does not support
highlighting -- but that's probably not such a big deal.

To start using this backend, define the following settings:

	ES_DISABLED = True
	MEILISEARCH_ENABLED = True
	MEILISEARCH_URL = "http://meilisearch:7700"
	MEILISEARCH_API_KEY = "s3cr3t"
	MEILISEARCH_INDEX = "tutor_student_notes"
  • Loading branch information
regisb committed Nov 12, 2024
1 parent 9d7e118 commit 02cdd5e
Show file tree
Hide file tree
Showing 6 changed files with 672 additions and 10 deletions.
5 changes: 4 additions & 1 deletion notesapi/v1/views/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@ def get_views_module():
Import views from either mysql or elasticsearch backend
"""
if settings.ES_DISABLED:
from . import common as backend_module
if getattr(settings, "MEILISEARCH_ENABLED", False):
from . import meilisearch as backend_module
else:
from . import common as backend_module
else:
from . import elasticsearch as backend_module
return backend_module
177 changes: 177 additions & 0 deletions notesapi/v1/views/meilisearch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
"""
Meilisearch views to search for annotations.
To enable this backend, define the following settings:
ES_DISABLED = True
MEILISEARCH_ENABLED = True
Then check the Client class for more information about Meilisearch credential settings.
When you start using this backend, you might want to re-index all your content. To do that, run:
./manage.py shell -c "from notesapi.v1.views.meilisearch import reindex; reindex()"
"""

import traceback

import meilisearch
from django.conf import settings
from django.core.paginator import Paginator
from django.db.models import signals
from django.dispatch import receiver

from notesapi.v1.models import Note

from .common import AnnotationSearchView as BaseAnnotationSearchView
from .exceptions import SearchViewRuntimeError


class Client:
"""
Simple Meilisearch client class
It depends on the following Django settings:
- MEILISEARCH_URL
- MEILISEARCH_API_KEY
- MEILISEARCH_INDEX
"""

_CLIENT = None
_INDEX = None
FILTERABLES = ["user_id", "course_id"]

@property
def meilisearch_client(self) -> meilisearch.Client:
"""
Return a meilisearch client.
"""
if self._CLIENT is None:
self._CLIENT = meilisearch.Client(
getattr(settings, "MEILISEARCH_URL", "http://meilisearch:7700"),
getattr(settings, "MEILISEARCH_API_KEY", ""),
)
return self._CLIENT

@property
def meilisearch_index(self) -> meilisearch.index.Index:
"""
Return the meilisearch index used to store annotations.
If the index does not exist, it is created. And if it does not have the right
filterable fields, then it is updated.
"""
if self._INDEX is None:
index_name = getattr(settings, "MEILISEARCH_INDEX", "student_notes")
try:
self._INDEX = self.meilisearch_client.get_index(index_name)
except meilisearch.errors.MeilisearchApiError:
task = self.meilisearch_client.create_index(
index_name, {"primaryKey": "id"}
)
self.meilisearch_client.wait_for_task(task.task_uid, timeout_in_ms=2000)
self._INDEX = self.meilisearch_client.get_index(index_name)

# Checking filterable attributes
existing_filterables = set(self._INDEX.get_filterable_attributes())
if not set(self.FILTERABLES).issubset(existing_filterables):
all_filterables = list(existing_filterables.union(self.FILTERABLES))
self._INDEX.update_filterable_attributes(all_filterables)

return self._INDEX


class AnnotationSearchView(BaseAnnotationSearchView):
def get_queryset(self):
"""
Simple result filtering method based on test search.
We simply include in the query only those that match the text search query. Note
that this backend does not support highlighting (yet).
"""
if not self.is_text_search:
return super().get_queryset()

queryset = Note.objects.filter(**self.query_params).order_by("-updated")

# Define meilisearch params
filters = [
f"user_id = '{self.params['user']}'",
f"course_id = '{self.params['course_id']}'",
]
page_size = int(self.params["page_size"])
offset = (int(self.params["page"]) - 1) * page_size

# Perform search
search_results = Client().meilisearch_index.search(
self.params["text"],
{"offset": offset, "limit": page_size, "filter": filters},
)

# Limit to these ID
queryset = queryset.filter(id__in=[r["id"] for r in search_results["hits"]])
return queryset


@receiver(signals.post_save, sender=Note)
def on_note_save(sender, instance, **kwargs): # pylint: disable=unused-argument
"""
Create or update a document.
"""
add_documents([instance])


@receiver(signals.post_delete, sender=Note)
def on_note_delete(sender, instance, **kwargs): # pylint: disable=unused-argument
"""
Delete a document.
"""
Client().meilisearch_index.delete_document(instance.id)


def reindex():
"""
Re-index all notes, in batches of 100.
"""
paginator = Paginator(Note.objects.all(), 100)
for page_number in paginator.page_range:
page = paginator.page(page_number)
add_documents(page.object_list)


def add_documents(notes):
"""
Convert some Note objects and insert them in the index.
"""
documents_to_add = [
{
"id": note.id,
"user_id": note.user_id,
"course_id": note.course_id,
"text": note.text,
}
for note in notes
]
if documents_to_add:
Client().meilisearch_index.add_documents(documents_to_add)


def heartbeat():
"""
Check that the meilisearch client is healthy.
"""
if not Client().meilisearch_client.is_healthy():
raise SearchViewRuntimeError("meilisearch")


def selftest():
"""
Check that we can access the meilisearch index.
"""
try:
return {"meilisearch": Client().meilisearch_index.created_at}
except meilisearch.errors.MeilisearchError as e:
raise SearchViewRuntimeError(
{"meilisearch_error": traceback.format_exc()}
) from e
1 change: 1 addition & 0 deletions requirements/base.in
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ elasticsearch-dsl
django-elasticsearch-dsl
django-elasticsearch-dsl-drf
django-cors-headers
meilisearch
mysqlclient
PyJWT
gunicorn # MIT
Expand Down
14 changes: 13 additions & 1 deletion requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#
# make upgrade
#
annotated-types==0.7.0
# via pydantic
asgiref==3.8.1
# via
# django
Expand All @@ -12,6 +14,8 @@ attrs==24.2.0
# via
# jsonschema
# referencing
camel-converter[pydantic]==4.0.1
# via meilisearch
certifi==2024.8.30
# via
# elasticsearch
Expand Down Expand Up @@ -100,6 +104,8 @@ jsonschema==4.23.0
# via drf-spectacular
jsonschema-specifications==2024.10.1
# via jsonschema
meilisearch==0.31.5
# via -r requirements/base.in
mysqlclient==2.2.5
# via -r requirements/base.in
newrelic==10.2.0
Expand All @@ -120,6 +126,10 @@ psutil==6.1.0
# via edx-django-utils
pycparser==2.22
# via cffi
pydantic==2.9.2
# via camel-converter
pydantic-core==2.23.4
# via pydantic
pyjwt[crypto]==2.9.0
# via
# -r requirements/base.in
Expand Down Expand Up @@ -147,6 +157,7 @@ requests==2.32.3
# via
# -r requirements/base.in
# edx-drf-extensions
# meilisearch
rpds-py==0.21.0
# via
# jsonschema
Expand All @@ -169,7 +180,8 @@ stevedore==5.3.0
typing-extensions==4.12.2
# via
# edx-opaque-keys
# elasticsearch-dsl
# pydantic
# pydantic-core
uritemplate==4.1.1
# via drf-spectacular
urllib3==1.26.20
Expand Down
Loading

0 comments on commit 02cdd5e

Please sign in to comment.