From f97a4e6e7aa07cb59db9774ef6bfe9b54a5bb8a7 Mon Sep 17 00:00:00 2001 From: Ben Silverman Date: Wed, 31 Jan 2024 17:12:01 -0500 Subject: [PATCH] Index user annotations in elasticsearch (#956) --- apps/readux/documents.py | 47 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 apps/readux/documents.py diff --git a/apps/readux/documents.py b/apps/readux/documents.py new file mode 100644 index 00000000..62b40eb3 --- /dev/null +++ b/apps/readux/documents.py @@ -0,0 +1,47 @@ +"""Elasticsearch indexing rules for UserAnnotations""" + +from html import unescape +from django_elasticsearch_dsl import Document, fields +from django_elasticsearch_dsl.registries import registry +from django.utils.html import strip_tags + +from apps.readux.models import UserAnnotation +from apps.iiif.manifests.documents import stemmer + +@registry.register_document +class UserAnnotationDocument(Document): + """Elasticsearch Document class for Readux UserAnnotation""" + + # fields to map explicitly in Elasticsearch + canvas_index = fields.IntegerField() + canvas_pid = fields.KeywordField() + content = fields.TextField(analyzer=stemmer) + manifest_pid = fields.KeywordField() + owner_username = fields.KeywordField() + pid = fields.KeywordField() + + class Index: + """Settings for Elasticsearch""" + + name = "annotations" + + class Django: + """Settings for automatically pulling data from Django""" + + model = UserAnnotation + + def prepare_content(self, instance): + """Strip HTML tags from content""" + return unescape(strip_tags(instance.content)) + + def prepare_canvas_index(self, instance): + return instance.canvas.position + + def prepare_canvas_pid(self, instance): + return instance.canvas.pid + + def prepare_manifest_pid(self, instance): + return instance.canvas.manifest.pid + + def prepare_owner_username(self, instance): + return instance.owner.username