Skip to content

Commit

Permalink
Merge pull request #969 from ecds/revert-955-feature/951-exact-search
Browse files Browse the repository at this point in the history
Revert "Search with exact matches using double quotes (#951)"
  • Loading branch information
jayvarner authored Nov 15, 2023
2 parents 2830b51 + 888ee62 commit 00f1bed
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 132 deletions.
15 changes: 15 additions & 0 deletions apps/cms/templatetags/readux_templatetags.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,18 @@
def order_by(queryset, args):
args = [x.strip() for x in args.split(",")]
return queryset.order_by(*args)


@register.filter
def dict_item(dictionary, key):
"""'Template filter to allow accessing dictionary value by variable key.
Example use::
{{ mydict|dict_item:keyvar }}
"""
# adapted from Princeton-CDH/geniza project https://github.com/Princeton-CDH/geniza/
try:
return dictionary[key]
except AttributeError:
# fail silently if something other than a dict is passed
return None
58 changes: 0 additions & 58 deletions apps/readux/templatetags/readux_extras.py

This file was deleted.

90 changes: 24 additions & 66 deletions apps/readux/views.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""Django Views for the Readux app"""
import re
from os import path
from urllib.parse import urlencode
from django.http import HttpResponse
Expand Down Expand Up @@ -341,9 +340,6 @@ class VolumeSearchView(ListView, FormMixin):
"sort": "label_alphabetical"
}

# regex to match terms in doublequotes
re_exact_match = re.compile(r'\B(".+?")\B')

def get_form_kwargs(self):
# adapted from Princeton-CDH/geniza project https://github.com/Princeton-CDH/geniza/
kwargs = super().get_form_kwargs()
Expand Down Expand Up @@ -414,75 +410,37 @@ def get_queryset(self):
search_query = form_data.get("q") or ""
scope = form_data.get("scope") or "all"
if search_query:
# find exact match queries (words or phrases in double quotes)
exact_queries = self.re_exact_match.findall(search_query)
# remove exact queries from the original search query to search separately
search_query = re.sub(self.re_exact_match , "", search_query).strip()

es_queries = []
es_queries_exact = []
queries = []
if scope in ["all", "metadata"]:
# query for root level fields
if search_query:
multimatch_query = Q(
"multi_match", query=search_query, fields=self.query_search_fields
)
es_queries.append(multimatch_query)
for exq in exact_queries:
# separate exact searches so we can put them in "must" boolean query
multimatch_exact = Q(
"multi_match",
query=exq.replace('"', "").strip(), # strip double quotes
fields=self.query_search_fields,
type="phrase", # type = "phrase" for exact phrase matches
)
es_queries_exact.append({"bool": {"should": [multimatch_exact]}})

multimatch_query = Q(
"multi_match", query=search_query, fields=self.query_search_fields
)
queries.append(multimatch_query)

if scope in ["all", "text"]:
# query for nested fields (i.e. canvas position and text)
nested_kwargs = {
"path": "canvas_set",
nested_query = Q(
"nested",
path="canvas_set",
query=Q(
"multi_match",
query=search_query,
fields=["canvas_set.result"],
),
inner_hits={
"name": "canvases",
"size": 3, # max number of pages shown in full-text results
"highlight": {"fields": {"canvas_set.result": {}}},
},
# sum scores if in full text only search, so vols with most hits show up first.
# if also searching metadata, use avg (default) instead, to not over-inflate.
"score_mode": "sum" if scope == "text" else "avg",
}
inner_hits_dict = {
"size": 3, # max number of pages shown in full-text results
"highlight": {"fields": {"canvas_set.result": {}}},
}
if search_query:
nested_query = Q(
"nested",
query=Q(
"multi_match",
query=search_query,
fields=["canvas_set.result"],
),
inner_hits={ **inner_hits_dict, "name": "canvases" },
**nested_kwargs,
)
es_queries.append(nested_query)
for i, exq in enumerate(exact_queries):
# separate exact searches so we can put them in "must" boolean query
nested_exact = Q(
"nested",
query=Q(
"multi_match",
query=exq.replace('"', "").strip(),
fields=["canvas_set.result"],
type="phrase",
),
# each inner_hits set needs to have a different name in elasticsearch
inner_hits={ **inner_hits_dict, "name": f"canvases_{i}" },
**nested_kwargs,
)
if scope == "all":
es_queries_exact[i]["bool"]["should"].append(nested_exact)
else:
es_queries_exact.append({"bool": {"should": [nested_exact]}})
score_mode="sum" if scope == "text" else "avg",
)
queries.append(nested_query)

# combine them with bool: { should, must }
q = Q("bool", should=es_queries, must=es_queries_exact)
# combine them with bool: { should }
q = Q("bool", should=queries)
volumes = volumes.query(q)

# highlight
Expand Down
3 changes: 1 addition & 2 deletions apps/templates/search_results.html
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,7 @@ <h1 class="uk-heading-medium uk-text-center">Search</h1>
</fieldset>
<span class="uk-text-small">
Search for individual whole keywords. Multiple words will be searched as
'or' (e.g. Rome London = Rome or London). Surround a word or phrase in
double quotes (e.g. "Roman painter") to require exact matches in results.
'or' (e.g. Rome London = Rome or London).
</span>
<fieldset class="uk-margin uk-width-1-1">
<div class="uk-form-label">{{ form.sort.label }}</div>
Expand Down
12 changes: 6 additions & 6 deletions apps/templates/snippets/volume_result.html
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{% load readux_extras %}
{% load readux_templatetags %}

<li class="uk-width-1-1@m uk-margin-small">
<h4>
Expand Down Expand Up @@ -73,15 +73,15 @@ <h4>
{% endif %}
</dd>
{% endif %}
{% if volume|has_inner_hits %}
<dt>Full Text</dt>
{% for canvas in volume.meta.inner_hits|group_by_canvas %}
{% if 'inner_hits' in volume.meta and volume.meta.inner_hits.canvases.hits.total.value %}
<dt>Full Text</dt>
{% for canvas in volume.meta.inner_hits.canvases %}
<dd class="result-page">
<a href="{% url 'page' volume=volume.pid page=canvas.pid %}">
<span class="page-number">p. {{ canvas.position|add:1 }}</span>
{% if canvas.highlights|length %}
{% if canvas.meta.highlight %}
<ul class="highlights">
{% for fragment in canvas.highlights %}
{% for fragment in canvas.meta.highlight|dict_item:"canvas_set.result" %}
<li>{{ fragment|safe }}</li>
{% endfor %}
</ul>
Expand Down

0 comments on commit 00f1bed

Please sign in to comment.