Skip to content
This repository has been archived by the owner on Jan 28, 2020. It is now read-only.

Commit

Permalink
Added sorting by relevance (_score)
Browse files Browse the repository at this point in the history
  • Loading branch information
Giovanni Di Milia committed Jan 22, 2016
1 parent 0024fe3 commit e668808
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 30 deletions.
1 change: 1 addition & 0 deletions rest/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,3 +335,4 @@ class RepositorySearchSerializer(Serializer):
description = CharField()
description_path = CharField()
preview_url = CharField()
score = FloatField()
39 changes: 28 additions & 11 deletions rest/tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
from search.utils import recreate_index
from taxonomy.models import Term, Vocabulary, make_vocab_key

# pylint: disable=too-many-statements


class TestSearch(RESTTestCase):
"""
Expand All @@ -29,6 +31,9 @@ class TestSearch(RESTTestCase):
def assert_result_equal(self, result, resource):
"""Helper method to assert result == resource."""

# remove the score from the result because
# it is meaningless for the tests
del result['score']
self.assertEqual(
{
'course': resource.course.course_number,
Expand Down Expand Up @@ -166,30 +171,42 @@ def test_sortby(self):
resource1.xa_avg_grade = 2.0
resource1.xa_nr_attempts = 4
resource1.xa_nr_views = 1000
resource1.title = '22222'
resource1.title = '22222 aaaa bbbb'
resource1.save()

resource2.xa_avg_grade = 4.0
resource2.xa_nr_attempts = 1
resource2.xa_nr_views = 100
resource2.title = '11111'
resource2.title = '11111 aaaa'
resource2.save()

resource3.xa_avg_grade = 2.0
resource3.xa_nr_attempts = 4
resource3.xa_nr_views = 1000
resource3.title = '00000'
resource3.title = '00000 bbbb'
resource3.save()

# Default sorting should be by nr_views, descending, then id ascending.
# Default sorting should be by relevance and score
# is null without an actual search
default_results = self.get_results()['results']
self.assertEqual(default_results[0]['id'], resource1.id)
self.assertEqual(default_results[1]['id'], resource3.id)
self.assertEqual(default_results[2]['id'], resource2.id)

nr_views_results = self.get_results(
sortby=LoreSortingFields.SORT_BY_NR_VIEWS[0])['results']
self.assertEqual(default_results, nr_views_results)
for default_result in default_results:
self.assertIsNone(default_result['score'])

# same thing if the sort by relevance is specified
relevance_results = self.get_results(
sortby=LoreSortingFields.SORT_BY_RELEVANCE[0])['results']
for relevance_result in relevance_results:
self.assertIsNone(relevance_result['score'])

# make a specific search
relevance_results = self.get_results('aaaa')['results']
for relevance_result in relevance_results:
self.assertIsNotNone(relevance_result['score'])
self.assertEqual(len(relevance_results), 2)
self.assertGreaterEqual(
relevance_results[0]['score'],
relevance_results[1]['score']
)

avg_grade_results = self.get_results(
sortby=LoreSortingFields.SORT_BY_AVG_GRADE[0])['results']
Expand Down
6 changes: 4 additions & 2 deletions search/sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@ class LoreSortingFields(object):
SORT_BY_NR_ATTEMPTS = ('nr_attempts', 'Number of Attempts (desc)', '-')
SORT_BY_AVG_GRADE = ('avg_grade', 'Average Grade (desc)', '-')
SORT_BY_TITLE = ('titlesort', 'Title (asc)', '')
SORT_BY_RELEVANCE = ('_score', 'Relevance', '')

DEFAULT_SORTING_FIELD = SORT_BY_NR_VIEWS[0]
DEFAULT_SORTING_FIELD = SORT_BY_RELEVANCE[0]

# base sorting field in case the applied sorting is working on equal values
BASE_SORTING_FIELD = 'id'
Expand All @@ -29,7 +30,8 @@ def all_sorting_options(cls):
cls.SORT_BY_NR_VIEWS,
cls.SORT_BY_NR_ATTEMPTS,
cls.SORT_BY_AVG_GRADE,
cls.SORT_BY_TITLE
cls.SORT_BY_TITLE,
cls.SORT_BY_RELEVANCE,
]

@classmethod
Expand Down
26 changes: 25 additions & 1 deletion search/tests/test_sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def test_all_sorting_options(self):
LoreSortingFields.SORT_BY_NR_ATTEMPTS,
LoreSortingFields.SORT_BY_AVG_GRADE,
LoreSortingFields.SORT_BY_TITLE,
LoreSortingFields.SORT_BY_RELEVANCE,
]
)

Expand All @@ -33,6 +34,7 @@ def test_all_sorting_fields(self):
LoreSortingFields.SORT_BY_NR_ATTEMPTS[0],
LoreSortingFields.SORT_BY_AVG_GRADE[0],
LoreSortingFields.SORT_BY_TITLE[0],
LoreSortingFields.SORT_BY_RELEVANCE[0],
]
)

Expand Down Expand Up @@ -62,9 +64,15 @@ def test_get_sorting_option(self):
),
LoreSortingFields.SORT_BY_TITLE
)
self.assertEqual(
LoreSortingFields.get_sorting_option(
LoreSortingFields.SORT_BY_RELEVANCE[0]
),
LoreSortingFields.SORT_BY_RELEVANCE
)
self.assertEqual(
LoreSortingFields.get_sorting_option('foo_field'),
LoreSortingFields.SORT_BY_NR_VIEWS
LoreSortingFields.SORT_BY_RELEVANCE
)

def test_all_sorting_options_but(self):
Expand All @@ -77,6 +85,7 @@ def test_all_sorting_options_but(self):
LoreSortingFields.SORT_BY_NR_ATTEMPTS,
LoreSortingFields.SORT_BY_AVG_GRADE,
LoreSortingFields.SORT_BY_TITLE,
LoreSortingFields.SORT_BY_RELEVANCE,
]
)
self.assertEqual(
Expand All @@ -87,6 +96,7 @@ def test_all_sorting_options_but(self):
LoreSortingFields.SORT_BY_NR_VIEWS,
LoreSortingFields.SORT_BY_AVG_GRADE,
LoreSortingFields.SORT_BY_TITLE,
LoreSortingFields.SORT_BY_RELEVANCE,
]
)
self.assertEqual(
Expand All @@ -97,6 +107,7 @@ def test_all_sorting_options_but(self):
LoreSortingFields.SORT_BY_NR_VIEWS,
LoreSortingFields.SORT_BY_NR_ATTEMPTS,
LoreSortingFields.SORT_BY_TITLE,
LoreSortingFields.SORT_BY_RELEVANCE,
]
)
self.assertEqual(
Expand All @@ -107,6 +118,18 @@ def test_all_sorting_options_but(self):
LoreSortingFields.SORT_BY_NR_VIEWS,
LoreSortingFields.SORT_BY_NR_ATTEMPTS,
LoreSortingFields.SORT_BY_AVG_GRADE,
LoreSortingFields.SORT_BY_RELEVANCE,
]
)
self.assertEqual(
LoreSortingFields.all_sorting_options_but(
LoreSortingFields.SORT_BY_RELEVANCE[0]
),
[
LoreSortingFields.SORT_BY_NR_VIEWS,
LoreSortingFields.SORT_BY_NR_ATTEMPTS,
LoreSortingFields.SORT_BY_AVG_GRADE,
LoreSortingFields.SORT_BY_TITLE,
]
)
self.assertEqual(
Expand All @@ -116,5 +139,6 @@ def test_all_sorting_options_but(self):
LoreSortingFields.SORT_BY_NR_ATTEMPTS,
LoreSortingFields.SORT_BY_AVG_GRADE,
LoreSortingFields.SORT_BY_TITLE,
LoreSortingFields.SORT_BY_RELEVANCE,
]
)
48 changes: 32 additions & 16 deletions search/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from collections import defaultdict
import logging
from itertools import islice
from itertools import islice # pylint: disable=no-name-in-module

from lxml import etree

Expand All @@ -22,6 +22,7 @@
from rest.serializers import RepositorySearchSerializer
from search.exceptions import ReindexException
from search.search_indexes import get_course_metadata
from search.sorting import LoreSortingFields
from search.tasks import refresh_index as _refresh_index
from taxonomy.models import Vocabulary, Term, make_vocab_key

Expand All @@ -33,6 +34,7 @@
_CONN = None
_CONN_VERIFIED = False
PAGE_LENGTH = 10
META_FIELDS_IN_RESULT = ('score',)


def get_vocab_ids(repo_slug=None):
Expand Down Expand Up @@ -168,21 +170,33 @@ def search_index(tokens=None, repo_slug=None, sort_by=None, terms=None):
search = search.query("match", repository=repo_slug)
if sort_by is None:
# Always sort by ID to preserve ordering.
search = search.sort("id")
search = search.sort(LoreSortingFields.BASE_SORTING_FIELD)
# Temporary workaround; the values in sorting.py should be updated,
# but for now Haystack is still using them. Also, the hyphen is
# required because we sort the numeric values high to low.
elif sort_by in (
LoreSortingFields.SORT_BY_RELEVANCE[0],
LoreSortingFields.SORT_BY_TITLE[0]
):
# special case when the sorting is by score with an empty search:
# in this case the score does not make any sense
if (sort_by == LoreSortingFields.SORT_BY_RELEVANCE[0] and
tokens is None):
search = search.sort(LoreSortingFields.BASE_SORTING_FIELD)
else:
search = search.sort(
sort_by, LoreSortingFields.BASE_SORTING_FIELD)
else:
# Temporary workaround; the values in sorting.py should be updated,
# but for now Haystack is still using them. Also, the hyphen is
# required because we sort the numeric values high to low.
if "title" not in sort_by:
reverse = sort_by.startswith("-")
if reverse:
sort_by = sort_by[1:]
if "xa" not in sort_by:
sort_by = "xa_{0}".format(sort_by)
if reverse:
sort_by = "-{0}".format(sort_by)

reverse = sort_by.startswith("-")
if reverse:
sort_by = sort_by[1:]
if "xa" not in sort_by:
sort_by = "xa_{0}".format(sort_by)
if reverse:
sort_by = "-{0}".format(sort_by)
# Always sort by ID to preserve ordering.
search = search.sort(sort_by, "id")
search = search.sort(sort_by, LoreSortingFields.BASE_SORTING_FIELD)

vocab_ids = set(get_vocab_ids(repo_slug=repo_slug))
for vocab_id in vocab_ids:
Expand All @@ -199,7 +213,6 @@ def search_index(tokens=None, repo_slug=None, sort_by=None, terms=None):
search.aggs.bucket(
'{key}_builtins'.format(key=key), "terms", field=key
)

return SearchResults(search)


Expand Down Expand Up @@ -413,7 +426,10 @@ def __getitem__(self, i):

for hit in hits:
for field_name in _get_field_names():
setattr(hit, field_name, getattr(hit, field_name)[0])
if field_name not in META_FIELDS_IN_RESULT:
setattr(hit, field_name, getattr(hit, field_name)[0])
else:
setattr(hit, field_name, getattr(hit.meta, field_name))

if isinstance(i, slice):
return hits
Expand Down

0 comments on commit e668808

Please sign in to comment.