diff --git a/rest/serializers.py b/rest/serializers.py index fadfb03a..5b43982c 100644 --- a/rest/serializers.py +++ b/rest/serializers.py @@ -335,3 +335,4 @@ class RepositorySearchSerializer(Serializer): description = CharField() description_path = CharField() preview_url = CharField() + score = FloatField() diff --git a/rest/tests/test_search.py b/rest/tests/test_search.py index 3d777b9d..4dad0796 100644 --- a/rest/tests/test_search.py +++ b/rest/tests/test_search.py @@ -20,6 +20,8 @@ from search.utils import recreate_index from taxonomy.models import Term, Vocabulary, make_vocab_key +# pylint: disable=too-many-statements + class TestSearch(RESTTestCase): """ @@ -29,6 +31,9 @@ class TestSearch(RESTTestCase): def assert_result_equal(self, result, resource): """Helper method to assert result == resource.""" + # remove the score from the result because + # it is meaningless for the tests + del result['score'] self.assertEqual( { 'course': resource.course.course_number, @@ -166,30 +171,42 @@ def test_sortby(self): resource1.xa_avg_grade = 2.0 resource1.xa_nr_attempts = 4 resource1.xa_nr_views = 1000 - resource1.title = '22222' + resource1.title = '22222 aaaa bbbb' resource1.save() resource2.xa_avg_grade = 4.0 resource2.xa_nr_attempts = 1 resource2.xa_nr_views = 100 - resource2.title = '11111' + resource2.title = '11111 aaaa' resource2.save() resource3.xa_avg_grade = 2.0 resource3.xa_nr_attempts = 4 resource3.xa_nr_views = 1000 - resource3.title = '00000' + resource3.title = '00000 bbbb' resource3.save() - # Default sorting should be by nr_views, descending, then id ascending. + # Default sorting should be by relevance and score + # is null without an actual search default_results = self.get_results()['results'] - self.assertEqual(default_results[0]['id'], resource1.id) - self.assertEqual(default_results[1]['id'], resource3.id) - self.assertEqual(default_results[2]['id'], resource2.id) - - nr_views_results = self.get_results( - sortby=LoreSortingFields.SORT_BY_NR_VIEWS[0])['results'] - self.assertEqual(default_results, nr_views_results) + for default_result in default_results: + self.assertIsNone(default_result['score']) + + # same thing if the sort by relevance is specified + relevance_results = self.get_results( + sortby=LoreSortingFields.SORT_BY_RELEVANCE[0])['results'] + for relevance_result in relevance_results: + self.assertIsNone(relevance_result['score']) + + # make a specific search + relevance_results = self.get_results('aaaa')['results'] + for relevance_result in relevance_results: + self.assertIsNotNone(relevance_result['score']) + self.assertEqual(len(relevance_results), 2) + self.assertGreaterEqual( + relevance_results[0]['score'], + relevance_results[1]['score'] + ) avg_grade_results = self.get_results( sortby=LoreSortingFields.SORT_BY_AVG_GRADE[0])['results'] diff --git a/search/sorting.py b/search/sorting.py index d117ab4c..3daba259 100644 --- a/search/sorting.py +++ b/search/sorting.py @@ -14,8 +14,9 @@ class LoreSortingFields(object): SORT_BY_NR_ATTEMPTS = ('nr_attempts', 'Number of Attempts (desc)', '-') SORT_BY_AVG_GRADE = ('avg_grade', 'Average Grade (desc)', '-') SORT_BY_TITLE = ('titlesort', 'Title (asc)', '') + SORT_BY_RELEVANCE = ('_score', 'Relevance', '') - DEFAULT_SORTING_FIELD = SORT_BY_NR_VIEWS[0] + DEFAULT_SORTING_FIELD = SORT_BY_RELEVANCE[0] # base sorting field in case the applied sorting is working on equal values BASE_SORTING_FIELD = 'id' @@ -29,7 +30,8 @@ def all_sorting_options(cls): cls.SORT_BY_NR_VIEWS, cls.SORT_BY_NR_ATTEMPTS, cls.SORT_BY_AVG_GRADE, - cls.SORT_BY_TITLE + cls.SORT_BY_TITLE, + cls.SORT_BY_RELEVANCE, ] @classmethod diff --git a/search/tests/test_sorting.py b/search/tests/test_sorting.py index 87f89bb8..363de69b 100644 --- a/search/tests/test_sorting.py +++ b/search/tests/test_sorting.py @@ -21,6 +21,7 @@ def test_all_sorting_options(self): LoreSortingFields.SORT_BY_NR_ATTEMPTS, LoreSortingFields.SORT_BY_AVG_GRADE, LoreSortingFields.SORT_BY_TITLE, + LoreSortingFields.SORT_BY_RELEVANCE, ] ) @@ -33,6 +34,7 @@ def test_all_sorting_fields(self): LoreSortingFields.SORT_BY_NR_ATTEMPTS[0], LoreSortingFields.SORT_BY_AVG_GRADE[0], LoreSortingFields.SORT_BY_TITLE[0], + LoreSortingFields.SORT_BY_RELEVANCE[0], ] ) @@ -62,9 +64,15 @@ def test_get_sorting_option(self): ), LoreSortingFields.SORT_BY_TITLE ) + self.assertEqual( + LoreSortingFields.get_sorting_option( + LoreSortingFields.SORT_BY_RELEVANCE[0] + ), + LoreSortingFields.SORT_BY_RELEVANCE + ) self.assertEqual( LoreSortingFields.get_sorting_option('foo_field'), - LoreSortingFields.SORT_BY_NR_VIEWS + LoreSortingFields.SORT_BY_RELEVANCE ) def test_all_sorting_options_but(self): @@ -77,6 +85,7 @@ def test_all_sorting_options_but(self): LoreSortingFields.SORT_BY_NR_ATTEMPTS, LoreSortingFields.SORT_BY_AVG_GRADE, LoreSortingFields.SORT_BY_TITLE, + LoreSortingFields.SORT_BY_RELEVANCE, ] ) self.assertEqual( @@ -87,6 +96,7 @@ def test_all_sorting_options_but(self): LoreSortingFields.SORT_BY_NR_VIEWS, LoreSortingFields.SORT_BY_AVG_GRADE, LoreSortingFields.SORT_BY_TITLE, + LoreSortingFields.SORT_BY_RELEVANCE, ] ) self.assertEqual( @@ -97,6 +107,7 @@ def test_all_sorting_options_but(self): LoreSortingFields.SORT_BY_NR_VIEWS, LoreSortingFields.SORT_BY_NR_ATTEMPTS, LoreSortingFields.SORT_BY_TITLE, + LoreSortingFields.SORT_BY_RELEVANCE, ] ) self.assertEqual( @@ -107,6 +118,18 @@ def test_all_sorting_options_but(self): LoreSortingFields.SORT_BY_NR_VIEWS, LoreSortingFields.SORT_BY_NR_ATTEMPTS, LoreSortingFields.SORT_BY_AVG_GRADE, + LoreSortingFields.SORT_BY_RELEVANCE, + ] + ) + self.assertEqual( + LoreSortingFields.all_sorting_options_but( + LoreSortingFields.SORT_BY_RELEVANCE[0] + ), + [ + LoreSortingFields.SORT_BY_NR_VIEWS, + LoreSortingFields.SORT_BY_NR_ATTEMPTS, + LoreSortingFields.SORT_BY_AVG_GRADE, + LoreSortingFields.SORT_BY_TITLE, ] ) self.assertEqual( @@ -116,5 +139,6 @@ def test_all_sorting_options_but(self): LoreSortingFields.SORT_BY_NR_ATTEMPTS, LoreSortingFields.SORT_BY_AVG_GRADE, LoreSortingFields.SORT_BY_TITLE, + LoreSortingFields.SORT_BY_RELEVANCE, ] ) diff --git a/search/utils.py b/search/utils.py index cbc684ea..c3d2c9c4 100644 --- a/search/utils.py +++ b/search/utils.py @@ -6,7 +6,7 @@ from collections import defaultdict import logging -from itertools import islice +from itertools import islice # pylint: disable=no-name-in-module from lxml import etree @@ -22,6 +22,7 @@ from rest.serializers import RepositorySearchSerializer from search.exceptions import ReindexException from search.search_indexes import get_course_metadata +from search.sorting import LoreSortingFields from search.tasks import refresh_index as _refresh_index from taxonomy.models import Vocabulary, Term, make_vocab_key @@ -33,6 +34,7 @@ _CONN = None _CONN_VERIFIED = False PAGE_LENGTH = 10 +META_FIELDS_IN_RESULT = ('score',) def get_vocab_ids(repo_slug=None): @@ -168,21 +170,33 @@ def search_index(tokens=None, repo_slug=None, sort_by=None, terms=None): search = search.query("match", repository=repo_slug) if sort_by is None: # Always sort by ID to preserve ordering. - search = search.sort("id") + search = search.sort(LoreSortingFields.BASE_SORTING_FIELD) + # Temporary workaround; the values in sorting.py should be updated, + # but for now Haystack is still using them. Also, the hyphen is + # required because we sort the numeric values high to low. + elif sort_by in ( + LoreSortingFields.SORT_BY_RELEVANCE[0], + LoreSortingFields.SORT_BY_TITLE[0] + ): + # special case when the sorting is by score with an empty search: + # in this case the score does not make any sense + if (sort_by == LoreSortingFields.SORT_BY_RELEVANCE[0] and + tokens is None): + search = search.sort(LoreSortingFields.BASE_SORTING_FIELD) + else: + search = search.sort( + sort_by, LoreSortingFields.BASE_SORTING_FIELD) else: - # Temporary workaround; the values in sorting.py should be updated, - # but for now Haystack is still using them. Also, the hyphen is - # required because we sort the numeric values high to low. - if "title" not in sort_by: - reverse = sort_by.startswith("-") - if reverse: - sort_by = sort_by[1:] - if "xa" not in sort_by: - sort_by = "xa_{0}".format(sort_by) - if reverse: - sort_by = "-{0}".format(sort_by) + + reverse = sort_by.startswith("-") + if reverse: + sort_by = sort_by[1:] + if "xa" not in sort_by: + sort_by = "xa_{0}".format(sort_by) + if reverse: + sort_by = "-{0}".format(sort_by) # Always sort by ID to preserve ordering. - search = search.sort(sort_by, "id") + search = search.sort(sort_by, LoreSortingFields.BASE_SORTING_FIELD) vocab_ids = set(get_vocab_ids(repo_slug=repo_slug)) for vocab_id in vocab_ids: @@ -199,7 +213,6 @@ def search_index(tokens=None, repo_slug=None, sort_by=None, terms=None): search.aggs.bucket( '{key}_builtins'.format(key=key), "terms", field=key ) - return SearchResults(search) @@ -413,7 +426,10 @@ def __getitem__(self, i): for hit in hits: for field_name in _get_field_names(): - setattr(hit, field_name, getattr(hit, field_name)[0]) + if field_name not in META_FIELDS_IN_RESULT: + setattr(hit, field_name, getattr(hit, field_name)[0]) + else: + setattr(hit, field_name, getattr(hit.meta, field_name)) if isinstance(i, slice): return hits