diff --git a/CHANGES.rst b/CHANGES.rst index 1f178f5c200..b3e69ef489c 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -112,6 +112,10 @@ Bugs fixed * #12425: Use Docutils' SVG processing in the HTML builder and remove Sphinx's custom logic. Patch by Tunç Başar Köse. +* #12391: Adjust scoring of matches during HTML search so that document main + titles tend to rank higher than subsection titles. In addition, boost matches + on the name of programming domain objects relative to title/subtitle matches. + Patch by James Addison and Will Lachance. Testing ------- diff --git a/sphinx/themes/basic/static/searchtools.js b/sphinx/themes/basic/static/searchtools.js index eaed90953f4..b08d58c9b9b 100644 --- a/sphinx/themes/basic/static/searchtools.js +++ b/sphinx/themes/basic/static/searchtools.js @@ -328,13 +328,14 @@ const Search = { for (const [title, foundTitles] of Object.entries(allTitles)) { if (title.toLowerCase().trim().includes(queryLower) && (queryLower.length >= title.length/2)) { for (const [file, id] of foundTitles) { - let score = Math.round(100 * queryLower.length / title.length) + const score = Math.round(Scorer.title * queryLower.length / title.length); + const boost = titles[file] === title ? 1 : 0; // add a boost for document titles normalResults.push([ docNames[file], titles[file] !== title ? `${titles[file]} > ${title}` : title, id !== null ? "#" + id : "", null, - score, + score + boost, filenames[file], ]); } diff --git a/tests/js/fixtures/titles/searchindex.js b/tests/js/fixtures/titles/searchindex.js new file mode 100644 index 00000000000..56855ca9a1b --- /dev/null +++ b/tests/js/fixtures/titles/searchindex.js @@ -0,0 +1 @@ +Search.setIndex({"alltitles": {"Main Page": [[0, null]], "Relevance": [[0, "relevance"], [1, null]]}, "docnames": ["index", "relevance"], "envversion": {"sphinx": 61, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2}, "filenames": ["index.rst", "relevance.rst"], "indexentries": {"example (class in relevance)": [[0, "relevance.Example", false]], "module": [[0, "module-relevance", false]], "relevance": [[0, "module-relevance", false]], "relevance (relevance.example attribute)": [[0, "relevance.Example.relevance", false]]}, "objects": {"": [[0, 0, 0, "-", "relevance"]], "relevance": [[0, 1, 1, "", "Example"]], "relevance.Example": [[0, 2, 1, "", "relevance"]]}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "attribute", "Python attribute"]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:attribute"}, "terms": {"": [0, 1], "A": 1, "For": 1, "In": [0, 1], "against": 0, "also": 1, "an": 0, "answer": 0, "appear": 1, "ar": 1, "area": 0, "ask": 0, "attribut": 0, "built": 1, "can": [0, 1], "class": 0, "code": [0, 1], "consid": 1, "contain": 0, "context": 0, "corpu": 1, "could": 1, "demonstr": 0, "describ": 1, "detail": 1, "determin": 1, "docstr": 0, "document": [0, 1], "domain": 1, "engin": 0, "exampl": [0, 1], "extract": 0, "find": 0, "found": 0, "from": 0, "function": 1, "ha": 1, "handl": 0, "happen": 1, "head": 0, "help": 0, "highli": 1, "how": 0, "i": [0, 1], "improv": 0, "inform": 0, "intend": 0, "issu": 1, "itself": 1, "knowledg": 0, "languag": 1, "less": 1, "like": [0, 1], "match": 0, "mention": 1, "name": [0, 1], "object": 0, "one": 1, "onli": 1, "other": 0, "page": 1, "part": 1, "particular": 0, "printf": 1, "program": 1, "project": 0, "queri": [0, 1], "question": 0, "re": 0, "rel": 0, "research": 0, "result": 1, "sai": 0, "same": 1, "score": 0, "search": [0, 1], "seem": 0, "softwar": 1, "some": 1, "sphinx": 0, "straightforward": 1, "subject": 0, "subsect": 0, "term": [0, 1], "test": 0, "text": 0, "than": 1, "thei": 0, "them": 0, "thi": 0, "titl": 0, "user": [0, 1], "we": [0, 1], "when": 0, "whether": 1, "within": 0, "would": 1}, "titles": ["Main Page", "Relevance"], "titleterms": {"main": 0, "page": 0, "relev": [0, 1]}}) \ No newline at end of file diff --git a/tests/js/roots/titles/conf.py b/tests/js/roots/titles/conf.py new file mode 100644 index 00000000000..e5f6bb97a20 --- /dev/null +++ b/tests/js/roots/titles/conf.py @@ -0,0 +1,6 @@ +import os +import sys + +sys.path.insert(0, os.path.abspath('.')) + +extensions = ['sphinx.ext.autodoc'] diff --git a/tests/js/roots/titles/index.rst b/tests/js/roots/titles/index.rst new file mode 100644 index 00000000000..464cd954b5c --- /dev/null +++ b/tests/js/roots/titles/index.rst @@ -0,0 +1,20 @@ +Main Page +========= + +This is the main page of the ``titles`` test project. + +In particular, this test project is intended to demonstrate how Sphinx +can handle scoring of query matches against document titles and subsection +heading titles relative to other document matches such as terms found within +document text and object names extracted from code. + +Relevance +--------- + +In the context of search engines, we can say that a document is **relevant** +to a user's query when it contains information that seems likely to help them +find an answer to a question they're asking, or to improve their knowledge of +the subject area they're researching. + +.. automodule:: relevance + :members: diff --git a/tests/js/roots/titles/relevance.py b/tests/js/roots/titles/relevance.py new file mode 100644 index 00000000000..c4d0eec557f --- /dev/null +++ b/tests/js/roots/titles/relevance.py @@ -0,0 +1,7 @@ +class Example: + """Example class""" + num_attribute = 5 + text_attribute = "string" + + relevance = "testing" + """attribute docstring""" diff --git a/tests/js/roots/titles/relevance.rst b/tests/js/roots/titles/relevance.rst new file mode 100644 index 00000000000..18f494fe109 --- /dev/null +++ b/tests/js/roots/titles/relevance.rst @@ -0,0 +1,13 @@ +Relevance +========= + +In some domains, it can be straightforward to determine whether a search result +is relevant to the user's query. + +For example, if we are in a software programming language domain, and a user +has issued a query for the term ``printf``, then we could consider a document +in the corpus that describes a built-in language function with the same name +as (highly) relevant. A document that only happens to mention the ``printf`` +function name as part of some example code that appears on the page would +also be relevant, but likely less relevant than the one that describes the +function itself in detail. diff --git a/tests/js/searchtools.js b/tests/js/searchtools.js index d020e40d904..a71047dae9f 100644 --- a/tests/js/searchtools.js +++ b/tests/js/searchtools.js @@ -7,6 +7,23 @@ describe('Basic html theme search', function() { return req.responseText; } + function checkRanking(expectedRanking, results) { + let [nextExpected, ...remainingItems] = expectedRanking; + + for (result of results.reverse()) { + if (!nextExpected) break; + + let [expectedPage, expectedTitle, expectedTarget] = nextExpected; + let [page, title, target] = result; + + if (page == expectedPage && title == expectedTitle && target == expectedTarget) { + [nextExpected, ...remainingItems] = remainingItems; + } + } + + expect(remainingItems.length).toEqual(0); + } + describe('terms search', function() { it('should find "C++" when in index', function() { @@ -76,7 +93,7 @@ describe('Basic html theme search', function() { 'Main Page', '', null, - 100, + 16, 'index.rst' ] ]; @@ -85,6 +102,66 @@ describe('Basic html theme search', function() { }); + describe('search result ranking', function() { + + /* + * These tests should not proscribe precise expected ordering of search + * results; instead each test case should describe a single relevance rule + * that helps users to locate relevant information efficiently. + * + * If you think that one of the rules seems to be poorly-defined or is + * limiting the potential for search algorithm improvements, please check + * for existing discussion/bugreports related to it on GitHub[1] before + * creating one yourself. Suggestions for possible improvements are also + * welcome. + * + * [1] - https://github.com/sphinx-doc/sphinx.git/ + */ + + it('should score a code module match above a page-title match', function() { + eval(loadFixture("titles/searchindex.js")); + + expectedRanking = [ + ['index', 'relevance', '#module-relevance'], /* py:module documentation */ + ['relevance', 'Relevance', ''], /* main title */ + ]; + + searchParameters = Search._parseQuery('relevance'); + results = Search._performSearch(...searchParameters); + + checkRanking(expectedRanking, results); + }); + + it('should score a main-title match above an object member match', function() { + eval(loadFixture("titles/searchindex.js")); + + expectedRanking = [ + ['relevance', 'Relevance', ''], /* main title */ + ['index', 'relevance.Example.relevance', '#module-relevance'], /* py:class attribute */ + ]; + + searchParameters = Search._parseQuery('relevance'); + results = Search._performSearch(...searchParameters); + + checkRanking(expectedRanking, results); + }); + + it('should score a main-title match above a subheading-title match', function() { + eval(loadFixture("titles/searchindex.js")); + + expectedRanking = [ + ['relevance', 'Relevance', ''], /* main title */ + ['index', 'Main Page > Relevance', '#relevance'], /* subsection heading title */ + ]; + + searchParameters = Search._parseQuery('relevance'); + results = Search._performSearch(...searchParameters); + + checkRanking(expectedRanking, results); + }); + + }); + }); describe("htmlToText", function() {