Skip to content

Commit

Permalink
Work-in-progress: initial draft JavaScript implementation of ngram-ba…
Browse files Browse the repository at this point in the history
…sed substring matching.
  • Loading branch information
jayaddison committed Jul 16, 2024
1 parent b7df2b8 commit ea5b2da
Showing 1 changed file with 35 additions and 3 deletions.
38 changes: 35 additions & 3 deletions sphinx/themes/basic/static/searchtools.js
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,7 @@ const Search = {
performTermsSearch: (searchTerms, excludedTerms) => {
// prepare search
const terms = Search._index.terms;
const termsNgrams = Search._index.terms_ngrams;
const titleTerms = Search._index.titleterms;
const filenames = Search._index.filenames;
const docNames = Search._index.docnames;
Expand All @@ -510,9 +511,40 @@ const Search = {
if (word.length > 2) {
const escapedWord = _escapeRegExp(word);
if (!terms.hasOwnProperty(word)) {
Object.keys(terms).forEach((term) => {
if (term.match(escapedWord))
arr.push({ files: terms[term], score: Scorer.partialTerm });
// utility function to lookup superstrings of an ngram
let lookup = function (ngram) {
let node = termsNgrams;
for (const step of ngram) {
node = node[step];
if (!node) return [];
}
return node;
};

// retrieve the initial candidate superstrings
let start = 0;
let ngram = word.substring(start, start + 3);
const candidateTerms = new Set(lookup(ngram));

// iterate over each ngram in the word
while (candidateTerms.size && start + 3 <= word.length) {
ngram = word.substring(start, start + 3);

// remove candidate terms that aren't found for the ngram
const ngramTerms = new Set(lookup(ngram));
for (const candidateTerm of candidateTerms) {
if (!ngramTerms.has(candidateTerm)) {
candidateTerms.delete(candidateTerm);
}
}

// continue to the next ngram
start++;
}

// include matches in the results
candidateTerms.forEach((term) => {
arr.push({ files: terms[term], score: Scorer.partialTerm });
});
}
if (!titleTerms.hasOwnProperty(word)) {
Expand Down

0 comments on commit ea5b2da

Please sign in to comment.