From 05534bdd83e1413b7ad99f384bd34f9cffea0319 Mon Sep 17 00:00:00 2001 From: Paul Tran-Van Date: Mon, 28 Oct 2024 17:06:11 +0100 Subject: [PATCH] fix: Limit was set on matching fields Flexsearch returns results grouped on matching field, for instance: ``` { "path": [doc1, doc2, ...], "name": [doc2, doc4, ...] } ``` It enforces the limit on each field. Which mean we might end up in a situation where a file is skipped from the `name` array, but appear on the `path` array. As we have a sort order making the results appear first when it has a match on the `name` compared to those with a match on the `path`, it can lead to inconsistent results display. Therefore, we enforce our own limit restriction, after sorting. --- src/search/SearchEngine.ts | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/search/SearchEngine.ts b/src/search/SearchEngine.ts index be9efaf..2c38e1c 100644 --- a/src/search/SearchEngine.ts +++ b/src/search/SearchEngine.ts @@ -264,11 +264,12 @@ class SearchEngine { } const allResults = this.searchOnIndexes(query) - const results = this.deduplicateAndFlatten(allResults) - const sortedResults = this.sortSearchResults(results) + const dedupResults = this.deduplicateAndFlatten(allResults) + const sortedResults = this.sortSearchResults(dedupResults) + const results = this.limitSearchResults(sortedResults) const normResults: SearchResult[] = [] - for (const res of sortedResults) { + for (const res of results) { const normalizedRes = await normalizeSearchResult(this.client, res, query) normResults.push(normalizedRes) } @@ -286,11 +287,18 @@ class SearchEngine { } // TODO: do not use flexsearch store and rely on pouch storage? // It's better for memory, but might slow down search queries + // // XXX - The limit is specified twice because of a flexsearch inconstency // that does not enforce the limit if only given in second argument, and // does not return the correct type is only given in third options - const indexResults = index.index.search(query, LIMIT_DOCTYPE_SEARCH, { - limit: LIMIT_DOCTYPE_SEARCH, + // + // XXX - The given limit here is arbitrary because flexsearch enforce it on matching + // field, which can cause issue related to the sort: if we search on name+path for files, + // and limit on 100, the 101th result on name will be skipped, but might appear on path, + // which will make it appear in the search results, but in incorrect order. + const FLEXSEARCH_LIMIT = 10000 + const indexResults = index.index.search(query, FLEXSEARCH_LIMIT, { + limit: FLEXSEARCH_LIMIT, enrich: true }) const newResults = indexResults.map(res => ({ @@ -373,6 +381,10 @@ class SearchEngine { // Then name return this.compareStrings(aRes.doc.name, bRes.doc.name) } + + limitSearchResults(searchResults: RawSearchResult[]): RawSearchResult[] { + return searchResults.slice(0, LIMIT_DOCTYPE_SEARCH) + } } export default SearchEngine