From 393af6d7896d882480b49a67c997cfc87963554f Mon Sep 17 00:00:00 2001 From: Paul Tran-Van Date: Thu, 21 Nov 2024 10:20:59 +0100 Subject: [PATCH] feat: Use forward and reverse tokenization We enable the "reverse" tokenization mode, which allow to search both in forward and backward directions on tokens. The forward mode allows to search from left to right, while the reverse mode allows the opposite. For example, with the word "example", you can search "exam" in forward mode, and "ample" in reverse. We measured a 15-20% memory impact on enabling the reverse tokenization, compared to the forward mode. The "full" mode, allowing searching on all combinations, including in the middle of the word, comes with ~70% memory increase. So, we decided to to not enable it for now, as the cost/benefit ratio of such feature is unclear. The memory cost of enabling the reverse mode seems however reasonable. --- packages/cozy-dataproxy-lib/src/search/SearchEngine.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/cozy-dataproxy-lib/src/search/SearchEngine.ts b/packages/cozy-dataproxy-lib/src/search/SearchEngine.ts index 788b789470..0164a5ecf7 100644 --- a/packages/cozy-dataproxy-lib/src/search/SearchEngine.ts +++ b/packages/cozy-dataproxy-lib/src/search/SearchEngine.ts @@ -180,7 +180,7 @@ export class SearchEngine { const fieldsToIndex = SEARCH_SCHEMA[doctype] const flexsearchIndex = new FlexSearch.Document({ - tokenize: 'forward', + tokenize: 'reverse', // See https://github.com/nextapps-de/flexsearch?tab=readme-ov-file#tokenizer encode: getSearchEncoder(), // @ts-expect-error minlength is not described by Flexsearch types but exists minlength: 2,