Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DAT-70] feat: Do not use flexsearch store #2623

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 92 additions & 20 deletions packages/cozy-dataproxy-lib/src/search/SearchEngine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import {
shouldKeepFile
} from './helpers/normalizeFile'
import { normalizeSearchResult } from './helpers/normalizeSearchResult'
import { queryAllDocs, queryFilesForSearch } from './queries'
import { queryAllDocs, queryFilesForSearch, queryDocsByIds } from './queries'
import {
CozyDoc,
RawSearchResult,
Expand All @@ -32,13 +32,14 @@ import {
SearchIndex,
SearchIndexes,
SearchResult,
isSearchedDoctype
isSearchedDoctype,
EnrichedSearchResult
} from './types'

const log = Minilog('🗂️ [Indexing]')

interface FlexSearchResultWithDoctype
extends FlexSearch.EnrichedDocumentSearchResultSetUnit<CozyDoc> {
extends FlexSearch.SimpleDocumentSearchResultSetUnit {
doctype: SearchedDoctype
}

Expand Down Expand Up @@ -179,14 +180,13 @@ export class SearchEngine {
const fieldsToIndex = SEARCH_SCHEMA[doctype]

const flexsearchIndex = new FlexSearch.Document<CozyDoc, true>({
tokenize: 'forward',
tokenize: 'full',
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: this change is not documented in the commit message and is not part of the "do not use flexsearch store" scope. Maybe you can add some precision in the commit message or extract it in another commit?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh my, it's a mistake, thank you!

encode: getSearchEncoder(),
// @ts-expect-error minlength is not described by Flexsearch types but exists
minlength: 2,
document: {
id: '_id',
index: fieldsToIndex,
store: true
index: fieldsToIndex
}
})

Expand Down Expand Up @@ -316,7 +316,7 @@ export class SearchEngine {
return this.incrementalIndexation(doctype, searchIndex)
}

search(query: string): SearchResult[] {
async search(query: string): Promise<SearchResult[]> {
if (!this.searchIndexes) {
// TODO: What if the indexing is running but not finished yet?
log.warn('[SEARCH] No search index available')
Expand All @@ -325,7 +325,8 @@ export class SearchEngine {

const allResults = this.searchOnIndexes(query)
const dedupResults = this.deduplicateAndFlatten(allResults)
const sortedResults = this.sortSearchResults(dedupResults)
const enrichedResults = await this.enrichResults(dedupResults)
const sortedResults = this.sortSearchResults(enrichedResults)
const results = this.limitSearchResults(sortedResults)

const normResults: SearchResult[] = []
Expand Down Expand Up @@ -359,8 +360,25 @@ export class SearchEngine {
const FLEXSEARCH_LIMIT = 10000
const indexResults = index.index.search(query, FLEXSEARCH_LIMIT, {
limit: FLEXSEARCH_LIMIT,
enrich: true
enrich: false
})
/*
Search result example:
[
{
"field": "displayName",
"result": [
"604627c6bafee013ec5f27f7f72029f6"
]
},
{
"field": "fullname",
"result": [
"604627c6bafee013ec5f27f7f72029f6", "604627c6bafee013ec5f27f3f714568"
]
}
]
*/
Comment on lines +365 to +381
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was that comment expected to be commited?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes it was, it does not look like a good practice, but I honestly felt like it was helpful to better grasp the expected result quickly


const newResults = indexResults.map(res => ({
...res,
Expand All @@ -376,30 +394,82 @@ export class SearchEngine {
searchResults: FlexSearchResultWithDoctype[]
): RawSearchResult[] {
const combinedResults = searchResults.flatMap(item =>
item.result.map(r => ({ ...r, field: item.field, doctype: item.doctype }))
item.result.map(id => ({
id: id.toString(), // Because of flexsearch Id typing
doctype: item.doctype,
field: item.field
}))
)

type MapItem = Omit<(typeof combinedResults)[number], 'field'> & {
fields: string[]
}
const resultMap = new Map<FlexSearch.Id[], MapItem>()
const resultMap = new Map<string, RawSearchResult>()

combinedResults.forEach(({ id, field, ...rest }) => {
combinedResults.forEach(({ id, field, doctype }) => {
if (resultMap.has(id)) {
resultMap.get(id)?.fields.push(field)
} else {
resultMap.set(id, { id, fields: [field], ...rest })
resultMap.set(id, { id, fields: [field], doctype })
}
})

return [...resultMap.values()]
}

async enrichResults(
results: RawSearchResult[]
): Promise<EnrichedSearchResult[]> {
const enrichedResults = [...results] as EnrichedSearchResult[]

// Group by doctype
const resultsByDoctype = results.reduce<Record<string, string[]>>(
(acc, { id, doctype }) => {
if (!acc[doctype]) {
acc[doctype] = []
}
acc[doctype].push(id)
return acc
},
{}
)
let docs = [] as CozyDoc[]
for (const doctype of Object.keys(resultsByDoctype)) {
const ids = resultsByDoctype[doctype]

const startQuery = performance.now()
let queryDocs
// Query docs directly from store, for better performances
queryDocs = await queryDocsByIds(this.client, doctype, ids, {
fromStore: true
})
if (queryDocs.length < 1) {
log.warn('Ids not found on store: query PouchDB')
// This should not happen, but let's add a fallback to query Pouch in case the store
// returned nothing. This is not done by default, as querying PouchDB is much slower.
queryDocs = await queryDocsByIds(this.client, doctype, ids, {
fromStore: false
})
}
const endQuery = performance.now()
docs = docs.concat(queryDocs)
log.debug(`Query took ${(endQuery - startQuery).toFixed(2)} ms`)
}
for (const res of enrichedResults) {
const id = res.id?.toString() // Because of flexsearch Id typing
const doc = docs?.find(doc => doc._id === id)
if (!doc) {
log.error(`${id} is found in search but not in local data`)
} else {
res.doc = doc
}
}
return enrichedResults
}

compareStrings(str1: string, str2: string): number {
return str1.localeCompare(str2, undefined, { numeric: true })
}

sortSearchResults(searchResults: RawSearchResult[]): RawSearchResult[] {
sortSearchResults(
searchResults: EnrichedSearchResult[]
): EnrichedSearchResult[] {
return searchResults.sort((a, b) => {
const doctypeComparison =
DOCTYPE_ORDER[a.doctype] - DOCTYPE_ORDER[b.doctype]
Expand Down Expand Up @@ -428,7 +498,7 @@ export class SearchEngine {
})
}

sortFiles(aRes: RawSearchResult, bRes: RawSearchResult): number {
sortFiles(aRes: EnrichedSearchResult, bRes: EnrichedSearchResult): number {
if (!isIOCozyFile(aRes.doc) || !isIOCozyFile(bRes.doc)) {
return 0
}
Expand All @@ -444,7 +514,9 @@ export class SearchEngine {
return this.compareStrings(aRes.doc.name, bRes.doc.name)
}

limitSearchResults(searchResults: RawSearchResult[]): RawSearchResult[] {
limitSearchResults(
searchResults: EnrichedSearchResult[]
): EnrichedSearchResult[] {
return searchResults.slice(0, LIMIT_DOCTYPE_SEARCH)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { IOCozyContact, IOCozyFile } from 'cozy-client/types/types'

import { cleanFilePath, normalizeSearchResult } from './normalizeSearchResult'
import { FILES_DOCTYPE } from '../consts'
import { RawSearchResult } from '../types'
import { EnrichedSearchResult } from '../types'

const fakeFlatDomainClient = {
getStackClient: () => ({
Expand All @@ -27,7 +27,7 @@ describe('Should normalize files results', () => {
const searchResult = {
doctype: 'io.cozy.files',
doc: doc
} as unknown as RawSearchResult
} as unknown as EnrichedSearchResult

const result = normalizeSearchResult(
fakeFlatDomainClient,
Expand Down Expand Up @@ -61,7 +61,7 @@ describe('Should normalize files results', () => {
const searchResult = {
doctype: 'io.cozy.files',
doc: doc
} as unknown as RawSearchResult
} as unknown as EnrichedSearchResult

const result = normalizeSearchResult(
fakeFlatDomainClient,
Expand Down Expand Up @@ -91,7 +91,7 @@ describe('Should normalize files results', () => {
const searchResult = {
doctype: 'io.cozy.files',
doc: doc
} as unknown as RawSearchResult
} as unknown as EnrichedSearchResult

const result = normalizeSearchResult(
fakeFlatDomainClient,
Expand Down Expand Up @@ -122,7 +122,7 @@ describe('Should normalize contacts results', () => {
doctype: 'io.cozy.files',
doc: doc,
fields: ['displayName', 'jobTitle']
} as unknown as RawSearchResult
} as unknown as EnrichedSearchResult

const result = normalizeSearchResult(
fakeFlatDomainClient,
Expand Down Expand Up @@ -151,7 +151,7 @@ describe('Should normalize contacts results', () => {
doctype: 'io.cozy.files',
doc: doc,
fields: ['displayName', 'jobTitle']
} as unknown as RawSearchResult
} as unknown as EnrichedSearchResult

const result = normalizeSearchResult(
fakeFlatDomainClient,
Expand Down Expand Up @@ -179,7 +179,7 @@ describe('Should normalize contacts results', () => {
doctype: 'io.cozy.files',
doc: doc,
fields: ['displayName', 'jobTitle']
} as unknown as RawSearchResult
} as unknown as EnrichedSearchResult

const result = normalizeSearchResult(
fakeFlatDomainClient,
Expand Down Expand Up @@ -208,7 +208,7 @@ describe('Should normalize contacts results', () => {
doctype: 'io.cozy.files',
doc: doc,
fields: []
} as unknown as RawSearchResult
} as unknown as EnrichedSearchResult

const result = normalizeSearchResult(
fakeFlatDomainClient,
Expand Down Expand Up @@ -241,7 +241,7 @@ describe('Should normalize contacts results', () => {
doctype: 'io.cozy.files',
doc: doc,
fields: ['displayName', 'email[]:address']
} as unknown as RawSearchResult
} as unknown as EnrichedSearchResult

const result = normalizeSearchResult(
fakeFlatDomainClient,
Expand Down Expand Up @@ -277,7 +277,7 @@ describe('Should normalize apps results', () => {
doctype: 'io.cozy.files',
doc: doc,
fields: ['displayName', 'email[]:address']
} as unknown as RawSearchResult
} as unknown as EnrichedSearchResult

const result = normalizeSearchResult(
fakeFlatDomainClient,
Expand Down Expand Up @@ -306,7 +306,7 @@ describe('Should normalize apps results', () => {
doctype: 'io.cozy.files',
doc: doc,
fields: ['displayName', 'email[]:address']
} as unknown as RawSearchResult
} as unknown as EnrichedSearchResult

const result = normalizeSearchResult(
fakeFlatDomainClient,
Expand Down Expand Up @@ -337,7 +337,7 @@ describe('Should normalize unknown doctypes', () => {
doctype: 'io.cozy.files',
doc: doc,
fields: ['displayName', 'email[]:address']
} as unknown as RawSearchResult
} as unknown as EnrichedSearchResult

const result = normalizeSearchResult(
fakeFlatDomainClient,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { IOCozyContact } from 'cozy-client/types/types'
import { APPS_DOCTYPE, TYPE_DIRECTORY } from '../consts'
import {
CozyDoc,
RawSearchResult,
EnrichedSearchResult,
isIOCozyApp,
isIOCozyContact,
isIOCozyFile,
Expand All @@ -13,7 +13,7 @@ import {

export const normalizeSearchResult = (
client: CozyClient,
searchResults: RawSearchResult,
searchResults: EnrichedSearchResult,
query: string
): SearchResult => {
const doc = cleanFilePath(searchResults.doc)
Expand Down
23 changes: 23 additions & 0 deletions packages/cozy-dataproxy-lib/src/search/queries/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ interface QueryResponseSingleDoc {
data: CozyDoc
}

interface QueryResponseMultipleDoc {
data: CozyDoc[]
}

export const queryFilesForSearch = async (
client: CozyClient
): Promise<CozyDoc[]> => {
Expand Down Expand Up @@ -58,3 +62,22 @@ export const queryDocById = async (
})) as QueryResponseSingleDoc
return resp.data
}

export const queryDocsByIds = async (
client: CozyClient,
doctype: string,
ids: string[],
{ fromStore = true } = {}
): Promise<CozyDoc[]> => {
if (fromStore) {
// This is much more efficient to query from store than PouchDB
const allDocs = client.getCollectionFromState(doctype)
const docs = allDocs.filter(doc => doc._id && ids.includes(doc._id))
return docs as CozyDoc[]
}

const resp = (await client.query(
Q(doctype).getByIds(ids)
)) as QueryResponseMultipleDoc
return resp.data
}
8 changes: 6 additions & 2 deletions packages/cozy-dataproxy-lib/src/search/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,14 @@ export const isSearchedDoctype = (
return searchedDoctypes.includes(doctype)
}

export interface RawSearchResult
extends FlexSearch.EnrichedDocumentSearchResultSetUnitResultUnit<CozyDoc> {
export interface RawSearchResult {
fields: string[]
doctype: SearchedDoctype
id: string
}

export interface EnrichedSearchResult extends RawSearchResult {
doc: CozyDoc
}

export interface SearchResult {
Expand Down
Loading