Skip to content

Commit

Permalink
feat: Improve indexing time
Browse files Browse the repository at this point in the history
We noticed a huge performance drop at indexing time: for 44K files, it
was taking 20000+ ms to index it.
The issue was in the paths computation, that was hugely sub-optimal for
no reason.

Now, the indexing takes ~400ms for 44K files.
  • Loading branch information
paultranvan committed Nov 20, 2024
1 parent f0d4b50 commit 99e2a1b
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 71 deletions.
13 changes: 10 additions & 3 deletions packages/cozy-dataproxy-lib/src/search/SearchEngine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ export class SearchEngine {
if (!this.client) {
return
}
let startReplicationTime = 0,
endReplicationTime = 0
if (!this.isLocalSearch) {
// In case of non-local search, force the indexing for all doctypes
// For local search, this will be done automatically after initial replication
Expand All @@ -88,9 +90,16 @@ export class SearchEngine {
})
this.client.on('pouchlink:sync:start', () => {
log.debug('Started pouch replication')
startReplicationTime = performance.now()
})
this.client.on('pouchlink:sync:end', () => {
log.debug('Ended pouch replication')
endReplicationTime = performance.now()
log.debug(
`Replication took ${(
endReplicationTime - startReplicationTime
).toFixed(2)}`
)
})
}

Expand Down Expand Up @@ -182,9 +191,7 @@ export class SearchEngine {
})

// There is no persisted path for files: we must add it
const completedDocs = this.isLocalSearch
? addFilePaths(this.client, docs)
: docs
const completedDocs = this.isLocalSearch ? addFilePaths(docs) : docs
for (const doc of completedDocs) {
void this.addDocToIndex(flexsearchIndex, doc)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,52 +103,42 @@ describe('normalizeFileWithFolders', () => {
})

describe('addFilePaths', () => {
test(`should add parent folder's path to files`, () => {
const client = {
getCollectionFromState: jest.fn().mockReturnValue([
{
type: 'directory',
_type: 'io.cozy.files',
_id: 'SOME_DIR_ID',
path: 'SOME/PARENT/PATH'
}
])
} as unknown as CozyClient

it(`should add parent folder's path to files`, () => {
const docs = [
{
_id: 'SOME_FILE_ID',
_type: 'io.cozy.files',
type: 'file',
dir_id: 'SOME_DIR_ID'
dir_id: 'SOME_DIR_ID',
name: 'myfile.txt'
},
{
_id: 'SOME_DIR_ID',
_type: 'io.cozy.files',
type: 'directory',
path: '/mydir',
dir_id: 'ROOT_ID'
}
] as CozyDoc[]

const result = addFilePaths(client, docs)
const result = addFilePaths(docs)

expect(result).toStrictEqual([
{
_id: 'SOME_FILE_ID',
_type: 'io.cozy.files',
type: 'file',
dir_id: 'SOME_DIR_ID',
path: 'SOME/PARENT/PATH'
}
])
expect(result[0]).toStrictEqual({
_id: 'SOME_FILE_ID',
_type: 'io.cozy.files',
type: 'file',
dir_id: 'SOME_DIR_ID',
name: 'myfile.txt',
path: '/mydir/myfile.txt'
})
})

test(`should handle no files in results`, () => {
const client = {} as unknown as CozyClient
const result = addFilePaths(client, [])

it(`should handle no files in results`, () => {
const result = addFilePaths([])
expect(result).toStrictEqual([])
})

test(`should handle when no parent dir is found`, () => {
const client = {
getCollectionFromState: jest.fn().mockReturnValue([])
} as unknown as CozyClient

it(`should handle when no parent dir is found`, () => {
const docs = [
{
_id: 'SOME_FILE_ID',
Expand All @@ -158,14 +148,15 @@ describe('addFilePaths', () => {
}
] as CozyDoc[]

const result = addFilePaths(client, docs)
const result = addFilePaths(docs)

expect(result).toStrictEqual([
{
_id: 'SOME_FILE_ID',
_type: 'io.cozy.files',
type: 'file',
dir_id: 'SOME_DIR_ID'
dir_id: 'SOME_DIR_ID',
path: ''
}
])
})
Expand Down
56 changes: 22 additions & 34 deletions packages/cozy-dataproxy-lib/src/search/helpers/normalizeFile.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,44 +37,33 @@ export const normalizeFileWithFolders = (
return { ...file, _type: 'io.cozy.files', path }
}

export const addFilePaths = (
client: CozyClient,
docs: CozyDoc[]
): CozyDoc[] => {
export const addFilePaths = (docs: CozyDoc[]): CozyDoc[] => {
const completedDocs = [...docs]
const filesOrDirs = completedDocs.filter(doc => isIOCozyFile(doc))
const files = filesOrDirs.filter(file => file.type === TYPE_FILE)
const filesAndDirs = completedDocs.filter(doc => isIOCozyFile(doc))

if (files.length > 0) {
const dirIds = files.map(file => file.dir_id)
const parentDirs = getDirsFromStore(client, dirIds)
if (parentDirs.length < 1) {
return completedDocs
}
for (const file of files) {
const dir = parentDirs.find(dir => dir._id === file.dir_id)
if (dir) {
const idx = completedDocs.findIndex(doc => doc._id === file._id)
// @ts-expect-error We know that we are manipulating an IOCozyFile here so path exists
completedDocs[idx].path = dir.path
if (filesAndDirs.length > 0) {
const directoryPaths = new Map<string, string>()

filesAndDirs.forEach(file => {
if (file.type === TYPE_DIRECTORY) {
// Get all directory paths
directoryPaths.set(file._id, file.path || '')
}
}
}
return completedDocs
}
})

const getDirsFromStore = (
client: CozyClient,
dirIds: string[]
): IOCozyFile[] => {
// XXX querying from store is surprisingly slow: 100+ ms for 50 docs, while
// this approach takes 2-3ms... It should be investigated in cozy-client
const allFiles = client.getCollectionFromState(FILES_DOCTYPE) as IOCozyFile[]
if (allFiles) {
const dirs = allFiles.filter(file => file.type === TYPE_DIRECTORY)
return dirs.filter(dir => dirIds.includes(dir._id))
return filesAndDirs.map(file => {
if (file.type === TYPE_FILE) {
const parentPath = directoryPaths.get(file.dir_id) || ''
// Add path to all files based on their parent path
return {
...file,
path: parentPath ? `${parentPath}/${file.name}` : ''
}
}
return file
})
}
return []
return completedDocs
}

export const shouldKeepFile = (file: IOCozyFile): boolean => {
Expand Down Expand Up @@ -105,7 +94,6 @@ export const computeFileFullpath = async (
return { ...file, path: newPath }
}
// If there is no path at all, let's compute it from the parent path

const fileWithPath = { ...file }
const parentDir = (await queryDocById(
client,
Expand Down

0 comments on commit 99e2a1b

Please sign in to comment.