From d623bcecae5076dff869f629ac52cbf6231b7ce5 Mon Sep 17 00:00:00 2001 From: Martin Lingstuyl Date: Sun, 11 Feb 2024 22:54:57 +0100 Subject: [PATCH] Fixes paging in SharePoint search using allResults. Closes #5710 --- docs/docs/cmd/spo/spo-search.mdx | 8 ++- src/m365/spo/commands/spo-search.spec.ts | 60 ++++++++++++++-- src/m365/spo/commands/spo-search.ts | 87 +++++++++++++++++------- 3 files changed, 125 insertions(+), 30 deletions(-) diff --git a/docs/docs/cmd/spo/spo-search.mdx b/docs/docs/cmd/spo/spo-search.mdx index 95db55009cb..2768a795038 100644 --- a/docs/docs/cmd/spo/spo-search.mdx +++ b/docs/docs/cmd/spo/spo-search.mdx @@ -25,7 +25,7 @@ m365 spo search [options] : The web against which we want to execute the query. If the parameter is not defined, the query is executed against the web that's used when logging in to the SPO environment. `--allResults` -: Set, to get all results of the search query, instead of the number specified by the `rowlimit` (default: 10) +: Set, to get all results of the search query in batches of 500. `--rowLimit [rowLimit]` : The number of rows to be returned. When the `allResults` option is used, the specified value will define the size of retrieved batches @@ -55,7 +55,7 @@ m365 spo search [options] : The ID of the ranking model to use for the query. `--startRow [startRow]` -: The first row that is included in the search results that are returned. You use this parameter when you want to implement paging for search results. +: The first row that is included in the search results that are returned. You use this parameter when you want to implement manual paging for search results. `--properties [properties]` : Additional properties for the query. @@ -90,6 +90,10 @@ m365 spo search [options] +## Remarks + +When using the `--allResults` option, you cannot use the `--startRow` option. Only use `--startRow` for manual paging purposes. + ## Examples Execute search query to retrieve all Document Sets (ContentTypeId = _0x0120D520_) for the English locale diff --git a/src/m365/spo/commands/spo-search.spec.ts b/src/m365/spo/commands/spo-search.spec.ts index 1d24fda9913..d8c03ebedb2 100644 --- a/src/m365/spo/commands/spo-search.spec.ts +++ b/src/m365/spo/commands/spo-search.spec.ts @@ -23,6 +23,10 @@ enum TestID { QueryDocuments_WithStartRow1Test, QueryDocuments_NoStartRowTest, QueryDocuments_NoParameterTest, + QueryDocuments_WithDocId0Test, + QueryDocuments_WithDocId1Test, + QueryDocuments_WithDocId2Test, + QueryDocuments_WithDocIdAllTest, QueryAll_WithRowLimitTest, QueryAll_WithSourceIdTest, QueryAll_WithTrimDuplicatesTest, @@ -154,7 +158,7 @@ describe(commands.SEARCH, () => { "ResultTitleUrl": null, "RowCount": rows.length, "Table": { - "Rows": fakeRows + "Rows": rows }, "TotalRows": returnArrayLength, "TotalRowsIncludingDuplicates": returnArrayLength @@ -195,6 +199,27 @@ describe(commands.SEARCH, () => { executedTest = TestID.QueryDocuments_NoParameterTest; return getQueryResult(rows); } + if (urlContains(opts, `QUERYTEXT=\'ISDOCUMENT:1 INDEXDOCID>0\'`)) { + const rows = filterRows(fakeRows, 'ISDOCUMENT', 'TRUE'); + + if (urlContains(opts, 'ROWLIMIT=500')) { + executedTest = TestID.QueryDocuments_WithDocIdAllTest; + return getQueryResult(rows, 4); + } + else { + executedTest = TestID.QueryDocuments_WithDocId0Test; + return getQueryResult([rows[0]], 2); + } + } + if (urlContains(opts, `QUERYTEXT=\'ISDOCUMENT:1 INDEXDOCID>1\'`)) { + const rows = filterRows(fakeRows, 'ISDOCUMENT', 'TRUE'); + executedTest = TestID.QueryDocuments_WithDocId1Test; + return getQueryResult([rows[1]], 1); + } + if (urlContains(opts, `QUERYTEXT=\'ISDOCUMENT:1 INDEXDOCID>2\'`)) { + executedTest = TestID.QueryDocuments_WithDocId2Test; + return getQueryResult([], 0); + } if (urlContains(opts, 'QUERYTEXT=\'*\'')) { let rows = fakeRows; if (urlContains(opts, 'ROWLIMIT=1')) { @@ -381,8 +406,7 @@ describe(commands.SEARCH, () => { rowLimit: 1 } }); - assert.strictEqual(returnArrayLength, 2); - assert.strictEqual(executedTest, TestID.QueryDocuments_WithStartRow1Test); + assert.strictEqual(executedTest, TestID.QueryDocuments_WithDocId2Test); }); it('executes search request with trimDuplicates', async () => { @@ -463,11 +487,26 @@ describe(commands.SEARCH, () => { output: 'json', queryText: 'IsDocument:1', allResults: true, + verbose: true, rowLimit: 1 } }); - assert.strictEqual(returnArrayLength, 2); - assert.strictEqual(executedTest, TestID.QueryDocuments_WithStartRow1Test); + assert.strictEqual(executedTest, TestID.QueryDocuments_WithDocId2Test); + }); + + it('executes search request with \'allResults\' and no rowlimit', async () => { + sinon.stub(request, 'get').callsFake(getFakes); + + await command.action(logger, { + options: { + output: 'json', + queryText: 'IsDocument:1', + allResults: true, + verbose: true + } + }); + assert.strictEqual(returnArrayLength, 4); + assert.strictEqual(executedTest, TestID.QueryDocuments_WithDocIdAllTest); }); it('executes search request with selectProperties', async () => { @@ -827,6 +866,17 @@ describe(commands.SEARCH, () => { assert.notStrictEqual(actual, true); }); + it('fails validation if startRow is set together with allResults', async () => { + const actual = await command.validate({ + options: { + startRow: 1, + allResults: true, + queryText: '*' + } + }, commandInfo); + assert.notStrictEqual(actual, true); + }); + it('fails validation if the culture is not a valid number', async () => { const actual = await command.validate({ options: { diff --git a/src/m365/spo/commands/spo-search.ts b/src/m365/spo/commands/spo-search.ts index 5b90ebfc241..698f4896bbf 100644 --- a/src/m365/spo/commands/spo-search.ts +++ b/src/m365/spo/commands/spo-search.ts @@ -180,6 +180,7 @@ class SpoSearchCommand extends SpoCommand { if (args.options.sortList && !/^([a-z0-9_]+:(ascending|descending))(,([a-z0-9_]+:(ascending|descending)))*$/gi.test(args.options.sortList)) { return `sortlist parameter value '${args.options.sortList}' does not match the required pattern (=comma-separated list of ':(ascending|descending)'-pattern)`; } + if (args.options.rowLimit && !isNumber(args.options.rowLimit)) { return `${args.options.rowLimit} is not a valid number`; } @@ -188,6 +189,10 @@ class SpoSearchCommand extends SpoCommand { return `${args.options.startRow} is not a valid number`; } + if (args.options.startRow && args.options.allResults) { + return 'You cannot specify startRow when allResults is set'; + } + if (args.options.culture && !isNumber(args.options.culture)) { return `${args.options.culture} is not a valid number`; } @@ -212,9 +217,7 @@ class SpoSearchCommand extends SpoCommand { await logger.logToStderr(`Executing search query '${args.options.queryText}' on site at ${webUrl}...`); } - const startRow = args.options.startRow ? args.options.startRow : 0; - - const results: SearchResult[] = await this.executeSearchQuery(logger, args, webUrl, [], startRow); + const results: SearchResult[] = await this.executeSearchQuery(logger, args, webUrl, []); this.printResults(logger, args, results); } catch (err: any) { @@ -222,8 +225,8 @@ class SpoSearchCommand extends SpoCommand { } } - private async executeSearchQuery(logger: Logger, args: CommandArgs, webUrl: string, resultSet: SearchResult[], startRow: number): Promise { - const requestUrl: string = await this.getRequestUrl(webUrl, logger, args, startRow); + private async executeSearchQuery(logger: Logger, args: CommandArgs, webUrl: string, resultSet: SearchResult[], lastDocId: string = '0'): Promise { + const requestUrl: string = await this.getRequestUrl(webUrl, logger, args, lastDocId); const requestOptions: any = { url: requestUrl, headers: { @@ -235,29 +238,43 @@ class SpoSearchCommand extends SpoCommand { const searchResult: SearchResult = await request.get(requestOptions); resultSet.push(searchResult); - if (args.options.allResults && startRow + searchResult.PrimaryQueryResult.RelevantResults.RowCount < searchResult.PrimaryQueryResult.RelevantResults.TotalRows) { - const nextStartRow = startRow + searchResult.PrimaryQueryResult.RelevantResults.RowCount; - return this.executeSearchQuery(logger, args, webUrl, resultSet, nextStartRow); + const rowLimit = args.options.rowLimit ? args.options.rowLimit : 500; + + if (args.options.allResults && searchResult.PrimaryQueryResult.RelevantResults.RowCount === rowLimit) { + if (this.verbose) { + await logger.logToStderr(`Processing search query, retrieved ${resultSet.length * searchResult.PrimaryQueryResult.RelevantResults.RowCount} of ${resultSet[0].PrimaryQueryResult.RelevantResults.TotalRows} items...`); + } + + // When running in allResults mode, paging is done using the DocId property + // This is a more stable way than using the StartRow property. + // Explanation: https://learn.microsoft.com/sharepoint/dev/general-development/pagination-for-large-result-sets + const lastRow = searchResult.PrimaryQueryResult.RelevantResults.Table.Rows[searchResult.PrimaryQueryResult.RelevantResults.RowCount - 1]; + const newLastDocId = lastRow.Cells.filter(cell => cell.Key === 'DocId')[0].Value; + + return this.executeSearchQuery(logger, args, webUrl, resultSet, newLastDocId); } return resultSet; } - private async getRequestUrl(webUrl: string, logger: Logger, args: CommandArgs, startRow: number): Promise { + private async getRequestUrl(webUrl: string, logger: Logger, args: CommandArgs, lastDocId: string): Promise { // get the list of selected properties const selectPropertiesArray: string[] = this.getSelectPropertiesArray(args); + // get the sort list + const sortList = this.getSortList(args); + // transform arg data to query string parameters - const propertySelectRequestString: string = `&selectproperties='${formatting.encodeQueryParameter(selectPropertiesArray.join(","))}'`; - const startRowRequestString: string = `&startrow=${startRow ? startRow : 0}`; - const rowLimitRequestString: string = args.options.rowLimit ? `&rowlimit=${args.options.rowLimit}` : ``; + const propertySelectRequestString: string = `&selectproperties='${formatting.encodeQueryParameter(selectPropertiesArray.join(','))}'`; + const startRowRequestString: string = `&startrow=${args.options.startRow ? args.options.startRow : 0}`; + const rowLimitRequestString: string = args.options.rowLimit ? `&rowlimit=${args.options.rowLimit}` : (args.options.allResults ? `&rowlimit=500` : ''); const sourceIdRequestString: string = args.options.sourceId ? `&sourceid='${args.options.sourceId}'` : ``; - const trimDuplicatesRequestString: string = `&trimduplicates=${args.options.trimDuplicates ? args.options.trimDuplicates : "false"}`; - const enableStemmingRequestString: string = `&enablestemming=${typeof (args.options.enableStemming) === 'undefined' ? "true" : args.options.enableStemming}`; + const trimDuplicatesRequestString: string = `&trimduplicates=${args.options.trimDuplicates ? args.options.trimDuplicates : 'false'}`; + const enableStemmingRequestString: string = `&enablestemming=${typeof (args.options.enableStemming) === 'undefined' ? 'true' : args.options.enableStemming}`; const cultureRequestString: string = args.options.culture ? `&culture=${args.options.culture}` : ``; const refinementFiltersRequestString: string = args.options.refinementFilters ? `&refinementfilters='${args.options.refinementFilters}'` : ``; const queryTemplateRequestString: string = args.options.queryTemplate ? `&querytemplate='${args.options.queryTemplate}'` : ``; - const sortListRequestString: string = args.options.sortList ? `&sortList='${formatting.encodeQueryParameter(args.options.sortList)}'` : ``; + const sortListRequestString: string = sortList ? `&sortList='${sortList}'` : ``; const rankingModelIdRequestString: string = args.options.rankingModelId ? `&rankingmodelid='${args.options.rankingModelId}'` : ``; const propertiesRequestString: string = this.getPropertiesRequestString(args); const refinersRequestString: string = args.options.refiners ? `&refiners='${args.options.refiners}'` : ``; @@ -267,9 +284,10 @@ class SpoSearchCommand extends SpoCommand { const processBestBetsRequestString: string = typeof (args.options.processBestBets) === 'undefined' ? `` : `&processbestbets=${args.options.processBestBets}`; const enableQueryRulesRequestString: string = typeof (args.options.enableQueryRules) === 'undefined' ? `` : `&enablequeryrules=${args.options.enableQueryRules}`; const processPersonalFavoritesRequestString: string = typeof (args.options.processPersonalFavorites) === 'undefined' ? `` : `&processpersonalfavorites=${args.options.processPersonalFavorites}`; + const indexDocIdQueryText = args.options.allResults ? ` IndexDocId>${lastDocId}` : ''; // construct single requestUrl - const requestUrl = `${webUrl}/_api/search/query?querytext='${args.options.queryText}'`.concat( + const requestUrl = `${webUrl}/_api/search/query?querytext='${args.options.queryText}${indexDocIdQueryText}'`.concat( propertySelectRequestString, startRowRequestString, rowLimitRequestString, @@ -302,7 +320,7 @@ class SpoSearchCommand extends SpoCommand { let properties = args.options.properties ? args.options.properties : ''; if (args.options.sourceName) { - if (properties && !properties.endsWith(",")) { + if (properties && !properties.endsWith(',')) { properties += `,`; } @@ -313,9 +331,32 @@ class SpoSearchCommand extends SpoCommand { } private getSelectPropertiesArray(args: CommandArgs): string[] { - return args.options.selectProperties - ? args.options.selectProperties.split(",") - : ["Title", "OriginalPath"]; + const selectProperties = args.options.selectProperties + ? args.options.selectProperties.split(',') + : ['Title', 'OriginalPath']; + + if (args.options.allResults) { + selectProperties.filter(p => p.toLowerCase() !== 'docid').push('DocId'); + } + + return selectProperties; + } + + private getSortList(args: CommandArgs): string { + const sortList = []; + if (args.options.allResults) { + sortList.push(formatting.encodeQueryParameter('[DocId]:ascending')); + } + + if (args.options.sortList) { + const sortListArray = args.options.sortList.split(','); + + sortListArray.forEach(sortItem => { + sortList.push(formatting.encodeQueryParameter(sortItem)); + }); + } + + return sortList.join(','); } private async printResults(logger: Logger, args: CommandArgs, results: SearchResult[]): Promise { @@ -327,9 +368,9 @@ class SpoSearchCommand extends SpoCommand { } if (!args.options.output || cli.shouldTrimOutput(args.options.output)) { - await logger.log("# Rows: " + results[results.length - 1].PrimaryQueryResult.RelevantResults.TotalRows); - await logger.log("# Rows (Including duplicates): " + results[results.length - 1].PrimaryQueryResult.RelevantResults.TotalRowsIncludingDuplicates); - await logger.log("Elapsed Time: " + this.getElapsedTime(results)); + await logger.log('# Rows: ' + results[results.length - 1].PrimaryQueryResult.RelevantResults.TotalRows); + await logger.log('# Rows (Including duplicates): ' + results[results.length - 1].PrimaryQueryResult.RelevantResults.TotalRowsIncludingDuplicates); + await logger.log('Elapsed Time: ' + this.getElapsedTime(results)); } }