diff --git a/docs/reference/es_compatible_api.md b/docs/reference/es_compatible_api.md index 760ccbbf04c..66927a5a72d 100644 --- a/docs/reference/es_compatible_api.md +++ b/docs/reference/es_compatible_api.md @@ -133,7 +133,7 @@ If a parameter appears both as a query string parameter and in the JSON payload, | `from` | `Integer` | The rank of the first hit to return. This is useful for pagination. | 0 | | `q` | `String` | The search query. | (Optional) | | `size` | `Integer` | Number of hits to return. | 10 | -| `sort` | `String` | Describes how documents should be ranked. See [Sort order](#sort-order) | `[]` | (Optional) | +| `sort` | `String` | Describes how documents should be ranked. See [Sort order](#sort-order) | (Optional) | | `scroll` | `Duration` | Creates a scroll context for "time to live". See [Scroll](#_scroll--scroll-api). | (Optional) | #### Supported Request Body parameters @@ -145,7 +145,8 @@ If a parameter appears both as a query string parameter and in the JSON payload, | `query` | `Json object` | Describe the search query. See [Query DSL](#query-dsl) | (Optional) | | `size` | `Integer` | Number of hits to return. | 10 | | `sort` | `JsonObject[]` | Describes how documents should be ranked. See [Sort order](#sort-order) | `[]` | -| `aggs` | `Json object` | Aggregation definition. See [Aggregations](aggregation.md). | `{}` | ` | +| `search_after` | `Any[]` | Ignore documents with a SortingValue preceding or equal to the parameter | (Optional) | +| `aggs` | `Json object` | Aggregation definition. See [Aggregations](aggregation.md). | `{}` | #### Sort order @@ -168,7 +169,7 @@ following syntax. { // ... "sort" : [ - { "timestamp" : {"order" : "asc"}}, + { "timestamp" : {"format": "epoch_millis_as_int","order" : "asc"}}, { "serial_number" : "desc" } ] // ... @@ -185,6 +186,44 @@ It is also possible to not supply an order and rely on the default order using t } ``` +If no format is provided for timestamps, timestamps are returned with nanosecond precision. Beware +this means the resulting json may contain high numbers for which there is loss of precision when +using languages where all numbers are floats, such as JavaScript. + +#### Search after + +When sorting results, the answer looks like the following + +```json +{ + // ... + "hits": { + // ... + "hits": [ + // ... + { + // ... + "sort": [ + 1701962929199000000 + ] + } + ] + } +} +``` + +You can pass the `sort` value of the last hit in a subsequent request where other fields are kept unchanged: +```json +{ + // keep all fields from the original request + "seach_after": [ + 1701962929199000000 + ] +} +``` + +This allows you to paginate your results. + ### `_msearch`   Multi search API ``` @@ -219,7 +258,7 @@ GET api/v1/_elastic/_search/scroll | Variable | Type | Description | Default value | | ----------- | ------------------------------------------- | ----------- | ------------- | -| `scroll_id` | Scroll id (obtained from a search response) | Required | +| `scroll_id` | Scroll id (obtained from a search response) | Required | | The `_search/scroll` endpoint, in combination with the `_search` API makes it possible to request successive pages of search results. @@ -632,4 +671,4 @@ GET api/v1/_elastic/stackoverflow*/_search } } } -``` \ No newline at end of file +``` diff --git a/quickwit/quickwit-search/src/leaf_cache.rs b/quickwit/quickwit-search/src/leaf_cache.rs index f63d7bc6aeb..5a2c6ec4b5e 100644 --- a/quickwit/quickwit-search/src/leaf_cache.rs +++ b/quickwit/quickwit-search/src/leaf_cache.rs @@ -38,6 +38,12 @@ pub struct LeafSearchCache { // match, the merged_time_range is strictly smaller, and every hit we had fits in the new // timebound, we can reply from cache, saying we hit only result.partial_hits.len() res. It always // undercount, and necessarily returns the right hits. +// TODO if we stored a result for X hits, but a subsequent request asks for Y < X hits, we can +// modify the answer and serve from cache. +// TODO mix of 1 and 3. +// TODO this means given a request for X documents, we could search for k*X docs in each split, +// truncate to X while merging, and get free results from cache for at least the next k subsequent +// queries which vary only by search_after. impl LeafSearchCache { pub fn new(capacity: usize) -> LeafSearchCache { diff --git a/quickwit/quickwit-search/src/root.rs b/quickwit/quickwit-search/src/root.rs index 72f0f1e34c6..d4e23cc2396 100644 --- a/quickwit/quickwit-search/src/root.rs +++ b/quickwit/quickwit-search/src/root.rs @@ -353,7 +353,7 @@ fn validate_sort_by_field( has_timestamp_format: bool, schema: &Schema, ) -> crate::Result<()> { - if field_name == "_score" { + if ["_score", "_shard_doc", "_doc"].contains(&field_name) { return Ok(()); } let dynamic_field_opt = schema.get_field(DYNAMIC_FIELD_NAME).ok(); @@ -1728,6 +1728,27 @@ mod tests { .unwrap(); } + #[test] + fn test_validate_sort_by_docid() { + let sort_fields = vec![ + SortField { + field_name: "_doc".to_string(), + sort_order: 0, + sort_datetime_format: None, + }, + SortField { + field_name: "_shard_doc".to_string(), + sort_order: 0, + sort_datetime_format: None, + }, + ]; + let mut schema_builder = Schema::builder(); + schema_builder.add_date_field("timestamp", FAST); + schema_builder.add_u64_field("id", FAST); + let schema = schema_builder.build(); + validate_sort_by_fields_and_search_after(&sort_fields, &None, &schema).unwrap(); + } + #[test] fn test_validate_sort_by_fields_and_search_after_invalid_1() { // 2 sort fields + search after with only one sort value is invalid. diff --git a/quickwit/rest-api-tests/scenarii/es_compatibility/0008-sort_by.yaml b/quickwit/rest-api-tests/scenarii/es_compatibility/0008-sort_by.yaml index 25b81b5e7c0..a140beb8f6a 100644 --- a/quickwit/rest-api-tests/scenarii/es_compatibility/0008-sort_by.yaml +++ b/quickwit/rest-api-tests/scenarii/es_compatibility/0008-sort_by.yaml @@ -63,3 +63,19 @@ expected: - _source: actor: id: 5688 +--- +json: + size: 1 + query: + match_all: {} + sort: + _doc: {} +expected: + hits: + total: + value: 100 + relation: eq + hits: + - _source: + actor: + id: 1762355