diff --git a/docs/changelog/111932.yaml b/docs/changelog/111932.yaml new file mode 100644 index 0000000000000..ce840ecebcff0 --- /dev/null +++ b/docs/changelog/111932.yaml @@ -0,0 +1,6 @@ +pr: 111932 +summary: Fix union-types where one index is missing the field +area: ES|QL +type: bug +issues: + - 111912 diff --git a/docs/changelog/112151.yaml b/docs/changelog/112151.yaml new file mode 100644 index 0000000000000..f5cbfd8da07c2 --- /dev/null +++ b/docs/changelog/112151.yaml @@ -0,0 +1,5 @@ +pr: 112151 +summary: Store original source for keywords using a normalizer +area: Logs +type: enhancement +issues: [] diff --git a/docs/changelog/112270.yaml b/docs/changelog/112270.yaml new file mode 100644 index 0000000000000..1e6b9c7fc9290 --- /dev/null +++ b/docs/changelog/112270.yaml @@ -0,0 +1,5 @@ +pr: 112270 +summary: Support sparse embedding models in the elasticsearch inference service +area: Machine Learning +type: enhancement +issues: [] diff --git a/docs/changelog/112273.yaml b/docs/changelog/112273.yaml new file mode 100644 index 0000000000000..3182a1884a145 --- /dev/null +++ b/docs/changelog/112273.yaml @@ -0,0 +1,5 @@ +pr: 111181 +summary: "[Inference API] Add Docs for AlibabaCloud AI Search Support for the Inference API" +area: Machine Learning +type: enhancement +issues: [ ] diff --git a/docs/changelog/112341.yaml b/docs/changelog/112341.yaml new file mode 100644 index 0000000000000..8f44b53ad9998 --- /dev/null +++ b/docs/changelog/112341.yaml @@ -0,0 +1,5 @@ +pr: 112341 +summary: Fix DLS using runtime fields and synthetic source +area: Authorization +type: bug +issues: [] diff --git a/docs/reference/inference/inference-apis.asciidoc b/docs/reference/inference/inference-apis.asciidoc index 33db148755d8e..8fdf8aecc2ae5 100644 --- a/docs/reference/inference/inference-apis.asciidoc +++ b/docs/reference/inference/inference-apis.asciidoc @@ -39,6 +39,7 @@ include::delete-inference.asciidoc[] include::get-inference.asciidoc[] include::post-inference.asciidoc[] include::put-inference.asciidoc[] +include::service-alibabacloud-ai-search.asciidoc[] include::service-amazon-bedrock.asciidoc[] include::service-anthropic.asciidoc[] include::service-azure-ai-studio.asciidoc[] diff --git a/docs/reference/inference/put-inference.asciidoc b/docs/reference/inference/put-inference.asciidoc index 57485e0720cca..ba26a563541fc 100644 --- a/docs/reference/inference/put-inference.asciidoc +++ b/docs/reference/inference/put-inference.asciidoc @@ -39,6 +39,7 @@ The create {infer} API enables you to create an {infer} endpoint and configure a The following services are available through the {infer} API, click the links to review the configuration details of the services: +* <> * <> * <> * <> diff --git a/docs/reference/inference/service-alibabacloud-ai-search.asciidoc b/docs/reference/inference/service-alibabacloud-ai-search.asciidoc new file mode 100644 index 0000000000000..23a3d532635ac --- /dev/null +++ b/docs/reference/inference/service-alibabacloud-ai-search.asciidoc @@ -0,0 +1,184 @@ +[[infer-service-alibabacloud-ai-search]] +=== AlibabaCloud AI Search {infer} service + +Creates an {infer} endpoint to perform an {infer} task with the `alibabacloud-ai-search` service. + +[discrete] +[[infer-service-alibabacloud-ai-search-api-request]] +==== {api-request-title} + +`PUT /_inference//` + +[discrete] +[[infer-service-alibabacloud-ai-search-api-path-params]] +==== {api-path-parms-title} + +``:: +(Required, string) +include::inference-shared.asciidoc[tag=inference-id] + +``:: +(Required, string) +include::inference-shared.asciidoc[tag=task-type] ++ +-- +Available task types: + +* `text_embedding`, +* `sparse_embedding`, +* `rerank`. +-- + +[discrete] +[[infer-service-alibabacloud-ai-search-api-request-body]] +==== {api-request-body-title} + +`service`:: +(Required, string) The type of service supported for the specified task type. +In this case, +`alibabacloud-ai-search`. + +`service_settings`:: +(Required, object) +include::inference-shared.asciidoc[tag=service-settings] ++ +-- +These settings are specific to the `alibabacloud-ai-search` service. +-- + +`api_key`::: +(Required, string) +A valid API key for the AlibabaCloud AI Search API. + +`service_id`::: +(Required, string) +The name of the model service to use for the {infer} task. ++ +-- +Available service_ids for the `text_embedding` task: + +* `ops-text-embedding-001` +* `ops-text-embedding-zh-001` +* `ops-text-embedding-en-001` +* `ops-text-embedding-002` + +For the supported `text_embedding` service_ids, refer to the https://help.aliyun.com/zh/open-search/search-platform/developer-reference/text-embedding-api-details[documentation]. + +Available service_id for the `sparse_embedding` task: + +* `ops-text-sparse-embedding-001` + +For the supported `sparse_embedding` service_id, refer to the https://help.aliyun.com/zh/open-search/search-platform/developer-reference/text-sparse-embedding-api-details[documentation]. + +Available service_id for the `rerank` task is: + +* `ops-bge-reranker-larger` + +For the supported `rerank` service_id, refer to the https://help.aliyun.com/zh/open-search/search-platform/developer-reference/ranker-api-details[documentation]. +-- + +`host`::: +(Required, string) +The name of the host address used for the {infer} task. You can find the host address at https://opensearch.console.aliyun.com/cn-shanghai/rag/api-key[ the API keys section] of the documentation. + +`workspace`::: +(Required, string) +The name of the workspace used for the {infer} task. + +`rate_limit`::: +(Optional, object) +By default, the `alibabacloud-ai-search` service sets the number of requests allowed per minute to `1000`. +This helps to minimize the number of rate limit errors returned from AlibabaCloud AI Search. +To modify this, set the `requests_per_minute` setting of this object in your service settings: ++ +-- +include::inference-shared.asciidoc[tag=request-per-minute-example] +-- + + +`task_settings`:: +(Optional, object) +include::inference-shared.asciidoc[tag=task-settings] ++ +.`task_settings` for the `text_embedding` task type +[%collapsible%closed] +===== +`input_type`::: +(Optional, string) +Specifies the type of input passed to the model. +Valid values are: +* `ingest`: for storing document embeddings in a vector database. +* `search`: for storing embeddings of search queries run against a vector database to find relevant documents. +===== ++ +.`task_settings` for the `sparse_embedding` task type +[%collapsible%closed] +===== +`input_type`::: +(Optional, string) +Specifies the type of input passed to the model. +Valid values are: +* `ingest`: for storing document embeddings in a vector database. +* `search`: for storing embeddings of search queries run against a vector database to find relevant documents. + +`return_token`::: +(Optional, boolean) +If `true`, the token name will be returned in the response. Defaults to `false` which means only the token ID will be returned in the response. +===== + +[discrete] +[[inference-example-alibabacloud-ai-search]] +==== AlibabaCloud AI Search service examples + +The following example shows how to create an {infer} endpoint called `alibabacloud_ai_search_embeddings` to perform a `text_embedding` task type. + +[source,console] +------------------------------------------------------------ +PUT _inference/text_embedding/alibabacloud_ai_search_embeddings +{ + "service": "alibabacloud-ai-search", + "service_settings": { + "api_key": "", + "service_id": "ops-text-embedding-001", + "host": "default-j01.platform-cn-shanghai.opensearch.aliyuncs.com", + "workspace": "default" + } +} +------------------------------------------------------------ +// TEST[skip:TBD] + +The following example shows how to create an {infer} endpoint called +`alibabacloud_ai_search_sparse` to perform a `sparse_embedding` task type. + +[source,console] +------------------------------------------------------------ +PUT _inference/sparse_embedding/alibabacloud_ai_search_sparse +{ + "service": "alibabacloud-ai-search", + "service_settings": { + "api_key": "", + "service_id": "ops-text-sparse-embedding-001", + "host": "default-j01.platform-cn-shanghai.opensearch.aliyuncs.com", + "workspace": "default" + } +} +------------------------------------------------------------ +// TEST[skip:TBD] + +The next example shows how to create an {infer} endpoint called +`alibabacloud_ai_search_rerank` to perform a `rerank` task type. + +[source,console] +------------------------------------------------------------ +PUT _inference/rerank/alibabacloud_ai_search_rerank +{ + "service": "alibabacloud-ai-search", + "service_settings": { + "api_key": "", + "service_id": "ops-bge-reranker-larger", + "host": "default-j01.platform-cn-shanghai.opensearch.aliyuncs.com", + "workspace": "default" + } +} +------------------------------------------------------------ +// TEST[skip:TBD] diff --git a/docs/reference/inference/service-amazon-bedrock.asciidoc b/docs/reference/inference/service-amazon-bedrock.asciidoc index 4ffa368613a0e..dbffd5c26fbcc 100644 --- a/docs/reference/inference/service-amazon-bedrock.asciidoc +++ b/docs/reference/inference/service-amazon-bedrock.asciidoc @@ -122,14 +122,6 @@ Only available for `anthropic`, `cohere`, and `mistral` providers. Alternative to `temperature`. Limits samples to the top-K most likely words, balancing coherence and variability. Should not be used if `temperature` is specified. -===== -+ -.`task_settings` for the `text_embedding` task type -[%collapsible%closed] -===== - -There are no `task_settings` available for the `text_embedding` task type. - ===== [discrete] diff --git a/docs/reference/inference/service-elasticsearch.asciidoc b/docs/reference/inference/service-elasticsearch.asciidoc index 99fd41ee2db65..572cad591fba6 100644 --- a/docs/reference/inference/service-elasticsearch.asciidoc +++ b/docs/reference/inference/service-elasticsearch.asciidoc @@ -31,6 +31,7 @@ include::inference-shared.asciidoc[tag=task-type] Available task types: * `rerank`, +* `sparse_embedding`, * `text_embedding`. -- @@ -182,4 +183,4 @@ PUT _inference/text_embedding/my-e5-model } } ------------------------------------------------------------ -// TEST[skip:TBD] \ No newline at end of file +// TEST[skip:TBD] diff --git a/docs/reference/intro.asciidoc b/docs/reference/intro.asciidoc index 3fc23b44994a7..cd9c126e7b1fd 100644 --- a/docs/reference/intro.asciidoc +++ b/docs/reference/intro.asciidoc @@ -1,42 +1,70 @@ [[elasticsearch-intro]] == What is {es}? -_**You know, for search (and analysis)**_ - -{es} is the distributed search and analytics engine at the heart of -the {stack}. {ls} and {beats} facilitate collecting, aggregating, and -enriching your data and storing it in {es}. {kib} enables you to -interactively explore, visualize, and share insights into your data and manage -and monitor the stack. {es} is where the indexing, search, and analysis -magic happens. - -{es} provides near real-time search and analytics for all types of data. Whether you -have structured or unstructured text, numerical data, or geospatial data, -{es} can efficiently store and index it in a way that supports fast searches. -You can go far beyond simple data retrieval and aggregate information to discover -trends and patterns in your data. And as your data and query volume grows, the -distributed nature of {es} enables your deployment to grow seamlessly right -along with it. - -While not _every_ problem is a search problem, {es} offers speed and flexibility -to handle data in a wide variety of use cases: - -* Add a search box to an app or website -* Store and analyze logs, metrics, and security event data -* Use machine learning to automatically model the behavior of your data in real - time -* Use {es} as a vector database to create, store, and search vector embeddings -* Automate business workflows using {es} as a storage engine -* Manage, integrate, and analyze spatial information using {es} as a geographic - information system (GIS) -* Store and process genetic data using {es} as a bioinformatics research tool - -We’re continually amazed by the novel ways people use search. But whether -your use case is similar to one of these, or you're using {es} to tackle a new -problem, the way you work with your data, documents, and indices in {es} is -the same. + +{es-repo}[{es}] is a distributed search and analytics engine, scalable data store, and vector database built on Apache Lucene. +It's optimized for speed and relevance on production-scale workloads. +Use {es} to search, index, store, and analyze data of all shapes and sizes in near real time. + +[TIP] +==== +{es} has a lot of features. Explore the full list on the https://www.elastic.co/elasticsearch/features[product webpage^]. +==== + +{es} is the heart of the {estc-welcome-current}/stack-components.html[Elastic Stack] and powers the Elastic https://www.elastic.co/enterprise-search[Search], https://www.elastic.co/observability[Observability] and https://www.elastic.co/security[Security] solutions. + +{es} is used for a wide and growing range of use cases. Here are a few examples: + +* *Monitor log and event data*. Store logs, metrics, and event data for observability and security information and event management (SIEM). +* *Build search applications*. Add search capabilities to apps or websites, or build enterprise search engines over your organization's internal data sources. +* *Vector database*. Store and search vectorized data, and create vector embeddings with built-in and third-party natural language processing (NLP) models. +* *Retrieval augmented generation (RAG)*. Use {es} as a retrieval engine to augment Generative AI models. +* *Application and security monitoring*. Monitor and analyze application performance and security data effectively. +* *Machine learning*. Use {ml} to automatically model the behavior of your data in real-time. + +This is just a sample of search, observability, and security use cases enabled by {es}. +Refer to our https://www.elastic.co/customers/success-stories[customer success stories] for concrete examples across a range of industries. +// Link to demos, search labs chatbots + +[discrete] +[[elasticsearch-intro-elastic-stack]] +.What is the Elastic Stack? +******************************* +{es} is the core component of the Elastic Stack, a suite of products for collecting, storing, searching, and visualizing data. +https://www.elastic.co/guide/en/starting-with-the-elasticsearch-platform-and-its-solutions/current/stack-components.html[Learn more about the Elastic Stack]. +******************************* +// TODO: Remove once we've moved Stack Overview to a subpage? + +[discrete] +[[elasticsearch-intro-deploy]] +=== Deployment options + +To use {es}, you need a running instance of the {es} service. +You can deploy {es} in various ways: + +* <>. Get started quickly with a minimal local Docker setup. +* {cloud}/ec-getting-started-trial.html[*Elastic Cloud*]. {es} is available as part of our hosted Elastic Stack offering, deployed in the cloud with your provider of choice. Sign up for a https://cloud.elastic.co/registration[14 day free trial]. +* {serverless-docs}/general/sign-up-trial[*Elastic Cloud Serverless* (technical preview)]. Create serverless projects for autoscaled and fully managed {es} deployments. Sign up for a https://cloud.elastic.co/serverless-registration[14 day free trial]. + +**Advanced deployment options** + +* <>. Install, configure, and run {es} on your own premises. +* {ece-ref}/Elastic-Cloud-Enterprise-overview.html[*Elastic Cloud Enterprise*]. Deploy Elastic Cloud on public or private clouds, virtual machines, or your own premises. +* {eck-ref}/k8s-overview.html[*Elastic Cloud on Kubernetes*]. Deploy Elastic Cloud on Kubernetes. + +[discrete] +[[elasticsearch-next-steps]] +=== Learn more + +Here are some resources to help you get started: + +* <>. A beginner's guide to deploying your first {es} instance, indexing data, and running queries. +* https://elastic.co/webinars/getting-started-elasticsearch[Webinar: Introduction to {es}]. Register for our live webinars to learn directly from {es} experts. +* https://www.elastic.co/search-labs[Elastic Search Labs]. Tutorials and blogs that explore AI-powered search using the latest {es} features. +** Follow our tutorial https://www.elastic.co/search-labs/tutorials/search-tutorial/welcome[to build a hybrid search solution in Python]. +** Check out the https://github.com/elastic/elasticsearch-labs?tab=readme-ov-file#elasticsearch-examples--apps[`elasticsearch-labs` repository] for a range of Python notebooks and apps for various use cases. [[documents-indices]] -=== Data in: documents and indices +=== Documents and indices {es} is a distributed document store. Instead of storing information as rows of columnar data, {es} stores complex data structures that have been serialized @@ -65,8 +93,7 @@ behavior makes it easy to index and explore your data--just start indexing documents and {es} will detect and map booleans, floating point and integer values, dates, and strings to the appropriate {es} data types. -Ultimately, however, you know more about your data and how you want to use it -than {es} can. You can define rules to control dynamic mapping and explicitly +You can define rules to control dynamic mapping and explicitly define mappings to take full control of how fields are stored and indexed. Defining your own mappings enables you to: @@ -89,7 +116,7 @@ used at search time. When you query a full-text field, the query text undergoes the same analysis before the terms are looked up in the index. [[search-analyze]] -=== Information out: search and analyze +=== Search and analyze While you can use {es} as a document store and retrieve documents and their metadata, the real power comes from being able to easily access the full suite @@ -160,27 +187,8 @@ size 70 needles, you’re displaying a count of the size 70 needles that match your users' search criteria--for example, all size 70 _non-stick embroidery_ needles. -[discrete] -[[more-features]] -===== But wait, there’s more - -Want to automate the analysis of your time series data? You can use -{ml-docs}/ml-ad-overview.html[machine learning] features to create accurate -baselines of normal behavior in your data and identify anomalous patterns. With -machine learning, you can detect: - -* Anomalies related to temporal deviations in values, counts, or frequencies -* Statistical rarity -* Unusual behaviors for a member of a population - -And the best part? You can do this without having to specify algorithms, models, -or other data science-related configurations. - [[scalability]] -=== Scalability and resilience: clusters, nodes, and shards -++++ -Scalability and resilience -++++ +=== Scalability and resilience {es} is built to be always available and to scale with your needs. It does this by being distributed by nature. You can add servers (nodes) to a cluster to @@ -209,7 +217,7 @@ interrupting indexing or query operations. [discrete] [[it-depends]] -==== It depends... +==== Shard size and number of shards There are a number of performance considerations and trade offs with respect to shard size and the number of primary shards configured for an index. The more @@ -237,7 +245,7 @@ testing with your own data and queries]. [discrete] [[disaster-ccr]] -==== In case of disaster +==== Disaster recovery A cluster's nodes need good, reliable connections to each other. To provide better connections, you typically co-locate the nodes in the same data center or @@ -257,7 +265,7 @@ secondary clusters are read-only followers. [discrete] [[admin]] -==== Care and feeding +==== Security, management, and monitoring As with any enterprise system, you need tools to secure, manage, and monitor your {es} clusters. Security, monitoring, and administrative features @@ -265,3 +273,5 @@ that are integrated into {es} enable you to use {kibana-ref}/introduction.html[{ as a control center for managing a cluster. Features like <> and <> help you intelligently manage your data over time. + +Refer to <> for more information. \ No newline at end of file diff --git a/docs/reference/mapping/types/semantic-text.asciidoc b/docs/reference/mapping/types/semantic-text.asciidoc index 522a0c54c8aad..a006f288dc66d 100644 --- a/docs/reference/mapping/types/semantic-text.asciidoc +++ b/docs/reference/mapping/types/semantic-text.asciidoc @@ -7,8 +7,8 @@ beta[] -The `semantic_text` field type automatically generates embeddings for text -content using an inference endpoint. +The `semantic_text` field type automatically generates embeddings for text content using an inference endpoint. +Long passages are <> to smaller sections to enable the processing of larger corpuses of text. The `semantic_text` field type specifies an inference endpoint identifier that will be used to generate embeddings. You can create the inference endpoint by using the <>. diff --git a/docs/reference/modules/discovery/fault-detection.asciidoc b/docs/reference/modules/discovery/fault-detection.asciidoc index 89c8a78eccbc6..21f4ae2317e6a 100644 --- a/docs/reference/modules/discovery/fault-detection.asciidoc +++ b/docs/reference/modules/discovery/fault-detection.asciidoc @@ -35,313 +35,30 @@ starting from the beginning of the cluster state update. Refer to [[cluster-fault-detection-troubleshooting]] ==== Troubleshooting an unstable cluster -//tag::troubleshooting[] -Normally, a node will only leave a cluster if deliberately shut down. If a node -leaves the cluster unexpectedly, it's important to address the cause. A cluster -in which nodes leave unexpectedly is unstable and can create several issues. -For instance: -* The cluster health may be yellow or red. - -* Some shards will be initializing and other shards may be failing. - -* Search, indexing, and monitoring operations may fail and report exceptions in -logs. - -* The `.security` index may be unavailable, blocking access to the cluster. - -* The master may appear busy due to frequent cluster state updates. - -To troubleshoot a cluster in this state, first ensure the cluster has a -<>. Next, focus on the nodes -unexpectedly leaving the cluster ahead of all other issues. It will not be -possible to solve other issues until the cluster has a stable master node and -stable node membership. - -Diagnostics and statistics are usually not useful in an unstable cluster. These -tools only offer a view of the state of the cluster at a single point in time. -Instead, look at the cluster logs to see the pattern of behaviour over time. -Focus particularly on logs from the elected master. When a node leaves the -cluster, logs for the elected master include a message like this (with line -breaks added to make it easier to read): - -[source,text] ----- -[2022-03-21T11:02:35,513][INFO ][o.e.c.c.NodeLeftExecutor] [instance-0000000000] - node-left: [{instance-0000000004}{bfcMDTiDRkietFb9v_di7w}{aNlyORLASam1ammv2DzYXA}{172.27.47.21}{172.27.47.21:19054}{m}] - with reason [disconnected] ----- - -This message says that the `NodeLeftExecutor` on the elected master -(`instance-0000000000`) processed a `node-left` task, identifying the node that -was removed and the reason for its removal. When the node joins the cluster -again, logs for the elected master will include a message like this (with line -breaks added to make it easier to read): - -[source,text] ----- -[2022-03-21T11:02:59,892][INFO ][o.e.c.c.NodeJoinExecutor] [instance-0000000000] - node-join: [{instance-0000000004}{bfcMDTiDRkietFb9v_di7w}{UNw_RuazQCSBskWZV8ID_w}{172.27.47.21}{172.27.47.21:19054}{m}] - with reason [joining after restart, removed [24s] ago with reason [disconnected]] ----- - -This message says that the `NodeJoinExecutor` on the elected master -(`instance-0000000000`) processed a `node-join` task, identifying the node that -was added to the cluster and the reason for the task. - -Other nodes may log similar messages, but report fewer details: - -[source,text] ----- -[2020-01-29T11:02:36,985][INFO ][o.e.c.s.ClusterApplierService] - [instance-0000000001] removed { - {instance-0000000004}{bfcMDTiDRkietFb9v_di7w}{aNlyORLASam1ammv2DzYXA}{172.27.47.21}{172.27.47.21:19054}{m} - {tiebreaker-0000000003}{UNw_RuazQCSBskWZV8ID_w}{bltyVOQ-RNu20OQfTHSLtA}{172.27.161.154}{172.27.161.154:19251}{mv} - }, term: 14, version: 1653415, reason: Publication{term=14, version=1653415} ----- - -These messages are not especially useful for troubleshooting, so focus on the -ones from the `NodeLeftExecutor` and `NodeJoinExecutor` which are only emitted -on the elected master and which contain more details. If you don't see the -messages from the `NodeLeftExecutor` and `NodeJoinExecutor`, check that: - -* You're looking at the logs for the elected master node. - -* The logs cover the correct time period. - -* Logging is enabled at `INFO` level. - -Nodes will also log a message containing `master node changed` whenever they -start or stop following the elected master. You can use these messages to -determine each node's view of the state of the master over time. - -If a node restarts, it will leave the cluster and then join the cluster again. -When it rejoins, the `NodeJoinExecutor` will log that it processed a -`node-join` task indicating that the node is `joining after restart`. If a node -is unexpectedly restarting, look at the node's logs to see why it is shutting -down. - -The <> API on the affected node will also provide some useful -information about the situation. - -If the node did not restart then you should look at the reason for its -departure more closely. Each reason has different troubleshooting steps, -described below. There are three possible reasons: - -* `disconnected`: The connection from the master node to the removed node was -closed. - -* `lagging`: The master published a cluster state update, but the removed node -did not apply it within the permitted timeout. By default, this timeout is 2 -minutes. Refer to <> for information about the -settings which control this mechanism. - -* `followers check retry count exceeded`: The master sent a number of -consecutive health checks to the removed node. These checks were rejected or -timed out. By default, each health check times out after 10 seconds and {es} -removes the node removed after three consecutively failed health checks. Refer -to <> for information about the settings which -control this mechanism. +See <>. [discrete] ===== Diagnosing `disconnected` nodes -Nodes typically leave the cluster with reason `disconnected` when they shut -down, but if they rejoin the cluster without restarting then there is some -other problem. - -{es} is designed to run on a fairly reliable network. It opens a number of TCP -connections between nodes and expects these connections to remain open -<>. If a connection is closed then {es} will -try and reconnect, so the occasional blip may fail some in-flight operations -but should otherwise have limited impact on the cluster. In contrast, -repeatedly-dropped connections will severely affect its operation. - -The connections from the elected master node to every other node in the cluster -are particularly important. The elected master never spontaneously closes its -outbound connections to other nodes. Similarly, once an inbound connection is -fully established, a node never spontaneously it unless the node is shutting -down. - -If you see a node unexpectedly leave the cluster with the `disconnected` -reason, something other than {es} likely caused the connection to close. A -common cause is a misconfigured firewall with an improper timeout or another -policy that's <>. It could also -be caused by general connectivity issues, such as packet loss due to faulty -hardware or network congestion. If you're an advanced user, configure the -following loggers to get more detailed information about network exceptions: - -[source,yaml] ----- -logger.org.elasticsearch.transport.TcpTransport: DEBUG -logger.org.elasticsearch.xpack.core.security.transport.netty4.SecurityNetty4Transport: DEBUG ----- - -If these logs do not show enough information to diagnose the problem, obtain a -packet capture simultaneously from the nodes at both ends of an unstable -connection and analyse it alongside the {es} logs from those nodes to determine -if traffic between the nodes is being disrupted by another device on the -network. +See <>. [discrete] ===== Diagnosing `lagging` nodes -{es} needs every node to process cluster state updates reasonably quickly. If a -node takes too long to process a cluster state update, it can be harmful to the -cluster. The master will remove these nodes with the `lagging` reason. Refer to -<> for information about the settings which control -this mechanism. - -Lagging is typically caused by performance issues on the removed node. However, -a node may also lag due to severe network delays. To rule out network delays, -ensure that `net.ipv4.tcp_retries2` is <>. Log messages that contain `warn threshold` may provide more -information about the root cause. - -If you're an advanced user, you can get more detailed information about what -the node was doing when it was removed by configuring the following logger: - -[source,yaml] ----- -logger.org.elasticsearch.cluster.coordination.LagDetector: DEBUG ----- - -When this logger is enabled, {es} will attempt to run the -<> API on the faulty node and report the results in -the logs on the elected master. The results are compressed, encoded, and split -into chunks to avoid truncation: - -[source,text] ----- -[DEBUG][o.e.c.c.LagDetector ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 1]: H4sIAAAAAAAA/x... -[DEBUG][o.e.c.c.LagDetector ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 2]: p7x3w1hmOQVtuV... -[DEBUG][o.e.c.c.LagDetector ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 3]: v7uTboMGDbyOy+... -[DEBUG][o.e.c.c.LagDetector ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 4]: 4tse0RnPnLeDNN... -[DEBUG][o.e.c.c.LagDetector ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] (gzip compressed, base64-encoded, and split into 4 parts on preceding log lines) ----- - -To reconstruct the output, base64-decode the data and decompress it using -`gzip`. For instance, on Unix-like systems: - -[source,sh] ----- -cat lagdetector.log | sed -e 's/.*://' | base64 --decode | gzip --decompress ----- +See <>. [discrete] ===== Diagnosing `follower check retry count exceeded` nodes -Nodes sometimes leave the cluster with reason `follower check retry count -exceeded` when they shut down, but if they rejoin the cluster without -restarting then there is some other problem. - -{es} needs every node to respond to network messages successfully and -reasonably quickly. If a node rejects requests or does not respond at all then -it can be harmful to the cluster. If enough consecutive checks fail then the -master will remove the node with reason `follower check retry count exceeded` -and will indicate in the `node-left` message how many of the consecutive -unsuccessful checks failed and how many of them timed out. Refer to -<> for information about the settings which control -this mechanism. - -Timeouts and failures may be due to network delays or performance problems on -the affected nodes. Ensure that `net.ipv4.tcp_retries2` is -<> to eliminate network delays as -a possible cause for this kind of instability. Log messages containing -`warn threshold` may give further clues about the cause of the instability. - -If the last check failed with an exception then the exception is reported, and -typically indicates the problem that needs to be addressed. If any of the -checks timed out then narrow down the problem as follows. - -include::../../troubleshooting/network-timeouts.asciidoc[tag=troubleshooting-network-timeouts-gc-vm] - -include::../../troubleshooting/network-timeouts.asciidoc[tag=troubleshooting-network-timeouts-packet-capture-fault-detection] - -include::../../troubleshooting/network-timeouts.asciidoc[tag=troubleshooting-network-timeouts-threads] - -By default the follower checks will time out after 30s, so if node departures -are unpredictable then capture stack dumps every 15s to be sure that at least -one stack dump was taken at the right time. +See <>. [discrete] ===== Diagnosing `ShardLockObtainFailedException` failures -If a node leaves and rejoins the cluster then {es} will usually shut down and -re-initialize its shards. If the shards do not shut down quickly enough then -{es} may fail to re-initialize them due to a `ShardLockObtainFailedException`. - -To gather more information about the reason for shards shutting down slowly, -configure the following logger: - -[source,yaml] ----- -logger.org.elasticsearch.env.NodeEnvironment: DEBUG ----- - -When this logger is enabled, {es} will attempt to run the -<> API whenever it encounters a -`ShardLockObtainFailedException`. The results are compressed, encoded, and -split into chunks to avoid truncation: - -[source,text] ----- -[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 1]: H4sIAAAAAAAA/x... -[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 2]: p7x3w1hmOQVtuV... -[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 3]: v7uTboMGDbyOy+... -[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 4]: 4tse0RnPnLeDNN... -[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] (gzip compressed, base64-encoded, and split into 4 parts on preceding log lines) ----- - -To reconstruct the output, base64-decode the data and decompress it using -`gzip`. For instance, on Unix-like systems: - -[source,sh] ----- -cat shardlock.log | sed -e 's/.*://' | base64 --decode | gzip --decompress ----- -//end::troubleshooting[] +See <>. [discrete] ===== Diagnosing other network disconnections -{es} is designed to run on a fairly reliable network. It opens a number of TCP -connections between nodes and expects these connections to remain open -<>. If a connection is closed then {es} will -try and reconnect, so the occasional blip may fail some in-flight operations -but should otherwise have limited impact on the cluster. In contrast, -repeatedly-dropped connections will severely affect its operation. - -{es} nodes will only actively close an outbound connection to another node if -the other node leaves the cluster. See -<> for further information about -identifying and troubleshooting this situation. If an outbound connection -closes for some other reason, nodes will log a message such as the following: - -[source,text] ----- -[INFO ][o.e.t.ClusterConnectionManager] [node-1] transport connection to [{node-2}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] closed by remote ----- - -Similarly, once an inbound connection is fully established, a node never -spontaneously closes it unless the node is shutting down. - -Therefore if you see a node report that a connection to another node closed -unexpectedly, something other than {es} likely caused the connection to close. -A common cause is a misconfigured firewall with an improper timeout or another -policy that's <>. It could also -be caused by general connectivity issues, such as packet loss due to faulty -hardware or network congestion. If you're an advanced user, configure the -following loggers to get more detailed information about network exceptions: - -[source,yaml] ----- -logger.org.elasticsearch.transport.TcpTransport: DEBUG -logger.org.elasticsearch.xpack.core.security.transport.netty4.SecurityNetty4Transport: DEBUG ----- - -If these logs do not show enough information to diagnose the problem, obtain a -packet capture simultaneously from the nodes at both ends of an unstable -connection and analyse it alongside the {es} logs from those nodes to determine -if traffic between the nodes is being disrupted by another device on the -network. +See <>. diff --git a/docs/reference/search/search-your-data/near-real-time.asciidoc b/docs/reference/search/search-your-data/near-real-time.asciidoc index 46a996c237c38..47618ecd9fd7a 100644 --- a/docs/reference/search/search-your-data/near-real-time.asciidoc +++ b/docs/reference/search/search-your-data/near-real-time.asciidoc @@ -2,7 +2,7 @@ [[near-real-time]] === Near real-time search -The overview of <> indicates that when a document is stored in {es}, it is indexed and fully searchable in _near real-time_--within 1 second. What defines near real-time search? +When a document is stored in {es}, it is indexed and fully searchable in _near real-time_--within 1 second. What defines near real-time search? Lucene, the Java libraries on which {es} is based, introduced the concept of per-segment search. A _segment_ is similar to an inverted index, but the word _index_ in Lucene means "a collection of segments plus a commit point". After a commit, a new segment is added to the commit point and the buffer is cleared. diff --git a/docs/reference/search/search-your-data/semantic-search-inference.asciidoc b/docs/reference/search/search-your-data/semantic-search-inference.asciidoc index f74bc65e31bf0..719aeb070fc7c 100644 --- a/docs/reference/search/search-your-data/semantic-search-inference.asciidoc +++ b/docs/reference/search/search-your-data/semantic-search-inference.asciidoc @@ -17,6 +17,7 @@ Azure based examples use models available through https://ai.azure.com/explore/m or https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models[Azure OpenAI]. Mistral examples use the `mistral-embed` model from https://docs.mistral.ai/getting-started/models/[the Mistral API]. Amazon Bedrock examples use the `amazon.titan-embed-text-v1` model from https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html[the Amazon Bedrock base models]. +AlibabaCloud AI Search examples use the `ops-text-embedding-zh-001` model from https://help.aliyun.com/zh/open-search/search-platform/developer-reference/text-embedding-api-details[the AlibabaCloud AI Search base models]. Click the name of the service you want to use on any of the widgets below to review the corresponding instructions. diff --git a/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc index 997dbbe8a20e6..3a686e27cf580 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc @@ -49,6 +49,12 @@ id="infer-api-ingest-amazon-bedrock"> Amazon Bedrock +
+
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline.asciidoc index 6adf3d2ebbf46..6678b60fabc40 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline.asciidoc @@ -216,3 +216,29 @@ PUT _ingest/pipeline/amazon_bedrock_embeddings and the `output_field` that will contain the {infer} results. // end::amazon-bedrock[] + +// tag::alibabacloud-ai-search[] + +[source,console] +-------------------------------------------------- +PUT _ingest/pipeline/alibabacloud_ai_search_embeddings +{ + "processors": [ + { + "inference": { + "model_id": "alibabacloud_ai_search_embeddings", <1> + "input_output": { <2> + "input_field": "content", + "output_field": "content_embedding" + } + } + } + ] +} +-------------------------------------------------- +<1> The name of the inference endpoint you created by using the +<>, it's referred to as `inference_id` in that step. +<2> Configuration object that defines the `input_field` for the {infer} process +and the `output_field` that will contain the {infer} results. + +// end::alibabacloud-ai-search[] diff --git a/docs/reference/tab-widgets/inference-api/infer-api-mapping-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-mapping-widget.asciidoc index 4e3a453a7bbea..66b790bdd57a5 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-mapping-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-mapping-widget.asciidoc @@ -49,6 +49,12 @@ id="infer-api-mapping-amazon-bedrock"> Amazon Bedrock +
+
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-mapping.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-mapping.asciidoc index abeeb87f03e75..c86538ceb9c87 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-mapping.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-mapping.asciidoc @@ -270,3 +270,35 @@ the {infer} pipeline configuration in the next step. <6> The field type which is text in this example. // end::amazon-bedrock[] + +// tag::alibabacloud-ai-search[] + +[source,console] +-------------------------------------------------- +PUT alibabacloud-ai-search-embeddings +{ + "mappings": { + "properties": { + "content_embedding": { <1> + "type": "dense_vector", <2> + "dims": 1024, <3> + "element_type": "float" + }, + "content": { <4> + "type": "text" <5> + } + } + } +} +-------------------------------------------------- +<1> The name of the field to contain the generated tokens. It must be referenced +in the {infer} pipeline configuration in the next step. +<2> The field to contain the tokens is a `dense_vector` field. +<3> The output dimensions of the model. This value may be different depending on the underlying model used. +See the https://help.aliyun.com/zh/open-search/search-platform/developer-reference/text-embedding-api-details[AlibabaCloud AI Search embedding model] documentation. +<4> The name of the field from which to create the dense vector representation. +In this example, the name of the field is `content`. It must be referenced in +the {infer} pipeline configuration in the next step. +<5> The field type which is text in this example. + +// end::alibabacloud-ai-search[] diff --git a/docs/reference/tab-widgets/inference-api/infer-api-reindex-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-reindex-widget.asciidoc index 45cb9fc51b9f1..86f52fee2063c 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-reindex-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-reindex-widget.asciidoc @@ -49,6 +49,12 @@ id="infer-api-reindex-amazon-bedrock"> Amazon Bedrock +
+
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-reindex.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-reindex.asciidoc index d961ec8bd39bd..25d4023c650c0 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-reindex.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-reindex.asciidoc @@ -200,3 +200,26 @@ number makes the update of the reindexing process quicker which enables you to follow the progress closely and detect errors early. // end::amazon-bedrock[] + +// tag::alibabacloud-ai-search[] + +[source,console] +---- +POST _reindex?wait_for_completion=false +{ + "source": { + "index": "test-data", + "size": 50 <1> + }, + "dest": { + "index": "alibabacloud-ai-search-embeddings", + "pipeline": "alibabacloud_ai_search_embeddings" + } +} +---- +// TEST[skip:TBD] +<1> The default batch size for reindexing is 1000. Reducing `size` to a smaller +number makes the update of the reindexing process quicker which enables you to +follow the progress closely and detect errors early. + +// end::alibabacloud-ai-search[] diff --git a/docs/reference/tab-widgets/inference-api/infer-api-requirements-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-requirements-widget.asciidoc index c867b39b88e3b..fb686a2d8be12 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-requirements-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-requirements-widget.asciidoc @@ -49,6 +49,12 @@ id="infer-api-requirements-amazon-bedrock"> Amazon Bedrock +
+
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-requirements.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-requirements.asciidoc index 603cd85a8f93d..c9e7ca8b80ba6 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-requirements.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-requirements.asciidoc @@ -52,3 +52,9 @@ You can apply for access to Azure OpenAI by completing the form at https://aka.m * A pair of access and secret keys used to access Amazon Bedrock // end::amazon-bedrock[] + +// tag::alibabacloud-ai-search[] +* An AlibabaCloud Account with https://console.aliyun.com[AlibabaCloud] access +* An API key generated for your account from the https://opensearch.console.aliyun.com/cn-shanghai/rag/api-key[API keys section] + +// end::alibabacloud-ai-search[] diff --git a/docs/reference/tab-widgets/inference-api/infer-api-search-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-search-widget.asciidoc index fa4a11c59a158..996148d80a4bd 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-search-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-search-widget.asciidoc @@ -49,6 +49,12 @@ id="infer-api-search-amazon-bedrock"> Amazon Bedrock +
+
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-search.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-search.asciidoc index f23ed1dfef05d..fe1f58b6bd1a9 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-search.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-search.asciidoc @@ -531,3 +531,68 @@ query from the `amazon-bedrock-embeddings` index sorted by their proximity to th // NOTCONSOLE // end::amazon-bedrock[] + +// tag::alibabacloud-ai-search[] + +[source,console] +-------------------------------------------------- +GET alibabacloud-ai-search-embeddings/_search +{ + "knn": { + "field": "content_embedding", + "query_vector_builder": { + "text_embedding": { + "model_id": "alibabacloud_ai_search_embeddings", + "model_text": "Calculate fuel cost" + } + }, + "k": 10, + "num_candidates": 100 + }, + "_source": [ + "id", + "content" + ] +} +-------------------------------------------------- +// TEST[skip:TBD] + +As a result, you receive the top 10 documents that are closest in meaning to the +query from the `alibabacloud-ai-search-embeddings` index sorted by their proximity to the query: + +[source,consol-result] +-------------------------------------------------- +"hits": [ + { + "_index": "alibabacloud-ai-search-embeddings", + "_id": "DDd5OowBHxQKHyc3TDSC", + "_score": 0.83704096, + "_source": { + "id": 862114, + "body": "How to calculate fuel cost for a road trip. By Tara Baukus Mello • Bankrate.com. Dear Driving for Dollars, My family is considering taking a long road trip to finish off the end of the summer, but I'm a little worried about gas prices and our overall fuel cost.It doesn't seem easy to calculate since we'll be traveling through many states and we are considering several routes.y family is considering taking a long road trip to finish off the end of the summer, but I'm a little worried about gas prices and our overall fuel cost. It doesn't seem easy to calculate since we'll be traveling through many states and we are considering several routes." + } + }, + { + "_index": "alibabacloud-ai-search-embeddings", + "_id": "ajd5OowBHxQKHyc3TDSC", + "_score": 0.8345704, + "_source": { + "id": 820622, + "body": "Home Heating Calculator. Typically, approximately 50% of the energy consumed in a home annually is for space heating. When deciding on a heating system, many factors will come into play: cost of fuel, installation cost, convenience and life style are all important.This calculator can help you estimate the cost of fuel for different heating appliances.hen deciding on a heating system, many factors will come into play: cost of fuel, installation cost, convenience and life style are all important. This calculator can help you estimate the cost of fuel for different heating appliances." + } + }, + { + "_index": "alibabacloud-ai-search-embeddings", + "_id": "Djd5OowBHxQKHyc3TDSC", + "_score": 0.8327426, + "_source": { + "id": 8202683, + "body": "Fuel is another important cost. This cost will depend on your boat, how far you travel, and how fast you travel. A 33-foot sailboat traveling at 7 knots should be able to travel 300 miles on 50 gallons of diesel fuel.If you are paying $4 per gallon, the trip would cost you $200.Most boats have much larger gas tanks than cars.uel is another important cost. This cost will depend on your boat, how far you travel, and how fast you travel. A 33-foot sailboat traveling at 7 knots should be able to travel 300 miles on 50 gallons of diesel fuel." + } + }, + (...) + ] +-------------------------------------------------- +// NOTCONSOLE + +// end::alibabacloud-ai-search[] diff --git a/docs/reference/tab-widgets/inference-api/infer-api-task-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-task-widget.asciidoc index f12be341d866d..1dfa6077553fe 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-task-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-task-widget.asciidoc @@ -49,6 +49,12 @@ id="infer-api-task-amazon-bedrock"> Amazon Bedrock +
+
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-task.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-task.asciidoc index b186b2c58ccc5..2b4aa1a200102 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-task.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-task.asciidoc @@ -223,3 +223,32 @@ PUT _inference/text_embedding/amazon_bedrock_embeddings <1> <6> The model ID or ARN of the model to use. // end::amazon-bedrock[] + +// tag::alibabacloud-ai-search[] + +[source,console] +------------------------------------------------------------ +PUT _inference/text_embedding/alibabacloud_ai_search_embeddings <1> +{ + "service": "alibabacloud-ai-search", + "service_settings": { + "api_key": "", <2> + "service_id": "", <3> + "host": "", <4> + "workspace": "" <5> + } +} +------------------------------------------------------------ +// TEST[skip:TBD] +<1> The task type is `text_embedding` in the path and the `inference_id` which is the unique identifier of the {infer} endpoint is `alibabacloud_ai_search_embeddings`. +<2> The API key for accessing the AlibabaCloud AI Search API. You can find your API keys in +your AlibabaCloud account under the +https://opensearch.console.aliyun.com/cn-shanghai/rag/api-key[API keys section]. You need to provide +your API key only once. The <> does not return your API +key. +<3> The AlibabaCloud AI Search embeddings model name, for example `ops-text-embedding-zh-001`. +<4> The name our your AlibabaCloud AI Search host address. +<5> The name our your AlibabaCloud AI Search workspace. + +// end::alibabacloud-ai-search[] + diff --git a/docs/reference/troubleshooting/troubleshooting-unstable-cluster.asciidoc b/docs/reference/troubleshooting/troubleshooting-unstable-cluster.asciidoc index 387ebcdcd43c0..cbb35f7731034 100644 --- a/docs/reference/troubleshooting/troubleshooting-unstable-cluster.asciidoc +++ b/docs/reference/troubleshooting/troubleshooting-unstable-cluster.asciidoc @@ -1,4 +1,316 @@ [[troubleshooting-unstable-cluster]] == Troubleshooting an unstable cluster -include::../modules/discovery/fault-detection.asciidoc[tag=troubleshooting,leveloffset=-2] \ No newline at end of file +Normally, a node will only leave a cluster if deliberately shut down. If a node +leaves the cluster unexpectedly, it's important to address the cause. A cluster +in which nodes leave unexpectedly is unstable and can create several issues. +For instance: + +* The cluster health may be yellow or red. + +* Some shards will be initializing and other shards may be failing. + +* Search, indexing, and monitoring operations may fail and report exceptions in +logs. + +* The `.security` index may be unavailable, blocking access to the cluster. + +* The master may appear busy due to frequent cluster state updates. + +To troubleshoot a cluster in this state, first ensure the cluster has a +<>. Next, focus on the nodes +unexpectedly leaving the cluster ahead of all other issues. It will not be +possible to solve other issues until the cluster has a stable master node and +stable node membership. + +Diagnostics and statistics are usually not useful in an unstable cluster. These +tools only offer a view of the state of the cluster at a single point in time. +Instead, look at the cluster logs to see the pattern of behaviour over time. +Focus particularly on logs from the elected master. When a node leaves the +cluster, logs for the elected master include a message like this (with line +breaks added to make it easier to read): + +[source,text] +---- +[2022-03-21T11:02:35,513][INFO ][o.e.c.c.NodeLeftExecutor] [instance-0000000000] + node-left: [{instance-0000000004}{bfcMDTiDRkietFb9v_di7w}{aNlyORLASam1ammv2DzYXA}{172.27.47.21}{172.27.47.21:19054}{m}] + with reason [disconnected] +---- + +This message says that the `NodeLeftExecutor` on the elected master +(`instance-0000000000`) processed a `node-left` task, identifying the node that +was removed and the reason for its removal. When the node joins the cluster +again, logs for the elected master will include a message like this (with line +breaks added to make it easier to read): + +[source,text] +---- +[2022-03-21T11:02:59,892][INFO ][o.e.c.c.NodeJoinExecutor] [instance-0000000000] + node-join: [{instance-0000000004}{bfcMDTiDRkietFb9v_di7w}{UNw_RuazQCSBskWZV8ID_w}{172.27.47.21}{172.27.47.21:19054}{m}] + with reason [joining after restart, removed [24s] ago with reason [disconnected]] +---- + +This message says that the `NodeJoinExecutor` on the elected master +(`instance-0000000000`) processed a `node-join` task, identifying the node that +was added to the cluster and the reason for the task. + +Other nodes may log similar messages, but report fewer details: + +[source,text] +---- +[2020-01-29T11:02:36,985][INFO ][o.e.c.s.ClusterApplierService] + [instance-0000000001] removed { + {instance-0000000004}{bfcMDTiDRkietFb9v_di7w}{aNlyORLASam1ammv2DzYXA}{172.27.47.21}{172.27.47.21:19054}{m} + {tiebreaker-0000000003}{UNw_RuazQCSBskWZV8ID_w}{bltyVOQ-RNu20OQfTHSLtA}{172.27.161.154}{172.27.161.154:19251}{mv} + }, term: 14, version: 1653415, reason: Publication{term=14, version=1653415} +---- + +These messages are not especially useful for troubleshooting, so focus on the +ones from the `NodeLeftExecutor` and `NodeJoinExecutor` which are only emitted +on the elected master and which contain more details. If you don't see the +messages from the `NodeLeftExecutor` and `NodeJoinExecutor`, check that: + +* You're looking at the logs for the elected master node. + +* The logs cover the correct time period. + +* Logging is enabled at `INFO` level. + +Nodes will also log a message containing `master node changed` whenever they +start or stop following the elected master. You can use these messages to +determine each node's view of the state of the master over time. + +If a node restarts, it will leave the cluster and then join the cluster again. +When it rejoins, the `NodeJoinExecutor` will log that it processed a +`node-join` task indicating that the node is `joining after restart`. If a node +is unexpectedly restarting, look at the node's logs to see why it is shutting +down. + +The <> API on the affected node will also provide some useful +information about the situation. + +If the node did not restart then you should look at the reason for its +departure more closely. Each reason has different troubleshooting steps, +described below. There are three possible reasons: + +* `disconnected`: The connection from the master node to the removed node was +closed. + +* `lagging`: The master published a cluster state update, but the removed node +did not apply it within the permitted timeout. By default, this timeout is 2 +minutes. Refer to <> for information about the +settings which control this mechanism. + +* `followers check retry count exceeded`: The master sent a number of +consecutive health checks to the removed node. These checks were rejected or +timed out. By default, each health check times out after 10 seconds and {es} +removes the node removed after three consecutively failed health checks. Refer +to <> for information about the settings which +control this mechanism. + +[discrete] +[[troubleshooting-unstable-cluster-disconnected]] +=== Diagnosing `disconnected` nodes + +Nodes typically leave the cluster with reason `disconnected` when they shut +down, but if they rejoin the cluster without restarting then there is some +other problem. + +{es} is designed to run on a fairly reliable network. It opens a number of TCP +connections between nodes and expects these connections to remain open +<>. If a connection is closed then {es} will +try and reconnect, so the occasional blip may fail some in-flight operations +but should otherwise have limited impact on the cluster. In contrast, +repeatedly-dropped connections will severely affect its operation. + +The connections from the elected master node to every other node in the cluster +are particularly important. The elected master never spontaneously closes its +outbound connections to other nodes. Similarly, once an inbound connection is +fully established, a node never spontaneously it unless the node is shutting +down. + +If you see a node unexpectedly leave the cluster with the `disconnected` +reason, something other than {es} likely caused the connection to close. A +common cause is a misconfigured firewall with an improper timeout or another +policy that's <>. It could also +be caused by general connectivity issues, such as packet loss due to faulty +hardware or network congestion. If you're an advanced user, configure the +following loggers to get more detailed information about network exceptions: + +[source,yaml] +---- +logger.org.elasticsearch.transport.TcpTransport: DEBUG +logger.org.elasticsearch.xpack.core.security.transport.netty4.SecurityNetty4Transport: DEBUG +---- + +If these logs do not show enough information to diagnose the problem, obtain a +packet capture simultaneously from the nodes at both ends of an unstable +connection and analyse it alongside the {es} logs from those nodes to determine +if traffic between the nodes is being disrupted by another device on the +network. + +[discrete] +[[troubleshooting-unstable-cluster-lagging]] +=== Diagnosing `lagging` nodes + +{es} needs every node to process cluster state updates reasonably quickly. If a +node takes too long to process a cluster state update, it can be harmful to the +cluster. The master will remove these nodes with the `lagging` reason. Refer to +<> for information about the settings which control +this mechanism. + +Lagging is typically caused by performance issues on the removed node. However, +a node may also lag due to severe network delays. To rule out network delays, +ensure that `net.ipv4.tcp_retries2` is <>. Log messages that contain `warn threshold` may provide more +information about the root cause. + +If you're an advanced user, you can get more detailed information about what +the node was doing when it was removed by configuring the following logger: + +[source,yaml] +---- +logger.org.elasticsearch.cluster.coordination.LagDetector: DEBUG +---- + +When this logger is enabled, {es} will attempt to run the +<> API on the faulty node and report the results in +the logs on the elected master. The results are compressed, encoded, and split +into chunks to avoid truncation: + +[source,text] +---- +[DEBUG][o.e.c.c.LagDetector ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 1]: H4sIAAAAAAAA/x... +[DEBUG][o.e.c.c.LagDetector ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 2]: p7x3w1hmOQVtuV... +[DEBUG][o.e.c.c.LagDetector ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 3]: v7uTboMGDbyOy+... +[DEBUG][o.e.c.c.LagDetector ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 4]: 4tse0RnPnLeDNN... +[DEBUG][o.e.c.c.LagDetector ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] (gzip compressed, base64-encoded, and split into 4 parts on preceding log lines) +---- + +To reconstruct the output, base64-decode the data and decompress it using +`gzip`. For instance, on Unix-like systems: + +[source,sh] +---- +cat lagdetector.log | sed -e 's/.*://' | base64 --decode | gzip --decompress +---- + +[discrete] +[[troubleshooting-unstable-cluster-follower-check]] +=== Diagnosing `follower check retry count exceeded` nodes + +Nodes sometimes leave the cluster with reason `follower check retry count +exceeded` when they shut down, but if they rejoin the cluster without +restarting then there is some other problem. + +{es} needs every node to respond to network messages successfully and +reasonably quickly. If a node rejects requests or does not respond at all then +it can be harmful to the cluster. If enough consecutive checks fail then the +master will remove the node with reason `follower check retry count exceeded` +and will indicate in the `node-left` message how many of the consecutive +unsuccessful checks failed and how many of them timed out. Refer to +<> for information about the settings which control +this mechanism. + +Timeouts and failures may be due to network delays or performance problems on +the affected nodes. Ensure that `net.ipv4.tcp_retries2` is +<> to eliminate network delays as +a possible cause for this kind of instability. Log messages containing +`warn threshold` may give further clues about the cause of the instability. + +If the last check failed with an exception then the exception is reported, and +typically indicates the problem that needs to be addressed. If any of the +checks timed out then narrow down the problem as follows. + +include::network-timeouts.asciidoc[tag=troubleshooting-network-timeouts-gc-vm] + +include::network-timeouts.asciidoc[tag=troubleshooting-network-timeouts-packet-capture-fault-detection] + +include::network-timeouts.asciidoc[tag=troubleshooting-network-timeouts-threads] + +By default the follower checks will time out after 30s, so if node departures +are unpredictable then capture stack dumps every 15s to be sure that at least +one stack dump was taken at the right time. + +[discrete] +[[troubleshooting-unstable-cluster-shardlockobtainfailedexception]] +=== Diagnosing `ShardLockObtainFailedException` failures + +If a node leaves and rejoins the cluster then {es} will usually shut down and +re-initialize its shards. If the shards do not shut down quickly enough then +{es} may fail to re-initialize them due to a `ShardLockObtainFailedException`. + +To gather more information about the reason for shards shutting down slowly, +configure the following logger: + +[source,yaml] +---- +logger.org.elasticsearch.env.NodeEnvironment: DEBUG +---- + +When this logger is enabled, {es} will attempt to run the +<> API whenever it encounters a +`ShardLockObtainFailedException`. The results are compressed, encoded, and +split into chunks to avoid truncation: + +[source,text] +---- +[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 1]: H4sIAAAAAAAA/x... +[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 2]: p7x3w1hmOQVtuV... +[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 3]: v7uTboMGDbyOy+... +[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 4]: 4tse0RnPnLeDNN... +[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] (gzip compressed, base64-encoded, and split into 4 parts on preceding log lines) +---- + +To reconstruct the output, base64-decode the data and decompress it using +`gzip`. For instance, on Unix-like systems: + +[source,sh] +---- +cat shardlock.log | sed -e 's/.*://' | base64 --decode | gzip --decompress +---- + +[discrete] +[[troubleshooting-unstable-cluster-network]] +=== Diagnosing other network disconnections + +{es} is designed to run on a fairly reliable network. It opens a number of TCP +connections between nodes and expects these connections to remain open +<>. If a connection is closed then {es} will +try and reconnect, so the occasional blip may fail some in-flight operations +but should otherwise have limited impact on the cluster. In contrast, +repeatedly-dropped connections will severely affect its operation. + +{es} nodes will only actively close an outbound connection to another node if +the other node leaves the cluster. See +<> for further information about +identifying and troubleshooting this situation. If an outbound connection +closes for some other reason, nodes will log a message such as the following: + +[source,text] +---- +[INFO ][o.e.t.ClusterConnectionManager] [node-1] transport connection to [{node-2}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] closed by remote +---- + +Similarly, once an inbound connection is fully established, a node never +spontaneously closes it unless the node is shutting down. + +Therefore if you see a node report that a connection to another node closed +unexpectedly, something other than {es} likely caused the connection to close. +A common cause is a misconfigured firewall with an improper timeout or another +policy that's <>. It could also +be caused by general connectivity issues, such as packet loss due to faulty +hardware or network congestion. If you're an advanced user, configure the +following loggers to get more detailed information about network exceptions: + +[source,yaml] +---- +logger.org.elasticsearch.transport.TcpTransport: DEBUG +logger.org.elasticsearch.xpack.core.security.transport.netty4.SecurityNetty4Transport: DEBUG +---- + +If these logs do not show enough information to diagnose the problem, obtain a +packet capture simultaneously from the nodes at both ends of an unstable +connection and analyse it alongside the {es} logs from those nodes to determine +if traffic between the nodes is being disrupted by another device on the +network. diff --git a/libs/core/src/main/java/org/elasticsearch/core/UpdateForV10.java b/libs/core/src/main/java/org/elasticsearch/core/UpdateForV10.java new file mode 100644 index 0000000000000..0fe816bd3721d --- /dev/null +++ b/libs/core/src/main/java/org/elasticsearch/core/UpdateForV10.java @@ -0,0 +1,23 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.core; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * Annotation to identify a block of code (a whole class, a method, or a field) that needs to be reviewed (for cleanup, remove or change) + * before releasing 10.0 + */ +@Retention(RetentionPolicy.SOURCE) +@Target({ ElementType.LOCAL_VARIABLE, ElementType.CONSTRUCTOR, ElementType.FIELD, ElementType.METHOD, ElementType.TYPE }) +public @interface UpdateForV10 { +} diff --git a/muted-tests.yml b/muted-tests.yml index 7feefa1255f48..e80a39040a4ef 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -122,12 +122,6 @@ tests: - class: org.elasticsearch.xpack.restart.CoreFullClusterRestartIT method: testSnapshotRestore {cluster=UPGRADED} issue: https://github.com/elastic/elasticsearch/issues/111799 -- class: org.elasticsearch.xpack.esql.qa.mixed.EsqlClientYamlIT - method: "test {p0=esql/26_aggs_bucket/friendlier BUCKET interval hourly: #110916}" - issue: https://github.com/elastic/elasticsearch/issues/111901 -- class: org.elasticsearch.xpack.esql.qa.mixed.EsqlClientYamlIT - method: "test {p0=esql/26_aggs_bucket/friendlier BUCKET interval: monthly #110916}" - issue: https://github.com/elastic/elasticsearch/issues/111902 - class: org.elasticsearch.xpack.esql.qa.mixed.FieldExtractorIT method: testScaledFloat issue: https://github.com/elastic/elasticsearch/issues/112003 @@ -137,9 +131,6 @@ tests: - class: org.elasticsearch.xpack.ml.integration.MlJobIT method: testDeleteJobAfterMissingIndex issue: https://github.com/elastic/elasticsearch/issues/112088 -- class: org.elasticsearch.xpack.esql.EsqlAsyncSecurityIT - method: testLimitedPrivilege - issue: https://github.com/elastic/elasticsearch/issues/112110 - class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT method: test {stats.ByTwoCalculatedSecondOverwrites SYNC} issue: https://github.com/elastic/elasticsearch/issues/112117 @@ -160,18 +151,17 @@ tests: - class: org.elasticsearch.xpack.ml.integration.MlJobIT method: testDeleteJobAsync issue: https://github.com/elastic/elasticsearch/issues/112212 -- class: org.elasticsearch.search.query.ScriptScoreQueryTests - method: testScriptTermStatsAvailable - issue: https://github.com/elastic/elasticsearch/issues/112278 -- class: org.elasticsearch.search.query.ScriptScoreQueryTests - method: testScriptTermStatsNotAvailable - issue: https://github.com/elastic/elasticsearch/issues/112290 - class: org.elasticsearch.search.retriever.rankdoc.RankDocsSortBuilderTests method: testEqualsAndHashcode issue: https://github.com/elastic/elasticsearch/issues/112312 - class: org.elasticsearch.blobcache.shared.SharedBlobCacheServiceTests method: testGetMultiThreaded issue: https://github.com/elastic/elasticsearch/issues/112314 +- class: org.elasticsearch.search.retriever.RankDocRetrieverBuilderIT + method: testRankDocsRetrieverWithCollapse + issue: https://github.com/elastic/elasticsearch/issues/112254 +- class: org.elasticsearch.search.ccs.CCSUsageTelemetryIT + issue: https://github.com/elastic/elasticsearch/issues/112324 # Examples: # diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mget/90_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mget/90_synthetic_source.yml index 2935c0c1c41b5..ff17a92ed0fcc 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mget/90_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mget/90_synthetic_source.yml @@ -46,6 +46,94 @@ keyword: docs.1._source: kwd: bar +--- +keyword with normalizer: + - requires: + cluster_features: [ "mapper.keyword_normalizer_synthetic_source" ] + reason: support for normalizer on keyword fields + - do: + indices.create: + index: test-keyword-with-normalizer + body: + settings: + analysis: + normalizer: + lowercase: + type: custom + filter: + - lowercase + mappings: + _source: + mode: synthetic + properties: + keyword: + type: keyword + normalizer: lowercase + keyword_with_ignore_above: + type: keyword + normalizer: lowercase + ignore_above: 10 + keyword_without_doc_values: + type: keyword + normalizer: lowercase + doc_values: false + + - do: + index: + index: test-keyword-with-normalizer + id: 1 + body: + keyword: "the Quick Brown Fox jumps over the lazy Dog" + keyword_with_ignore_above: "the Quick Brown Fox jumps over the lazy Dog" + keyword_without_doc_values: "the Quick Brown Fox jumps over the lazy Dog" + + - do: + index: + index: test-keyword-with-normalizer + id: 2 + body: + keyword: "The five BOXING wizards jump Quickly" + keyword_with_ignore_above: "The five BOXING wizards jump Quickly" + keyword_without_doc_values: "The five BOXING wizards jump Quickly" + + - do: + index: + index: test-keyword-with-normalizer + id: 3 + body: + keyword: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ] + keyword_with_ignore_above: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ] + keyword_without_doc_values: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ] + + - do: + mget: + index: test-keyword-with-normalizer + body: + ids: [ 1, 2, 3 ] + - match: { docs.0._index: "test-keyword-with-normalizer" } + - match: { docs.0._id: "1" } + - match: + docs.0._source: + keyword: "the Quick Brown Fox jumps over the lazy Dog" + keyword_with_ignore_above: "the Quick Brown Fox jumps over the lazy Dog" + keyword_without_doc_values: "the Quick Brown Fox jumps over the lazy Dog" + + - match: { docs.1._index: "test-keyword-with-normalizer" } + - match: { docs.1._id: "2" } + - match: + docs.1._source: + keyword: "The five BOXING wizards jump Quickly" + keyword_with_ignore_above: "The five BOXING wizards jump Quickly" + keyword_without_doc_values: "The five BOXING wizards jump Quickly" + + - match: { docs.2._index: "test-keyword-with-normalizer" } + - match: { docs.2._id: "3" } + - match: + docs.2._source: + keyword: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ] + keyword_with_ignore_above: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ] + keyword_without_doc_values: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ] + --- stored text: - requires: diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexClusterStateUpdateRequest.java b/server/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexClusterStateUpdateRequest.java index 8a46daa45e73b..948199fbe74f4 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexClusterStateUpdateRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexClusterStateUpdateRequest.java @@ -12,7 +12,6 @@ import org.elasticsearch.action.admin.indices.shrink.ResizeType; import org.elasticsearch.action.support.ActiveShardCount; import org.elasticsearch.cluster.ack.ClusterStateUpdateRequest; -import org.elasticsearch.cluster.block.ClusterBlock; import org.elasticsearch.cluster.metadata.ComposableIndexTemplate; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.common.settings.Settings; @@ -43,8 +42,6 @@ public class CreateIndexClusterStateUpdateRequest extends ClusterStateUpdateRequ private final Set aliases = new HashSet<>(); - private final Set blocks = new HashSet<>(); - private ActiveShardCount waitForActiveShards = ActiveShardCount.DEFAULT; private boolean performReroute = true; @@ -125,10 +122,6 @@ public Set aliases() { return aliases; } - public Set blocks() { - return blocks; - } - public Index recoverFrom() { return recoverFrom; } @@ -229,8 +222,6 @@ public String toString() { + settings + ", aliases=" + aliases - + ", blocks=" - + blocks + ", waitForActiveShards=" + waitForActiveShards + ", systemDataStreamDescriptor=" diff --git a/server/src/main/java/org/elasticsearch/action/bulk/BulkItemRequest.java b/server/src/main/java/org/elasticsearch/action/bulk/BulkItemRequest.java index 425461d1f4ba1..7c1304f92eefd 100644 --- a/server/src/main/java/org/elasticsearch/action/bulk/BulkItemRequest.java +++ b/server/src/main/java/org/elasticsearch/action/bulk/BulkItemRequest.java @@ -101,11 +101,11 @@ public void writeTo(StreamOutput out) throws IOException { out.writeOptionalWriteable(primaryResponse); } - public void writeThin(StreamOutput out) throws IOException { - out.writeVInt(id); - DocWriteRequest.writeDocumentRequestThin(out, request); - out.writeOptionalWriteable(primaryResponse == null ? null : primaryResponse::writeThin); - } + public static final Writer THIN_WRITER = (out, item) -> { + out.writeVInt(item.id); + DocWriteRequest.writeDocumentRequestThin(out, item.request); + out.writeOptional(BulkItemResponse.THIN_WRITER, item.primaryResponse); + }; @Override public long ramBytesUsed() { diff --git a/server/src/main/java/org/elasticsearch/action/bulk/BulkItemResponse.java b/server/src/main/java/org/elasticsearch/action/bulk/BulkItemResponse.java index 151e8795d0f82..d3e550eaf05b3 100644 --- a/server/src/main/java/org/elasticsearch/action/bulk/BulkItemResponse.java +++ b/server/src/main/java/org/elasticsearch/action/bulk/BulkItemResponse.java @@ -264,7 +264,7 @@ public String toString() { id = in.readVInt(); opType = OpType.fromId(in.readByte()); response = readResponse(shardId, in); - failure = in.readBoolean() ? new Failure(in) : null; + failure = in.readOptionalWriteable(Failure::new); assertConsistent(); } @@ -272,7 +272,7 @@ public String toString() { id = in.readVInt(); opType = OpType.fromId(in.readByte()); response = readResponse(in); - failure = in.readBoolean() ? new Failure(in) : null; + failure = in.readOptionalWriteable(Failure::new); assertConsistent(); } @@ -384,31 +384,21 @@ public void writeTo(StreamOutput out) throws IOException { writeResponseType(out); response.writeTo(out); } - if (failure == null) { - out.writeBoolean(false); - } else { - out.writeBoolean(true); - failure.writeTo(out); - } + out.writeOptionalWriteable(failure); } - public void writeThin(StreamOutput out) throws IOException { - out.writeVInt(id); - out.writeByte(opType.getId()); + public static final Writer THIN_WRITER = (out, item) -> { + out.writeVInt(item.id); + out.writeByte(item.opType.getId()); - if (response == null) { + if (item.response == null) { out.writeByte((byte) 2); } else { - writeResponseType(out); - response.writeThin(out); + item.writeResponseType(out); + item.response.writeThin(out); } - if (failure == null) { - out.writeBoolean(false); - } else { - out.writeBoolean(true); - failure.writeTo(out); - } - } + out.writeOptionalWriteable(item.failure); + }; private void writeResponseType(StreamOutput out) throws IOException { if (response instanceof SimulateIndexResponse) { diff --git a/server/src/main/java/org/elasticsearch/action/bulk/BulkShardRequest.java b/server/src/main/java/org/elasticsearch/action/bulk/BulkShardRequest.java index 0d2942e688382..f7860c47d8b73 100644 --- a/server/src/main/java/org/elasticsearch/action/bulk/BulkShardRequest.java +++ b/server/src/main/java/org/elasticsearch/action/bulk/BulkShardRequest.java @@ -130,14 +130,7 @@ public void writeTo(StreamOutput out) throws IOException { throw new IllegalStateException("Inference metadata should have been consumed before writing to the stream"); } super.writeTo(out); - out.writeArray((o, item) -> { - if (item != null) { - o.writeBoolean(true); - item.writeThin(o); - } else { - o.writeBoolean(false); - } - }, items); + out.writeArray((o, item) -> o.writeOptional(BulkItemRequest.THIN_WRITER, item), items); if (out.getTransportVersion().onOrAfter(TransportVersions.SIMULATE_VALIDATES_MAPPINGS)) { out.writeBoolean(isSimulated); } diff --git a/server/src/main/java/org/elasticsearch/action/bulk/BulkShardResponse.java b/server/src/main/java/org/elasticsearch/action/bulk/BulkShardResponse.java index 3eeb96546c9b0..eb1bb0468c9bb 100644 --- a/server/src/main/java/org/elasticsearch/action/bulk/BulkShardResponse.java +++ b/server/src/main/java/org/elasticsearch/action/bulk/BulkShardResponse.java @@ -56,6 +56,6 @@ public void setForcedRefresh(boolean forcedRefresh) { public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); shardId.writeTo(out); - out.writeArray((o, item) -> item.writeThin(o), responses); + out.writeArray(BulkItemResponse.THIN_WRITER, responses); } } diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchTask.java b/server/src/main/java/org/elasticsearch/action/search/SearchTask.java index 3bf72313c4c21..cc5d60ad0b0c0 100644 --- a/server/src/main/java/org/elasticsearch/action/search/SearchTask.java +++ b/server/src/main/java/org/elasticsearch/action/search/SearchTask.java @@ -69,4 +69,11 @@ public Supplier getSearchResponseMergerSupplier() { public void setSearchResponseMergerSupplier(Supplier supplier) { this.searchResponseMergerSupplier = supplier; } + + /** + * Is this async search? + */ + public boolean isAsync() { + return false; + } } diff --git a/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java b/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java index 6e1645c1ed711..32ee9c331295c 100644 --- a/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java +++ b/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java @@ -369,7 +369,7 @@ void executeRequest( } else { if ((listener instanceof TelemetryListener tl) && CCS_TELEMETRY_FEATURE_FLAG.isEnabled()) { tl.setRemotes(resolvedIndices.getRemoteClusterIndices().size()); - if (isAsyncSearchTask(task)) { + if (task.isAsync()) { tl.setFeature(CCSUsageTelemetry.ASYNC_FEATURE); } String client = task.getHeader(Task.X_ELASTIC_PRODUCT_ORIGIN_HTTP_HEADER); @@ -1514,34 +1514,6 @@ public SearchPhase newSearchPhase( } } - /** - * TransportSearchAction cannot access async-search code, so can't check whether this the Task - * is an instance of AsyncSearchTask, so this roundabout method is used - * @param searchTask SearchTask to analyze - * @return true if this is an async search task; false if a synchronous search task - */ - private boolean isAsyncSearchTask(SearchTask searchTask) { - assert assertAsyncSearchTaskListener(searchTask) : "AsyncSearchTask SearchProgressListener is not one of the expected types"; - // AsyncSearchTask will not return SearchProgressListener.NOOP, since it uses its own progress listener - // which delegates to CCSSingleCoordinatorSearchProgressListener when minimizing roundtrips. - // Only synchronous SearchTask uses SearchProgressListener.NOOP or CCSSingleCoordinatorSearchProgressListener directly - return searchTask.getProgressListener() != SearchProgressListener.NOOP - && searchTask.getProgressListener() instanceof CCSSingleCoordinatorSearchProgressListener == false; - } - - /** - * @param searchTask SearchTask to analyze - * @return true if AsyncSearchTask still uses its own special listener, not one of the two that synchronous SearchTask uses - */ - private boolean assertAsyncSearchTaskListener(SearchTask searchTask) { - if (searchTask.getClass().getSimpleName().contains("AsyncSearchTask")) { - SearchProgressListener progressListener = searchTask.getProgressListener(); - return progressListener != SearchProgressListener.NOOP - && progressListener instanceof CCSSingleCoordinatorSearchProgressListener == false; - } - return true; - } - private static void validateAndResolveWaitForCheckpoint( ClusterState clusterState, IndexNameExpressionResolver resolver, diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java index b5ee0ebd7e387..07dcb7baf0777 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java @@ -25,7 +25,6 @@ import org.elasticsearch.cluster.AckedClusterStateUpdateTask; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.ClusterStateUpdateTask; -import org.elasticsearch.cluster.block.ClusterBlock; import org.elasticsearch.cluster.block.ClusterBlockLevel; import org.elasticsearch.cluster.block.ClusterBlocks; import org.elasticsearch.cluster.node.DiscoveryNodes; @@ -514,7 +513,6 @@ private ClusterState applyCreateIndexWithTemporaryService( ClusterState updated = clusterStateCreateIndex( currentState, - request.blocks(), indexMetadata, metadataTransformer, allocationService.getShardRoutingRoleStrategy() @@ -1231,7 +1229,6 @@ public static List resolveAndValidateAliases( */ static ClusterState clusterStateCreateIndex( ClusterState currentState, - Set clusterBlocks, IndexMetadata indexMetadata, BiConsumer metadataTransformer, ShardRoutingRoleStrategy shardRoutingRoleStrategy @@ -1245,15 +1242,13 @@ static ClusterState clusterStateCreateIndex( newMetadata = currentState.metadata().withAddedIndex(indexMetadata); } - String indexName = indexMetadata.getIndex().getName(); - ClusterBlocks.Builder blocks = createClusterBlocksBuilder(currentState, indexName, clusterBlocks); - blocks.updateBlocks(indexMetadata); + var blocksBuilder = ClusterBlocks.builder().blocks(currentState.blocks()); + blocksBuilder.updateBlocks(indexMetadata); - ClusterState updatedState = ClusterState.builder(currentState).blocks(blocks).metadata(newMetadata).build(); + var routingTableBuilder = RoutingTable.builder(shardRoutingRoleStrategy, currentState.routingTable()) + .addAsNew(newMetadata.index(indexMetadata.getIndex().getName())); - RoutingTable.Builder routingTableBuilder = RoutingTable.builder(shardRoutingRoleStrategy, updatedState.routingTable()) - .addAsNew(updatedState.metadata().index(indexName)); - return ClusterState.builder(updatedState).routingTable(routingTableBuilder.build()).build(); + return ClusterState.builder(currentState).blocks(blocksBuilder).metadata(newMetadata).routingTable(routingTableBuilder).build(); } static IndexMetadata buildIndexMetadata( @@ -1326,16 +1321,6 @@ private static IndexMetadata.Builder createIndexMetadataBuilder( return builder; } - private static ClusterBlocks.Builder createClusterBlocksBuilder(ClusterState currentState, String index, Set blocks) { - ClusterBlocks.Builder blocksBuilder = ClusterBlocks.builder().blocks(currentState.blocks()); - if (blocks.isEmpty() == false) { - for (ClusterBlock block : blocks) { - blocksBuilder.addIndexBlock(index, block); - } - } - return blocksBuilder; - } - private static void updateIndexMappingsAndBuildSortOrder( IndexService indexService, CreateIndexClusterStateUpdateRequest request, diff --git a/server/src/main/java/org/elasticsearch/common/RandomBasedUUIDGenerator.java b/server/src/main/java/org/elasticsearch/common/RandomBasedUUIDGenerator.java index e731cf3bc58be..58c23ab9aa398 100644 --- a/server/src/main/java/org/elasticsearch/common/RandomBasedUUIDGenerator.java +++ b/server/src/main/java/org/elasticsearch/common/RandomBasedUUIDGenerator.java @@ -56,8 +56,10 @@ public static String getBase64UUID(Random random) { return Base64.getUrlEncoder().withoutPadding().encodeToString(getUUIDBytes(random)); } + static final int SIZE_IN_BYTES = 16; + private static byte[] getUUIDBytes(Random random) { - final byte[] randomBytes = new byte[16]; + final byte[] randomBytes = new byte[SIZE_IN_BYTES]; random.nextBytes(randomBytes); /* Set the version to version 4 (see http://www.ietf.org/rfc/rfc4122.txt) * The randomly or pseudo-randomly generated version. diff --git a/server/src/main/java/org/elasticsearch/common/TimeBasedUUIDGenerator.java b/server/src/main/java/org/elasticsearch/common/TimeBasedUUIDGenerator.java index f7f7f520fec90..d66b0f579ce3e 100644 --- a/server/src/main/java/org/elasticsearch/common/TimeBasedUUIDGenerator.java +++ b/server/src/main/java/org/elasticsearch/common/TimeBasedUUIDGenerator.java @@ -47,6 +47,8 @@ protected byte[] macAddress() { return SECURE_MUNGED_ADDRESS; } + static final int SIZE_IN_BYTES = 15; + @Override public String getBase64UUID() { final int sequenceId = sequenceNumber.incrementAndGet() & 0xffffff; @@ -61,7 +63,7 @@ public String getBase64UUID() { sequenceId == 0 ? (lastTimestamp, currentTimeMillis) -> Math.max(lastTimestamp, currentTimeMillis) + 1 : Math::max ); - final byte[] uuidBytes = new byte[15]; + final byte[] uuidBytes = new byte[SIZE_IN_BYTES]; int i = 0; // We have auto-generated ids, which are usually used for append-only workloads. diff --git a/server/src/main/java/org/elasticsearch/common/UUIDs.java b/server/src/main/java/org/elasticsearch/common/UUIDs.java index 43a232e82510e..ebc0978f38d49 100644 --- a/server/src/main/java/org/elasticsearch/common/UUIDs.java +++ b/server/src/main/java/org/elasticsearch/common/UUIDs.java @@ -17,26 +17,50 @@ public class UUIDs { private static final RandomBasedUUIDGenerator RANDOM_UUID_GENERATOR = new RandomBasedUUIDGenerator(); private static final UUIDGenerator TIME_UUID_GENERATOR = new TimeBasedUUIDGenerator(); - /** Generates a time-based UUID (similar to Flake IDs), which is preferred when generating an ID to be indexed into a Lucene index as - * primary key. The id is opaque and the implementation is free to change at any time! */ + /** + * The length of a UUID string generated by {@link #base64UUID}. + */ + // A 15-byte time-based UUID is base64-encoded as 5 3-byte chunks (each becoming 4 chars after encoding). + public static final int TIME_BASED_UUID_STRING_LENGTH = 20; + + /** + * Generates a time-based UUID (similar to Flake IDs), which is preferred when generating an ID to be indexed into a Lucene index as + * primary key. The id is opaque and the implementation is free to change at any time! + * The resulting string has length {@link #TIME_BASED_UUID_STRING_LENGTH}. + */ public static String base64UUID() { return TIME_UUID_GENERATOR.getBase64UUID(); } - /** Returns a Base64 encoded version of a Version 4.0 compatible UUID as defined here: http://www.ietf.org/rfc/rfc4122.txt, using the - * provided {@code Random} instance */ + /** + * The length of a UUID string generated by {@link #randomBase64UUID} and {@link #randomBase64UUIDSecureString}. + */ + // A 16-byte v4 UUID is base64-encoded as 5 3-byte chunks (each becoming 4 chars after encoding) plus another byte (becomes 2 chars). + public static final int RANDOM_BASED_UUID_STRING_LENGTH = 22; + + /** + * Returns a Base64 encoded string representing a RFC4122 version 4 UUID, using the + * provided {@code Random} instance. + * The resulting string has length {@link #RANDOM_BASED_UUID_STRING_LENGTH}. + */ public static String randomBase64UUID(Random random) { return RandomBasedUUIDGenerator.getBase64UUID(random); } - /** Returns a Base64 encoded version of a Version 4.0 compatible UUID as defined here: http://www.ietf.org/rfc/rfc4122.txt, using a - * private {@code SecureRandom} instance */ + /** + * Returns a Base64 encoded string representing a RFC4122 version 4 UUID, using a + * private {@code SecureRandom} instance. + * The resulting string has length {@link #RANDOM_BASED_UUID_STRING_LENGTH}. + */ public static String randomBase64UUID() { return RANDOM_UUID_GENERATOR.getBase64UUID(); } - /** Returns a Base64 encoded {@link SecureString} of a Version 4.0 compatible UUID as defined here: http://www.ietf.org/rfc/rfc4122.txt, - * using a private {@code SecureRandom} instance */ + /** + * Returns a Base64 encoded {@link SecureString} representing a RFC4122 version 4 + * UUID, using a private {@code SecureRandom} instance. + * The resulting string has length {@link #RANDOM_BASED_UUID_STRING_LENGTH}. + */ public static SecureString randomBase64UUIDSecureString() { return RandomBasedUUIDGenerator.getBase64UUIDSecureString(); } diff --git a/server/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java b/server/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java index ec0edb2d07e5a..497028ef37c69 100644 --- a/server/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java +++ b/server/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java @@ -1095,8 +1095,23 @@ public T[] readOptionalArray(Writeable.Reader reader, IntFunction ar return readBoolean() ? readArray(reader, arraySupplier) : null; } + /** + * Reads a possibly-null value using the given {@link org.elasticsearch.common.io.stream.Writeable.Reader}. + * + * @see StreamOutput#writeOptionalWriteable + */ + // just an alias for readOptional() since we don't actually care whether T extends Writeable @Nullable public T readOptionalWriteable(Writeable.Reader reader) throws IOException { + return readOptional(reader); + } + + /** + * Reads a possibly-null value using the given {@link org.elasticsearch.common.io.stream.Writeable.Reader}. + * + * @see StreamOutput#writeOptional + */ + public T readOptional(Writeable.Reader reader) throws IOException { if (readBoolean()) { T t = reader.read(this); if (t == null) { diff --git a/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java b/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java index c65ae2e3463d4..5780885473b00 100644 --- a/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java +++ b/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java @@ -1015,6 +1015,12 @@ public void writeOptionalArray(@Nullable T[] array) throws writeOptionalArray(StreamOutput::writeWriteable, array); } + /** + * Writes a boolean value indicating whether the given object is {@code null}, followed by the object's serialization if it is not + * {@code null}. + * + * @see StreamInput#readOptionalWriteable + */ public void writeOptionalWriteable(@Nullable Writeable writeable) throws IOException { if (writeable != null) { writeBoolean(true); @@ -1024,6 +1030,21 @@ public void writeOptionalWriteable(@Nullable Writeable writeable) throws IOExcep } } + /** + * Writes a boolean value indicating whether the given object is {@code null}, followed by the object's serialization if it is not + * {@code null}. + * + * @see StreamInput#readOptional + */ + public void writeOptional(Writer writer, @Nullable T maybeItem) throws IOException { + if (maybeItem != null) { + writeBoolean(true); + writer.write(this, maybeItem); + } else { + writeBoolean(false); + } + } + /** * This method allow to use a method reference when writing collection elements such as * {@code out.writeMap(map, StreamOutput::writeString, StreamOutput::writeWriteable)} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index 9645b4397df4f..d130f37c3e8eb 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -89,6 +89,7 @@ public final class KeywordFieldMapper extends FieldMapper { public static final String CONTENT_TYPE = "keyword"; static final NodeFeature KEYWORD_DIMENSION_IGNORE_ABOVE = new NodeFeature("mapper.keyword_dimension_ignore_above"); + static final NodeFeature KEYWORD_NORMALIZER_SYNTHETIC_SOURCE = new NodeFeature("mapper.keyword_normalizer_synthetic_source"); public static class Defaults { public static final FieldType FIELD_TYPE; @@ -856,7 +857,7 @@ public boolean hasNormalizer() { private final Script script; private final ScriptCompiler scriptCompiler; private final IndexVersion indexCreatedVersion; - private final boolean storeIgnored; + private final boolean isSyntheticSource; private final IndexAnalyzers indexAnalyzers; @@ -866,7 +867,7 @@ private KeywordFieldMapper( KeywordFieldType mappedFieldType, MultiFields multiFields, CopyTo copyTo, - boolean storeIgnored, + boolean isSyntheticSource, Builder builder ) { super(simpleName, mappedFieldType, multiFields, copyTo, builder.script.get() != null, builder.onScriptError.getValue()); @@ -881,7 +882,7 @@ private KeywordFieldMapper( this.indexAnalyzers = builder.indexAnalyzers; this.scriptCompiler = builder.scriptCompiler; this.indexCreatedVersion = builder.indexCreatedVersion; - this.storeIgnored = storeIgnored; + this.isSyntheticSource = isSyntheticSource; } @Override @@ -916,7 +917,7 @@ private void indexValue(DocumentParserContext context, String value) { if (value.length() > fieldType().ignoreAbove()) { context.addIgnoredField(fullPath()); - if (storeIgnored) { + if (isSyntheticSource) { // Save a copy of the field so synthetic source can load it context.doc().add(new StoredField(originalName(), new BytesRef(value))); } @@ -1026,6 +1027,11 @@ private String originalName() { @Override protected SyntheticSourceMode syntheticSourceMode() { + if (hasNormalizer()) { + // NOTE: no matter if we have doc values or not we use a stored field to reconstruct the original value + // whose doc values would be altered by the normalizer + return SyntheticSourceMode.FALLBACK; + } if (fieldType.stored() || hasDocValues) { return SyntheticSourceMode.NATIVE; } @@ -1047,11 +1053,6 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader(String simpleName) "field [" + fullPath() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to" ); } - if (hasNormalizer()) { - throw new IllegalArgumentException( - "field [" + fullPath() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares a normalizer" - ); - } if (syntheticSourceMode() != SyntheticSourceMode.NATIVE) { return super.syntheticFieldLoader(); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index 6dce9d6c7b86e..63bbef061c61f 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -33,6 +33,7 @@ public Set getFeatures() { NodeMappingStats.SEGMENT_LEVEL_FIELDS_STATS, BooleanFieldMapper.BOOLEAN_DIMENSION, ObjectMapper.SUBOBJECTS_AUTO, + KeywordFieldMapper.KEYWORD_NORMALIZER_SYNTHETIC_SOURCE, SourceFieldMapper.SYNTHETIC_SOURCE_STORED_FIELDS_ADVANCE_FIX ); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalBinaryRange.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalBinaryRange.java index 2b5bcd9931f6e..528c37de7a4a8 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalBinaryRange.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalBinaryRange.java @@ -72,8 +72,8 @@ private static Bucket createFromStream(StreamInput in, DocValueFormat format, bo String key = in.getTransportVersion().equals(TransportVersions.V_8_0_0) ? in.readString() : in.getTransportVersion().onOrAfter(TransportVersions.V_7_17_1) ? in.readOptionalString() : in.readString(); - BytesRef from = in.readBoolean() ? in.readBytesRef() : null; - BytesRef to = in.readBoolean() ? in.readBytesRef() : null; + BytesRef from = in.readOptional(StreamInput::readBytesRef); + BytesRef to = in.readOptional(StreamInput::readBytesRef); long docCount = in.readLong(); InternalAggregations aggregations = InternalAggregations.readFrom(in); @@ -89,14 +89,8 @@ public void writeTo(StreamOutput out) throws IOException { } else { out.writeString(key == null ? generateKey(from, to, format) : key); } - out.writeBoolean(from != null); - if (from != null) { - out.writeBytesRef(from); - } - out.writeBoolean(to != null); - if (to != null) { - out.writeBytesRef(to); - } + out.writeOptional(StreamOutput::writeBytesRef, from); + out.writeOptional(StreamOutput::writeBytesRef, to); out.writeLong(docCount); aggregations.writeTo(out); } diff --git a/server/src/main/java/org/elasticsearch/search/lookup/SyntheticSourceProvider.java b/server/src/main/java/org/elasticsearch/search/lookup/SyntheticSourceProvider.java index bccfc22dc7e95..a4549f0814a06 100644 --- a/server/src/main/java/org/elasticsearch/search/lookup/SyntheticSourceProvider.java +++ b/server/src/main/java/org/elasticsearch/search/lookup/SyntheticSourceProvider.java @@ -8,13 +8,14 @@ package org.elasticsearch.search.lookup; -import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReaderContext; +import org.elasticsearch.common.util.concurrent.ConcurrentCollections; import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader; import org.elasticsearch.index.fieldvisitor.StoredFieldLoader; import org.elasticsearch.index.mapper.SourceLoader; import java.io.IOException; +import java.util.Map; // NB This is written under the assumption that individual segments are accessed by a single // thread, even if separate segments may be searched concurrently. If we ever implement @@ -22,7 +23,7 @@ class SyntheticSourceProvider implements SourceProvider { private final SourceLoader sourceLoader; - private volatile SyntheticSourceLeafLoader[] leafLoaders; + private final Map leaves = ConcurrentCollections.newConcurrentMap(); SyntheticSourceProvider(SourceLoader sourceLoader) { this.sourceLoader = sourceLoader; @@ -30,31 +31,14 @@ class SyntheticSourceProvider implements SourceProvider { @Override public Source getSource(LeafReaderContext ctx, int doc) throws IOException { - maybeInit(ctx); - if (leafLoaders[ctx.ord] == null) { - // individual segments are currently only accessed on one thread so there's no need - // for locking here. - leafLoaders[ctx.ord] = new SyntheticSourceLeafLoader(ctx); + final Object id = ctx.id(); + var provider = leaves.get(id); + if (provider == null) { + provider = new SyntheticSourceLeafLoader(ctx); + var existing = leaves.put(id, provider); + assert existing == null : "unexpected source provider [" + existing + "]"; } - return leafLoaders[ctx.ord].getSource(doc); - } - - private void maybeInit(LeafReaderContext ctx) { - if (leafLoaders == null) { - synchronized (this) { - if (leafLoaders == null) { - leafLoaders = new SyntheticSourceLeafLoader[findParentContext(ctx).leaves().size()]; - } - } - } - } - - private IndexReaderContext findParentContext(LeafReaderContext ctx) { - if (ctx.parent != null) { - return ctx.parent; - } - assert ctx.isTopLevel; - return ctx; + return provider.getSource(doc); } private class SyntheticSourceLeafLoader { diff --git a/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json b/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json index 3eb8939c22a65..cc0bc5e2257c8 100644 --- a/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json +++ b/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json @@ -2,8 +2,8 @@ "INITIAL_MASTER_NODES": "important-settings.html#initial_master_nodes", "DISCOVERY_TROUBLESHOOTING": "discovery-troubleshooting.html", "UNSTABLE_CLUSTER_TROUBLESHOOTING": "troubleshooting-unstable-cluster.html", - "LAGGING_NODE_TROUBLESHOOTING": "troubleshooting-unstable-cluster.html#_diagnosing_lagging_nodes_2", - "SHARD_LOCK_TROUBLESHOOTING": "troubleshooting-unstable-cluster.html#_diagnosing_shardlockobtainfailedexception_failures_2", + "LAGGING_NODE_TROUBLESHOOTING": "troubleshooting-unstable-cluster.html#troubleshooting-unstable-cluster-lagging", + "SHARD_LOCK_TROUBLESHOOTING": "troubleshooting-unstable-cluster.html#troubleshooting-unstable-cluster-shardlockobtainfailedexception", "CONCURRENT_REPOSITORY_WRITERS": "diagnosing-corrupted-repositories.html", "ARCHIVE_INDICES": "archive-indices.html", "HTTP_TRACER": "modules-network.html#http-rest-request-tracer", diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexServiceTests.java index 8a487e5653627..f7d343b43b29c 100644 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexServiceTests.java @@ -967,13 +967,7 @@ public void testClusterStateCreateIndexThrowsWriteIndexValidationException() thr assertThat( expectThrows( IllegalStateException.class, - () -> clusterStateCreateIndex( - currentClusterState, - Set.of(), - newIndex, - null, - TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY - ) + () -> clusterStateCreateIndex(currentClusterState, newIndex, null, TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY) ).getMessage(), startsWith("alias [alias1] has more than one write index [") ); @@ -991,7 +985,6 @@ public void testClusterStateCreateIndex() { ClusterState updatedClusterState = clusterStateCreateIndex( currentClusterState, - Set.of(INDEX_READ_ONLY_BLOCK), newIndexMetadata, null, TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY @@ -1037,7 +1030,6 @@ public void testClusterStateCreateIndexWithMetadataTransaction() { ClusterState updatedClusterState = clusterStateCreateIndex( currentClusterState, - Set.of(INDEX_READ_ONLY_BLOCK), newIndexMetadata, metadataTransformer, TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY diff --git a/server/src/test/java/org/elasticsearch/common/UUIDTests.java b/server/src/test/java/org/elasticsearch/common/UUIDTests.java index 5af036ce0648f..3229049b67b4c 100644 --- a/server/src/test/java/org/elasticsearch/common/UUIDTests.java +++ b/server/src/test/java/org/elasticsearch/common/UUIDTests.java @@ -176,4 +176,20 @@ protected byte[] macAddress() { ); return bytesPerDoc; } + + public void testStringLength() { + assertEquals(UUIDs.RANDOM_BASED_UUID_STRING_LENGTH, getUnpaddedBase64StringLength(RandomBasedUUIDGenerator.SIZE_IN_BYTES)); + assertEquals(UUIDs.RANDOM_BASED_UUID_STRING_LENGTH, UUIDs.randomBase64UUID().length()); + assertEquals(UUIDs.RANDOM_BASED_UUID_STRING_LENGTH, UUIDs.randomBase64UUID(random()).length()); + try (var secureString = UUIDs.randomBase64UUIDSecureString()) { + assertEquals(UUIDs.RANDOM_BASED_UUID_STRING_LENGTH, secureString.toString().length()); + } + + assertEquals(UUIDs.TIME_BASED_UUID_STRING_LENGTH, getUnpaddedBase64StringLength(TimeBasedUUIDGenerator.SIZE_IN_BYTES)); + assertEquals(UUIDs.TIME_BASED_UUID_STRING_LENGTH, UUIDs.base64UUID().length()); + } + + private static int getUnpaddedBase64StringLength(int sizeInBytes) { + return (int) Math.ceil(sizeInBytes * 4.0 / 3.0); + } } diff --git a/server/src/test/java/org/elasticsearch/common/io/stream/AbstractStreamTests.java b/server/src/test/java/org/elasticsearch/common/io/stream/AbstractStreamTests.java index b1104a72400ea..ae686afcbb296 100644 --- a/server/src/test/java/org/elasticsearch/common/io/stream/AbstractStreamTests.java +++ b/server/src/test/java/org/elasticsearch/common/io/stream/AbstractStreamTests.java @@ -761,6 +761,17 @@ public void checkZonedDateTimeSerialization(TransportVersion tv) throws IOExcept } } + public void testOptional() throws IOException { + try (var output = new BytesStreamOutput()) { + output.writeOptional(StreamOutput::writeString, "not-null"); + output.writeOptional(StreamOutput::writeString, null); + + final var input = getStreamInput(output.bytes()); + assertEquals("not-null", input.readOptional(StreamInput::readString)); + assertNull(input.readOptional(StreamInput::readString)); + } + } + private void assertSerialization( CheckedConsumer outputAssertions, CheckedConsumer inputAssertions, diff --git a/server/src/test/java/org/elasticsearch/search/SearchServiceTests.java b/server/src/test/java/org/elasticsearch/search/SearchServiceTests.java index bdddea58b713f..2617f82b09f08 100644 --- a/server/src/test/java/org/elasticsearch/search/SearchServiceTests.java +++ b/server/src/test/java/org/elasticsearch/search/SearchServiceTests.java @@ -1548,7 +1548,7 @@ public void testMaxOpenScrollContexts() throws Exception { ClearScrollRequest clearScrollRequest = new ClearScrollRequest(); clearScrollRequest.setScrollIds(clearScrollIds); - client().clearScroll(clearScrollRequest); + client().clearScroll(clearScrollRequest).get(); for (int i = 0; i < clearScrollIds.size(); i++) { client().prepareSearch("index").setSize(1).setScroll(TimeValue.timeValueMinutes(1)).get().decRef(); diff --git a/server/src/test/java/org/elasticsearch/search/query/ScriptScoreQueryTests.java b/server/src/test/java/org/elasticsearch/search/query/ScriptScoreQueryTests.java index d6b1da9f76b42..177968b9a1326 100644 --- a/server/src/test/java/org/elasticsearch/search/query/ScriptScoreQueryTests.java +++ b/server/src/test/java/org/elasticsearch/search/query/ScriptScoreQueryTests.java @@ -72,7 +72,7 @@ public void initSearcher() throws IOException { w.commit(); reader = DirectoryReader.open(w); searcher = newSearcher(reader); - leafReaderContext = reader.leaves().get(0); + leafReaderContext = searcher.getTopReaderContext().leaves().get(0); } @After diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/KeywordFieldSyntheticSourceSupport.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/KeywordFieldSyntheticSourceSupport.java index 6abe923851318..2f452161b10ca 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/KeywordFieldSyntheticSourceSupport.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/KeywordFieldSyntheticSourceSupport.java @@ -21,8 +21,6 @@ import java.util.stream.Collectors; import java.util.stream.Stream; -import static org.hamcrest.Matchers.equalTo; - public class KeywordFieldSyntheticSourceSupport implements MapperTestCase.SyntheticSourceSupport { private final Integer ignoreAbove; private final boolean allIgnored; @@ -128,11 +126,6 @@ private void mapping(XContentBuilder b) throws IOException { @Override public List invalidExample() throws IOException { - return List.of( - new MapperTestCase.SyntheticSourceInvalidExample( - equalTo("field [field] of type [keyword] doesn't support synthetic source because it declares a normalizer"), - b -> b.field("type", "keyword").field("normalizer", "lowercase") - ) - ); + return List.of(); } } diff --git a/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/RepositoryFileType.java b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/RepositoryFileType.java index 014cbcd2bcc3a..8d20cce33bbb4 100644 --- a/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/RepositoryFileType.java +++ b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/RepositoryFileType.java @@ -9,6 +9,7 @@ package org.elasticsearch.repositories.blobstore; import org.elasticsearch.common.Strings; +import org.elasticsearch.common.UUIDs; import java.nio.file.Path; import java.util.regex.Pattern; @@ -38,9 +39,9 @@ public enum RepositoryFileType { // decimal numbers .replace("NUM", "(0|[1-9][0-9]*)") // 15-byte UUIDS from TimeBasedUUIDGenerator - .replace("SHORTUUID", "[0-9a-zA-Z_-]{20}") + .replace("SHORTUUID", "[0-9a-zA-Z_-]{" + UUIDs.TIME_BASED_UUID_STRING_LENGTH + "}") // 16-byte UUIDs from RandomBasedUUIDGenerator - .replace("UUID", "[0-9a-zA-Z_-]{22}") + .replace("UUID", "[0-9a-zA-Z_-]{" + UUIDs.RANDOM_BASED_UUID_STRING_LENGTH + "}") + ")$" ); } diff --git a/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorTests.java b/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorTests.java index db3b81891e87e..4a4ffca0f37aa 100644 --- a/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorTests.java +++ b/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorTests.java @@ -113,13 +113,13 @@ protected Collection getPlugins() { } public void testDataGeneratorStressTest() throws IOException { - // Let's generate 1000000 fields to test an extreme case (2 levels of objects + 1 leaf level with 100 fields per object). + // Let's generate 125000 fields to test an extreme case (2 levels of objects + 1 leaf level with 50 fields per object). var testChildFieldGenerator = new DataSourceResponse.ChildFieldGenerator() { private int generatedFields = 0; @Override public int generateChildFieldCount() { - return 100; + return 50; } @Override diff --git a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java index c0305f873327d..5068ac69e462a 100644 --- a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java +++ b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java @@ -545,4 +545,9 @@ public void onFailure(Exception exc) { executeCompletionListeners(); } } + + @Override + public boolean isAsync() { + return true; + } } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/rollup/job/RollupJobStatus.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/rollup/job/RollupJobStatus.java index 1ba625a507a46..f7ad1f65628b2 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/rollup/job/RollupJobStatus.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/rollup/job/RollupJobStatus.java @@ -74,7 +74,7 @@ public RollupJobStatus(IndexerState state, @Nullable Map positio public RollupJobStatus(StreamInput in) throws IOException { state = IndexerState.fromStream(in); - currentPosition = in.readBoolean() ? new TreeMap<>(in.readGenericMap()) : null; + currentPosition = in.readOptional(CURRENT_POSITION_READER); if (in.getTransportVersion().before(TransportVersions.V_8_0_0)) { // 7.x nodes serialize `upgradedDocumentID` flag. We don't need it anymore, but // we need to pull it off the stream @@ -83,6 +83,8 @@ public RollupJobStatus(StreamInput in) throws IOException { } } + private static final Reader> CURRENT_POSITION_READER = in -> new TreeMap<>(in.readGenericMap()); + public IndexerState getIndexerState() { return state; } @@ -118,10 +120,7 @@ public String getWriteableName() { @Override public void writeTo(StreamOutput out) throws IOException { state.writeTo(out); - out.writeBoolean(currentPosition != null); - if (currentPosition != null) { - out.writeGenericMap(currentPosition); - } + out.writeOptional(StreamOutput::writeGenericMap, currentPosition); if (out.getTransportVersion().before(TransportVersions.V_8_0_0)) { // 7.x nodes expect a boolean `upgradedDocumentID` flag. We don't have it anymore, // but we need to tell them we are upgraded in case there is a mixed cluster diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java index 4f3d7a245fc8f..74434adf61fbb 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java @@ -868,6 +868,11 @@ private static RoleDescriptor buildViewerRoleDescriptor() { .indices("/~(([.]|ilm-history-).*)/") .privileges("read", "view_index_metadata") .build(), + // Observability + RoleDescriptor.IndicesPrivileges.builder() + .indices(".slo-observability.*") + .privileges("read", "view_index_metadata") + .build(), // Security RoleDescriptor.IndicesPrivileges.builder() .indices(ReservedRolesStore.ALERTS_LEGACY_INDEX, ReservedRolesStore.LISTS_INDEX, ReservedRolesStore.LISTS_ITEMS_INDEX) @@ -915,6 +920,10 @@ private static RoleDescriptor buildEditorRoleDescriptor() { .indices("observability-annotations") .privileges("read", "view_index_metadata", "write") .build(), + RoleDescriptor.IndicesPrivileges.builder() + .indices(".slo-observability.*") + .privileges("read", "view_index_metadata", "write", "manage") + .build(), // Security RoleDescriptor.IndicesPrivileges.builder() .indices(ReservedRolesStore.ALERTS_LEGACY_INDEX, ReservedRolesStore.LISTS_INDEX, ReservedRolesStore.LISTS_ITEMS_INDEX) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/watcher/transport/actions/execute/ExecuteWatchRequest.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/watcher/transport/actions/execute/ExecuteWatchRequest.java index 681b004dd1d28..2f2617f956ed9 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/watcher/transport/actions/execute/ExecuteWatchRequest.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/watcher/transport/actions/execute/ExecuteWatchRequest.java @@ -59,12 +59,8 @@ public ExecuteWatchRequest(StreamInput in) throws IOException { id = in.readOptionalString(); ignoreCondition = in.readBoolean(); recordExecution = in.readBoolean(); - if (in.readBoolean()) { - alternativeInput = in.readGenericMap(); - } - if (in.readBoolean()) { - triggerData = in.readGenericMap(); - } + alternativeInput = in.readOptional(StreamInput::readGenericMap); + triggerData = in.readOptional(StreamInput::readGenericMap); long actionModesCount = in.readLong(); actionModes = new HashMap<>(); for (int i = 0; i < actionModesCount; i++) { @@ -83,14 +79,8 @@ public void writeTo(StreamOutput out) throws IOException { out.writeOptionalString(id); out.writeBoolean(ignoreCondition); out.writeBoolean(recordExecution); - out.writeBoolean(alternativeInput != null); - if (alternativeInput != null) { - out.writeGenericMap(alternativeInput); - } - out.writeBoolean(triggerData != null); - if (triggerData != null) { - out.writeGenericMap(triggerData); - } + out.writeOptional(StreamOutput::writeGenericMap, alternativeInput); + out.writeOptional(StreamOutput::writeGenericMap, triggerData); out.writeLong(actionModes.size()); for (Map.Entry entry : actionModes.entrySet()) { out.writeString(entry.getKey()); diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStoreTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStoreTests.java index f0676f35ae316..0cdf7de63ca99 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStoreTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStoreTests.java @@ -28,6 +28,7 @@ import org.elasticsearch.action.admin.indices.get.GetIndexAction; import org.elasticsearch.action.admin.indices.mapping.get.GetFieldMappingsAction; import org.elasticsearch.action.admin.indices.mapping.get.GetMappingsAction; +import org.elasticsearch.action.admin.indices.mapping.put.TransportAutoPutMappingAction; import org.elasticsearch.action.admin.indices.mapping.put.TransportPutMappingAction; import org.elasticsearch.action.admin.indices.recovery.RecoveryAction; import org.elasticsearch.action.admin.indices.resolve.ResolveIndexAction; @@ -3662,6 +3663,9 @@ public void testPredefinedViewerRole() { assertOnlyReadAllowed(role, ".profiling-" + randomIntBetween(0, 5)); assertOnlyReadAllowed(role, randomAlphaOfLength(5)); + assertOnlyReadAllowed(role, ".slo-observability." + randomIntBetween(0, 5)); + assertViewIndexMetadata(role, ".slo-observability." + randomIntBetween(0, 5)); + assertNoAccessAllowed(role, TestRestrictedIndices.SAMPLE_RESTRICTED_NAMES); assertNoAccessAllowed(role, "." + randomAlphaOfLengthBetween(6, 10)); assertNoAccessAllowed(role, "ilm-history-" + randomIntBetween(0, 5)); @@ -3740,6 +3744,9 @@ public void testPredefinedEditorRole() { assertReadWriteDocsAndMaintenanceButNotDeleteIndexAllowed(role, ".preview.alerts-" + randomIntBetween(0, 5)); assertReadWriteDocsAndMaintenanceButNotDeleteIndexAllowed(role, ".internal.preview.alerts-" + randomIntBetween(0, 5)); + assertViewIndexMetadata(role, ".slo-observability." + randomIntBetween(0, 5)); + assertReadWriteAndManage(role, ".slo-observability." + randomIntBetween(0, 5)); + assertNoAccessAllowed(role, TestRestrictedIndices.SAMPLE_RESTRICTED_NAMES); assertNoAccessAllowed(role, "." + randomAlphaOfLengthBetween(6, 10)); assertNoAccessAllowed(role, "ilm-history-" + randomIntBetween(0, 5)); @@ -3865,6 +3872,41 @@ private void assertReadWriteDocsButNotDeleteIndexAllowed(Role role, String index role.indices().allowedIndicesMatcher(TransportDeleteIndexAction.TYPE.name()).test(mockIndexAbstraction(index)), is(false) ); + + assertThat(role.indices().allowedIndicesMatcher(TransportSearchAction.TYPE.name()).test(mockIndexAbstraction(index)), is(true)); + assertThat(role.indices().allowedIndicesMatcher(TransportGetAction.TYPE.name()).test(mockIndexAbstraction(index)), is(true)); + assertThat(role.indices().allowedIndicesMatcher(TransportIndexAction.NAME).test(mockIndexAbstraction(index)), is(true)); + assertThat(role.indices().allowedIndicesMatcher(TransportUpdateAction.NAME).test(mockIndexAbstraction(index)), is(true)); + assertThat(role.indices().allowedIndicesMatcher(TransportDeleteAction.NAME).test(mockIndexAbstraction(index)), is(true)); + assertThat(role.indices().allowedIndicesMatcher(TransportBulkAction.NAME).test(mockIndexAbstraction(index)), is(true)); + } + + private void assertReadWriteAndManage(Role role, String index) { + assertThat( + role.indices().allowedIndicesMatcher(TransportDeleteIndexAction.TYPE.name()).test(mockIndexAbstraction(index)), + is(true) + ); + assertThat( + role.indices().allowedIndicesMatcher(TransportFieldCapabilitiesAction.NAME + "*").test(mockIndexAbstraction(index)), + is(true) + ); + assertThat( + role.indices().allowedIndicesMatcher(TransportCreateIndexAction.TYPE.name()).test(mockIndexAbstraction(index)), + is(true) + ); + assertThat( + role.indices().allowedIndicesMatcher(TransportUpdateSettingsAction.TYPE.name()).test(mockIndexAbstraction(index)), + is(true) + ); + assertThat(role.indices().allowedIndicesMatcher(GetRollupIndexCapsAction.NAME + "*").test(mockIndexAbstraction(index)), is(true)); + assertThat(role.indices().allowedIndicesMatcher("indices:admin/*").test(mockIndexAbstraction(index)), is(true)); + assertThat(role.indices().allowedIndicesMatcher("indices:monitor/*").test(mockIndexAbstraction(index)), is(true)); + assertThat( + role.indices().allowedIndicesMatcher(TransportAutoPutMappingAction.TYPE.name()).test(mockIndexAbstraction(index)), + is(true) + ); + assertThat(role.indices().allowedIndicesMatcher(AutoCreateAction.NAME).test(mockIndexAbstraction(index)), is(true)); + assertThat(role.indices().allowedIndicesMatcher(TransportSearchAction.TYPE.name()).test(mockIndexAbstraction(index)), is(true)); assertThat(role.indices().allowedIndicesMatcher(TransportGetAction.TYPE.name()).test(mockIndexAbstraction(index)), is(true)); assertThat(role.indices().allowedIndicesMatcher(TransportIndexAction.NAME).test(mockIndexAbstraction(index)), is(true)); diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataTypeConverter.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataTypeConverter.java index 1e68d63ef7bb1..78b395503e700 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataTypeConverter.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataTypeConverter.java @@ -38,7 +38,6 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION; import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTime; -import static org.elasticsearch.xpack.esql.core.type.DataType.isPrimitiveAndSupported; import static org.elasticsearch.xpack.esql.core.type.DataType.isString; import static org.elasticsearch.xpack.esql.core.util.NumericUtils.UNSIGNED_LONG_MAX; import static org.elasticsearch.xpack.esql.core.util.NumericUtils.inUnsignedLongRange; @@ -51,85 +50,6 @@ public final class DataTypeConverter { private DataTypeConverter() {} - /** - * Returns the type compatible with both left and right types - *

- * If one of the types is null - returns another type - * If both types are numeric - returns type with the highest precision int < long < float < double - * If one of the types is string and another numeric - returns numeric - */ - public static DataType commonType(DataType left, DataType right) { - if (left == right) { - return left; - } - if (left == NULL) { - return right; - } - if (right == NULL) { - return left; - } - if (isString(left) && isString(right)) { - if (left == TEXT || right == TEXT) { - return TEXT; - } - if (left == KEYWORD) { - return KEYWORD; - } - return right; - } - if (left.isNumeric() && right.isNumeric()) { - int lsize = left.estimatedSize().orElseThrow(); - int rsize = right.estimatedSize().orElseThrow(); - // if one is int - if (left.isWholeNumber()) { - // promote the highest int - if (right.isWholeNumber()) { - if (left == UNSIGNED_LONG || right == UNSIGNED_LONG) { - return UNSIGNED_LONG; - } - return lsize > rsize ? left : right; - } - // promote the rational - return right; - } - // try the other side - if (right.isWholeNumber()) { - return left; - } - // promote the highest rational - return lsize > rsize ? left : right; - } - if (isString(left)) { - if (right.isNumeric()) { - return right; - } - } - if (isString(right)) { - if (left.isNumeric()) { - return left; - } - } - - if (isDateTime(left) && isDateTime(right)) { - return DATETIME; - } - - // none found - return null; - } - - /** - * Returns true if the from type can be converted to the to type, false - otherwise - */ - public static boolean canConvert(DataType from, DataType to) { - // Special handling for nulls and if conversion is not requires - if (from == to || from == NULL) { - return true; - } - // only primitives are supported so far - return isPrimitiveAndSupported(from) && isPrimitiveAndSupported(to) && converterFor(from, to) != null; - } - /** * Get the conversion from one type to another. */ diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanArrayBlock.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanArrayBlock.java index 14f6c9591ed10..3d600bec1bd65 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanArrayBlock.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanArrayBlock.java @@ -85,6 +85,27 @@ public BooleanVector asVector() { return null; } + @Override + public ToMask toMask() { + if (getPositionCount() == 0) { + return new ToMask(blockFactory().newConstantBooleanVector(false, 0), false); + } + try (BooleanVector.FixedBuilder builder = blockFactory().newBooleanVectorFixedBuilder(getPositionCount())) { + boolean hasMv = false; + for (int p = 0; p < getPositionCount(); p++) { + builder.appendBoolean(switch (getValueCount(p)) { + case 0 -> false; + case 1 -> getBoolean(getFirstValueIndex(p)); + default -> { + hasMv = true; + yield false; + } + }); + } + return new ToMask(builder.build(), hasMv); + } + } + @Override public boolean getBoolean(int valueIndex) { return vector.getBoolean(valueIndex); diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBigArrayBlock.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBigArrayBlock.java index 5342728af4fee..f353512eb93b7 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBigArrayBlock.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBigArrayBlock.java @@ -86,6 +86,27 @@ public BooleanVector asVector() { return null; } + @Override + public ToMask toMask() { + if (getPositionCount() == 0) { + return new ToMask(blockFactory().newConstantBooleanVector(false, 0), false); + } + try (BooleanVector.FixedBuilder builder = blockFactory().newBooleanVectorFixedBuilder(getPositionCount())) { + boolean hasMv = false; + for (int p = 0; p < getPositionCount(); p++) { + builder.appendBoolean(switch (getValueCount(p)) { + case 0 -> false; + case 1 -> getBoolean(getFirstValueIndex(p)); + default -> { + hasMv = true; + yield false; + } + }); + } + return new ToMask(builder.build(), hasMv); + } + } + @Override public boolean getBoolean(int valueIndex) { return vector.getBoolean(valueIndex); diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBlock.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBlock.java index 566b8fbed445c..5d2d6c97a11f1 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBlock.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBlock.java @@ -37,6 +37,13 @@ public sealed interface BooleanBlock extends Block permits BooleanArrayBlock, Bo @Override BooleanVector asVector(); + /** + * Convert this to a {@link BooleanVector "mask"} that's appropriate for + * passing to {@link #keepMask}. Null and multivalued positions will be + * converted to {@code false}. + */ + ToMask toMask(); + @Override BooleanBlock filter(int... positions); diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVectorBlock.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVectorBlock.java index ca2fc58bf0bb5..1544cc3355cd0 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVectorBlock.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVectorBlock.java @@ -31,6 +31,12 @@ public BooleanVector asVector() { return vector; } + @Override + public ToMask toMask() { + vector.incRef(); + return new ToMask(vector, false); + } + @Override public boolean getBoolean(int valueIndex) { return vector.getBoolean(valueIndex); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java index fc4cdc1d41f46..3d61613ba70e9 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java @@ -48,6 +48,11 @@ public OrdinalBytesRefBlock asOrdinals() { return null; } + @Override + public ToMask toMask() { + return new ToMask(blockFactory.newConstantBooleanVector(false, positionCount), false); + } + @Override public boolean isNull(int position) { return true; diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ToMask.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ToMask.java new file mode 100644 index 0000000000000..5b71679048e21 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ToMask.java @@ -0,0 +1,22 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.data; + +import org.elasticsearch.core.Releasable; + +/** + * Result from calling {@link BooleanBlock#toMask}. {@link #close closing} this will + * close the contained {@link #mask()}. If you want to keep a reference to it then you'll + * have to {@link Block#incRef()} it. + */ +public record ToMask(BooleanVector mask, boolean hadMultivaluedFields) implements Releasable { + @Override + public void close() { + mask.close(); + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ArrayBlock.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ArrayBlock.java.st index 750de95e7b8d7..e855e6d6296d8 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ArrayBlock.java.st +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ArrayBlock.java.st @@ -101,6 +101,28 @@ $if(BytesRef)$ public OrdinalBytesRefBlock asOrdinals() { return null; } + +$elseif(boolean)$ + @Override + public ToMask toMask() { + if (getPositionCount() == 0) { + return new ToMask(blockFactory().newConstantBooleanVector(false, 0), false); + } + try (BooleanVector.FixedBuilder builder = blockFactory().newBooleanVectorFixedBuilder(getPositionCount())) { + boolean hasMv = false; + for (int p = 0; p < getPositionCount(); p++) { + builder.appendBoolean(switch (getValueCount(p)) { + case 0 -> false; + case 1 -> getBoolean(getFirstValueIndex(p)); + default -> { + hasMv = true; + yield false; + } + }); + } + return new ToMask(builder.build(), hasMv); + } + } $endif$ @Override diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-BigArrayBlock.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-BigArrayBlock.java.st index bf9e6fec18726..23632bf41349c 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-BigArrayBlock.java.st +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-BigArrayBlock.java.st @@ -86,6 +86,29 @@ public final class $Type$BigArrayBlock extends AbstractArrayBlock implements $Ty return null; } +$if(boolean)$ + @Override + public ToMask toMask() { + if (getPositionCount() == 0) { + return new ToMask(blockFactory().newConstantBooleanVector(false, 0), false); + } + try (BooleanVector.FixedBuilder builder = blockFactory().newBooleanVectorFixedBuilder(getPositionCount())) { + boolean hasMv = false; + for (int p = 0; p < getPositionCount(); p++) { + builder.appendBoolean(switch (getValueCount(p)) { + case 0 -> false; + case 1 -> getBoolean(getFirstValueIndex(p)); + default -> { + hasMv = true; + yield false; + } + }); + } + return new ToMask(builder.build(), hasMv); + } + } +$endif$ + @Override public $type$ get$Type$(int valueIndex) { return vector.get$Type$(valueIndex); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Block.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Block.java.st index da0769af2d185..67e4ac4bb334f 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Block.java.st +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Block.java.st @@ -63,8 +63,16 @@ $if(BytesRef)$ * returns null. Callers must not release the returned block as no extra reference is retained by this method. */ OrdinalBytesRefBlock asOrdinals(); -$endif$ +$elseif(boolean)$ + /** + * Convert this to a {@link BooleanVector "mask"} that's appropriate for + * passing to {@link #keepMask}. Null and multivalued positions will be + * converted to {@code false}. + */ + ToMask toMask(); + +$endif$ @Override $Type$Block filter(int... positions); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Vector.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Vector.java.st index 09f11f3504393..e19c1788cdb6b 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Vector.java.st +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Vector.java.st @@ -51,8 +51,8 @@ $if(BytesRef)$ * returns null. Callers must not release the returned vector as no extra reference is retained by this method. */ OrdinalBytesRefVector asOrdinals(); -$endif$ +$endif$ @Override $Type$Vector filter(int... positions); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-VectorBlock.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-VectorBlock.java.st index eec75f62f22f8..d4c6859e64b2a 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-VectorBlock.java.st +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-VectorBlock.java.st @@ -44,8 +44,15 @@ $if(BytesRef)$ return null; } } -$endif$ +$elseif(boolean)$ + @Override + public ToMask toMask() { + vector.incRef(); + return new ToMask(vector, false); + } + +$endif$ @Override $if(BytesRef)$ public BytesRef getBytesRef(int valueIndex, BytesRef dest) { diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BasicBlockTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BasicBlockTests.java index e8401048af011..ad372da47d6b8 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BasicBlockTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BasicBlockTests.java @@ -800,6 +800,12 @@ public void testBooleanBlock() { } assertLookup(block, positions(blockFactory, positionCount + 1000), singletonList(null)); assertEmptyLookup(blockFactory, block); + try (ToMask mask = block.toMask()) { + assertThat(mask.hadMultivaluedFields(), equalTo(false)); + for (int p = 0; p < positionCount; p++) { + assertThat(mask.mask().getBoolean(p), equalTo(p % 10 == 0)); + } + } try (BooleanBlock.Builder blockBuilder = blockFactory.newBooleanBlockBuilder(1)) { BooleanBlock copy = blockBuilder.copyFrom(block, 0, block.getPositionCount()).build(); @@ -826,6 +832,7 @@ public void testBooleanBlock() { IntStream.range(0, positionCount).mapToObj(ii -> randomBoolean()).forEach(vectorBuilder::appendBoolean); BooleanVector vector = vectorBuilder.build(); assertSingleValueDenseBlock(vector.asBlock()); + assertToMask(vector); releaseAndAssertBreaker(vector.asBlock()); } } @@ -1358,6 +1365,19 @@ void assertNullValues( assertTrue(block.isNull(randomNullPosition)); assertFalse(block.isNull(randomNonNullPosition)); releaseAndAssertBreaker(block); + if (block instanceof BooleanBlock bb) { + try (ToMask mask = bb.toMask()) { + assertThat(mask.hadMultivaluedFields(), equalTo(false)); + for (int p = 0; p < positionCount; p++) { + assertThat(mask.mask().getBoolean(p), equalTo(nullsMask.get(p) == false && p % 10 == 0)); + } + } + } + } + + void assertZeroPositionsAndRelease(BooleanBlock block) { + assertToMaskZeroPositions(block); + assertZeroPositionsAndRelease((Block) block); } void assertZeroPositionsAndRelease(Block block) { @@ -1366,6 +1386,11 @@ void assertZeroPositionsAndRelease(Block block) { releaseAndAssertBreaker(block); } + void assertZeroPositionsAndRelease(BooleanVector vector) { + assertToMask(vector); + assertZeroPositionsAndRelease((Vector) vector); + } + void assertZeroPositionsAndRelease(Vector vector) { assertThat(vector.getPositionCount(), is(0)); assertKeepMaskEmpty(vector); @@ -1386,6 +1411,20 @@ static void assertKeepMaskEmpty(Vector vector) { } } + static void assertToMaskZeroPositions(BooleanBlock block) { + try (ToMask mask = block.toMask()) { + assertThat(mask.mask().getPositionCount(), equalTo(0)); + assertThat(mask.hadMultivaluedFields(), equalTo(false)); + } + } + + static void assertToMask(BooleanVector vector) { + try (ToMask mask = vector.asBlock().toMask()) { + assertThat(mask.mask(), sameInstance(vector)); + assertThat(mask.hadMultivaluedFields(), equalTo(false)); + } + } + void releaseAndAssertBreaker(Block... blocks) { assertThat(breaker.getUsed(), greaterThan(0L)); Page[] pages = Arrays.stream(blocks).map(Page::new).toArray(Page[]::new); @@ -1836,7 +1875,7 @@ static void assertKeepMask(Block block) { /** * Build a random valid "mask" of single valued boolean fields that. */ - private static BooleanVector randomMask(int positions) { + static BooleanVector randomMask(int positions) { try (BooleanVector.Builder builder = TestBlockFactory.getNonBreakingInstance().newBooleanVectorFixedBuilder(positions)) { for (int i = 0; i < positions; i++) { builder.appendBoolean(randomBoolean()); diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayBlockBuilderTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayBlockBuilderTests.java index df32dcaddd927..34d591cd87d84 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayBlockBuilderTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayBlockBuilderTests.java @@ -164,6 +164,12 @@ public void testBooleanVector() throws IOException { assertThat(block.getBoolean(i), equalTo(elements[i])); } assertKeepMask(block); + try (ToMask mask = block.toMask()) { + assertThat(mask.hadMultivaluedFields(), equalTo(false)); + for (int p = 0; p < elements.length; p++) { + assertThat(mask.mask().getBoolean(p), equalTo(elements[p])); + } + } try (var copy = serializeDeserializeBlock(block)) { assertThat(copy, instanceOf(BooleanVectorBlock.class)); assertThat(block.asVector(), instanceOf(BooleanArrayVector.class)); @@ -224,6 +230,12 @@ public void testBooleanBlock() throws IOException { assertThat(block.getBoolean(i), equalTo(elements[i])); } assertKeepMask(block); + try (ToMask mask = block.toMask()) { + assertThat(mask.hadMultivaluedFields(), equalTo(true)); + for (int p = 0; p < elements.length; p++) { + assertThat(mask.mask().getBoolean(p), equalTo(false)); + } + } try (var copy = serializeDeserializeBlock(block)) { assertThat(copy, instanceOf(BooleanArrayBlock.class)); assertNull(copy.asVector()); @@ -253,6 +265,12 @@ public void testBooleanBlock() throws IOException { assertThat(block.getBoolean(i), equalTo(elements[i])); } assertKeepMask(block); + try (ToMask mask = block.toMask()) { + assertThat(mask.hadMultivaluedFields(), equalTo(true)); + for (int p = 0; p < elements.length; p++) { + assertThat(mask.mask().getBoolean(p), equalTo(false)); + } + } try (var copy = serializeDeserializeBlock(block)) { assertThat(copy, instanceOf(BooleanBigArrayBlock.class)); assertNull(block.asVector()); @@ -266,4 +284,52 @@ public void testBooleanBlock() throws IOException { } assertThat(blockFactory.breaker().getUsed(), equalTo(0L)); } + + /** + * Tests a block with one value being multivalued and the rest are single valued. + */ + public void testBooleanBlockOneMv() { + int mvCount = between(2, 10); + int positionCount = randomIntBetween(1000, 5000); + blockFactory = new BlockFactory(blockFactory.breaker(), blockFactory.bigArrays(), ByteSizeValue.ofBytes(1)); + try (var builder = blockFactory.newBooleanBlockBuilder(between(1, mvCount + positionCount))) { + boolean[] elements = new boolean[positionCount + mvCount]; + builder.beginPositionEntry(); + for (int i = 0; i < mvCount; i++) { + elements[i] = randomBoolean(); + builder.appendBoolean(elements[i]); + } + builder.endPositionEntry(); + for (int p = 1; p < positionCount; p++) { + elements[mvCount + p] = randomBoolean(); + builder.appendBoolean(elements[mvCount + p]); + } + try (var block = builder.build()) { + assertThat(block, instanceOf(BooleanBigArrayBlock.class)); + assertNull(block.asVector()); + assertThat(block.getPositionCount(), equalTo(positionCount)); + assertThat(block.getValueCount(0), equalTo(mvCount)); + for (int i = 0; i < mvCount; i++) { + assertThat(block.getBoolean(block.getFirstValueIndex(0) + i), equalTo(elements[i])); + } + for (int p = 1; p < positionCount; p++) { + assertThat(block.getValueCount(p), equalTo(1)); + assertThat(block.getBoolean(block.getFirstValueIndex(p)), equalTo(elements[mvCount + p])); + } + assertKeepMask(block); + try (ToMask mask = block.toMask()) { + /* + * NOTE: this test is customized to the layout above where we don't make + * any fields with 0 values. + */ + assertThat(mask.hadMultivaluedFields(), equalTo(true)); + assertThat(mask.mask().getBoolean(0), equalTo(false)); + for (int p = 1; p < positionCount; p++) { + assertThat(mask.mask().getBoolean(p), equalTo(elements[mvCount + p])); + } + } + } + } + assertThat(blockFactory.breaker().getUsed(), equalTo(0L)); + } } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayVectorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayVectorTests.java index af4c643a90625..aab8b86f9b795 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayVectorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayVectorTests.java @@ -72,6 +72,12 @@ public void testBoolean() throws IOException { assertEmptyLookup(blockFactory, vector.asBlock()); assertSerialization(block); assertThat(vector.toString(), containsString("BooleanBigArrayVector[positions=" + positionCount)); + try (ToMask mask = block.toMask()) { + assertThat(mask.hadMultivaluedFields(), equalTo(false)); + for (int p = 0; p < values.length; p++) { + assertThat(mask.mask().getBoolean(p), equalTo(values[p])); + } + } } } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockMultiValuedTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockMultiValuedTests.java index c5e130726844d..e37b2638b56f7 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockMultiValuedTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockMultiValuedTests.java @@ -31,6 +31,7 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.nullValue; public class BlockMultiValuedTests extends ESTestCase { @ParametersFactory @@ -122,6 +123,54 @@ public void testLookupFromSingleManyPages() { assertLookup(ByteSizeValue.ofBytes(1), between(1, 32), p -> 1); } + public void testToMask() { + if (elementType != ElementType.BOOLEAN) { + return; + } + int positionCount = randomIntBetween(1, 16 * 1024); + var b = BasicBlockTests.randomBlock(blockFactory(), elementType, positionCount, nullAllowed, 2, 10, 0, 0); + try (ToMask mask = ((BooleanBlock) b.block()).toMask()) { + assertThat(mask.hadMultivaluedFields(), equalTo(true)); + for (int p = 0; p < b.values().size(); p++) { + List v = b.values().get(p); + if (v == null) { + assertThat(mask.mask().getBoolean(p), equalTo(false)); + continue; + } + if (v.size() != 1) { + assertThat(mask.mask().getBoolean(p), equalTo(false)); + continue; + } + assertThat(mask.mask().getBoolean(p), equalTo(v.get(0))); + } + } finally { + b.block().close(); + } + } + + public void testMask() { + int positionCount = randomIntBetween(1, 16 * 1024); + var b = BasicBlockTests.randomBlock(blockFactory(), elementType, positionCount, nullAllowed, 0, 10, 0, 0); + try ( + BooleanVector mask = BasicBlockTests.randomMask(b.values().size() + between(0, 1000)); + Block masked = b.block().keepMask(mask) + ) { + for (int p = 0; p < b.values().size(); p++) { + List inputValues = b.values().get(p); + List valuesAtPosition = BasicBlockTests.valuesAtPositions(masked, p, p + 1).get(0); + if (inputValues == null || mask.getBoolean(p) == false) { + assertThat(masked.isNull(p), equalTo(true)); + assertThat(valuesAtPosition, nullValue()); + continue; + } + assertThat(masked.isNull(p), equalTo(false)); + assertThat(valuesAtPosition, equalTo(inputValues)); + } + } finally { + b.block().close(); + } + } + private void assertFiltered(boolean all, boolean shuffled) { int positionCount = randomIntBetween(1, 16 * 1024); var b = BasicBlockTests.randomBlock(blockFactory(), elementType, positionCount, nullAllowed, 0, 10, 0, 0); diff --git a/x-pack/plugin/esql/qa/security/src/javaRestTest/java/org/elasticsearch/xpack/esql/EsqlAsyncSecurityIT.java b/x-pack/plugin/esql/qa/security/src/javaRestTest/java/org/elasticsearch/xpack/esql/EsqlAsyncSecurityIT.java index 0806e41186395..f2633dfffb0fe 100644 --- a/x-pack/plugin/esql/qa/security/src/javaRestTest/java/org/elasticsearch/xpack/esql/EsqlAsyncSecurityIT.java +++ b/x-pack/plugin/esql/qa/security/src/javaRestTest/java/org/elasticsearch/xpack/esql/EsqlAsyncSecurityIT.java @@ -67,7 +67,7 @@ public void testUnauthorizedIndices() throws IOException { var getResponse = runAsyncGet("user1", id); // sanity assertOK(getResponse); ResponseException error; - error = expectThrows(ResponseException.class, () -> runAsyncGet("user2", id)); + error = expectThrows(ResponseException.class, () -> runAsyncGet("user2", id, true)); // resource not found exception if the authenticated user is not the creator of the original task assertThat(error.getResponse().getStatusLine().getStatusCode(), equalTo(404)); @@ -85,7 +85,7 @@ public void testUnauthorizedIndices() throws IOException { var getResponse = runAsyncGet("user2", id); // sanity assertOK(getResponse); ResponseException error; - error = expectThrows(ResponseException.class, () -> runAsyncGet("user1", id)); + error = expectThrows(ResponseException.class, () -> runAsyncGet("user1", id, true)); assertThat(error.getResponse().getStatusLine().getStatusCode(), equalTo(404)); error = expectThrows(ResponseException.class, () -> runAsyncDelete("user1", id)); @@ -117,6 +117,10 @@ private Response runAsync(String user, String command) throws IOException { } private Response runAsyncGet(String user, String id) throws IOException { + return runAsyncGet(user, id, false); + } + + private Response runAsyncGet(String user, String id, boolean isAsyncIdNotFound_Expected) throws IOException { int tries = 0; while (tries < 10) { // Sometimes we get 404s fetching the task status. @@ -129,22 +133,32 @@ private Response runAsyncGet(String user, String id) throws IOException { logResponse(response); return response; } catch (ResponseException e) { - if (e.getResponse().getStatusLine().getStatusCode() == 404 - && EntityUtils.toString(e.getResponse().getEntity()).contains("no such index [.async-search]")) { - /* - * Work around https://github.com/elastic/elasticsearch/issues/110304 - the .async-search - * index may not exist when we try the fetch, but it should exist on next attempt. - */ + var statusCode = e.getResponse().getStatusLine().getStatusCode(); + var message = EntityUtils.toString(e.getResponse().getEntity()); + + if (statusCode == 404 && message.contains("no such index [.async-search]")) { + // Work around https://github.com/elastic/elasticsearch/issues/110304 - the .async-search + // index may not exist when we try the fetch, but it should exist on next attempt. logger.warn("async-search index does not exist", e); try { Thread.sleep(1000); } catch (InterruptedException ex) { throw new RuntimeException(ex); } + } else if (statusCode == 404 && false == isAsyncIdNotFound_Expected && message.contains("resource_not_found_exception")) { + // Work around for https://github.com/elastic/elasticsearch/issues/112110 + // The async id is not indexed quickly enough in .async-search index for us to retrieve it. + logger.warn("async id not found", e); + try { + Thread.sleep(500); + } catch (InterruptedException ex) { + throw new RuntimeException(ex); + } } else { throw e; } tries++; + logger.warn("retry [" + tries + "] for GET /_query/async/" + id); } } throw new IllegalStateException("couldn't find task status"); diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index b20e3bb0d5409..9ee22113a4244 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -68,6 +68,11 @@ public class CsvTestsDataLoader { "mapping-sample_data_ts_long.json", "sample_data_ts_long.csv" ); + private static final TestsDataset MISSING_IP_SAMPLE_DATA = new TestsDataset( + "missing_ip_sample_data", + "mapping-missing_ip_sample_data.json", + "missing_ip_sample_data.csv" + ); private static final TestsDataset CLIENT_IPS = new TestsDataset("clientips", "mapping-clientips.json", "clientips.csv"); private static final TestsDataset CLIENT_CIDR = new TestsDataset("client_cidr", "mapping-client_cidr.json", "client_cidr.csv"); private static final TestsDataset AGES = new TestsDataset("ages", "mapping-ages.json", "ages.csv"); @@ -112,6 +117,7 @@ public class CsvTestsDataLoader { Map.entry(ALERTS.indexName, ALERTS), Map.entry(SAMPLE_DATA_STR.indexName, SAMPLE_DATA_STR), Map.entry(SAMPLE_DATA_TS_LONG.indexName, SAMPLE_DATA_TS_LONG), + Map.entry(MISSING_IP_SAMPLE_DATA.indexName, MISSING_IP_SAMPLE_DATA), Map.entry(CLIENT_IPS.indexName, CLIENT_IPS), Map.entry(CLIENT_CIDR.indexName, CLIENT_CIDR), Map.entry(AGES.indexName, AGES), diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-missing_ip_sample_data.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-missing_ip_sample_data.json new file mode 100644 index 0000000000000..6f3796dd7715d --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-missing_ip_sample_data.json @@ -0,0 +1,13 @@ +{ + "properties": { + "@timestamp": { + "type": "date" + }, + "event_duration": { + "type": "long" + }, + "message": { + "type": "keyword" + } + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/missing_ip_sample_data.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/missing_ip_sample_data.csv new file mode 100644 index 0000000000000..e8e9ddcaee83b --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/missing_ip_sample_data.csv @@ -0,0 +1,8 @@ +@timestamp:date,event_duration:long,message:keyword +2023-10-23T13:55:01.543Z,1756467,Connected to 10.1.0.1 +2023-10-23T13:53:55.832Z,5033755,Connection error +2023-10-23T13:52:55.015Z,8268153,Connection error +2023-10-23T13:51:54.732Z,725448,Connection error +2023-10-23T13:33:34.937Z,1232382,Disconnected +2023-10-23T12:27:28.948Z,2764889,Connected to 10.1.0.2 +2023-10-23T12:15:03.360Z,3450233,Connected to 10.1.0.3 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec index 6819727be0131..c6a2d47a78dc9 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec @@ -405,6 +405,74 @@ count:long | message:keyword 2 | Connected to 10.1.0.3 ; +multiIndexMissingIpToString +required_capability: union_types +required_capability: union_types_missing_field + +FROM sample_data, sample_data_str, missing_ip_sample_data METADATA _index +| EVAL client_ip = TO_STRING(client_ip) +| KEEP _index, @timestamp, client_ip, event_duration, message +| SORT _index ASC, @timestamp DESC +; + +_index:keyword | @timestamp:date | client_ip:keyword | event_duration:long | message:keyword +missing_ip_sample_data | 2023-10-23T13:55:01.543Z | null | 1756467 | Connected to 10.1.0.1 +missing_ip_sample_data | 2023-10-23T13:53:55.832Z | null | 5033755 | Connection error +missing_ip_sample_data | 2023-10-23T13:52:55.015Z | null | 8268153 | Connection error +missing_ip_sample_data | 2023-10-23T13:51:54.732Z | null | 725448 | Connection error +missing_ip_sample_data | 2023-10-23T13:33:34.937Z | null | 1232382 | Disconnected +missing_ip_sample_data | 2023-10-23T12:27:28.948Z | null | 2764889 | Connected to 10.1.0.2 +missing_ip_sample_data | 2023-10-23T12:15:03.360Z | null | 3450233 | Connected to 10.1.0.3 +sample_data | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +sample_data_str | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data_str | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data_str | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data_str | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data_str | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data_str | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data_str | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +; + +multiIndexMissingIpToIp +required_capability: union_types +required_capability: union_types_missing_field + +FROM sample_data, sample_data_str, missing_ip_sample_data METADATA _index +| EVAL client_ip = TO_IP(client_ip) +| KEEP _index, @timestamp, client_ip, event_duration, message +| SORT _index ASC, @timestamp DESC +; + +_index:keyword | @timestamp:date | client_ip:ip | event_duration:long | message:keyword +missing_ip_sample_data | 2023-10-23T13:55:01.543Z | null | 1756467 | Connected to 10.1.0.1 +missing_ip_sample_data | 2023-10-23T13:53:55.832Z | null | 5033755 | Connection error +missing_ip_sample_data | 2023-10-23T13:52:55.015Z | null | 8268153 | Connection error +missing_ip_sample_data | 2023-10-23T13:51:54.732Z | null | 725448 | Connection error +missing_ip_sample_data | 2023-10-23T13:33:34.937Z | null | 1232382 | Disconnected +missing_ip_sample_data | 2023-10-23T12:27:28.948Z | null | 2764889 | Connected to 10.1.0.2 +missing_ip_sample_data | 2023-10-23T12:15:03.360Z | null | 3450233 | Connected to 10.1.0.3 +sample_data | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +sample_data_str | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data_str | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data_str | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data_str | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data_str | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data_str | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data_str | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +; + multiIndexTsLong required_capability: union_types required_capability: metadata_fields diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 81b2ba71b8808..120323ebeb7a6 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -183,6 +183,11 @@ public enum Cap { */ UNION_TYPES_FIX_RENAME_RESOLUTION, + /** + * Fix for union-types when some indexes are missing the required field. Done in #111932. + */ + UNION_TYPES_MISSING_FIELD, + /** * Fix a parsing issue where numbers below Long.MIN_VALUE threw an exception instead of parsing as doubles. * see Parsing large numbers is inconsistent #104323 diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Add.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Add.java index b6ec9b6fd0e23..8f8d885ee379b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Add.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Add.java @@ -12,7 +12,6 @@ import org.elasticsearch.compute.ann.Evaluator; import org.elasticsearch.compute.ann.Fixed; import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.BinaryComparisonInversible; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.util.NumericUtils; diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/operator/arithmetic/ArithmeticOperation.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/ArithmeticOperation.java similarity index 80% rename from x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/operator/arithmetic/ArithmeticOperation.java rename to x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/ArithmeticOperation.java index 8dc0f58083179..cb7e7c4643fb9 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/operator/arithmetic/ArithmeticOperation.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/ArithmeticOperation.java @@ -4,16 +4,17 @@ * 2.0; you may not use this file except in compliance with the Elastic License * 2.0. */ -package org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic; +package org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal; import org.elasticsearch.xpack.esql.core.expression.predicate.BinaryOperator; +import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.BinaryArithmeticOperation; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; -import org.elasticsearch.xpack.esql.core.type.DataTypeConverter; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNumeric; +import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.commonType; public abstract class ArithmeticOperation extends BinaryOperator { @@ -36,7 +37,7 @@ public ArithmeticOperation swapLeftAndRight() { @Override public DataType dataType() { if (dataType == null) { - dataType = DataTypeConverter.commonType(left().dataType(), right().dataType()); + dataType = commonType(left().dataType(), right().dataType()); } return dataType; } diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/operator/arithmetic/BinaryComparisonInversible.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/BinaryComparisonInversible.java similarity index 91% rename from x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/operator/arithmetic/BinaryComparisonInversible.java rename to x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/BinaryComparisonInversible.java index 358ad59ec6356..b0ab4c48d970e 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/operator/arithmetic/BinaryComparisonInversible.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/BinaryComparisonInversible.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic; +package org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.Source; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Div.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Div.java index 0e4c506a90d85..f1e197cf350b6 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Div.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Div.java @@ -11,7 +11,6 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.compute.ann.Evaluator; import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.BinaryComparisonInversible; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/EsqlArithmeticOperation.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/EsqlArithmeticOperation.java index 647071c44cfd3..400e70b641111 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/EsqlArithmeticOperation.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/EsqlArithmeticOperation.java @@ -13,14 +13,12 @@ import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator; import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException; import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.ArithmeticOperation; import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.BinaryArithmeticOperation; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.evaluator.mapper.EvaluatorMapper; import org.elasticsearch.xpack.esql.expression.function.scalar.math.Cast; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; -import org.elasticsearch.xpack.esql.type.EsqlDataTypeRegistry; import java.io.IOException; import java.util.List; @@ -31,6 +29,7 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.INTEGER; import static org.elasticsearch.xpack.esql.core.type.DataType.LONG; import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; +import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.commonType; public abstract class EsqlArithmeticOperation extends ArithmeticOperation implements EvaluatorMapper { public static List getNamedWriteables() { @@ -133,7 +132,7 @@ public Object fold() { public DataType dataType() { if (dataType == null) { - dataType = EsqlDataTypeRegistry.INSTANCE.commonType(left().dataType(), right().dataType()); + dataType = commonType(left().dataType(), right().dataType()); } return dataType; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Mul.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Mul.java index a73562ff153b2..03981a821f52d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Mul.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Mul.java @@ -11,7 +11,6 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.compute.ann.Evaluator; import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.BinaryComparisonInversible; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.util.NumericUtils; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Sub.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Sub.java index ee2ccc3b7107a..27f5579129cc9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Sub.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Sub.java @@ -12,7 +12,6 @@ import org.elasticsearch.compute.ann.Evaluator; import org.elasticsearch.compute.ann.Fixed; import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.BinaryComparisonInversible; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/EsqlBinaryComparison.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/EsqlBinaryComparison.java index 52d4c111b2eae..b50d70e69819d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/EsqlBinaryComparison.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/EsqlBinaryComparison.java @@ -22,7 +22,6 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.math.Cast; import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.EsqlArithmeticOperation; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; -import org.elasticsearch.xpack.esql.type.EsqlDataTypeRegistry; import java.io.IOException; import java.time.ZoneId; @@ -32,6 +31,7 @@ import static org.elasticsearch.common.logging.LoggerMessageFormat.format; import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; +import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.commonType; public abstract class EsqlBinaryComparison extends BinaryComparison implements EvaluatorMapper { public static List getNamedWriteables() { @@ -172,7 +172,7 @@ public EvalOperator.ExpressionEvaluator.Factory toEvaluator( Function toEvaluator ) { // Our type is always boolean, so figure out the evaluator type from the inputs - DataType commonType = EsqlDataTypeRegistry.INSTANCE.commonType(left().dataType(), right().dataType()); + DataType commonType = commonType(left().dataType(), right().dataType()); EvalOperator.ExpressionEvaluator.Factory lhs; EvalOperator.ExpressionEvaluator.Factory rhs; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/In.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/In.java index 636b31fcc691b..333f32e82c579 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/In.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/In.java @@ -27,7 +27,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction; import org.elasticsearch.xpack.esql.expression.function.scalar.math.Cast; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; -import org.elasticsearch.xpack.esql.type.EsqlDataTypeRegistry; +import org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter; import java.io.IOException; import java.util.BitSet; @@ -269,7 +269,7 @@ private DataType commonType() { break; } } - commonType = EsqlDataTypeRegistry.INSTANCE.commonType(commonType, e.dataType()); + commonType = EsqlDataTypeConverter.commonType(commonType, e.dataType()); } return commonType; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SimplifyComparisonsArithmetics.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SimplifyComparisonsArithmetics.java index 4ef069ea16d04..fe83aeb647bf9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SimplifyComparisonsArithmetics.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SimplifyComparisonsArithmetics.java @@ -9,10 +9,10 @@ import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.Literal; -import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.ArithmeticOperation; -import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.BinaryComparisonInversible; import org.elasticsearch.xpack.esql.core.expression.predicate.operator.comparison.BinaryComparison; import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.ArithmeticOperation; +import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.BinaryComparisonInversible; import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Neg; import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Sub; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java index 8fddb7407a02a..04be731484267 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java @@ -138,7 +138,9 @@ private BlockLoader getBlockLoaderFor( if (unionTypes != null) { String indexName = shardContext.ctx.index().getName(); Expression conversion = unionTypes.getConversionExpressionForIndex(indexName); - return new TypeConvertingBlockLoader(blockLoader, (AbstractConvertFunction) conversion); + return conversion == null + ? BlockLoader.CONSTANT_NULLS + : new TypeConvertingBlockLoader(blockLoader, (AbstractConvertFunction) conversion); } return blockLoader; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java index 1572f8950e0ac..b090708a64ad3 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java @@ -58,6 +58,7 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_POINT; import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_SHAPE; import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME; +import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_PERIOD; import static org.elasticsearch.xpack.esql.core.type.DataType.DOUBLE; import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_POINT; import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_SHAPE; @@ -67,9 +68,14 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.LONG; import static org.elasticsearch.xpack.esql.core.type.DataType.NULL; import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT; +import static org.elasticsearch.xpack.esql.core.type.DataType.TIME_DURATION; import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION; -import static org.elasticsearch.xpack.esql.core.type.DataType.isPrimitiveAndSupported; +import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTime; +import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTimeOrTemporal; +import static org.elasticsearch.xpack.esql.core.type.DataType.isNullOrDatePeriod; +import static org.elasticsearch.xpack.esql.core.type.DataType.isNullOrTemporalAmount; +import static org.elasticsearch.xpack.esql.core.type.DataType.isNullOrTimeDuration; import static org.elasticsearch.xpack.esql.core.type.DataType.isString; import static org.elasticsearch.xpack.esql.core.type.DataTypeConverter.safeDoubleToLong; import static org.elasticsearch.xpack.esql.core.type.DataTypeConverter.safeToInt; @@ -107,18 +113,6 @@ public class EsqlDataTypeConverter { entry(VERSION, ToVersion::new) ); - /** - * Returns true if the from type can be converted to the to type, false - otherwise - */ - public static boolean canConvert(DataType from, DataType to) { - // Special handling for nulls and if conversion is not requires - if (from == to || from == NULL) { - return true; - } - // only primitives are supported so far - return isPrimitiveAndSupported(from) && isPrimitiveAndSupported(to) && converterFor(from, to) != null; - } - public static Converter converterFor(DataType from, DataType to) { // TODO move EXPRESSION_TO_LONG here if there is no regression if (isString(from)) { @@ -230,8 +224,63 @@ public static Object convert(Object value, DataType dataType) { return converter.convert(value); } + /** + * Returns the type compatible with both left and right types + *

+ * If one of the types is null - returns another type + * If both types are numeric - returns type with the highest precision int < long < float < double + */ public static DataType commonType(DataType left, DataType right) { - return DataTypeConverter.commonType(left, right); + if (left == right) { + return left; + } + if (left == NULL) { + return right; + } + if (right == NULL) { + return left; + } + if (isDateTimeOrTemporal(left) || isDateTimeOrTemporal(right)) { + if ((isDateTime(left) && isNullOrTemporalAmount(right)) || (isNullOrTemporalAmount(left) && isDateTime(right))) { + return DATETIME; + } + if (isNullOrTimeDuration(left) && isNullOrTimeDuration(right)) { + return TIME_DURATION; + } + if (isNullOrDatePeriod(left) && isNullOrDatePeriod(right)) { + return DATE_PERIOD; + } + } + if (isString(left) && isString(right)) { + if (left == TEXT || right == TEXT) { + return TEXT; + } + return right; + } + if (left.isNumeric() && right.isNumeric()) { + int lsize = left.estimatedSize().orElseThrow(); + int rsize = right.estimatedSize().orElseThrow(); + // if one is int + if (left.isWholeNumber()) { + // promote the highest int + if (right.isWholeNumber()) { + if (left == UNSIGNED_LONG || right == UNSIGNED_LONG) { + return UNSIGNED_LONG; + } + return lsize > rsize ? left : right; + } + // promote the rational + return right; + } + // try the other side + if (right.isWholeNumber()) { + return left; + } + // promote the highest rational + return lsize > rsize ? left : right; + } + // none found + return null; } // generally supporting abbreviations from https://en.wikipedia.org/wiki/Unit_of_time diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistry.java index 96e206b82cf0c..f8e8cd37dc8b2 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistry.java @@ -10,15 +10,6 @@ import org.elasticsearch.index.mapper.TimeSeriesParams; import org.elasticsearch.xpack.esql.core.type.DataType; -import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME; -import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_PERIOD; -import static org.elasticsearch.xpack.esql.core.type.DataType.TIME_DURATION; -import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTime; -import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTimeOrTemporal; -import static org.elasticsearch.xpack.esql.core.type.DataType.isNullOrDatePeriod; -import static org.elasticsearch.xpack.esql.core.type.DataType.isNullOrTemporalAmount; -import static org.elasticsearch.xpack.esql.core.type.DataType.isNullOrTimeDuration; - public class EsqlDataTypeRegistry { public static final EsqlDataTypeRegistry INSTANCE = new EsqlDataTypeRegistry(); @@ -35,19 +26,4 @@ public DataType fromEs(String typeName, TimeSeriesParams.MetricType metricType) */ return metricType == TimeSeriesParams.MetricType.COUNTER ? type.widenSmallNumeric().counter() : type; } - - public DataType commonType(DataType left, DataType right) { - if (isDateTimeOrTemporal(left) || isDateTimeOrTemporal(right)) { - if ((isDateTime(left) && isNullOrTemporalAmount(right)) || (isNullOrTemporalAmount(left) && isDateTime(right))) { - return DATETIME; - } - if (isNullOrTimeDuration(left) && isNullOrTimeDuration(right)) { - return TIME_DURATION; - } - if (isNullOrDatePeriod(left) && isNullOrDatePeriod(right)) { - return DATE_PERIOD; - } - } - return EsqlDataTypeConverter.commonType(left, right); - } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/DataTypeConversionTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/DataTypeConversionTests.java index 9f8c8f91b7037..871bf632adcc6 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/DataTypeConversionTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/DataTypeConversionTests.java @@ -35,7 +35,6 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; import static org.elasticsearch.xpack.esql.core.type.DataType.UNSUPPORTED; import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION; -import static org.elasticsearch.xpack.esql.core.type.DataTypeConverter.commonType; import static org.elasticsearch.xpack.esql.core.type.DataTypeConverter.converterFor; import static org.elasticsearch.xpack.esql.core.util.DateUtils.asDateTime; @@ -522,25 +521,6 @@ public void testConversionToIdentity() { assertEquals(10, conversion.convert(10)); } - public void testCommonType() { - assertEquals(BOOLEAN, commonType(BOOLEAN, NULL)); - assertEquals(BOOLEAN, commonType(NULL, BOOLEAN)); - assertEquals(BOOLEAN, commonType(BOOLEAN, BOOLEAN)); - assertEquals(NULL, commonType(NULL, NULL)); - assertEquals(INTEGER, commonType(INTEGER, KEYWORD)); - assertEquals(LONG, commonType(TEXT, LONG)); - assertEquals(SHORT, commonType(SHORT, BYTE)); - assertEquals(FLOAT, commonType(BYTE, FLOAT)); - assertEquals(FLOAT, commonType(FLOAT, INTEGER)); - assertEquals(UNSIGNED_LONG, commonType(UNSIGNED_LONG, LONG)); - assertEquals(DOUBLE, commonType(DOUBLE, FLOAT)); - assertEquals(FLOAT, commonType(FLOAT, UNSIGNED_LONG)); - - // strings - assertEquals(TEXT, commonType(TEXT, KEYWORD)); - assertEquals(TEXT, commonType(KEYWORD, TEXT)); - } - public void testEsDataTypes() { for (DataType type : DataType.types()) { assertEquals(type, DataType.fromTypeName(type.typeName())); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverterTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverterTests.java index 0997c88aac2b0..8ad083683f696 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverterTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverterTests.java @@ -8,6 +8,44 @@ package org.elasticsearch.xpack.esql.type; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.esql.core.type.DataType; + +import java.util.Arrays; +import java.util.List; + +import static org.elasticsearch.xpack.esql.core.type.DataType.BOOLEAN; +import static org.elasticsearch.xpack.esql.core.type.DataType.BYTE; +import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_POINT; +import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_SHAPE; +import static org.elasticsearch.xpack.esql.core.type.DataType.COUNTER_DOUBLE; +import static org.elasticsearch.xpack.esql.core.type.DataType.COUNTER_INTEGER; +import static org.elasticsearch.xpack.esql.core.type.DataType.COUNTER_LONG; +import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME; +import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_NANOS; +import static org.elasticsearch.xpack.esql.core.type.DataType.DOC_DATA_TYPE; +import static org.elasticsearch.xpack.esql.core.type.DataType.DOUBLE; +import static org.elasticsearch.xpack.esql.core.type.DataType.FLOAT; +import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_POINT; +import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_SHAPE; +import static org.elasticsearch.xpack.esql.core.type.DataType.HALF_FLOAT; +import static org.elasticsearch.xpack.esql.core.type.DataType.INTEGER; +import static org.elasticsearch.xpack.esql.core.type.DataType.IP; +import static org.elasticsearch.xpack.esql.core.type.DataType.LONG; +import static org.elasticsearch.xpack.esql.core.type.DataType.NULL; +import static org.elasticsearch.xpack.esql.core.type.DataType.OBJECT; +import static org.elasticsearch.xpack.esql.core.type.DataType.PARTIAL_AGG; +import static org.elasticsearch.xpack.esql.core.type.DataType.SCALED_FLOAT; +import static org.elasticsearch.xpack.esql.core.type.DataType.SHORT; +import static org.elasticsearch.xpack.esql.core.type.DataType.SOURCE; +import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT; +import static org.elasticsearch.xpack.esql.core.type.DataType.TSID_DATA_TYPE; +import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; +import static org.elasticsearch.xpack.esql.core.type.DataType.UNSUPPORTED; +import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION; +import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTime; +import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTimeOrTemporal; +import static org.elasticsearch.xpack.esql.core.type.DataType.isString; +import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.commonType; public class EsqlDataTypeConverterTests extends ESTestCase { @@ -16,4 +54,124 @@ public void testNanoTimeToString() { long actual = EsqlDataTypeConverter.dateNanosToLong(EsqlDataTypeConverter.nanoTimeToString(expected)); assertEquals(expected, actual); } + + public void testCommonTypeNull() { + for (DataType dataType : DataType.values()) { + assertEqualsCommonType(dataType, NULL, dataType); + } + } + + public void testCommonTypeStrings() { + List STRINGS = Arrays.stream(DataType.values()).filter(DataType::isString).toList(); + for (DataType dataType1 : STRINGS) { + for (DataType dataType2 : DataType.values()) { + if (dataType2 == NULL) { + assertEqualsCommonType(dataType1, NULL, dataType1); + } else if ((isString(dataType1) && isString(dataType2))) { + if (dataType1 == dataType2) { + assertEqualsCommonType(dataType1, dataType2, dataType1); + } else { + assertEqualsCommonType(dataType1, dataType2, TEXT); + } + } else { + assertNullCommonType(dataType1, dataType2); + } + } + } + } + + public void testCommonTypeDateTimeIntervals() { + List DATE_TIME_INTERVALS = Arrays.stream(DataType.values()).filter(DataType::isDateTimeOrTemporal).toList(); + for (DataType dataType1 : DATE_TIME_INTERVALS) { + for (DataType dataType2 : DataType.values()) { + if (dataType2 == NULL) { + assertEqualsCommonType(dataType1, NULL, dataType1); + } else if (isDateTimeOrTemporal(dataType2)) { + if (isDateTime(dataType1) || isDateTime(dataType2)) { + assertEqualsCommonType(dataType1, dataType2, DATETIME); + } else if (dataType1 == dataType2) { + assertEqualsCommonType(dataType1, dataType2, dataType1); + } else { + assertNullCommonType(dataType1, dataType2); + } + } else { + assertNullCommonType(dataType1, dataType2); + } + } + } + } + + public void testCommonTypeNumeric() { + // whole numbers + commonNumericType(BYTE, List.of(NULL, BYTE)); + commonNumericType(SHORT, List.of(NULL, BYTE, SHORT)); + commonNumericType(INTEGER, List.of(NULL, BYTE, SHORT, INTEGER)); + commonNumericType(LONG, List.of(NULL, BYTE, SHORT, INTEGER, LONG)); + commonNumericType(UNSIGNED_LONG, List.of(NULL, BYTE, SHORT, INTEGER, LONG, UNSIGNED_LONG)); + // floats + commonNumericType(HALF_FLOAT, List.of(NULL, BYTE, SHORT, INTEGER, LONG, UNSIGNED_LONG, HALF_FLOAT, FLOAT)); + commonNumericType(FLOAT, List.of(NULL, BYTE, SHORT, INTEGER, LONG, UNSIGNED_LONG, FLOAT, HALF_FLOAT)); + commonNumericType(DOUBLE, List.of(NULL, BYTE, SHORT, INTEGER, LONG, UNSIGNED_LONG, HALF_FLOAT, FLOAT, DOUBLE, SCALED_FLOAT)); + commonNumericType(SCALED_FLOAT, List.of(NULL, BYTE, SHORT, INTEGER, LONG, UNSIGNED_LONG, HALF_FLOAT, FLOAT, SCALED_FLOAT, DOUBLE)); + } + + /** + * The first argument and the second argument(s) have the first argument as a common type. + */ + private static void commonNumericType(DataType numericType, List lowerTypes) { + List NUMERICS = Arrays.stream(DataType.values()).filter(DataType::isNumeric).toList(); + List DOUBLES = Arrays.stream(DataType.values()).filter(DataType::isRationalNumber).toList(); + for (DataType dataType : DataType.values()) { + if (DOUBLES.containsAll(List.of(numericType, dataType)) && (dataType.estimatedSize().equals(numericType.estimatedSize()))) { + assertEquals(numericType, commonType(dataType, numericType)); + } else if (lowerTypes.contains(dataType)) { + assertEqualsCommonType(numericType, dataType, numericType); + } else if (NUMERICS.contains(dataType)) { + assertEqualsCommonType(numericType, dataType, dataType); + } else { + assertNullCommonType(numericType, dataType); + } + } + } + + public void testCommonTypeMiscellaneous() { + List MISCELLANEOUS = List.of( + COUNTER_INTEGER, + COUNTER_LONG, + COUNTER_DOUBLE, + UNSUPPORTED, + OBJECT, + SOURCE, + DATE_NANOS, + DOC_DATA_TYPE, + TSID_DATA_TYPE, + PARTIAL_AGG, + IP, + VERSION, + GEO_POINT, + GEO_SHAPE, + CARTESIAN_POINT, + CARTESIAN_SHAPE, + BOOLEAN + ); + for (DataType dataType1 : MISCELLANEOUS) { + for (DataType dataType2 : DataType.values()) { + if (dataType2 == NULL || dataType1 == dataType2) { + assertEqualsCommonType(dataType1, dataType2, dataType1); + } else { + assertNullCommonType(dataType1, dataType2); + } + } + } + } + + private static void assertEqualsCommonType(DataType dataType1, DataType dataType2, DataType commonType) { + assertEquals(commonType, commonType(dataType1, dataType2)); + assertEquals(commonType, commonType(dataType2, dataType1)); + } + + private static void assertNullCommonType(DataType dataType1, DataType dataType2) { + assertNull(commonType(dataType1, dataType2)); + assertNull(commonType(dataType2, dataType1)); + } } diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/CustomElandModelIT.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/CustomElandModelIT.java new file mode 100644 index 0000000000000..65b7a138e7e1e --- /dev/null +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/CustomElandModelIT.java @@ -0,0 +1,134 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference; + +import org.elasticsearch.client.Request; +import org.elasticsearch.core.Strings; +import org.elasticsearch.inference.TaskType; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Base64; +import java.util.List; +import java.util.stream.Collectors; + +public class CustomElandModelIT extends InferenceBaseRestTest { + + // The model definition is taken from org.elasticsearch.xpack.ml.integration.TextExpansionQueryIT + + static final String BASE_64_ENCODED_MODEL = "UEsDBAAACAgAAAAAAAAAAAAAAAAAA" + + "AAAAAAUAA4Ac2ltcGxlbW9kZWwvZGF0YS5wa2xGQgoAWlpaWlpaWlpaWoACY19fdG9yY2hfXwpUaW55VG" + + "V4dEV4cGFuc2lvbgpxACmBfShYCAAAAHRyYWluaW5ncQGJWBYAAABfaXNfZnVsbF9iYWNrd2FyZF9ob29" + + "rcQJOdWJxAy5QSwcIITmbsFgAAABYAAAAUEsDBBQACAgIAAAAAAAAAAAAAAAAAAAAAAAdAB0Ac2ltcGxl" + + "bW9kZWwvY29kZS9fX3RvcmNoX18ucHlGQhkAWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWoWRT4+cMAzF7" + + "/spfASJomF3e0Ga3nrrn8vcELIyxAzRhAQlpjvbT19DWDrdquqBA/bvPT87nVUxwsm41xPd+PNtUi4a77" + + "KvXs+W8voBAHFSQY3EFCIiHKFp1+p57vs/ShyUccZdoIaz93aBTMR+thbPqru+qKBx8P4q/e8TyxRlmwVc" + + "tJp66H1YmCyS7WsZwD50A2L5V7pCBADGTTOj0bGGE7noQyqzv5JDfp0o9fZRCWqP37yjhE4+mqX5X3AdF" + + "ZHGM/2TzOHDpy1IvQWR+OWo3KwsRiKdpcqg4pBFDtm+QJ7nqwIPckrlnGfFJG0uNhOl38Sjut3pCqg26Qu" + + "Zy8BR9In7ScHHrKkKMW0TIucFrGQXCMpdaDO05O6DpOiy8e4kr0Ed/2YKOIhplW8gPr4ntygrd9ixpx3j9" + + "UZZVRagl2c6+imWUzBjuf5m+Ch7afphuvvW+r/0dsfn+2N9MZGb9+/SFtCYdhd83CMYp+mGy0LiKNs8y/e" + + "UuEA8B/d2z4dfUEsHCFSE3IaCAQAAIAMAAFBLAwQUAAgICAAAAAAAAAAAAAAAAAAAAAAAJwApAHNpbXBsZ" + + "W1vZGVsL2NvZGUvX190b3JjaF9fLnB5LmRlYnVnX3BrbEZCJQBaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlp" + + "aWlpaWlpaWlpaWlpahZHLbtNAFIZtp03rSVIuLRKXjdk5ojitKJsiFq24lem0KKSqpRIZt55gE9/GM+lNL" + + "Fgx4i1Ys2aHhIBXgAVICNggHgNm6rqJN2BZGv36/v/MOWeea/Z5RVHurLfRUsfZXOnccx522itrd53O0vL" + + "qbaKYtsAKUe1pcege7hm9JNtzM8+kOOzNApIX0A3xBXE6YE7g0UWjg2OaZAJXbKvALOnj2GEHKc496ykLkt" + + "gNt3Jz17hprCUxFqExe7YIpQkNpO1/kfHhPUdtUAdH2/gfmeYiIFW7IkM6IBP2wrDNbMe3Mjf2ksiK3Hjg" + + "hg7F2DN9l/omZZl5Mmez2QRk0q4WUUB0+1oh9nDwxGdUXJdXPMRZQs352eGaRPV9s2lcMeZFGWBfKJJiw0Y" + + "gbCMLBaRmXyy4flx6a667Fch55q05QOq2Jg2ANOyZwplhNsjiohVApo7aa21QnNGW5+4GXv8gxK1beBeHSR" + + "rhmLXWVh+0aBhErZ7bx1ejxMOhlR6QU4ycNqGyk8/yNGCWkwY7/RCD7UEQek4QszCgDJAzZtfErA0VqHBy9" + + "ugQP9pUfUmgCjVYgWNwHFbhBJyEOgSwBuuwARWZmoI6J9PwLfzEocpRpPrT8DP8wqHG0b4UX+E3DiscvRgl" + + "XIoi81KKPwioHI5x9EooNKWiy0KOc/T6WF4SssrRuzJ9L2VNRXUhJzj6UKYfS4W/q/5wuh/l4M9R9qsU+y2" + + "dpoo2hJzkaEET8r6KRONicnRdK9EbUi6raFVIwNGjsrlbpk6ZPi7TbS3fv3LyNjPiEKzG0aG0tvNb6xw90/" + + "whe6ONjnJcUxobHDUqQ8bIOW79BVBLBwhfSmPKdAIAAE4EAABQSwMEAAAICAAAAAAAAAAAAAAAAAAAAAAAA" + + "BkABQBzaW1wbGVtb2RlbC9jb25zdGFudHMucGtsRkIBAFqAAikuUEsHCG0vCVcEAAAABAAAAFBLAwQAAAgI" + + "AAAAAAAAAAAAAAAAAAAAAAAAEwA7AHNpbXBsZW1vZGVsL3ZlcnNpb25GQjcAWlpaWlpaWlpaWlpaWlpaWlp" + + "aWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWjMKUEsHCNGeZ1UCAAAAAgAAAFBLAQIAAA" + + "AACAgAAAAAAAAhOZuwWAAAAFgAAAAUAAAAAAAAAAAAAAAAAAAAAABzaW1wbGVtb2RlbC9kYXRhLnBrbFBLA" + + "QIAABQACAgIAAAAAABUhNyGggEAACADAAAdAAAAAAAAAAAAAAAAAKgAAABzaW1wbGVtb2RlbC9jb2RlL19f" + + "dG9yY2hfXy5weVBLAQIAABQACAgIAAAAAABfSmPKdAIAAE4EAAAnAAAAAAAAAAAAAAAAAJICAABzaW1wbGVt" + + "b2RlbC9jb2RlL19fdG9yY2hfXy5weS5kZWJ1Z19wa2xQSwECAAAAAAgIAAAAAAAAbS8JVwQAAAAEAAAAGQAA" + + "AAAAAAAAAAAAAACEBQAAc2ltcGxlbW9kZWwvY29uc3RhbnRzLnBrbFBLAQIAAAAACAgAAAAAAADRnmdVAgAA" + + "AAIAAAATAAAAAAAAAAAAAAAAANQFAABzaW1wbGVtb2RlbC92ZXJzaW9uUEsGBiwAAAAAAAAAHgMtAAAAAAAA" + + "AAAABQAAAAAAAAAFAAAAAAAAAGoBAAAAAAAAUgYAAAAAAABQSwYHAAAAALwHAAAAAAAAAQAAAFBLBQYAAAAABQAFAGoBAABSBgAAAAA="; + + static final long RAW_MODEL_SIZE; // size of the model before base64 encoding + static { + RAW_MODEL_SIZE = Base64.getDecoder().decode(BASE_64_ENCODED_MODEL).length; + } + + // Test a sparse embedding model deployed with the ml trained models APIs + public void testSparse() throws IOException { + String modelId = "custom-text-expansion-model"; + + createTextExpansionModel(modelId); + putModelDefinition(modelId, BASE_64_ENCODED_MODEL, RAW_MODEL_SIZE); + putVocabulary( + List.of("these", "are", "my", "words", "the", "washing", "machine", "is", "leaking", "octopus", "comforter", "smells"), + modelId + ); + + var inferenceConfig = """ + { + "service": "elasticsearch", + "service_settings": { + "model_id": "custom-text-expansion-model", + "num_allocations": 1, + "num_threads": 1 + } + } + """; + + var inferenceId = "sparse-inf"; + putModel(inferenceId, inferenceConfig, TaskType.SPARSE_EMBEDDING); + var results = inferOnMockService(inferenceId, List.of("washing", "machine")); + deleteModel(inferenceId); + assertNotNull(results.get("sparse_embedding")); + } + + protected void createTextExpansionModel(String modelId) throws IOException { + // with_special_tokens: false for this test with limited vocab + Request request = new Request("PUT", "/_ml/trained_models/" + modelId); + request.setJsonEntity(""" + { + "description": "a text expansion model", + "model_type": "pytorch", + "inference_config": { + "text_expansion": { + "tokenization": { + "bert": { + "with_special_tokens": false + } + } + } + } + }"""); + client().performRequest(request); + } + + protected void putVocabulary(List vocabulary, String modelId) throws IOException { + List vocabularyWithPad = new ArrayList<>(); + vocabularyWithPad.add("[PAD]"); + vocabularyWithPad.add("[UNK]"); + vocabularyWithPad.addAll(vocabulary); + String quotedWords = vocabularyWithPad.stream().map(s -> "\"" + s + "\"").collect(Collectors.joining(",")); + + Request request = new Request("PUT", "_ml/trained_models/" + modelId + "/vocabulary"); + request.setJsonEntity(Strings.format(""" + { "vocabulary": [%s] } + """, quotedWords)); + client().performRequest(request); + } + + protected void putModelDefinition(String modelId, String base64EncodedModel, long unencodedModelSize) throws IOException { + Request request = new Request("PUT", "_ml/trained_models/" + modelId + "/definition/0"); + String body = Strings.format(""" + {"total_definition_length":%s,"definition": "%s","total_parts": 1}""", unencodedModelSize, base64EncodedModel); + request.setJsonEntity(body); + client().performRequest(request); + } +} diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/RerankingIT.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/RerankingIT.java index 77251ada4c488..893d3fb3e9b80 100644 --- a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/RerankingIT.java +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/RerankingIT.java @@ -35,7 +35,7 @@ private String putCohereRerankEndpoint() throws IOException { "api_key": "" } } - """);// TODO remove key + """); return endpointID; } @@ -61,7 +61,7 @@ private String putCohereRerankEndpointWithDocuments() throws IOException { "return_documents": true } } - """);// TODO remove key + """); return endpointID; } @@ -81,13 +81,13 @@ private String putCohereRerankEndpointWithTop2() throws IOException { "service": "cohere", "service_settings": { "model_id": "rerank-english-v2.0", - "api_key": "8TNPBvpBO7oN97009HQHzQbBhNrxmREbcJrZCwkK" + "api_key": "" }, "task_settings": { "top_n": 2 } } - """);// TODO remove key + """); return endpointID; } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java index 574ca77d4587e..457416370e559 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java @@ -154,10 +154,10 @@ public void isModelDownloaded(Model model, ActionListener listener) { executeAsyncWithOrigin(client, INFERENCE_ORIGIN, GetTrainedModelsAction.INSTANCE, getRequest, getModelsResponseListener); } else { listener.onFailure( - new IllegalArgumentException( - "Unable to determine supported model for [" + new IllegalStateException( + "Can not check the download status of the model used by [" + model.getConfigurations().getInferenceEntityId() - + "] please verify the request and submit a bug report if necessary." + + "] as the model_id cannot be found." ) ); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java index c3a0111562319..cca8ae63e974c 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java @@ -7,8 +7,6 @@ package org.elasticsearch.xpack.inference.services.elasticsearch; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.TransportVersion; import org.elasticsearch.TransportVersions; @@ -27,19 +25,18 @@ import org.elasticsearch.inference.TaskType; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.xpack.core.inference.results.ErrorChunkedInferenceResults; +import org.elasticsearch.xpack.core.inference.results.InferenceChunkedSparseEmbeddingResults; import org.elasticsearch.xpack.core.inference.results.InferenceChunkedTextEmbeddingFloatResults; import org.elasticsearch.xpack.core.inference.results.InferenceTextEmbeddingFloatResults; import org.elasticsearch.xpack.core.inference.results.RankedDocsResults; +import org.elasticsearch.xpack.core.inference.results.SparseEmbeddingResults; import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsAction; import org.elasticsearch.xpack.core.ml.action.InferModelAction; -import org.elasticsearch.xpack.core.ml.action.PutTrainedModelAction; -import org.elasticsearch.xpack.core.ml.action.StartTrainedModelDeploymentAction; -import org.elasticsearch.xpack.core.ml.action.StopTrainedModelDeploymentAction; -import org.elasticsearch.xpack.core.ml.inference.TrainedModelConfig; -import org.elasticsearch.xpack.core.ml.inference.TrainedModelInput; import org.elasticsearch.xpack.core.ml.inference.results.ErrorInferenceResults; import org.elasticsearch.xpack.core.ml.inference.results.MlChunkedTextEmbeddingFloatResults; +import org.elasticsearch.xpack.core.ml.inference.results.MlChunkedTextExpansionResults; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextEmbeddingConfigUpdate; +import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextExpansionConfigUpdate; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextSimilarityConfigUpdate; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TokenizationConfigUpdate; import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; @@ -53,8 +50,6 @@ import java.util.Set; import java.util.function.Function; -import static org.elasticsearch.xpack.core.ClientHelper.INFERENCE_ORIGIN; -import static org.elasticsearch.xpack.core.ClientHelper.executeAsyncWithOrigin; import static org.elasticsearch.xpack.core.inference.results.ResultUtils.createInvalidChunkedResultException; import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMap; import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrThrowIfNull; @@ -71,15 +66,13 @@ public class ElasticsearchInternalService extends BaseElasticsearchInternalServi MULTILINGUAL_E5_SMALL_MODEL_ID_LINUX_X86 ); - private static final Logger logger = LogManager.getLogger(ElasticsearchInternalService.class); - public ElasticsearchInternalService(InferenceServiceExtension.InferenceServiceFactoryContext context) { super(context); } @Override protected EnumSet supportedTaskTypes() { - return EnumSet.of(TaskType.RERANK, TaskType.TEXT_EMBEDDING); + return EnumSet.of(TaskType.RERANK, TaskType.TEXT_EMBEDDING, TaskType.SPARSE_EMBEDDING); } @Override @@ -161,6 +154,12 @@ private static CustomElandModel createCustomElandModel( NAME, CustomElandInternalTextEmbeddingServiceSettings.fromMap(serviceSettings, context) ); + case SPARSE_EMBEDDING -> new CustomElandModel( + inferenceEntityId, + taskType, + NAME, + elandServiceSettings(serviceSettings, context) + ); case RERANK -> new CustomElandRerankModel( inferenceEntityId, taskType, @@ -334,6 +333,8 @@ public void infer( inferTextEmbedding(model, input, inputType, timeout, listener); } else if (TaskType.RERANK.equals(taskType)) { inferRerank(model, query, input, inputType, timeout, taskSettings, listener); + } else if (TaskType.SPARSE_EMBEDDING.equals(taskType)) { + inferSparseEmbedding(model, input, inputType, timeout, listener); } else { throw new ElasticsearchStatusException(TaskType.unsupportedTaskTypeErrorMsg(taskType, NAME), RestStatus.BAD_REQUEST); } @@ -364,6 +365,31 @@ public void inferTextEmbedding( ); } + public void inferSparseEmbedding( + Model model, + List inputs, + InputType inputType, + TimeValue timeout, + ActionListener listener + ) { + var request = buildInferenceRequest( + model.getConfigurations().getInferenceEntityId(), + TextExpansionConfigUpdate.EMPTY_UPDATE, + inputs, + inputType, + timeout, + false + ); + + client.execute( + InferModelAction.INSTANCE, + request, + listener.delegateFailureAndWrap( + (l, inferenceResult) -> l.onResponse(SparseEmbeddingResults.of(inferenceResult.getInferenceResults())) + ) + ); + } + public void inferRerank( Model model, String query, @@ -422,7 +448,7 @@ public void chunkedInfer( TimeValue timeout, ActionListener> listener ) { - if (TaskType.TEXT_EMBEDDING.isAnyOrSame(model.getTaskType()) == false) { + if ((TaskType.TEXT_EMBEDDING.equals(model.getTaskType()) || TaskType.SPARSE_EMBEDDING.equals(model.getTaskType())) == false) { listener.onFailure( new ElasticsearchStatusException(TaskType.unsupportedTaskTypeErrorMsg(model.getTaskType(), NAME), RestStatus.BAD_REQUEST) ); @@ -464,6 +490,8 @@ private static List translateToChunkedResults(Li private static ChunkedInferenceServiceResults translateToChunkedResult(InferenceResults inferenceResult) { if (inferenceResult instanceof MlChunkedTextEmbeddingFloatResults mlChunkedResult) { return InferenceChunkedTextEmbeddingFloatResults.ofMlResults(mlChunkedResult); + } else if (inferenceResult instanceof MlChunkedTextExpansionResults mlChunkedResult) { + return InferenceChunkedSparseEmbeddingResults.ofMlResult(mlChunkedResult); } else if (inferenceResult instanceof ErrorInferenceResults error) { return new ErrorChunkedInferenceResults(error.getException()); } else { @@ -471,103 +499,6 @@ private static ChunkedInferenceServiceResults translateToChunkedResult(Inference } } - @Override - public void start(Model model, ActionListener listener) { - if (model instanceof ElasticsearchInternalModel == false) { - listener.onFailure(notElasticsearchModelException(model)); - return; - } - - if (model.getTaskType() != TaskType.TEXT_EMBEDDING && model.getTaskType() != TaskType.RERANK) { - listener.onFailure( - new IllegalStateException(TaskType.unsupportedTaskTypeErrorMsg(model.getConfigurations().getTaskType(), NAME)) - ); - return; - } - - var startRequest = ((ElasticsearchInternalModel) model).getStartTrainedModelDeploymentActionRequest(); - var responseListener = ((ElasticsearchInternalModel) model).getCreateTrainedModelAssignmentActionListener(model, listener); - - client.execute(StartTrainedModelDeploymentAction.INSTANCE, startRequest, responseListener); - } - - @Override - public void stop(String inferenceEntityId, ActionListener listener) { - var request = new StopTrainedModelDeploymentAction.Request(inferenceEntityId); - request.setForce(true); - client.execute( - StopTrainedModelDeploymentAction.INSTANCE, - request, - listener.delegateFailureAndWrap((delegatedResponseListener, response) -> delegatedResponseListener.onResponse(Boolean.TRUE)) - ); - } - - @Override - public void putModel(Model model, ActionListener listener) { - if (model instanceof ElasticsearchInternalModel == false) { - listener.onFailure(notElasticsearchModelException(model)); - return; - } else if (model instanceof MultilingualE5SmallModel e5Model) { - String modelId = e5Model.getServiceSettings().modelId(); - var input = new TrainedModelInput(List.of("text_field")); // by convention text_field is used - var config = TrainedModelConfig.builder().setInput(input).setModelId(modelId).validate(true).build(); - PutTrainedModelAction.Request putRequest = new PutTrainedModelAction.Request(config, false, true); - executeAsyncWithOrigin( - client, - INFERENCE_ORIGIN, - PutTrainedModelAction.INSTANCE, - putRequest, - ActionListener.wrap(response -> listener.onResponse(Boolean.TRUE), e -> { - if (e instanceof ElasticsearchStatusException esException - && esException.getMessage().contains(PutTrainedModelAction.MODEL_ALREADY_EXISTS_ERROR_MESSAGE_FRAGMENT)) { - listener.onResponse(Boolean.TRUE); - } else { - listener.onFailure(e); - } - }) - ); - } else if (model instanceof CustomElandModel) { - logger.info("Custom eland model detected, model must have been already loaded into the cluster with eland."); - listener.onResponse(Boolean.TRUE); - } else { - listener.onFailure( - new IllegalArgumentException( - "Can not download model automatically for [" - + model.getConfigurations().getInferenceEntityId() - + "] you may need to download it through the trained models API or with eland." - ) - ); - return; - } - } - - @Override - public void isModelDownloaded(Model model, ActionListener listener) { - ActionListener getModelsResponseListener = listener.delegateFailure((delegate, response) -> { - if (response.getResources().count() < 1) { - delegate.onResponse(Boolean.FALSE); - } else { - delegate.onResponse(Boolean.TRUE); - } - }); - - if (model.getServiceSettings() instanceof ElasticsearchInternalServiceSettings internalServiceSettings) { - String modelId = internalServiceSettings.modelId(); - GetTrainedModelsAction.Request getRequest = new GetTrainedModelsAction.Request(modelId); - executeAsyncWithOrigin(client, INFERENCE_ORIGIN, GetTrainedModelsAction.INSTANCE, getRequest, getModelsResponseListener); - } else if (model instanceof ElasticsearchInternalModel == false) { - listener.onFailure(notElasticsearchModelException(model)); - } else { - listener.onFailure( - new IllegalArgumentException( - "Unable to determine supported model for [" - + model.getConfigurations().getInferenceEntityId() - + "] please verify the request and submit a bug report if necessary." - ) - ); - } - } - @Override public TransportVersion getMinimalSupportedVersion() { return TransportVersions.V_8_14_0; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalService.java index 775ddca160463..948117954a63f 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalService.java @@ -28,7 +28,6 @@ import org.elasticsearch.xpack.core.inference.results.ErrorChunkedInferenceResults; import org.elasticsearch.xpack.core.inference.results.InferenceChunkedSparseEmbeddingResults; import org.elasticsearch.xpack.core.inference.results.SparseEmbeddingResults; -import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsAction; import org.elasticsearch.xpack.core.ml.action.InferModelAction; import org.elasticsearch.xpack.core.ml.inference.results.ErrorInferenceResults; import org.elasticsearch.xpack.core.ml.inference.results.MlChunkedTextExpansionResults; @@ -43,8 +42,6 @@ import java.util.Map; import java.util.Set; -import static org.elasticsearch.xpack.core.ClientHelper.INFERENCE_ORIGIN; -import static org.elasticsearch.xpack.core.ClientHelper.executeAsyncWithOrigin; import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrThrowIfNull; import static org.elasticsearch.xpack.inference.services.ServiceUtils.throwIfNotEmptyMap; import static org.elasticsearch.xpack.inference.services.elser.ElserModels.ELSER_V2_MODEL; @@ -242,31 +239,6 @@ private void checkCompatibleTaskType(TaskType taskType) { } } - @Override - public void isModelDownloaded(Model model, ActionListener listener) { - ActionListener getModelsResponseListener = listener.delegateFailure((delegate, response) -> { - if (response.getResources().count() < 1) { - delegate.onResponse(Boolean.FALSE); - } else { - delegate.onResponse(Boolean.TRUE); - } - }); - - if (model instanceof ElserInternalModel elserModel) { - String modelId = elserModel.getServiceSettings().modelId(); - GetTrainedModelsAction.Request getRequest = new GetTrainedModelsAction.Request(modelId); - executeAsyncWithOrigin(client, INFERENCE_ORIGIN, GetTrainedModelsAction.INSTANCE, getRequest, getModelsResponseListener); - } else { - listener.onFailure( - new IllegalArgumentException( - "Can not download model automatically for [" - + model.getConfigurations().getInferenceEntityId() - + "] you may need to download it through the trained models API or with eland." - ) - ); - } - } - private static ElserMlNodeTaskSettings taskSettingsFromMap(TaskType taskType, Map config) { if (taskType != TaskType.SPARSE_EMBEDDING) { throw new ElasticsearchStatusException(TaskType.unsupportedTaskTypeErrorMsg(taskType, NAME), RestStatus.BAD_REQUEST); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java index e6fd725a50198..257616033f080 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java @@ -17,6 +17,7 @@ import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.inference.ChunkedInferenceServiceResults; import org.elasticsearch.inference.ChunkingOptions; +import org.elasticsearch.inference.EmptyTaskSettings; import org.elasticsearch.inference.InferenceResults; import org.elasticsearch.inference.InferenceServiceExtension; import org.elasticsearch.inference.InputType; @@ -31,6 +32,7 @@ import org.elasticsearch.xpack.core.action.util.QueryPage; import org.elasticsearch.xpack.core.inference.action.InferenceAction; import org.elasticsearch.xpack.core.inference.results.ErrorChunkedInferenceResults; +import org.elasticsearch.xpack.core.inference.results.InferenceChunkedSparseEmbeddingResults; import org.elasticsearch.xpack.core.inference.results.InferenceChunkedTextEmbeddingFloatResults; import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsAction; import org.elasticsearch.xpack.core.ml.action.InferModelAction; @@ -39,8 +41,10 @@ import org.elasticsearch.xpack.core.ml.inference.TrainedModelConfig; import org.elasticsearch.xpack.core.ml.inference.TrainedModelPrefixStrings; import org.elasticsearch.xpack.core.ml.inference.results.ErrorInferenceResults; +import org.elasticsearch.xpack.core.ml.inference.results.InferenceChunkedTextExpansionResultsTests; import org.elasticsearch.xpack.core.ml.inference.results.MlChunkedTextEmbeddingFloatResults; import org.elasticsearch.xpack.core.ml.inference.results.MlChunkedTextEmbeddingFloatResultsTests; +import org.elasticsearch.xpack.core.ml.inference.results.MlChunkedTextExpansionResults; import org.elasticsearch.xpack.core.ml.inference.results.MlTextEmbeddingResults; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextEmbeddingConfigUpdate; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TokenizationConfigUpdate; @@ -52,12 +56,10 @@ import org.mockito.Mockito; import java.util.ArrayList; -import java.util.Arrays; import java.util.EnumSet; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Random; import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @@ -76,7 +78,6 @@ public class ElasticsearchInternalServiceTests extends ESTestCase { - TaskType taskType = TaskType.TEXT_EMBEDDING; String randomInferenceEntityId = randomAlphaOfLength(10); private static ThreadPool threadPool; @@ -92,7 +93,25 @@ public void shutdownThreadPool() { } public void testParseRequestConfig() { + var service = createService(mock(Client.class)); + var settings = new HashMap(); + settings.put( + ModelConfigurations.SERVICE_SETTINGS, + new HashMap<>( + Map.of(ElasticsearchInternalServiceSettings.NUM_ALLOCATIONS, 1, ElasticsearchInternalServiceSettings.NUM_THREADS, 4) + ) + ); + ActionListener modelListener = ActionListener.wrap( + model -> fail("Model parsing should have failed"), + e -> assertThat(e, instanceOf(IllegalArgumentException.class)) + ); + + var taskType = randomFrom(TaskType.TEXT_EMBEDDING, TaskType.RERANK, TaskType.SPARSE_EMBEDDING); + service.parseRequestConfig(randomInferenceEntityId, taskType, settings, Set.of(), modelListener); + } + + public void testParseRequestConfig_Misconfigured() { // Null model variant { var service = createService(mock(Client.class)); @@ -109,43 +128,10 @@ public void testParseRequestConfig() { e -> assertThat(e, instanceOf(IllegalArgumentException.class)) ); + var taskType = randomFrom(TaskType.TEXT_EMBEDDING, TaskType.RERANK, TaskType.SPARSE_EMBEDDING); service.parseRequestConfig(randomInferenceEntityId, taskType, settings, Set.of(), modelListener); } - // Valid model variant - { - var service = createService(mock(Client.class)); - var settings = new HashMap(); - settings.put( - ModelConfigurations.SERVICE_SETTINGS, - new HashMap<>( - Map.of( - ElasticsearchInternalServiceSettings.NUM_ALLOCATIONS, - 1, - ElasticsearchInternalServiceSettings.NUM_THREADS, - 4, - ElasticsearchInternalServiceSettings.MODEL_ID, - ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID - ) - ) - ); - - var e5ServiceSettings = new MultilingualE5SmallInternalServiceSettings( - 1, - 4, - ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID, - null - ); - - service.parseRequestConfig( - randomInferenceEntityId, - taskType, - settings, - Set.of(), - getModelVerificationActionListener(e5ServiceSettings) - ); - } - // Invalid config map { var service = createService(mock(Client.class)); @@ -163,10 +149,12 @@ public void testParseRequestConfig() { e -> assertThat(e, instanceOf(ElasticsearchStatusException.class)) ); + var taskType = randomFrom(TaskType.TEXT_EMBEDDING, TaskType.RERANK, TaskType.SPARSE_EMBEDDING); service.parseRequestConfig(randomInferenceEntityId, taskType, settings, Set.of(), modelListener); } + } - // Invalid service settings + public void testParseRequestConfig_E5() { { var service = createService(mock(Client.class)); var settings = new HashMap(); @@ -179,52 +167,28 @@ public void testParseRequestConfig() { ElasticsearchInternalServiceSettings.NUM_THREADS, 4, ElasticsearchInternalServiceSettings.MODEL_ID, - ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID, // we can't directly test the eland case until we mock - // the threadpool within the client - "not_a_valid_service_setting", - randomAlphaOfLength(10) + ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID ) ) ); - ActionListener modelListener = ActionListener.wrap( - model -> fail("Model parsing should have failed"), - e -> assertThat(e, instanceOf(ElasticsearchStatusException.class)) - ); - - service.parseRequestConfig(randomInferenceEntityId, taskType, settings, Set.of(), modelListener); - } - - // Extra service settings - { - var service = createService(mock(Client.class)); - var settings = new HashMap(); - settings.put( - ModelConfigurations.SERVICE_SETTINGS, - new HashMap<>( - Map.of( - ElasticsearchInternalServiceSettings.NUM_ALLOCATIONS, - 1, - ElasticsearchInternalServiceSettings.NUM_THREADS, - 4, - ElasticsearchInternalServiceSettings.MODEL_ID, - ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID, // we can't directly test the eland case until we mock - // the threadpool within the client - "extra_setting_that_should_not_be_here", - randomAlphaOfLength(10) - ) - ) + var e5ServiceSettings = new MultilingualE5SmallInternalServiceSettings( + 1, + 4, + ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID, + null ); - ActionListener modelListener = ActionListener.wrap( - model -> fail("Model parsing should have failed"), - e -> assertThat(e, instanceOf(ElasticsearchStatusException.class)) + service.parseRequestConfig( + randomInferenceEntityId, + TaskType.TEXT_EMBEDDING, + settings, + Set.of(), + getModelVerificationActionListener(e5ServiceSettings) ); - - service.parseRequestConfig(randomInferenceEntityId, taskType, settings, Set.of(), modelListener); } - // Extra settings + // Invalid service settings { var service = createService(mock(Client.class)); var settings = new HashMap(); @@ -237,19 +201,19 @@ public void testParseRequestConfig() { ElasticsearchInternalServiceSettings.NUM_THREADS, 4, ElasticsearchInternalServiceSettings.MODEL_ID, - ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID // we can't directly test the eland case until we mock - // the threadpool within the client + ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID, + "not_a_valid_service_setting", + randomAlphaOfLength(10) ) ) ); - settings.put("extra_setting_that_should_not_be_here", randomAlphaOfLength(10)); ActionListener modelListener = ActionListener.wrap( model -> fail("Model parsing should have failed"), e -> assertThat(e, instanceOf(ElasticsearchStatusException.class)) ); - service.parseRequestConfig(randomInferenceEntityId, taskType, settings, Set.of(), modelListener); + service.parseRequestConfig(randomInferenceEntityId, TaskType.TEXT_EMBEDDING, settings, Set.of(), modelListener); } } @@ -342,10 +306,53 @@ public void testParseRequestConfig_Rerank_DefaultTaskSettings() { } } + @SuppressWarnings("unchecked") + public void testParseRequestConfig_SparseEmbedding() { + var client = mock(Client.class); + doAnswer(invocation -> { + var listener = (ActionListener) invocation.getArguments()[2]; + listener.onResponse( + new GetTrainedModelsAction.Response(new QueryPage<>(List.of(mock(TrainedModelConfig.class)), 1, mock(ParseField.class))) + ); + return null; + }).when(client).execute(Mockito.same(GetTrainedModelsAction.INSTANCE), any(), any()); + + when(client.threadPool()).thenReturn(threadPool); + + var service = createService(client); + var settings = new HashMap(); + settings.put( + ModelConfigurations.SERVICE_SETTINGS, + new HashMap<>( + Map.of( + ElasticsearchInternalServiceSettings.NUM_ALLOCATIONS, + 1, + ElasticsearchInternalServiceSettings.NUM_THREADS, + 4, + ElasticsearchInternalServiceSettings.MODEL_ID, + "foo" + ) + ) + ); + + ActionListener modelListener = ActionListener.wrap(model -> { + assertThat(model, instanceOf(CustomElandModel.class)); + assertThat(model.getTaskSettings(), instanceOf(EmptyTaskSettings.class)); + assertThat(model.getServiceSettings(), instanceOf(CustomElandInternalServiceSettings.class)); + }, e -> { fail("Model parsing failed " + e.getMessage()); }); + + service.parseRequestConfig(randomInferenceEntityId, TaskType.SPARSE_EMBEDDING, settings, Set.of(), modelListener); + } + private ActionListener getModelVerificationActionListener(MultilingualE5SmallInternalServiceSettings e5ServiceSettings) { return ActionListener.wrap(model -> { assertEquals( - new MultilingualE5SmallModel(randomInferenceEntityId, taskType, ElasticsearchInternalService.NAME, e5ServiceSettings), + new MultilingualE5SmallModel( + randomInferenceEntityId, + TaskType.TEXT_EMBEDDING, + ElasticsearchInternalService.NAME, + e5ServiceSettings + ), model ); }, e -> { fail("Model parsing failed " + e.getMessage()); }); @@ -371,7 +378,10 @@ public void testParsePersistedConfig() { ) ); - expectThrows(IllegalArgumentException.class, () -> service.parsePersistedConfig(randomInferenceEntityId, taskType, settings)); + expectThrows( + IllegalArgumentException.class, + () -> service.parsePersistedConfig(randomInferenceEntityId, TaskType.TEXT_EMBEDDING, settings) + ); } @@ -397,12 +407,17 @@ public void testParsePersistedConfig() { CustomElandEmbeddingModel parsedModel = (CustomElandEmbeddingModel) service.parsePersistedConfig( randomInferenceEntityId, - taskType, + TaskType.TEXT_EMBEDDING, settings ); var elandServiceSettings = new CustomElandInternalTextEmbeddingServiceSettings(1, 4, "invalid", null); assertEquals( - new CustomElandEmbeddingModel(randomInferenceEntityId, taskType, ElasticsearchInternalService.NAME, elandServiceSettings), + new CustomElandEmbeddingModel( + randomInferenceEntityId, + TaskType.TEXT_EMBEDDING, + ElasticsearchInternalService.NAME, + elandServiceSettings + ), parsedModel ); } @@ -436,11 +451,16 @@ public void testParsePersistedConfig() { MultilingualE5SmallModel parsedModel = (MultilingualE5SmallModel) service.parsePersistedConfig( randomInferenceEntityId, - taskType, + TaskType.TEXT_EMBEDDING, settings ); assertEquals( - new MultilingualE5SmallModel(randomInferenceEntityId, taskType, ElasticsearchInternalService.NAME, e5ServiceSettings), + new MultilingualE5SmallModel( + randomInferenceEntityId, + TaskType.TEXT_EMBEDDING, + ElasticsearchInternalService.NAME, + e5ServiceSettings + ), parsedModel ); } @@ -456,6 +476,8 @@ public void testParsePersistedConfig() { ) ); settings.put("not_a_valid_config_setting", randomAlphaOfLength(10)); + + var taskType = randomFrom(TaskType.TEXT_EMBEDDING, TaskType.RERANK, TaskType.SPARSE_EMBEDDING); expectThrows(IllegalArgumentException.class, () -> service.parsePersistedConfig(randomInferenceEntityId, taskType, settings)); } @@ -476,12 +498,13 @@ public void testParsePersistedConfig() { ) ) ); + var taskType = randomFrom(TaskType.TEXT_EMBEDDING, TaskType.RERANK, TaskType.SPARSE_EMBEDDING); expectThrows(IllegalArgumentException.class, () -> service.parsePersistedConfig(randomInferenceEntityId, taskType, settings)); } } @SuppressWarnings("unchecked") - public void testChunkInfer() { + public void testChunkInfer_e5() { var mlTrainedModelResults = new ArrayList(); mlTrainedModelResults.add(MlChunkedTextEmbeddingFloatResultsTests.createRandomResults()); mlTrainedModelResults.add(MlChunkedTextEmbeddingFloatResultsTests.createRandomResults()); @@ -568,6 +591,63 @@ public void testChunkInfer() { assertTrue("Listener not called", gotResults.get()); } + @SuppressWarnings("unchecked") + public void testChunkInfer_Sparse() { + var mlTrainedModelResults = new ArrayList(); + mlTrainedModelResults.add(InferenceChunkedTextExpansionResultsTests.createRandomResults()); + mlTrainedModelResults.add(InferenceChunkedTextExpansionResultsTests.createRandomResults()); + mlTrainedModelResults.add(new ErrorInferenceResults(new RuntimeException("boom"))); + var response = new InferModelAction.Response(mlTrainedModelResults, "foo", true); + + ThreadPool threadpool = new TestThreadPool("test"); + Client client = mock(Client.class); + when(client.threadPool()).thenReturn(threadpool); + doAnswer(invocationOnMock -> { + var listener = (ActionListener) invocationOnMock.getArguments()[2]; + listener.onResponse(response); + return null; + }).when(client).execute(same(InferModelAction.INSTANCE), any(InferModelAction.Request.class), any(ActionListener.class)); + + var model = new CustomElandModel( + "foo", + TaskType.SPARSE_EMBEDDING, + "elasticsearch", + new ElasticsearchInternalServiceSettings(1, 1, "model-id", null) + ); + var service = createService(client); + + var gotResults = new AtomicBoolean(); + var resultsListener = ActionListener.>wrap(chunkedResponse -> { + assertThat(chunkedResponse, hasSize(3)); + assertThat(chunkedResponse.get(0), instanceOf(InferenceChunkedSparseEmbeddingResults.class)); + var result1 = (InferenceChunkedSparseEmbeddingResults) chunkedResponse.get(0); + assertEquals(((MlChunkedTextExpansionResults) mlTrainedModelResults.get(0)).getChunks(), result1.getChunkedResults()); + assertThat(chunkedResponse.get(1), instanceOf(InferenceChunkedSparseEmbeddingResults.class)); + var result2 = (InferenceChunkedSparseEmbeddingResults) chunkedResponse.get(1); + assertEquals(((MlChunkedTextExpansionResults) mlTrainedModelResults.get(1)).getChunks(), result2.getChunkedResults()); + var result3 = (ErrorChunkedInferenceResults) chunkedResponse.get(2); + assertThat(result3.getException(), instanceOf(RuntimeException.class)); + assertThat(result3.getException().getMessage(), containsString("boom")); + gotResults.set(true); + }, ESTestCase::fail); + + service.chunkedInfer( + model, + null, + List.of("foo", "bar"), + Map.of(), + InputType.SEARCH, + new ChunkingOptions(null, null), + InferenceAction.Request.DEFAULT_TIMEOUT, + ActionListener.runAfter(resultsListener, () -> terminate(threadpool)) + ); + + if (gotResults.get() == false) { + terminate(threadpool); + } + assertTrue("Listener not called", gotResults.get()); + } + @SuppressWarnings("unchecked") public void testChunkInferSetsTokenization() { var expectedSpan = new AtomicInteger(); @@ -711,7 +791,7 @@ public void testParseRequestConfigEland_PreservesTaskType() { ) ); - var taskType = randomFrom(EnumSet.of(TaskType.RERANK, TaskType.TEXT_EMBEDDING)); + var taskType = randomFrom(EnumSet.of(TaskType.RERANK, TaskType.TEXT_EMBEDDING, TaskType.SPARSE_EMBEDDING)); CustomElandModel expectedModel = getCustomElandModel(taskType); PlainActionFuture listener = new PlainActionFuture<>(); @@ -739,6 +819,13 @@ private CustomElandModel getCustomElandModel(TaskType taskType) { ElasticsearchInternalService.NAME, serviceSettings ); + } else if (taskType == TaskType.SPARSE_EMBEDDING) { + expectedModel = new CustomElandModel( + randomInferenceEntityId, + taskType, + ElasticsearchInternalService.NAME, + new CustomElandInternalServiceSettings(1, 4, "custom-model", null) + ); } return expectedModel; } @@ -867,21 +954,4 @@ private ElasticsearchInternalService createService(Client client) { var context = new InferenceServiceExtension.InferenceServiceFactoryContext(client); return new ElasticsearchInternalService(context); } - - public static Model randomModelConfig(String inferenceEntityId) { - List givenList = Arrays.asList("MultilingualE5SmallModel"); - Random rand = org.elasticsearch.common.Randomness.get(); - String model = givenList.get(rand.nextInt(givenList.size())); - - return switch (model) { - case "MultilingualE5SmallModel" -> new MultilingualE5SmallModel( - inferenceEntityId, - TaskType.TEXT_EMBEDDING, - ElasticsearchInternalService.NAME, - MultilingualE5SmallInternalServiceSettingsTests.createRandom() - ); - default -> throw new IllegalArgumentException("model " + model + " is not supported for testing"); - }; - } - } diff --git a/x-pack/plugin/ml/qa/single-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/LearningToRankRescorerIT.java b/x-pack/plugin/ml/qa/single-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/LearningToRankRescorerIT.java index b2a0b60aed7ba..4a703117c6551 100644 --- a/x-pack/plugin/ml/qa/single-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/LearningToRankRescorerIT.java +++ b/x-pack/plugin/ml/qa/single-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/LearningToRankRescorerIT.java @@ -16,7 +16,9 @@ import org.junit.Before; import java.io.IOException; +import java.util.ArrayList; import java.util.List; +import java.util.Map; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; @@ -28,189 +30,11 @@ public class LearningToRankRescorerIT extends InferenceTestCase { @Before public void setupModelAndData() throws IOException { - putRegressionModel(MODEL_ID, """ - { - "description": "super complex model for tests", - "inference_config": { - "learning_to_rank": { - "feature_extractors": [ - { - "query_extractor": { - "feature_name": "cost", - "query": {"script_score": {"query": {"match_all":{}}, "script": {"source": "return doc['cost'].value;"}}} - } - }, - { - "query_extractor": { - "feature_name": "type_tv", - "query": {"constant_score": {"filter": {"term": { "product": "TV" }}, "boost": 1.0}} - } - }, - { - "query_extractor": { - "feature_name": "type_vcr", - "query": {"constant_score": {"filter": {"term": { "product": "VCR" }}, "boost": 1.0}} - } - }, - { - "query_extractor": { - "feature_name": "type_laptop", - "query": {"constant_score": {"filter": {"term": { "product": "Laptop" }}, "boost": 1.0}} - } - }, - { - "query_extractor": { - "feature_name": "two", - "query": { "script_score": { "query": { "match_all": {} }, "script": { "source": "return 2.0;" } } } - } - }, - { - "query_extractor": { - "feature_name": "product_bm25", - "query": { "term": { "product": "{{keyword}}" } } - } - } - ] - } - }, - "definition": { - "trained_model": { - "ensemble": { - "feature_names": ["cost", "type_tv", "type_vcr", "type_laptop", "two", "product_bm25"], - "target_type": "regression", - "trained_models": [ - { - "tree": { - "feature_names": [ - "cost" - ], - "tree_structure": [ - { - "node_index": 0, - "split_feature": 0, - "split_gain": 12, - "threshold": 400, - "decision_type": "lte", - "default_left": true, - "left_child": 1, - "right_child": 2 - }, - { - "node_index": 1, - "leaf_value": 5.0 - }, - { - "node_index": 2, - "leaf_value": 2.0 - } - ], - "target_type": "regression" - } - }, - { - "tree": { - "feature_names": [ - "type_tv" - ], - "tree_structure": [ - { - "node_index": 0, - "split_feature": 0, - "split_gain": 12, - "threshold": 1, - "decision_type": "lt", - "default_left": true, - "left_child": 1, - "right_child": 2 - }, - { - "node_index": 1, - "leaf_value": 1.0 - }, - { - "node_index": 2, - "leaf_value": 12.0 - } - ], - "target_type": "regression" - } - }, - { - "tree": { - "feature_names": [ - "two" - ], - "tree_structure": [ - { - "node_index": 0, - "split_feature": 0, - "split_gain": 12, - "threshold": 1, - "decision_type": "lt", - "default_left": true, - "left_child": 1, - "right_child": 2 - }, - { - "node_index": 1, - "leaf_value": 1.0 - }, - { - "node_index": 2, - "leaf_value": 2.0 - } - ], - "target_type": "regression" - } - }, - { - "tree": { - "feature_names": [ - "product_bm25" - ], - "tree_structure": [ - { - "node_index": 0, - "split_feature": 0, - "split_gain": 12, - "threshold": 1, - "decision_type": "lt", - "default_left": true, - "left_child": 1, - "right_child": 2 - }, - { - "node_index": 1, - "leaf_value": 1.0 - }, - { - "node_index": 2, - "leaf_value": 4.0 - } - ], - "target_type": "regression" - } - } - ] - } - } - } - } - """); - createIndex(INDEX_NAME, Settings.EMPTY, """ - "properties":{ - "product":{"type": "keyword"}, - "cost":{"type": "integer"} - }"""); - indexData("{ \"product\": \"TV\", \"cost\": 300}"); - indexData("{ \"product\": \"TV\", \"cost\": 400}"); - indexData("{ \"product\": \"TV\", \"cost\": 600}"); - indexData("{ \"product\": \"VCR\", \"cost\": 15}"); - indexData("{ \"product\": \"VCR\", \"cost\": 350}"); - indexData("{ \"product\": \"VCR\", \"cost\": 580}"); - indexData("{ \"product\": \"Laptop\", \"cost\": 100}"); - indexData("{ \"product\": \"Laptop\", \"cost\": 300}"); - indexData("{ \"product\": \"Laptop\", \"cost\": 500}"); + putRegressionModel(MODEL_ID, testRegressionModel); + createIndex(INDEX_NAME, Settings.EMPTY, testIndexDefinition); + for (String testDataItem : testIndexData) { + indexData(testDataItem); + } adminClient().performRequest(new Request("POST", INDEX_NAME + "/_refresh")); } @@ -249,6 +73,19 @@ public void testLearningToRankRescore() throws Exception { assertHitScores(client().performRequest(request), List.of(9.0, 9.0, 6.0)); } + public void testLearningToRankRescoreWithExplain() throws Exception { + Request request = new Request("GET", "store/_search?size=3&explain=true&error_trace"); + request.setJsonEntity(""" + { + "rescore": { + "window_size": 10, + "learning_to_rank": { "model_id": "ltr-model" } + } + }"""); + var response = client().performRequest(request); + assertExplainExtractedFeatures(response, List.of("type_tv", "cost", "two")); + } + public void testLearningToRankRescoreSmallWindow() throws Exception { Request request = new Request("GET", "store/_search?size=5"); request.setJsonEntity(""" @@ -336,4 +173,234 @@ private void indexData(String data) throws IOException { private static void assertHitScores(Response response, List expectedScores) throws IOException { assertThat((List) XContentMapValues.extractValue("hits.hits._score", responseAsMap(response)), equalTo(expectedScores)); } + + @SuppressWarnings("unchecked") + private static void assertExplainExtractedFeatures(Response response, List expectedFeatures) throws IOException { + var explainValues = (ArrayList>) XContentMapValues.extractValue( + "hits.hits._explanation", + responseAsMap(response) + ); + + assertThat(explainValues.size(), equalTo(3)); + for (Map hit : explainValues) { + assertThat(hit.get("description"), equalTo("rescored using LTR model ltr-model")); + + var queryDetails = (ArrayList>) hit.get("details"); + assertThat(queryDetails.size(), equalTo(2)); + + assertThat(queryDetails.get(0).get("description"), equalTo("first pass query score")); + assertThat(queryDetails.get(1).get("description"), equalTo("extracted features")); + + var featureDetails = new ArrayList<>((ArrayList>) queryDetails.get(1).get("details")); + assertThat(featureDetails.size(), equalTo(3)); + + var missingKeys = new ArrayList(); + for (String expectedFeature : expectedFeatures) { + var expectedDescription = Strings.format("feature value for [%s]", expectedFeature); + + var wasFound = false; + for (Map detailItem : featureDetails) { + if (detailItem.get("description").equals(expectedDescription)) { + featureDetails.remove(detailItem); + wasFound = true; + break; + } + } + + if (wasFound == false) { + missingKeys.add(expectedFeature); + } + } + + assertThat(Strings.format("Could not find features: [%s]", String.join(", ", missingKeys)), featureDetails.size(), equalTo(0)); + } + } + + private static String testIndexDefinition = """ + "properties":{ + "product":{"type": "keyword"}, + "cost":{"type": "integer"} + }"""; + + private static List testIndexData = List.of( + "{ \"product\": \"TV\", \"cost\": 300}", + "{ \"product\": \"TV\", \"cost\": 400}", + "{ \"product\": \"TV\", \"cost\": 600}", + "{ \"product\": \"VCR\", \"cost\": 15}", + "{ \"product\": \"VCR\", \"cost\": 350}", + "{ \"product\": \"VCR\", \"cost\": 580}", + "{ \"product\": \"Laptop\", \"cost\": 100}", + "{ \"product\": \"Laptop\", \"cost\": 300}", + "{ \"product\": \"Laptop\", \"cost\": 500}" + ); + + private static String testRegressionModel = """ + { + "description": "super complex model for tests", + "inference_config": { + "learning_to_rank": { + "feature_extractors": [ + { + "query_extractor": { + "feature_name": "cost", + "query": {"script_score": {"query": {"match_all":{}}, "script": {"source": "return doc['cost'].value;"}}} + } + }, + { + "query_extractor": { + "feature_name": "type_tv", + "query": {"constant_score": {"filter": {"term": { "product": "TV" }}, "boost": 1.0}} + } + }, + { + "query_extractor": { + "feature_name": "type_vcr", + "query": {"constant_score": {"filter": {"term": { "product": "VCR" }}, "boost": 1.0}} + } + }, + { + "query_extractor": { + "feature_name": "type_laptop", + "query": {"constant_score": {"filter": {"term": { "product": "Laptop" }}, "boost": 1.0}} + } + }, + { + "query_extractor": { + "feature_name": "two", + "query": { "script_score": { "query": { "match_all": {} }, "script": { "source": "return 2.0;" } } } + } + }, + { + "query_extractor": { + "feature_name": "product_bm25", + "query": { "term": { "product": "{{keyword}}" } } + } + } + ] + } + }, + "definition": { + "trained_model": { + "ensemble": { + "feature_names": ["cost", "type_tv", "type_vcr", "type_laptop", "two", "product_bm25"], + "target_type": "regression", + "trained_models": [ + { + "tree": { + "feature_names": [ + "cost" + ], + "tree_structure": [ + { + "node_index": 0, + "split_feature": 0, + "split_gain": 12, + "threshold": 400, + "decision_type": "lte", + "default_left": true, + "left_child": 1, + "right_child": 2 + }, + { + "node_index": 1, + "leaf_value": 5.0 + }, + { + "node_index": 2, + "leaf_value": 2.0 + } + ], + "target_type": "regression" + } + }, + { + "tree": { + "feature_names": [ + "type_tv" + ], + "tree_structure": [ + { + "node_index": 0, + "split_feature": 0, + "split_gain": 12, + "threshold": 1, + "decision_type": "lt", + "default_left": true, + "left_child": 1, + "right_child": 2 + }, + { + "node_index": 1, + "leaf_value": 1.0 + }, + { + "node_index": 2, + "leaf_value": 12.0 + } + ], + "target_type": "regression" + } + }, + { + "tree": { + "feature_names": [ + "two" + ], + "tree_structure": [ + { + "node_index": 0, + "split_feature": 0, + "split_gain": 12, + "threshold": 1, + "decision_type": "lt", + "default_left": true, + "left_child": 1, + "right_child": 2 + }, + { + "node_index": 1, + "leaf_value": 1.0 + }, + { + "node_index": 2, + "leaf_value": 2.0 + } + ], + "target_type": "regression" + } + }, + { + "tree": { + "feature_names": [ + "product_bm25" + ], + "tree_structure": [ + { + "node_index": 0, + "split_feature": 0, + "split_gain": 12, + "threshold": 1, + "decision_type": "lt", + "default_left": true, + "left_child": 1, + "right_child": 2 + }, + { + "node_index": 1, + "leaf_value": 1.0 + }, + { + "node_index": 2, + "leaf_value": 4.0 + } + ], + "target_type": "regression" + } + } + ] + } + } + } + } + """; } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/DefaultMachineLearningExtension.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/DefaultMachineLearningExtension.java index 66f4797ef707c..fa94bf96c1167 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/DefaultMachineLearningExtension.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/DefaultMachineLearningExtension.java @@ -51,11 +51,6 @@ public boolean isNlpEnabled() { return true; } - @Override - public boolean isLearningToRankEnabled() { - return true; - } - @Override public String[] getAnalyticsDestIndexAllowedSettings() { return ANALYTICS_DEST_INDEX_ALLOWED_SETTINGS; diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java index c4bf92401be9d..5876836185ba3 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java @@ -902,7 +902,7 @@ private static void reportClashingNodeAttribute(String attrName) { @Override public List> getRescorers() { - if (enabled && machineLearningExtension.get().isLearningToRankEnabled()) { + if (enabled) { return List.of( new RescorerSpec<>( LearningToRankRescorerBuilder.NAME, @@ -1864,10 +1864,8 @@ public List getNamedXContent() { ) ); namedXContent.addAll(new CorrelationNamedContentProvider().getNamedXContentParsers()); - // LTR Combine with Inference named content provider when feature flag is removed - if (machineLearningExtension.get().isLearningToRankEnabled()) { - namedXContent.addAll(new MlLTRNamedXContentProvider().getNamedXContentParsers()); - } + namedXContent.addAll(new MlLTRNamedXContentProvider().getNamedXContentParsers()); + return namedXContent; } @@ -1958,10 +1956,8 @@ public List getNamedWriteables() { namedWriteables.addAll(MlAutoscalingNamedWritableProvider.getNamedWriteables()); namedWriteables.addAll(new CorrelationNamedContentProvider().getNamedWriteables()); namedWriteables.addAll(new ChangePointNamedContentProvider().getNamedWriteables()); - // LTR Combine with Inference named content provider when feature flag is removed - if (machineLearningExtension.get().isLearningToRankEnabled()) { - namedWriteables.addAll(new MlLTRNamedXContentProvider().getNamedWriteables()); - } + namedWriteables.addAll(new MlLTRNamedXContentProvider().getNamedWriteables()); + return namedWriteables; } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearningExtension.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearningExtension.java index 528883439ef2f..f46652978753c 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearningExtension.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearningExtension.java @@ -25,10 +25,6 @@ default void configure(Settings settings) {} boolean isNlpEnabled(); - default boolean isLearningToRankEnabled() { - return true; - } - default boolean disableInferenceProcessCache() { return false; } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/ltr/LearningToRankRescorer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/ltr/LearningToRankRescorer.java index 8a310ba2719f2..70d0b980bb3bf 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/ltr/LearningToRankRescorer.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/ltr/LearningToRankRescorer.java @@ -28,6 +28,7 @@ import java.util.Comparator; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; import static java.util.stream.Collectors.toUnmodifiableSet; @@ -129,8 +130,58 @@ public TopDocs rescore(TopDocs topDocs, IndexSearcher searcher, RescoreContext r @Override public Explanation explain(int topLevelDocId, IndexSearcher searcher, RescoreContext rescoreContext, Explanation sourceExplanation) throws IOException { - // TODO: Call infer again but with individual feature importance values and explaining the model (which features are used, etc.) - return null; + if (sourceExplanation == null) { + return Explanation.noMatch("no match found"); + } + + LearningToRankRescorerContext ltrContext = (LearningToRankRescorerContext) rescoreContext; + LocalModel localModelDefinition = ltrContext.regressionModelDefinition; + + if (localModelDefinition == null) { + throw new IllegalStateException("local model reference is null, missing rewriteAndFetch before rescore phase?"); + } + + List leaves = ltrContext.executionContext.searcher().getIndexReader().leaves(); + + int endDoc = 0; + int readerUpto = -1; + LeafReaderContext currentSegment = null; + + while (topLevelDocId >= endDoc) { + readerUpto++; + currentSegment = leaves.get(readerUpto); + endDoc = currentSegment.docBase + currentSegment.reader().maxDoc(); + } + + assert currentSegment != null : "Unexpected null segment"; + + int targetDoc = topLevelDocId - currentSegment.docBase; + + List featureExtractors = ltrContext.buildFeatureExtractors(searcher); + int featureSize = featureExtractors.stream().mapToInt(fe -> fe.featureNames().size()).sum(); + + Map features = Maps.newMapWithExpectedSize(featureSize); + + for (FeatureExtractor featureExtractor : featureExtractors) { + featureExtractor.setNextReader(currentSegment); + featureExtractor.addFeatures(features, targetDoc); + } + + // Predicting the value + var ltrScore = ((Number) localModelDefinition.inferLtr(features, ltrContext.learningToRankConfig).predictedValue()).floatValue(); + + List featureExplanations = new ArrayList<>(); + for (String featureName : features.keySet()) { + Number featureValue = Objects.requireNonNullElse((Number) features.get(featureName), 0); + featureExplanations.add(Explanation.match(featureValue, "feature value for [" + featureName + "]")); + } + + return Explanation.match( + ltrScore, + "rescored using LTR model " + ltrContext.regressionModelDefinition.getModelId(), + Explanation.match(sourceExplanation.getValue(), "first pass query score", sourceExplanation), + Explanation.match(0f, "extracted features", featureExplanations) + ); } /** Returns a new {@link TopDocs} with the topN from the incoming one, or the same TopDocs if the number of hits is already <= diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningAdOnly.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningAdOnly.java index 175a035a70f7e..3ff3a4a404f97 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningAdOnly.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningAdOnly.java @@ -14,6 +14,6 @@ public class LocalStateMachineLearningAdOnly extends LocalStateMachineLearning { public LocalStateMachineLearningAdOnly(final Settings settings, final Path configPath) { - super(settings, configPath, new MlTestExtensionLoader(new MlTestExtension(true, true, true, false, false, false))); + super(settings, configPath, new MlTestExtensionLoader(new MlTestExtension(true, true, true, false, false))); } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningDfaOnly.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningDfaOnly.java index f054e52dc29ec..1a72f27865d8a 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningDfaOnly.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningDfaOnly.java @@ -14,6 +14,6 @@ public class LocalStateMachineLearningDfaOnly extends LocalStateMachineLearning { public LocalStateMachineLearningDfaOnly(final Settings settings, final Path configPath) { - super(settings, configPath, new MlTestExtensionLoader(new MlTestExtension(true, true, false, true, false, false))); + super(settings, configPath, new MlTestExtensionLoader(new MlTestExtension(true, true, false, true, false))); } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningNlpOnly.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningNlpOnly.java index a3d684011e932..0f11e8033b83d 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningNlpOnly.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningNlpOnly.java @@ -14,6 +14,6 @@ public class LocalStateMachineLearningNlpOnly extends LocalStateMachineLearning { public LocalStateMachineLearningNlpOnly(final Settings settings, final Path configPath) { - super(settings, configPath, new MlTestExtensionLoader(new MlTestExtension(true, true, false, false, true, false))); + super(settings, configPath, new MlTestExtensionLoader(new MlTestExtension(true, true, false, false, true))); } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java index afa372fb94527..e5575abfeb020 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java @@ -160,14 +160,7 @@ private MachineLearningUsageTransportAction newUsageAction( licenseState, jobManagerHolder, new MachineLearningExtensionHolder( - new MachineLearningTests.MlTestExtension( - true, - true, - isAnomalyDetectionEnabled, - isDataFrameAnalyticsEnabled, - isNlpEnabled, - true - ) + new MachineLearningTests.MlTestExtension(true, true, isAnomalyDetectionEnabled, isDataFrameAnalyticsEnabled, isNlpEnabled) ) ); } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningTests.java index c35b9da7b2bd2..8a05537917abe 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningTests.java @@ -220,7 +220,7 @@ public void testNoAttributes_givenClash() throws IOException { public void testAnomalyDetectionOnly() throws IOException { Settings settings = Settings.builder().put("path.home", createTempDir()).build(); - MlTestExtensionLoader loader = new MlTestExtensionLoader(new MlTestExtension(false, false, true, false, false, false)); + MlTestExtensionLoader loader = new MlTestExtensionLoader(new MlTestExtension(false, false, true, false, false)); try (MachineLearning machineLearning = createTrialLicensedMachineLearning(settings, loader)) { List restHandlers = machineLearning.getRestHandlers(settings, null, null, null, null, null, null, null, null); assertThat(restHandlers, hasItem(instanceOf(RestMlInfoAction.class))); @@ -240,7 +240,7 @@ public void testAnomalyDetectionOnly() throws IOException { public void testDataFrameAnalyticsOnly() throws IOException { Settings settings = Settings.builder().put("path.home", createTempDir()).build(); - MlTestExtensionLoader loader = new MlTestExtensionLoader(new MlTestExtension(false, false, false, true, false, false)); + MlTestExtensionLoader loader = new MlTestExtensionLoader(new MlTestExtension(false, false, false, true, false)); try (MachineLearning machineLearning = createTrialLicensedMachineLearning(settings, loader)) { List restHandlers = machineLearning.getRestHandlers(settings, null, null, null, null, null, null, null, null); assertThat(restHandlers, hasItem(instanceOf(RestMlInfoAction.class))); @@ -260,7 +260,7 @@ public void testDataFrameAnalyticsOnly() throws IOException { public void testNlpOnly() throws IOException { Settings settings = Settings.builder().put("path.home", createTempDir()).build(); - MlTestExtensionLoader loader = new MlTestExtensionLoader(new MlTestExtension(false, false, false, false, true, false)); + MlTestExtensionLoader loader = new MlTestExtensionLoader(new MlTestExtension(false, false, false, false, true)); try (MachineLearning machineLearning = createTrialLicensedMachineLearning(settings, loader)) { List restHandlers = machineLearning.getRestHandlers(settings, null, null, null, null, null, null, null, null); assertThat(restHandlers, hasItem(instanceOf(RestMlInfoAction.class))); @@ -287,22 +287,19 @@ public static class MlTestExtension implements MachineLearningExtension { private final boolean isAnomalyDetectionEnabled; private final boolean isDataFrameAnalyticsEnabled; private final boolean isNlpEnabled; - private final boolean isLearningToRankEnabled; MlTestExtension( boolean useIlm, boolean includeNodeInfo, boolean isAnomalyDetectionEnabled, boolean isDataFrameAnalyticsEnabled, - boolean isNlpEnabled, - boolean isLearningToRankEnabled + boolean isNlpEnabled ) { this.useIlm = useIlm; this.includeNodeInfo = includeNodeInfo; this.isAnomalyDetectionEnabled = isAnomalyDetectionEnabled; this.isDataFrameAnalyticsEnabled = isDataFrameAnalyticsEnabled; this.isNlpEnabled = isNlpEnabled; - this.isLearningToRankEnabled = isLearningToRankEnabled; } @Override @@ -330,11 +327,6 @@ public boolean isNlpEnabled() { return isNlpEnabled; } - @Override - public boolean isLearningToRankEnabled() { - return isLearningToRankEnabled; - } - @Override public String[] getAnalyticsDestIndexAllowedSettings() { return ANALYTICS_DEST_INDEX_ALLOWED_SETTINGS; diff --git a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DocumentLevelSecurityRandomTests.java b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DocumentLevelSecurityRandomTests.java index fb74631970813..1bf7d89347755 100644 --- a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DocumentLevelSecurityRandomTests.java +++ b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DocumentLevelSecurityRandomTests.java @@ -144,6 +144,43 @@ public void testWithRuntimeFields() throws Exception { .endObject() ) ); + doTestWithRuntimeFieldsInTestIndex(); + } + + public void testWithRuntimeFieldsAndSyntheticSource() throws Exception { + assertAcked( + indicesAdmin().prepareCreate("test") + .setMapping( + XContentFactory.jsonBuilder() + .startObject() + .startObject("_source") + .field("mode", "synthetic") + .endObject() + .startObject("runtime") + .startObject("field1") + .field("type", "keyword") + .endObject() + .startObject("field2") + .field("type", "keyword") + .endObject() + .endObject() + .startObject("properties") + .startObject("field1") + .field("type", "text") + .field("store", true) + .endObject() + .startObject("field2") + .field("type", "text") + .field("store", true) + .endObject() + .endObject() + .endObject() + ) + ); + doTestWithRuntimeFieldsInTestIndex(); + } + + private void doTestWithRuntimeFieldsInTestIndex() { List requests = new ArrayList<>(47); for (int i = 1; i <= 42; i++) { requests.add(prepareIndex("test").setSource("field1", "value1", "field2", "foo" + i)); @@ -158,5 +195,4 @@ public void testWithRuntimeFields() throws Exception { 42L ); } - }