From b2d400ccf17b6e04012e3c667baa94626e199728 Mon Sep 17 00:00:00 2001
From: elasticsearchmachine
 <58790826+elasticsearchmachine@users.noreply.github.com>
Date: Thu, 29 Aug 2024 14:29:25 +1000
Subject: [PATCH 01/30] Mute
 org.elasticsearch.search.retriever.RankDocRetrieverBuilderIT
 testRankDocsRetrieverWithCollapse #112254

---
 muted-tests.yml | 3 +++
 1 file changed, 3 insertions(+)
diff --git a/muted-tests.yml b/muted-tests.yml
index 7feefa1255f4..ec2a846f71c4 100644
--- a/muted-tests.yml
+++ b/muted-tests.yml
@@ -172,6 +172,9 @@ tests:
 - class: org.elasticsearch.blobcache.shared.SharedBlobCacheServiceTests
   method: testGetMultiThreaded
   issue: https://github.com/elastic/elasticsearch/issues/112314
+- class: org.elasticsearch.search.retriever.RankDocRetrieverBuilderIT
+  method: testRankDocsRetrieverWithCollapse
+  issue: https://github.com/elastic/elasticsearch/issues/112254
 
 # Examples:
 #

From 633f5f9fe37618e1a998e397cdb006db4af55610 Mon Sep 17 00:00:00 2001
From: elasticsearchmachine
 <58790826+elasticsearchmachine@users.noreply.github.com>
Date: Thu, 29 Aug 2024 14:41:59 +1000
Subject: [PATCH 02/30] Mute org.elasticsearch.search.ccs.CCSUsageTelemetryIT
 org.elasticsearch.search.ccs.CCSUsageTelemetryIT #112324

---
 muted-tests.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/muted-tests.yml b/muted-tests.yml
index ec2a846f71c4..71a347920178 100644
--- a/muted-tests.yml
+++ b/muted-tests.yml
@@ -175,6 +175,8 @@ tests:
 - class: org.elasticsearch.search.retriever.RankDocRetrieverBuilderIT
   method: testRankDocsRetrieverWithCollapse
   issue: https://github.com/elastic/elasticsearch/issues/112254
+- class: org.elasticsearch.search.ccs.CCSUsageTelemetryIT
+  issue: https://github.com/elastic/elasticsearch/issues/112324
 
 # Examples:
 #

From 9344f173d32231f1c47e5ef994bffa27b61da876 Mon Sep 17 00:00:00 2001
From: David Turner <david.turner@elastic.co>
Date: Thu, 29 Aug 2024 05:54:13 +0100
Subject: [PATCH 03/30] Add general read/write optional support (#112276)

Today `StreamOutput#writeOptionalWriteable` allows to write a
possibly-null value that implements `Writeable` and therefore carries
its own serialization, but sometimes we want to write an optional value
and provide a custom `Writer` too. This commit adds
`StreamOutput#writeOptional` and a corresponding
`StreamInput#readOptional` to support this.
---
 .../action/bulk/BulkItemRequest.java          | 10 +++---
 .../action/bulk/BulkItemResponse.java         | 32 +++++++------------
 .../action/bulk/BulkShardRequest.java         |  9 +-----
 .../action/bulk/BulkShardResponse.java        |  2 +-
 .../common/io/stream/StreamInput.java         | 15 +++++++++
 .../common/io/stream/StreamOutput.java        | 21 ++++++++++++
 .../bucket/range/InternalBinaryRange.java     | 14 +++-----
 .../common/io/stream/AbstractStreamTests.java | 11 +++++++
 .../core/rollup/job/RollupJobStatus.java      |  9 +++---
 .../actions/execute/ExecuteWatchRequest.java  | 18 +++--------
 10 files changed, 77 insertions(+), 64 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/action/bulk/BulkItemRequest.java b/server/src/main/java/org/elasticsearch/action/bulk/BulkItemRequest.java
index 425461d1f4ba..7c1304f92eef 100644
--- a/server/src/main/java/org/elasticsearch/action/bulk/BulkItemRequest.java
+++ b/server/src/main/java/org/elasticsearch/action/bulk/BulkItemRequest.java
@@ -101,11 +101,11 @@ public void writeTo(StreamOutput out) throws IOException {
         out.writeOptionalWriteable(primaryResponse);
     }
 
-    public void writeThin(StreamOutput out) throws IOException {
-        out.writeVInt(id);
-        DocWriteRequest.writeDocumentRequestThin(out, request);
-        out.writeOptionalWriteable(primaryResponse == null ? null : primaryResponse::writeThin);
-    }
+    public static final Writer<BulkItemRequest> THIN_WRITER = (out, item) -> {
+        out.writeVInt(item.id);
+        DocWriteRequest.writeDocumentRequestThin(out, item.request);
+        out.writeOptional(BulkItemResponse.THIN_WRITER, item.primaryResponse);
+    };
 
     @Override
     public long ramBytesUsed() {
diff --git a/server/src/main/java/org/elasticsearch/action/bulk/BulkItemResponse.java b/server/src/main/java/org/elasticsearch/action/bulk/BulkItemResponse.java
index 151e8795d0f8..d3e550eaf05b 100644
--- a/server/src/main/java/org/elasticsearch/action/bulk/BulkItemResponse.java
+++ b/server/src/main/java/org/elasticsearch/action/bulk/BulkItemResponse.java
@@ -264,7 +264,7 @@ public String toString() {
         id = in.readVInt();
         opType = OpType.fromId(in.readByte());
         response = readResponse(shardId, in);
-        failure = in.readBoolean() ? new Failure(in) : null;
+        failure = in.readOptionalWriteable(Failure::new);
         assertConsistent();
     }
 
@@ -272,7 +272,7 @@ public String toString() {
         id = in.readVInt();
         opType = OpType.fromId(in.readByte());
         response = readResponse(in);
-        failure = in.readBoolean() ? new Failure(in) : null;
+        failure = in.readOptionalWriteable(Failure::new);
         assertConsistent();
     }
 
@@ -384,31 +384,21 @@ public void writeTo(StreamOutput out) throws IOException {
             writeResponseType(out);
             response.writeTo(out);
         }
-        if (failure == null) {
-            out.writeBoolean(false);
-        } else {
-            out.writeBoolean(true);
-            failure.writeTo(out);
-        }
+        out.writeOptionalWriteable(failure);
     }
 
-    public void writeThin(StreamOutput out) throws IOException {
-        out.writeVInt(id);
-        out.writeByte(opType.getId());
+    public static final Writer<BulkItemResponse> THIN_WRITER = (out, item) -> {
+        out.writeVInt(item.id);
+        out.writeByte(item.opType.getId());
 
-        if (response == null) {
+        if (item.response == null) {
             out.writeByte((byte) 2);
         } else {
-            writeResponseType(out);
-            response.writeThin(out);
+            item.writeResponseType(out);
+            item.response.writeThin(out);
         }
-        if (failure == null) {
-            out.writeBoolean(false);
-        } else {
-            out.writeBoolean(true);
-            failure.writeTo(out);
-        }
-    }
+        out.writeOptionalWriteable(item.failure);
+    };
 
     private void writeResponseType(StreamOutput out) throws IOException {
         if (response instanceof SimulateIndexResponse) {
diff --git a/server/src/main/java/org/elasticsearch/action/bulk/BulkShardRequest.java b/server/src/main/java/org/elasticsearch/action/bulk/BulkShardRequest.java
index 0d2942e68838..f7860c47d8b7 100644
--- a/server/src/main/java/org/elasticsearch/action/bulk/BulkShardRequest.java
+++ b/server/src/main/java/org/elasticsearch/action/bulk/BulkShardRequest.java
@@ -130,14 +130,7 @@ public void writeTo(StreamOutput out) throws IOException {
             throw new IllegalStateException("Inference metadata should have been consumed before writing to the stream");
         }
         super.writeTo(out);
-        out.writeArray((o, item) -> {
-            if (item != null) {
-                o.writeBoolean(true);
-                item.writeThin(o);
-            } else {
-                o.writeBoolean(false);
-            }
-        }, items);
+        out.writeArray((o, item) -> o.writeOptional(BulkItemRequest.THIN_WRITER, item), items);
         if (out.getTransportVersion().onOrAfter(TransportVersions.SIMULATE_VALIDATES_MAPPINGS)) {
             out.writeBoolean(isSimulated);
         }
diff --git a/server/src/main/java/org/elasticsearch/action/bulk/BulkShardResponse.java b/server/src/main/java/org/elasticsearch/action/bulk/BulkShardResponse.java
index 3eeb96546c9b..eb1bb0468c9b 100644
--- a/server/src/main/java/org/elasticsearch/action/bulk/BulkShardResponse.java
+++ b/server/src/main/java/org/elasticsearch/action/bulk/BulkShardResponse.java
@@ -56,6 +56,6 @@ public void setForcedRefresh(boolean forcedRefresh) {
     public void writeTo(StreamOutput out) throws IOException {
         super.writeTo(out);
         shardId.writeTo(out);
-        out.writeArray((o, item) -> item.writeThin(o), responses);
+        out.writeArray(BulkItemResponse.THIN_WRITER, responses);
     }
 }
diff --git a/server/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java b/server/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java
index ec0edb2d07e5..497028ef37c6 100644
--- a/server/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java
+++ b/server/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java
@@ -1095,8 +1095,23 @@ public <T> T[] readOptionalArray(Writeable.Reader<T> reader, IntFunction<T[]> ar
         return readBoolean() ? readArray(reader, arraySupplier) : null;
     }
 
+    /**
+     * Reads a possibly-null value using the given {@link org.elasticsearch.common.io.stream.Writeable.Reader}.
+     *
+     * @see StreamOutput#writeOptionalWriteable
+     */
+    // just an alias for readOptional() since we don't actually care whether T extends Writeable
     @Nullable
     public <T extends Writeable> T readOptionalWriteable(Writeable.Reader<T> reader) throws IOException {
+        return readOptional(reader);
+    }
+
+    /**
+     * Reads a possibly-null value using the given {@link org.elasticsearch.common.io.stream.Writeable.Reader}.
+     *
+     * @see StreamOutput#writeOptional
+     */
+    public <T> T readOptional(Writeable.Reader<T> reader) throws IOException {
         if (readBoolean()) {
             T t = reader.read(this);
             if (t == null) {
diff --git a/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java b/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java
index c65ae2e3463d..5780885473b0 100644
--- a/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java
+++ b/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java
@@ -1015,6 +1015,12 @@ public <T extends Writeable> void writeOptionalArray(@Nullable T[] array) throws
         writeOptionalArray(StreamOutput::writeWriteable, array);
     }
 
+    /**
+     * Writes a boolean value indicating whether the given object is {@code null}, followed by the object's serialization if it is not
+     * {@code null}.
+     *
+     * @see StreamInput#readOptionalWriteable
+     */
     public void writeOptionalWriteable(@Nullable Writeable writeable) throws IOException {
         if (writeable != null) {
             writeBoolean(true);
@@ -1024,6 +1030,21 @@ public void writeOptionalWriteable(@Nullable Writeable writeable) throws IOExcep
         }
     }
 
+    /**
+     * Writes a boolean value indicating whether the given object is {@code null}, followed by the object's serialization if it is not
+     * {@code null}.
+     *
+     * @see StreamInput#readOptional
+     */
+    public <T> void writeOptional(Writer<T> writer, @Nullable T maybeItem) throws IOException {
+        if (maybeItem != null) {
+            writeBoolean(true);
+            writer.write(this, maybeItem);
+        } else {
+            writeBoolean(false);
+        }
+    }
+
     /**
      * This method allow to use a method reference when writing collection elements such as
      * {@code out.writeMap(map, StreamOutput::writeString, StreamOutput::writeWriteable)}
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalBinaryRange.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalBinaryRange.java
index 2b5bcd9931f6..528c37de7a4a 100644
--- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalBinaryRange.java
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalBinaryRange.java
@@ -72,8 +72,8 @@ private static Bucket createFromStream(StreamInput in, DocValueFormat format, bo
             String key = in.getTransportVersion().equals(TransportVersions.V_8_0_0) ? in.readString()
                 : in.getTransportVersion().onOrAfter(TransportVersions.V_7_17_1) ? in.readOptionalString()
                 : in.readString();
-            BytesRef from = in.readBoolean() ? in.readBytesRef() : null;
-            BytesRef to = in.readBoolean() ? in.readBytesRef() : null;
+            BytesRef from = in.readOptional(StreamInput::readBytesRef);
+            BytesRef to = in.readOptional(StreamInput::readBytesRef);
             long docCount = in.readLong();
             InternalAggregations aggregations = InternalAggregations.readFrom(in);
 
@@ -89,14 +89,8 @@ public void writeTo(StreamOutput out) throws IOException {
             } else {
                 out.writeString(key == null ? generateKey(from, to, format) : key);
             }
-            out.writeBoolean(from != null);
-            if (from != null) {
-                out.writeBytesRef(from);
-            }
-            out.writeBoolean(to != null);
-            if (to != null) {
-                out.writeBytesRef(to);
-            }
+            out.writeOptional(StreamOutput::writeBytesRef, from);
+            out.writeOptional(StreamOutput::writeBytesRef, to);
             out.writeLong(docCount);
             aggregations.writeTo(out);
         }
diff --git a/server/src/test/java/org/elasticsearch/common/io/stream/AbstractStreamTests.java b/server/src/test/java/org/elasticsearch/common/io/stream/AbstractStreamTests.java
index b1104a72400e..ae686afcbb29 100644
--- a/server/src/test/java/org/elasticsearch/common/io/stream/AbstractStreamTests.java
+++ b/server/src/test/java/org/elasticsearch/common/io/stream/AbstractStreamTests.java
@@ -761,6 +761,17 @@ public void checkZonedDateTimeSerialization(TransportVersion tv) throws IOExcept
         }
     }
 
+    public void testOptional() throws IOException {
+        try (var output = new BytesStreamOutput()) {
+            output.writeOptional(StreamOutput::writeString, "not-null");
+            output.writeOptional(StreamOutput::writeString, null);
+
+            final var input = getStreamInput(output.bytes());
+            assertEquals("not-null", input.readOptional(StreamInput::readString));
+            assertNull(input.readOptional(StreamInput::readString));
+        }
+    }
+
     private void assertSerialization(
         CheckedConsumer<StreamOutput, IOException> outputAssertions,
         CheckedConsumer<StreamInput, IOException> inputAssertions,
diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/rollup/job/RollupJobStatus.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/rollup/job/RollupJobStatus.java
index 1ba625a507a4..f7ad1f65628b 100644
--- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/rollup/job/RollupJobStatus.java
+++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/rollup/job/RollupJobStatus.java
@@ -74,7 +74,7 @@ public RollupJobStatus(IndexerState state, @Nullable Map<String, Object> positio
 
     public RollupJobStatus(StreamInput in) throws IOException {
         state = IndexerState.fromStream(in);
-        currentPosition = in.readBoolean() ? new TreeMap<>(in.readGenericMap()) : null;
+        currentPosition = in.readOptional(CURRENT_POSITION_READER);
         if (in.getTransportVersion().before(TransportVersions.V_8_0_0)) {
             // 7.x nodes serialize `upgradedDocumentID` flag. We don't need it anymore, but
             // we need to pull it off the stream
@@ -83,6 +83,8 @@ public RollupJobStatus(StreamInput in) throws IOException {
         }
     }
 
+    private static final Reader<TreeMap<String, Object>> CURRENT_POSITION_READER = in -> new TreeMap<>(in.readGenericMap());
+
     public IndexerState getIndexerState() {
         return state;
     }
@@ -118,10 +120,7 @@ public String getWriteableName() {
     @Override
     public void writeTo(StreamOutput out) throws IOException {
         state.writeTo(out);
-        out.writeBoolean(currentPosition != null);
-        if (currentPosition != null) {
-            out.writeGenericMap(currentPosition);
-        }
+        out.writeOptional(StreamOutput::writeGenericMap, currentPosition);
         if (out.getTransportVersion().before(TransportVersions.V_8_0_0)) {
             // 7.x nodes expect a boolean `upgradedDocumentID` flag. We don't have it anymore,
             // but we need to tell them we are upgraded in case there is a mixed cluster
diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/watcher/transport/actions/execute/ExecuteWatchRequest.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/watcher/transport/actions/execute/ExecuteWatchRequest.java
index 681b004dd1d2..2f2617f956ed 100644
--- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/watcher/transport/actions/execute/ExecuteWatchRequest.java
+++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/watcher/transport/actions/execute/ExecuteWatchRequest.java
@@ -59,12 +59,8 @@ public ExecuteWatchRequest(StreamInput in) throws IOException {
         id = in.readOptionalString();
         ignoreCondition = in.readBoolean();
         recordExecution = in.readBoolean();
-        if (in.readBoolean()) {
-            alternativeInput = in.readGenericMap();
-        }
-        if (in.readBoolean()) {
-            triggerData = in.readGenericMap();
-        }
+        alternativeInput = in.readOptional(StreamInput::readGenericMap);
+        triggerData = in.readOptional(StreamInput::readGenericMap);
         long actionModesCount = in.readLong();
         actionModes = new HashMap<>();
         for (int i = 0; i < actionModesCount; i++) {
@@ -83,14 +79,8 @@ public void writeTo(StreamOutput out) throws IOException {
         out.writeOptionalString(id);
         out.writeBoolean(ignoreCondition);
         out.writeBoolean(recordExecution);
-        out.writeBoolean(alternativeInput != null);
-        if (alternativeInput != null) {
-            out.writeGenericMap(alternativeInput);
-        }
-        out.writeBoolean(triggerData != null);
-        if (triggerData != null) {
-            out.writeGenericMap(triggerData);
-        }
+        out.writeOptional(StreamOutput::writeGenericMap, alternativeInput);
+        out.writeOptional(StreamOutput::writeGenericMap, triggerData);
         out.writeLong(actionModes.size());
         for (Map.Entry<String, ActionExecutionMode> entry : actionModes.entrySet()) {
             out.writeString(entry.getKey());

From 59a42ed41b72ea92e62e4522b83d9f9f48955203 Mon Sep 17 00:00:00 2001
From: David Turner <david.turner@elastic.co>
Date: Thu, 29 Aug 2024 06:03:13 +0100
Subject: [PATCH 04/30] Include network disconnect info in troubleshooting docs
 (#112323)

A misplaced `//end::` tag meant that the docs added in #112271 are only
included in the page on fault detection and not the equivalent
troubleshooting docs. This commit fixes the problem.
---
 docs/reference/modules/discovery/fault-detection.asciidoc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/reference/modules/discovery/fault-detection.asciidoc b/docs/reference/modules/discovery/fault-detection.asciidoc
index 89c8a78eccbc..d12985b70597 100644
--- a/docs/reference/modules/discovery/fault-detection.asciidoc
+++ b/docs/reference/modules/discovery/fault-detection.asciidoc
@@ -300,7 +300,6 @@ To reconstruct the output, base64-decode the data and decompress it using
 ----
 cat shardlock.log | sed -e 's/.*://' | base64 --decode | gzip --decompress
 ----
-//end::troubleshooting[]
 
 [discrete]
 ===== Diagnosing other network disconnections
@@ -345,3 +344,4 @@ packet capture simultaneously from the nodes at both ends of an unstable
 connection and analyse it alongside the {es} logs from those nodes to determine
 if traffic between the nodes is being disrupted by another device on the
 network.
+//end::troubleshooting[]

From aa67bdb5ca8abebcee8a50ebb58e6160d134230c Mon Sep 17 00:00:00 2001
From: Andrei Stefan <astefan@users.noreply.github.com>
Date: Thu, 29 Aug 2024 09:53:09 +0300
Subject: [PATCH 05/30] ES|QL: EsqlAsyncSecurityIT workaround for lazy
 .async-search indexing (#112287)

---
 muted-tests.yml                               |  3 --
 .../xpack/esql/EsqlAsyncSecurityIT.java       | 30 ++++++++++++++-----
 2 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/muted-tests.yml b/muted-tests.yml
index 71a347920178..508403ee6238 100644
--- a/muted-tests.yml
+++ b/muted-tests.yml
@@ -137,9 +137,6 @@ tests:
 - class: org.elasticsearch.xpack.ml.integration.MlJobIT
   method: testDeleteJobAfterMissingIndex
   issue: https://github.com/elastic/elasticsearch/issues/112088
-- class: org.elasticsearch.xpack.esql.EsqlAsyncSecurityIT
-  method: testLimitedPrivilege
-  issue: https://github.com/elastic/elasticsearch/issues/112110
 - class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT
   method: test {stats.ByTwoCalculatedSecondOverwrites SYNC}
   issue: https://github.com/elastic/elasticsearch/issues/112117
diff --git a/x-pack/plugin/esql/qa/security/src/javaRestTest/java/org/elasticsearch/xpack/esql/EsqlAsyncSecurityIT.java b/x-pack/plugin/esql/qa/security/src/javaRestTest/java/org/elasticsearch/xpack/esql/EsqlAsyncSecurityIT.java
index 0806e4118639..f2633dfffb0f 100644
--- a/x-pack/plugin/esql/qa/security/src/javaRestTest/java/org/elasticsearch/xpack/esql/EsqlAsyncSecurityIT.java
+++ b/x-pack/plugin/esql/qa/security/src/javaRestTest/java/org/elasticsearch/xpack/esql/EsqlAsyncSecurityIT.java
@@ -67,7 +67,7 @@ public void testUnauthorizedIndices() throws IOException {
             var getResponse = runAsyncGet("user1", id); // sanity
             assertOK(getResponse);
             ResponseException error;
-            error = expectThrows(ResponseException.class, () -> runAsyncGet("user2", id));
+            error = expectThrows(ResponseException.class, () -> runAsyncGet("user2", id, true));
             // resource not found exception if the authenticated user is not the creator of the original task
             assertThat(error.getResponse().getStatusLine().getStatusCode(), equalTo(404));
 
@@ -85,7 +85,7 @@ public void testUnauthorizedIndices() throws IOException {
             var getResponse = runAsyncGet("user2", id); // sanity
             assertOK(getResponse);
             ResponseException error;
-            error = expectThrows(ResponseException.class, () -> runAsyncGet("user1", id));
+            error = expectThrows(ResponseException.class, () -> runAsyncGet("user1", id, true));
             assertThat(error.getResponse().getStatusLine().getStatusCode(), equalTo(404));
 
             error = expectThrows(ResponseException.class, () -> runAsyncDelete("user1", id));
@@ -117,6 +117,10 @@ private Response runAsync(String user, String command) throws IOException {
     }
 
     private Response runAsyncGet(String user, String id) throws IOException {
+        return runAsyncGet(user, id, false);
+    }
+
+    private Response runAsyncGet(String user, String id, boolean isAsyncIdNotFound_Expected) throws IOException {
         int tries = 0;
         while (tries < 10) {
             // Sometimes we get 404s fetching the task status.
@@ -129,22 +133,32 @@ private Response runAsyncGet(String user, String id) throws IOException {
                 logResponse(response);
                 return response;
             } catch (ResponseException e) {
-                if (e.getResponse().getStatusLine().getStatusCode() == 404
-                    && EntityUtils.toString(e.getResponse().getEntity()).contains("no such index [.async-search]")) {
-                    /*
-                     * Work around https://github.com/elastic/elasticsearch/issues/110304 - the .async-search
-                     * index may not exist when we try the fetch, but it should exist on next attempt.
-                     */
+                var statusCode = e.getResponse().getStatusLine().getStatusCode();
+                var message = EntityUtils.toString(e.getResponse().getEntity());
+
+                if (statusCode == 404 && message.contains("no such index [.async-search]")) {
+                    // Work around https://github.com/elastic/elasticsearch/issues/110304 - the .async-search
+                    // index may not exist when we try the fetch, but it should exist on next attempt.
                     logger.warn("async-search index does not exist", e);
                     try {
                         Thread.sleep(1000);
                     } catch (InterruptedException ex) {
                         throw new RuntimeException(ex);
                     }
+                } else if (statusCode == 404 && false == isAsyncIdNotFound_Expected && message.contains("resource_not_found_exception")) {
+                    // Work around for https://github.com/elastic/elasticsearch/issues/112110
+                    // The async id is not indexed quickly enough in .async-search index for us to retrieve it.
+                    logger.warn("async id not found", e);
+                    try {
+                        Thread.sleep(500);
+                    } catch (InterruptedException ex) {
+                        throw new RuntimeException(ex);
+                    }
                 } else {
                     throw e;
                 }
                 tries++;
+                logger.warn("retry [" + tries + "] for GET /_query/async/" + id);
             }
         }
         throw new IllegalStateException("couldn't find task status");

From b9dea69b5ca5b34600d1fc51badc3a9b163107b2 Mon Sep 17 00:00:00 2001
From: weizijun <weizijun1989@gmail.com>
Date: Thu, 29 Aug 2024 15:17:27 +0800
Subject: [PATCH 06/30] [Inference API] Add Docs for AlibabaCloud AI Search
 Support for the Inference API (#112273)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: István Zoltán Szabó <istvan.szabo@elastic.co>
---
 docs/changelog/112273.yaml                    |   5 +
 .../inference/inference-apis.asciidoc         |   1 +
 .../inference/put-inference.asciidoc          |   1 +
 .../service-alibabacloud-ai-search.asciidoc   | 184 ++++++++++++++++++
 .../semantic-search-inference.asciidoc        |   1 +
 .../infer-api-ingest-pipeline-widget.asciidoc |  17 ++
 .../infer-api-ingest-pipeline.asciidoc        |  26 +++
 .../infer-api-mapping-widget.asciidoc         |  17 ++
 .../inference-api/infer-api-mapping.asciidoc  |  32 +++
 .../infer-api-reindex-widget.asciidoc         |  17 ++
 .../inference-api/infer-api-reindex.asciidoc  |  23 +++
 .../infer-api-requirements-widget.asciidoc    |  17 ++
 .../infer-api-requirements.asciidoc           |   6 +
 .../infer-api-search-widget.asciidoc          |  17 ++
 .../inference-api/infer-api-search.asciidoc   |  65 +++++++
 .../infer-api-task-widget.asciidoc            |  17 ++
 .../inference-api/infer-api-task.asciidoc     |  29 +++
 17 files changed, 475 insertions(+)
 create mode 100644 docs/changelog/112273.yaml
 create mode 100644 docs/reference/inference/service-alibabacloud-ai-search.asciidoc

diff --git a/docs/changelog/112273.yaml b/docs/changelog/112273.yaml
new file mode 100644
index 000000000000..3182a1884a14
--- /dev/null
+++ b/docs/changelog/112273.yaml
@@ -0,0 +1,5 @@
+pr: 111181
+summary: "[Inference API] Add Docs for AlibabaCloud AI Search Support for the Inference API"
+area: Machine Learning
+type: enhancement
+issues: [ ]
diff --git a/docs/reference/inference/inference-apis.asciidoc b/docs/reference/inference/inference-apis.asciidoc
index 33db148755d8..8fdf8aecc2ae 100644
--- a/docs/reference/inference/inference-apis.asciidoc
+++ b/docs/reference/inference/inference-apis.asciidoc
@@ -39,6 +39,7 @@ include::delete-inference.asciidoc[]
 include::get-inference.asciidoc[]
 include::post-inference.asciidoc[]
 include::put-inference.asciidoc[]
+include::service-alibabacloud-ai-search.asciidoc[]
 include::service-amazon-bedrock.asciidoc[]
 include::service-anthropic.asciidoc[]
 include::service-azure-ai-studio.asciidoc[]
diff --git a/docs/reference/inference/put-inference.asciidoc b/docs/reference/inference/put-inference.asciidoc
index 57485e0720cc..ba26a563541f 100644
--- a/docs/reference/inference/put-inference.asciidoc
+++ b/docs/reference/inference/put-inference.asciidoc
@@ -39,6 +39,7 @@ The create {infer} API enables you to create an {infer} endpoint and configure a
 
 The following services are available through the {infer} API, click the links to review the configuration details of the services:
 
+* <<infer-service-alibabacloud-ai-search,AlibabaCloud AI Search>>
 * <<infer-service-amazon-bedrock,Amazon Bedrock>>
 * <<infer-service-anthropic,Anthropic>>
 * <<infer-service-azure-ai-studio,Azure AI Studio>>
diff --git a/docs/reference/inference/service-alibabacloud-ai-search.asciidoc b/docs/reference/inference/service-alibabacloud-ai-search.asciidoc
new file mode 100644
index 000000000000..df5220573d9e
--- /dev/null
+++ b/docs/reference/inference/service-alibabacloud-ai-search.asciidoc
@@ -0,0 +1,184 @@
+[[infer-service-alibabacloud-ai-search]]
+=== AlibabaCloud AI Search {infer} service
+
+Creates an {infer} endpoint to perform an {infer} task with the `alibabacloud-ai-search` service.
+
+[discrete]
+[[infer-service-alibabacloud-ai-search-api-request]]
+==== {api-request-title}
+
+`PUT /_inference/<task_type>/<inference_id>`
+
+[discrete]
+[[infer-service-alibabacloud-ai-search-api-path-params]]
+==== {api-path-parms-title}
+
+`<inference_id>`::
+(Required, string)
+include::inference-shared.asciidoc[tag=inference-id]
+
+`<task_type>`::
+(Required, string)
+include::inference-shared.asciidoc[tag=task-type]
++
+--
+Available task types:
+
+* `text_embedding`,
+* `sparse_embedding`.
+* `rerank`.
+--
+
+[discrete]
+[[infer-service-alibabacloud-ai-search-api-request-body]]
+==== {api-request-body-title}
+
+`service`::
+(Required, string) The type of service supported for the specified task type.
+In this case,
+`alibabacloud-ai-search`.
+
+`service_settings`::
+(Required, object)
+include::inference-shared.asciidoc[tag=service-settings]
++
+--
+These settings are specific to the `alibabacloud-ai-search` service.
+--
+
+`api_key`:::
+(Required, string)
+A valid API key for the AlibabaCloud AI Search API.
+
+`service_id`:::
+(Required, string)
+The name of the model service to use for the {infer} task.
++
+--
+Available service_ids for the `text_embedding` task:
+
+* `ops-text-embedding-001`
+* `ops-text-embedding-zh-001`
+* `ops-text-embedding-en-001`
+* `ops-text-embedding-002`
+
+For the supported `text_embedding` service_ids, refer to the https://help.aliyun.com/zh/open-search/search-platform/developer-reference/text-embedding-api-details[documentation].
+
+Available service_id for the `sparse_embedding` task:
+
+* `ops-text-sparse-embedding-001`
+
+For the supported `sparse_embedding` service_id, refer to the https://help.aliyun.com/zh/open-search/search-platform/developer-reference/text-sparse-embedding-api-details[documentation].
+
+Available service_id for the `rerank` task is:
+
+* `ops-bge-reranker-larger`
+
+For the supported `rerank` service_id, refer to the https://help.aliyun.com/zh/open-search/search-platform/developer-reference/ranker-api-details[documentation].
+--
+
+`host`:::
+(Required, string)
+The name of the host address used for the {infer} task. You can find the host address at https://opensearch.console.aliyun.com/cn-shanghai/rag/api-key[ the API keys section] of the documentation.
+
+`workspace`:::
+(Required, string)
+The name of the workspace used for the {infer} task.
+
+`rate_limit`:::
+(Optional, object)
+By default, the `alibabacloud-ai-search` service sets the number of requests allowed per minute to `1000`.
+This helps to minimize the number of rate limit errors returned from AlibabaCloud AI Search.
+To modify this, set the `requests_per_minute` setting of this object in your service settings:
++
+--
+include::inference-shared.asciidoc[tag=request-per-minute-example]
+--
+
+
+`task_settings`::
+(Optional, object)
+include::inference-shared.asciidoc[tag=task-settings]
++
+.`task_settings` for the `text_embedding` task type
+[%collapsible%closed]
+=====
+`input_type`:::
+(Optional, string)
+Specifies the type of input passed to the model.
+Valid values are:
+* `ingest`: for storing document embeddings in a vector database.
+* `search`: for storing embeddings of search queries run against a vector database to find relevant documents.
+=====
++
+.`task_settings` for the `sparse_embedding` task type
+[%collapsible%closed]
+=====
+`input_type`:::
+(Optional, string)
+Specifies the type of input passed to the model.
+Valid values are:
+* `ingest`: for storing document embeddings in a vector database.
+* `search`: for storing embeddings of search queries run against a vector database to find relevant documents.
+
+`return_token`:::
+(Optional, boolean)
+If `true`, the token name will be returned in the response. Defaults to `false` which means only the token ID will be returned in the response.
+=====
+
+[discrete]
+[[inference-example-alibabacloud-ai-search]]
+==== AlibabaCloud AI Search service examples
+
+The following example shows how to create an {infer} endpoint called `alibabacloud_ai_search_embeddings` to perform a `text_embedding` task type.
+
+[source,console]
+------------------------------------------------------------
+PUT _inference/text_embedding/alibabacloud_ai_search_embeddings
+{
+    "service": "alibabacloud-ai-search",
+    "service_settings": {
+        "api_key": "<api_key>",
+        "service_id": "ops-text-embedding-001",
+        "host": "default-j01.platform-cn-shanghai.opensearch.aliyuncs.com",
+        "workspace": "default"
+    }
+}
+------------------------------------------------------------
+// TEST[skip:TBD]
+
+The following example shows how to create an {infer} endpoint called
+`alibabacloud_ai_search_sparse` to perform a `sparse_embedding` task type.
+
+[source,console]
+------------------------------------------------------------
+PUT _inference/sparse_embedding/alibabacloud_ai_search_sparse
+{
+    "service": "alibabacloud-ai-search",
+    "service_settings": {
+        "api_key": "<api_key>",
+        "service_id": "ops-text-sparse-embedding-001",
+        "host": "default-j01.platform-cn-shanghai.opensearch.aliyuncs.com",
+        "workspace": "default"
+    }
+}
+------------------------------------------------------------
+// TEST[skip:TBD]
+
+The next example shows how to create an {infer} endpoint called
+`alibabacloud_ai_search_rerank` to perform a `rerank` task type.
+
+[source,console]
+------------------------------------------------------------
+PUT _inference/rerank/alibabacloud_ai_search_rerank
+{
+    "service": "alibabacloud-ai-search",
+    "service_settings": {
+        "api_key": "<api_key>",
+        "service_id": "ops-bge-reranker-larger",
+        "host": "default-j01.platform-cn-shanghai.opensearch.aliyuncs.com",
+        "workspace": "default"
+    }
+}
+------------------------------------------------------------
+// TEST[skip:TBD]
diff --git a/docs/reference/search/search-your-data/semantic-search-inference.asciidoc b/docs/reference/search/search-your-data/semantic-search-inference.asciidoc
index f74bc65e31bf..719aeb070fc7 100644
--- a/docs/reference/search/search-your-data/semantic-search-inference.asciidoc
+++ b/docs/reference/search/search-your-data/semantic-search-inference.asciidoc
@@ -17,6 +17,7 @@ Azure based examples use models available through https://ai.azure.com/explore/m
 or https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models[Azure OpenAI].
 Mistral examples use the `mistral-embed` model from https://docs.mistral.ai/getting-started/models/[the Mistral API].
 Amazon Bedrock examples use the `amazon.titan-embed-text-v1` model from https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html[the Amazon Bedrock base models].
+AlibabaCloud AI Search examples use the `ops-text-embedding-zh-001` model from https://help.aliyun.com/zh/open-search/search-platform/developer-reference/text-embedding-api-details[the AlibabaCloud AI Search base models].
 
 Click the name of the service you want to use on any of the widgets below to review the corresponding instructions.
 
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc
index 997dbbe8a20e..3a686e27cf58 100644
--- a/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc
+++ b/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc
@@ -49,6 +49,12 @@
             id="infer-api-ingest-amazon-bedrock">
       Amazon Bedrock
     </button>
+    <button role="tab"
+            aria-selected="false"
+            aria-controls="infer-api-ingest-alibabacloud-ai-search-tab"
+            id="infer-api-ingest-alibabacloud-ai-search">
+      AlibabaCloud AI Search
+    </button>
   </div>
   <div tabindex="0"
        role="tabpanel"
@@ -135,6 +141,17 @@ include::infer-api-ingest-pipeline.asciidoc[tag=mistral]
 
 include::infer-api-ingest-pipeline.asciidoc[tag=amazon-bedrock]
 
+++++
+  </div>
+  <div tabindex="0"
+       role="tabpanel"
+       id="infer-api-ingest-alibabacloud-ai-search-tab"
+       aria-labelledby="infer-api-ingest-alibabacloud-ai-search"
+       hidden="">
+++++
+
+include::infer-api-ingest-pipeline.asciidoc[tag=alibabacloud-ai-search]
+
 ++++
   </div>
 </div>
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline.asciidoc
index 6adf3d2ebbf4..6678b60fabc4 100644
--- a/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline.asciidoc
+++ b/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline.asciidoc
@@ -216,3 +216,29 @@ PUT _ingest/pipeline/amazon_bedrock_embeddings
 and the `output_field` that will contain the {infer} results.
 
 // end::amazon-bedrock[]
+
+// tag::alibabacloud-ai-search[]
+
+[source,console]
+--------------------------------------------------
+PUT _ingest/pipeline/alibabacloud_ai_search_embeddings
+{
+  "processors": [
+    {
+      "inference": {
+        "model_id": "alibabacloud_ai_search_embeddings", <1>
+        "input_output": { <2>
+          "input_field": "content",
+          "output_field": "content_embedding"
+        }
+      }
+    }
+  ]
+}
+--------------------------------------------------
+<1> The name of the inference endpoint you created by using the
+<<put-inference-api>>, it's referred to as `inference_id` in that step.
+<2> Configuration object that defines the `input_field` for the {infer} process
+and the `output_field` that will contain the {infer} results.
+
+// end::alibabacloud-ai-search[]
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-mapping-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-mapping-widget.asciidoc
index 4e3a453a7bbe..66b790bdd57a 100644
--- a/docs/reference/tab-widgets/inference-api/infer-api-mapping-widget.asciidoc
+++ b/docs/reference/tab-widgets/inference-api/infer-api-mapping-widget.asciidoc
@@ -49,6 +49,12 @@
             id="infer-api-mapping-amazon-bedrock">
       Amazon Bedrock
     </button>
+    <button role="tab"
+            aria-selected="false"
+            aria-controls="infer-api-mapping-alibabacloud-ai-search-tab"
+            id="infer-api-mapping-alibabacloud-ai-search">
+      AlibabaCloud AI Search
+    </button>
   </div>
   <div tabindex="0"
        role="tabpanel"
@@ -135,6 +141,17 @@ include::infer-api-mapping.asciidoc[tag=mistral]
 
 include::infer-api-mapping.asciidoc[tag=amazon-bedrock]
 
+++++
+  </div>
+  <div tabindex="0"
+       role="tabpanel"
+       id="infer-api-mapping-alibabacloud-ai-search-tab"
+       aria-labelledby="infer-api-mapping-alibabacloud-ai-search"
+       hidden="">
+++++
+
+include::infer-api-mapping.asciidoc[tag=alibabacloud-ai-search]
+
 ++++
   </div>
 </div>
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-mapping.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-mapping.asciidoc
index abeeb87f03e7..c86538ceb9c8 100644
--- a/docs/reference/tab-widgets/inference-api/infer-api-mapping.asciidoc
+++ b/docs/reference/tab-widgets/inference-api/infer-api-mapping.asciidoc
@@ -270,3 +270,35 @@ the {infer} pipeline configuration in the next step.
 <6> The field type which is text in this example.
 
 // end::amazon-bedrock[]
+
+// tag::alibabacloud-ai-search[]
+
+[source,console]
+--------------------------------------------------
+PUT alibabacloud-ai-search-embeddings
+{
+  "mappings": {
+    "properties": {
+      "content_embedding": { <1>
+        "type": "dense_vector", <2>
+        "dims": 1024, <3>
+        "element_type": "float"
+      },
+      "content": { <4>
+        "type": "text" <5>
+      }
+    }
+  }
+}
+--------------------------------------------------
+<1> The name of the field to contain the generated tokens. It must be referenced
+in the {infer} pipeline configuration in the next step.
+<2> The field to contain the tokens is a `dense_vector` field.
+<3> The output dimensions of the model. This value may be different depending on the underlying model used.
+See the https://help.aliyun.com/zh/open-search/search-platform/developer-reference/text-embedding-api-details[AlibabaCloud AI Search embedding model] documentation.
+<4> The name of the field from which to create the dense vector representation.
+In this example, the name of the field is `content`. It must be referenced in
+the {infer} pipeline configuration in the next step.
+<5> The field type which is text in this example.
+
+// end::alibabacloud-ai-search[]
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-reindex-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-reindex-widget.asciidoc
index 45cb9fc51b9f..86f52fee2063 100644
--- a/docs/reference/tab-widgets/inference-api/infer-api-reindex-widget.asciidoc
+++ b/docs/reference/tab-widgets/inference-api/infer-api-reindex-widget.asciidoc
@@ -49,6 +49,12 @@
             id="infer-api-reindex-amazon-bedrock">
       Amazon Bedrock
     </button>
+    <button role="tab"
+            aria-selected="false"
+            aria-controls="infer-api-reindex-alibabacloud-ai-search-tab"
+            id="infer-api-reindex-alibabacloud-ai-search">
+      AlibabaCloud AI Search
+    </button>
   </div>
   <div tabindex="0"
        role="tabpanel"
@@ -135,6 +141,17 @@ include::infer-api-reindex.asciidoc[tag=mistral]
 
 include::infer-api-reindex.asciidoc[tag=amazon-bedrock]
 
+++++
+  </div>
+  <div tabindex="0"
+       role="tabpanel"
+       id="infer-api-reindex-alibabacloud-ai-search-tab"
+       aria-labelledby="infer-api-reindex-alibabacloud-ai-search"
+       hidden="">
+++++
+
+include::infer-api-reindex.asciidoc[tag=alibabacloud-ai-search]
+
 ++++
   </div>
 </div>
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-reindex.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-reindex.asciidoc
index d961ec8bd39b..25d4023c650c 100644
--- a/docs/reference/tab-widgets/inference-api/infer-api-reindex.asciidoc
+++ b/docs/reference/tab-widgets/inference-api/infer-api-reindex.asciidoc
@@ -200,3 +200,26 @@ number makes the update of the reindexing process quicker which enables you to
 follow the progress closely and detect errors early.
 
 // end::amazon-bedrock[]
+
+// tag::alibabacloud-ai-search[]
+
+[source,console]
+----
+POST _reindex?wait_for_completion=false
+{
+  "source": {
+    "index": "test-data",
+    "size": 50 <1>
+  },
+  "dest": {
+    "index": "alibabacloud-ai-search-embeddings",
+    "pipeline": "alibabacloud_ai_search_embeddings"
+  }
+}
+----
+// TEST[skip:TBD]
+<1> The default batch size for reindexing is 1000. Reducing `size` to a smaller
+number makes the update of the reindexing process quicker which enables you to
+follow the progress closely and detect errors early.
+
+// end::alibabacloud-ai-search[]
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-requirements-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-requirements-widget.asciidoc
index c867b39b88e3..fb686a2d8be1 100644
--- a/docs/reference/tab-widgets/inference-api/infer-api-requirements-widget.asciidoc
+++ b/docs/reference/tab-widgets/inference-api/infer-api-requirements-widget.asciidoc
@@ -49,6 +49,12 @@
             id="infer-api-requirements-amazon-bedrock">
       Amazon Bedrock
     </button>
+    <button role="tab"
+            aria-selected="false"
+            aria-controls="infer-api-requirements-alibabacloud-ai-search-tab"
+            id="infer-api-requirements-alibabacloud-ai-search">
+      AlibabaCloud AI Search
+    </button>
   </div>
   <div tabindex="0"
        role="tabpanel"
@@ -135,6 +141,17 @@ include::infer-api-requirements.asciidoc[tag=mistral]
 
 include::infer-api-requirements.asciidoc[tag=amazon-bedrock]
 
+++++
+  </div>
+  <div tabindex="0"
+       role="tabpanel"
+       id="infer-api-requirements-alibabacloud-ai-search-tab"
+       aria-labelledby="infer-api-requirements-alibabacloud-ai-search"
+       hidden="">
+++++
+
+include::infer-api-requirements.asciidoc[tag=alibabacloud-ai-search]
+
 ++++
   </div>
 </div>
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-requirements.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-requirements.asciidoc
index 603cd85a8f93..c9e7ca8b80ba 100644
--- a/docs/reference/tab-widgets/inference-api/infer-api-requirements.asciidoc
+++ b/docs/reference/tab-widgets/inference-api/infer-api-requirements.asciidoc
@@ -52,3 +52,9 @@ You can apply for access to Azure OpenAI by completing the form at https://aka.m
 * A pair of access and secret keys used to access Amazon Bedrock
 
 // end::amazon-bedrock[]
+
+// tag::alibabacloud-ai-search[]
+* An AlibabaCloud Account with https://console.aliyun.com[AlibabaCloud] access
+* An API key generated for your account from the https://opensearch.console.aliyun.com/cn-shanghai/rag/api-key[API keys section]
+
+// end::alibabacloud-ai-search[]
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-search-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-search-widget.asciidoc
index fa4a11c59a15..996148d80a4b 100644
--- a/docs/reference/tab-widgets/inference-api/infer-api-search-widget.asciidoc
+++ b/docs/reference/tab-widgets/inference-api/infer-api-search-widget.asciidoc
@@ -49,6 +49,12 @@
             id="infer-api-search-amazon-bedrock">
       Amazon Bedrock
     </button>
+    <button role="tab"
+            aria-selected="false"
+            aria-controls="infer-api-search-alibabacloud-ai-search-tab"
+            id="infer-api-search-alibabacloud-ai-search">
+      AlibabaCloud AI Search
+    </button>
   </div>
   <div tabindex="0"
        role="tabpanel"
@@ -135,6 +141,17 @@ include::infer-api-search.asciidoc[tag=mistral]
 
 include::infer-api-search.asciidoc[tag=amazon-bedrock]
 
+++++
+  </div>
+  <div tabindex="0"
+       role="tabpanel"
+       id="infer-api-search-alibabacloud-ai-search-tab"
+       aria-labelledby="infer-api-search-alibabacloud-ai-search"
+       hidden="">
+++++
+
+include::infer-api-search.asciidoc[tag=alibabacloud-ai-search]
+
 ++++
   </div>
 </div>
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-search.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-search.asciidoc
index f23ed1dfef05..fe1f58b6bd1a 100644
--- a/docs/reference/tab-widgets/inference-api/infer-api-search.asciidoc
+++ b/docs/reference/tab-widgets/inference-api/infer-api-search.asciidoc
@@ -531,3 +531,68 @@ query from the `amazon-bedrock-embeddings` index sorted by their proximity to th
 // NOTCONSOLE
 
 // end::amazon-bedrock[]
+
+// tag::alibabacloud-ai-search[]
+
+[source,console]
+--------------------------------------------------
+GET alibabacloud-ai-search-embeddings/_search
+{
+  "knn": {
+    "field": "content_embedding",
+    "query_vector_builder": {
+      "text_embedding": {
+        "model_id": "alibabacloud_ai_search_embeddings",
+        "model_text": "Calculate fuel cost"
+      }
+    },
+    "k": 10,
+    "num_candidates": 100
+  },
+  "_source": [
+    "id",
+    "content"
+  ]
+}
+--------------------------------------------------
+// TEST[skip:TBD]
+
+As a result, you receive the top 10 documents that are closest in meaning to the
+query from the `alibabacloud-ai-search-embeddings` index sorted by their proximity to the query:
+
+[source,consol-result]
+--------------------------------------------------
+"hits": [
+      {
+        "_index": "alibabacloud-ai-search-embeddings",
+        "_id": "DDd5OowBHxQKHyc3TDSC",
+        "_score": 0.83704096,
+        "_source": {
+          "id": 862114,
+          "body": "How to calculate fuel cost for a road trip. By Tara Baukus Mello • Bankrate.com. Dear Driving for Dollars, My family is considering taking a long road trip to finish off the end of the summer, but I'm a little worried about gas prices and our overall fuel cost.It doesn't seem easy to calculate since we'll be traveling through many states and we are considering several routes.y family is considering taking a long road trip to finish off the end of the summer, but I'm a little worried about gas prices and our overall fuel cost. It doesn't seem easy to calculate since we'll be traveling through many states and we are considering several routes."
+        }
+      },
+      {
+        "_index": "alibabacloud-ai-search-embeddings",
+        "_id": "ajd5OowBHxQKHyc3TDSC",
+        "_score": 0.8345704,
+        "_source": {
+          "id": 820622,
+          "body": "Home Heating Calculator. Typically, approximately 50% of the energy consumed in a home annually is for space heating. When deciding on a heating system, many factors will come into play: cost of fuel, installation cost, convenience and life style are all important.This calculator can help you estimate the cost of fuel for different heating appliances.hen deciding on a heating system, many factors will come into play: cost of fuel, installation cost, convenience and life style are all important. This calculator can help you estimate the cost of fuel for different heating appliances."
+        }
+      },
+      {
+        "_index": "alibabacloud-ai-search-embeddings",
+        "_id": "Djd5OowBHxQKHyc3TDSC",
+        "_score": 0.8327426,
+        "_source": {
+          "id": 8202683,
+          "body": "Fuel is another important cost. This cost will depend on your boat, how far you travel, and how fast you travel. A 33-foot sailboat traveling at 7 knots should be able to travel 300 miles on 50 gallons of diesel fuel.If you are paying $4 per gallon, the trip would cost you $200.Most boats have much larger gas tanks than cars.uel is another important cost. This cost will depend on your boat, how far you travel, and how fast you travel. A 33-foot sailboat traveling at 7 knots should be able to travel 300 miles on 50 gallons of diesel fuel."
+        }
+      },
+      (...)
+    ]
+--------------------------------------------------
+// NOTCONSOLE
+
+// end::alibabacloud-ai-search[]
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-task-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-task-widget.asciidoc
index f12be341d866..1dfa6077553f 100644
--- a/docs/reference/tab-widgets/inference-api/infer-api-task-widget.asciidoc
+++ b/docs/reference/tab-widgets/inference-api/infer-api-task-widget.asciidoc
@@ -49,6 +49,12 @@
             id="infer-api-task-amazon-bedrock">
       Amazon Bedrock
     </button>
+    <button role="tab"
+            aria-selected="false"
+            aria-controls="infer-api-task-alibabacloud-ai-search-tab"
+            id="infer-api-task-alibabacloud-ai-search">
+      AlibabaCloud AI Search
+    </button>
   </div>
   <div tabindex="0"
        role="tabpanel"
@@ -135,6 +141,17 @@ include::infer-api-task.asciidoc[tag=mistral]
 
 include::infer-api-task.asciidoc[tag=amazon-bedrock]
 
+++++
+  </div>
+  <div tabindex="0"
+       role="tabpanel"
+       id="infer-api-task-alibabacloud-ai-search-tab"
+       aria-labelledby="infer-api-task-alibabacloud-ai-search"
+       hidden="">
+++++
+
+include::infer-api-task.asciidoc[tag=alibabacloud-ai-search]
+
 ++++
   </div>
 </div>
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-task.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-task.asciidoc
index b186b2c58ccc..2b4aa1a20010 100644
--- a/docs/reference/tab-widgets/inference-api/infer-api-task.asciidoc
+++ b/docs/reference/tab-widgets/inference-api/infer-api-task.asciidoc
@@ -223,3 +223,32 @@ PUT _inference/text_embedding/amazon_bedrock_embeddings <1>
 <6> The model ID or ARN of the model to use.
 
 // end::amazon-bedrock[]
+
+// tag::alibabacloud-ai-search[]
+
+[source,console]
+------------------------------------------------------------
+PUT _inference/text_embedding/alibabacloud_ai_search_embeddings <1>
+{
+    "service": "alibabacloud-ai-search",
+    "service_settings": {
+        "api_key": "<api_key>", <2>
+        "service_id": "<service_id>", <3>
+        "host": "<host>", <4>
+        "workspace": "<workspace>" <5>
+    }
+}
+------------------------------------------------------------
+// TEST[skip:TBD]
+<1> The task type is `text_embedding` in the path and the `inference_id` which is the unique identifier of the {infer} endpoint is `alibabacloud_ai_search_embeddings`.
+<2> The API key for accessing the AlibabaCloud AI Search API. You can find your API keys in
+your AlibabaCloud account under the
+https://opensearch.console.aliyun.com/cn-shanghai/rag/api-key[API keys section]. You need to provide
+your API key only once. The <<get-inference-api>> does not return your API
+key.
+<3> The AlibabaCloud AI Search embeddings model name, for example `ops-text-embedding-zh-001`.
+<4> The name our your AlibabaCloud AI Search host address.
+<5> The name our your AlibabaCloud AI Search workspace.
+
+// end::alibabacloud-ai-search[]
+

From 569184871bc0006ba55c20bcbc4500e98e853aea Mon Sep 17 00:00:00 2001
From: Chris Hegarty <62058229+ChrisHegarty@users.noreply.github.com>
Date: Thu, 29 Aug 2024 08:23:34 +0100
Subject: [PATCH 07/30] Add UpdateForV10 annotation (#112281)

In preparation for the next major release of Elasticsearch, this commit adds the UpdateForV10 annotation.
---
 .../org/elasticsearch/core/UpdateForV10.java  | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 libs/core/src/main/java/org/elasticsearch/core/UpdateForV10.java

diff --git a/libs/core/src/main/java/org/elasticsearch/core/UpdateForV10.java b/libs/core/src/main/java/org/elasticsearch/core/UpdateForV10.java
new file mode 100644
index 000000000000..0fe816bd3721
--- /dev/null
+++ b/libs/core/src/main/java/org/elasticsearch/core/UpdateForV10.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.core;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+/**
+ * Annotation to identify a block of code (a whole class, a method, or a field) that needs to be reviewed (for cleanup, remove or change)
+ * before releasing 10.0
+ */
+@Retention(RetentionPolicy.SOURCE)
+@Target({ ElementType.LOCAL_VARIABLE, ElementType.CONSTRUCTOR, ElementType.FIELD, ElementType.METHOD, ElementType.TYPE })
+public @interface UpdateForV10 {
+}

From 727f1e72c6d930ad763ca307f622eadbbdfff112 Mon Sep 17 00:00:00 2001
From: Dominique Clarke <doclarke71@gmail.com>
Date: Thu, 29 Aug 2024 03:46:44 -0400
Subject: [PATCH 08/30] [Observability] add .slo-observability.* index
 privileges to built in editor and viewer roles (#111984)

Today, the `editor` and `viewer` roles do not contain
the appropriate index privileges for SLO users.

This PR updates the index privileges to include
the `.slo-observability.*` indices.

---------

Co-authored-by: Slobodan Adamovic <slobodan.adamovic@elastic.co>
---
 .../authz/store/ReservedRolesStore.java       |  9 ++++
 .../authz/store/ReservedRolesStoreTests.java  | 42 +++++++++++++++++++
 2 files changed, 51 insertions(+)

diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java
index 4f3d7a245fc8..74434adf61fb 100644
--- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java
+++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java
@@ -868,6 +868,11 @@ private static RoleDescriptor buildViewerRoleDescriptor() {
                     .indices("/~(([.]|ilm-history-).*)/")
                     .privileges("read", "view_index_metadata")
                     .build(),
+                // Observability
+                RoleDescriptor.IndicesPrivileges.builder()
+                    .indices(".slo-observability.*")
+                    .privileges("read", "view_index_metadata")
+                    .build(),
                 // Security
                 RoleDescriptor.IndicesPrivileges.builder()
                     .indices(ReservedRolesStore.ALERTS_LEGACY_INDEX, ReservedRolesStore.LISTS_INDEX, ReservedRolesStore.LISTS_ITEMS_INDEX)
@@ -915,6 +920,10 @@ private static RoleDescriptor buildEditorRoleDescriptor() {
                     .indices("observability-annotations")
                     .privileges("read", "view_index_metadata", "write")
                     .build(),
+                RoleDescriptor.IndicesPrivileges.builder()
+                    .indices(".slo-observability.*")
+                    .privileges("read", "view_index_metadata", "write", "manage")
+                    .build(),
                 // Security
                 RoleDescriptor.IndicesPrivileges.builder()
                     .indices(ReservedRolesStore.ALERTS_LEGACY_INDEX, ReservedRolesStore.LISTS_INDEX, ReservedRolesStore.LISTS_ITEMS_INDEX)
diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStoreTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStoreTests.java
index f0676f35ae31..0cdf7de63ca9 100644
--- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStoreTests.java
+++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStoreTests.java
@@ -28,6 +28,7 @@
 import org.elasticsearch.action.admin.indices.get.GetIndexAction;
 import org.elasticsearch.action.admin.indices.mapping.get.GetFieldMappingsAction;
 import org.elasticsearch.action.admin.indices.mapping.get.GetMappingsAction;
+import org.elasticsearch.action.admin.indices.mapping.put.TransportAutoPutMappingAction;
 import org.elasticsearch.action.admin.indices.mapping.put.TransportPutMappingAction;
 import org.elasticsearch.action.admin.indices.recovery.RecoveryAction;
 import org.elasticsearch.action.admin.indices.resolve.ResolveIndexAction;
@@ -3662,6 +3663,9 @@ public void testPredefinedViewerRole() {
         assertOnlyReadAllowed(role, ".profiling-" + randomIntBetween(0, 5));
         assertOnlyReadAllowed(role, randomAlphaOfLength(5));
 
+        assertOnlyReadAllowed(role, ".slo-observability." + randomIntBetween(0, 5));
+        assertViewIndexMetadata(role, ".slo-observability." + randomIntBetween(0, 5));
+
         assertNoAccessAllowed(role, TestRestrictedIndices.SAMPLE_RESTRICTED_NAMES);
         assertNoAccessAllowed(role, "." + randomAlphaOfLengthBetween(6, 10));
         assertNoAccessAllowed(role, "ilm-history-" + randomIntBetween(0, 5));
@@ -3740,6 +3744,9 @@ public void testPredefinedEditorRole() {
         assertReadWriteDocsAndMaintenanceButNotDeleteIndexAllowed(role, ".preview.alerts-" + randomIntBetween(0, 5));
         assertReadWriteDocsAndMaintenanceButNotDeleteIndexAllowed(role, ".internal.preview.alerts-" + randomIntBetween(0, 5));
 
+        assertViewIndexMetadata(role, ".slo-observability." + randomIntBetween(0, 5));
+        assertReadWriteAndManage(role, ".slo-observability." + randomIntBetween(0, 5));
+
         assertNoAccessAllowed(role, TestRestrictedIndices.SAMPLE_RESTRICTED_NAMES);
         assertNoAccessAllowed(role, "." + randomAlphaOfLengthBetween(6, 10));
         assertNoAccessAllowed(role, "ilm-history-" + randomIntBetween(0, 5));
@@ -3865,6 +3872,41 @@ private void assertReadWriteDocsButNotDeleteIndexAllowed(Role role, String index
             role.indices().allowedIndicesMatcher(TransportDeleteIndexAction.TYPE.name()).test(mockIndexAbstraction(index)),
             is(false)
         );
+
+        assertThat(role.indices().allowedIndicesMatcher(TransportSearchAction.TYPE.name()).test(mockIndexAbstraction(index)), is(true));
+        assertThat(role.indices().allowedIndicesMatcher(TransportGetAction.TYPE.name()).test(mockIndexAbstraction(index)), is(true));
+        assertThat(role.indices().allowedIndicesMatcher(TransportIndexAction.NAME).test(mockIndexAbstraction(index)), is(true));
+        assertThat(role.indices().allowedIndicesMatcher(TransportUpdateAction.NAME).test(mockIndexAbstraction(index)), is(true));
+        assertThat(role.indices().allowedIndicesMatcher(TransportDeleteAction.NAME).test(mockIndexAbstraction(index)), is(true));
+        assertThat(role.indices().allowedIndicesMatcher(TransportBulkAction.NAME).test(mockIndexAbstraction(index)), is(true));
+    }
+
+    private void assertReadWriteAndManage(Role role, String index) {
+        assertThat(
+            role.indices().allowedIndicesMatcher(TransportDeleteIndexAction.TYPE.name()).test(mockIndexAbstraction(index)),
+            is(true)
+        );
+        assertThat(
+            role.indices().allowedIndicesMatcher(TransportFieldCapabilitiesAction.NAME + "*").test(mockIndexAbstraction(index)),
+            is(true)
+        );
+        assertThat(
+            role.indices().allowedIndicesMatcher(TransportCreateIndexAction.TYPE.name()).test(mockIndexAbstraction(index)),
+            is(true)
+        );
+        assertThat(
+            role.indices().allowedIndicesMatcher(TransportUpdateSettingsAction.TYPE.name()).test(mockIndexAbstraction(index)),
+            is(true)
+        );
+        assertThat(role.indices().allowedIndicesMatcher(GetRollupIndexCapsAction.NAME + "*").test(mockIndexAbstraction(index)), is(true));
+        assertThat(role.indices().allowedIndicesMatcher("indices:admin/*").test(mockIndexAbstraction(index)), is(true));
+        assertThat(role.indices().allowedIndicesMatcher("indices:monitor/*").test(mockIndexAbstraction(index)), is(true));
+        assertThat(
+            role.indices().allowedIndicesMatcher(TransportAutoPutMappingAction.TYPE.name()).test(mockIndexAbstraction(index)),
+            is(true)
+        );
+        assertThat(role.indices().allowedIndicesMatcher(AutoCreateAction.NAME).test(mockIndexAbstraction(index)), is(true));
+
         assertThat(role.indices().allowedIndicesMatcher(TransportSearchAction.TYPE.name()).test(mockIndexAbstraction(index)), is(true));
         assertThat(role.indices().allowedIndicesMatcher(TransportGetAction.TYPE.name()).test(mockIndexAbstraction(index)), is(true));
         assertThat(role.indices().allowedIndicesMatcher(TransportIndexAction.NAME).test(mockIndexAbstraction(index)), is(true));

From 55ed03fddfa8c77c354a2db2910593b40d2be890 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christoph=20B=C3=BCscher?= <christophbuescher@posteo.de>
Date: Wed, 28 Aug 2024 19:21:00 +0200
Subject: [PATCH 09/30] Remove Scorable#docID implementations

This method was removed in https://github.com/apache/lucene/pull/12407 so
we also need to remove it in implementations of Scorable.
---
 .../painless/ScriptedMetricAggContextsTests.java       |  5 -----
 .../join/aggregations/ParentJoinAggregator.java        |  5 -----
 .../aggregations/bucket/nested/NestedAggregator.java   |  7 -------
 .../bucket/sampler/BestDocsDeferringCollector.java     |  8 --------
 .../search/aggregations/MultiBucketCollectorTests.java | 10 ++--------
 .../search/query/QueryPhaseCollectorTests.java         | 10 ----------
 .../search/sort/BucketedSortForFloatsTests.java        |  6 ------
 7 files changed, 2 insertions(+), 49 deletions(-)

diff --git a/modules/lang-painless/src/test/java/org/elasticsearch/painless/ScriptedMetricAggContextsTests.java b/modules/lang-painless/src/test/java/org/elasticsearch/painless/ScriptedMetricAggContextsTests.java
index 2d3f09fc7243..8eae139eb822 100644
--- a/modules/lang-painless/src/test/java/org/elasticsearch/painless/ScriptedMetricAggContextsTests.java
+++ b/modules/lang-painless/src/test/java/org/elasticsearch/painless/ScriptedMetricAggContextsTests.java
@@ -73,11 +73,6 @@ public void testMapBasic() throws IOException {
         Map<String, Object> state = new HashMap<>();
 
         Scorable scorer = new Scorable() {
-            @Override
-            public int docID() {
-                return 0;
-            }
-
             @Override
             public float score() {
                 return 0.5f;
diff --git a/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentJoinAggregator.java b/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentJoinAggregator.java
index 9c6a788ea2f7..ed4dcf2072b8 100644
--- a/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentJoinAggregator.java
+++ b/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentJoinAggregator.java
@@ -133,11 +133,6 @@ protected void prepareSubAggs(long[] ordsToCollect) throws IOException {
                 public float score() {
                     return 1f;
                 }
-
-                @Override
-                public int docID() {
-                    return childDocsIter.docID();
-                }
             });
 
             final Bits liveDocs = ctx.reader().getLiveDocs();
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java
index 39dfd6e4aac3..28e010f541a7 100644
--- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java
@@ -206,19 +206,12 @@ void processBufferedChildBuckets() throws IOException {
     }
 
     private static class CachedScorable extends Scorable {
-        int doc;
         float score;
 
         @Override
         public final float score() {
             return score;
         }
-
-        @Override
-        public int docID() {
-            return doc;
-        }
-
     }
 
 }
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java
index 1344604a8d39..c72c4b29a478 100644
--- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java
@@ -213,7 +213,6 @@ class PerSegmentCollects extends Scorable {
         private final AggregationExecutionContext aggCtx;
         int maxDocId = Integer.MIN_VALUE;
         private float currentScore;
-        private int currentDocId = -1;
         private Scorable currentScorer;
 
         PerSegmentCollects(AggregationExecutionContext aggCtx) throws IOException {
@@ -248,7 +247,6 @@ public void replayRelatedMatches(List<ScoreDoc> sd) throws IOException {
                 leafCollector.setScorer(this);
 
                 currentScore = 0;
-                currentDocId = -1;
                 if (maxDocId < 0) {
                     return;
                 }
@@ -258,7 +256,6 @@ public void replayRelatedMatches(List<ScoreDoc> sd) throws IOException {
                     int rebased = scoreDoc.doc - aggCtx.getLeafReaderContext().docBase;
                     if ((rebased >= 0) && (rebased <= maxDocId)) {
                         currentScore = scoreDoc.score;
-                        currentDocId = rebased;
                         // We stored the bucket ID in Lucene's shardIndex property
                         // for convenience.
                         leafCollector.collect(rebased, scoreDoc.shardIndex);
@@ -275,11 +272,6 @@ public float score() throws IOException {
             return currentScore;
         }
 
-        @Override
-        public int docID() {
-            return currentDocId;
-        }
-
         public void collect(int docId, long parentBucket) throws IOException {
             perBucketSamples = bigArrays.grow(perBucketSamples, parentBucket + 1);
             PerParentBucketSamples sampler = perBucketSamples.get((int) parentBucket);
diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/MultiBucketCollectorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/MultiBucketCollectorTests.java
index cfb9c4bb8324..ff4ad059559f 100644
--- a/server/src/test/java/org/elasticsearch/search/aggregations/MultiBucketCollectorTests.java
+++ b/server/src/test/java/org/elasticsearch/search/aggregations/MultiBucketCollectorTests.java
@@ -32,14 +32,8 @@
 import static org.hamcrest.Matchers.equalTo;
 
 public class MultiBucketCollectorTests extends ESTestCase {
-    private static class ScoreAndDoc extends Scorable {
+    private static class Score extends Scorable {
         float score;
-        int doc = -1;
-
-        @Override
-        public int docID() {
-            return doc;
-        }
 
         @Override
         public float score() {
@@ -246,7 +240,7 @@ public void testSetScorerAfterCollectionTerminated() throws IOException {
         collector1 = new TerminateAfterBucketCollector(collector1, 1);
         collector2 = new TerminateAfterBucketCollector(collector2, 2);
 
-        Scorable scorer = new ScoreAndDoc();
+        Scorable scorer = new Score();
 
         List<BucketCollector> collectors = Arrays.asList(collector1, collector2);
         Collections.shuffle(collectors, random());
diff --git a/server/src/test/java/org/elasticsearch/search/query/QueryPhaseCollectorTests.java b/server/src/test/java/org/elasticsearch/search/query/QueryPhaseCollectorTests.java
index f222e697488d..dbfd9d83ee88 100644
--- a/server/src/test/java/org/elasticsearch/search/query/QueryPhaseCollectorTests.java
+++ b/server/src/test/java/org/elasticsearch/search/query/QueryPhaseCollectorTests.java
@@ -1138,11 +1138,6 @@ public void testSetScorerAfterCollectionTerminated() throws IOException {
             public float score() {
                 return 0;
             }
-
-            @Override
-            public int docID() {
-                return 0;
-            }
         };
 
         QueryPhaseCollector queryPhaseCollector = new QueryPhaseCollector(
@@ -1472,11 +1467,6 @@ public float score() throws IOException {
             return 0;
         }
 
-        @Override
-        public int docID() {
-            return 0;
-        }
-
         @Override
         public void setMinCompetitiveScore(float minScore) {
             setMinCompetitiveScoreCalled = true;
diff --git a/server/src/test/java/org/elasticsearch/search/sort/BucketedSortForFloatsTests.java b/server/src/test/java/org/elasticsearch/search/sort/BucketedSortForFloatsTests.java
index 0f088d2948fc..7f136a097e24 100644
--- a/server/src/test/java/org/elasticsearch/search/sort/BucketedSortForFloatsTests.java
+++ b/server/src/test/java/org/elasticsearch/search/sort/BucketedSortForFloatsTests.java
@@ -120,18 +120,12 @@ public void testScorer() throws IOException {
     }
 
     private class MockScorable extends Scorable {
-        private int doc;
         private float score;
 
         @Override
         public float score() throws IOException {
             return score;
         }
-
-        @Override
-        public int docID() {
-            return doc;
-        }
     }
 
     /**

From 5e455db10ecbb1a31cad58ecb1120a66fc50079f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christoph=20B=C3=BCscher?= <christophbuescher@posteo.de>
Date: Thu, 29 Aug 2024 10:04:27 +0200
Subject: [PATCH 10/30] Revert "Remove Scorable#docID implementations"

This reverts commit 55ed03fddfa8c77c354a2db2910593b40d2be890.
---
 .../painless/ScriptedMetricAggContextsTests.java       |  5 +++++
 .../join/aggregations/ParentJoinAggregator.java        |  5 +++++
 .../aggregations/bucket/nested/NestedAggregator.java   |  7 +++++++
 .../bucket/sampler/BestDocsDeferringCollector.java     |  8 ++++++++
 .../search/aggregations/MultiBucketCollectorTests.java | 10 ++++++++--
 .../search/query/QueryPhaseCollectorTests.java         | 10 ++++++++++
 .../search/sort/BucketedSortForFloatsTests.java        |  6 ++++++
 7 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/modules/lang-painless/src/test/java/org/elasticsearch/painless/ScriptedMetricAggContextsTests.java b/modules/lang-painless/src/test/java/org/elasticsearch/painless/ScriptedMetricAggContextsTests.java
index 8eae139eb822..2d3f09fc7243 100644
--- a/modules/lang-painless/src/test/java/org/elasticsearch/painless/ScriptedMetricAggContextsTests.java
+++ b/modules/lang-painless/src/test/java/org/elasticsearch/painless/ScriptedMetricAggContextsTests.java
@@ -73,6 +73,11 @@ public void testMapBasic() throws IOException {
         Map<String, Object> state = new HashMap<>();
 
         Scorable scorer = new Scorable() {
+            @Override
+            public int docID() {
+                return 0;
+            }
+
             @Override
             public float score() {
                 return 0.5f;
diff --git a/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentJoinAggregator.java b/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentJoinAggregator.java
index ed4dcf2072b8..9c6a788ea2f7 100644
--- a/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentJoinAggregator.java
+++ b/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentJoinAggregator.java
@@ -133,6 +133,11 @@ protected void prepareSubAggs(long[] ordsToCollect) throws IOException {
                 public float score() {
                     return 1f;
                 }
+
+                @Override
+                public int docID() {
+                    return childDocsIter.docID();
+                }
             });
 
             final Bits liveDocs = ctx.reader().getLiveDocs();
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java
index 28e010f541a7..39dfd6e4aac3 100644
--- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java
@@ -206,12 +206,19 @@ void processBufferedChildBuckets() throws IOException {
     }
 
     private static class CachedScorable extends Scorable {
+        int doc;
         float score;
 
         @Override
         public final float score() {
             return score;
         }
+
+        @Override
+        public int docID() {
+            return doc;
+        }
+
     }
 
 }
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java
index c72c4b29a478..1344604a8d39 100644
--- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java
@@ -213,6 +213,7 @@ class PerSegmentCollects extends Scorable {
         private final AggregationExecutionContext aggCtx;
         int maxDocId = Integer.MIN_VALUE;
         private float currentScore;
+        private int currentDocId = -1;
         private Scorable currentScorer;
 
         PerSegmentCollects(AggregationExecutionContext aggCtx) throws IOException {
@@ -247,6 +248,7 @@ public void replayRelatedMatches(List<ScoreDoc> sd) throws IOException {
                 leafCollector.setScorer(this);
 
                 currentScore = 0;
+                currentDocId = -1;
                 if (maxDocId < 0) {
                     return;
                 }
@@ -256,6 +258,7 @@ public void replayRelatedMatches(List<ScoreDoc> sd) throws IOException {
                     int rebased = scoreDoc.doc - aggCtx.getLeafReaderContext().docBase;
                     if ((rebased >= 0) && (rebased <= maxDocId)) {
                         currentScore = scoreDoc.score;
+                        currentDocId = rebased;
                         // We stored the bucket ID in Lucene's shardIndex property
                         // for convenience.
                         leafCollector.collect(rebased, scoreDoc.shardIndex);
@@ -272,6 +275,11 @@ public float score() throws IOException {
             return currentScore;
         }
 
+        @Override
+        public int docID() {
+            return currentDocId;
+        }
+
         public void collect(int docId, long parentBucket) throws IOException {
             perBucketSamples = bigArrays.grow(perBucketSamples, parentBucket + 1);
             PerParentBucketSamples sampler = perBucketSamples.get((int) parentBucket);
diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/MultiBucketCollectorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/MultiBucketCollectorTests.java
index ff4ad059559f..cfb9c4bb8324 100644
--- a/server/src/test/java/org/elasticsearch/search/aggregations/MultiBucketCollectorTests.java
+++ b/server/src/test/java/org/elasticsearch/search/aggregations/MultiBucketCollectorTests.java
@@ -32,8 +32,14 @@
 import static org.hamcrest.Matchers.equalTo;
 
 public class MultiBucketCollectorTests extends ESTestCase {
-    private static class Score extends Scorable {
+    private static class ScoreAndDoc extends Scorable {
         float score;
+        int doc = -1;
+
+        @Override
+        public int docID() {
+            return doc;
+        }
 
         @Override
         public float score() {
@@ -240,7 +246,7 @@ public void testSetScorerAfterCollectionTerminated() throws IOException {
         collector1 = new TerminateAfterBucketCollector(collector1, 1);
         collector2 = new TerminateAfterBucketCollector(collector2, 2);
 
-        Scorable scorer = new Score();
+        Scorable scorer = new ScoreAndDoc();
 
         List<BucketCollector> collectors = Arrays.asList(collector1, collector2);
         Collections.shuffle(collectors, random());
diff --git a/server/src/test/java/org/elasticsearch/search/query/QueryPhaseCollectorTests.java b/server/src/test/java/org/elasticsearch/search/query/QueryPhaseCollectorTests.java
index dbfd9d83ee88..f222e697488d 100644
--- a/server/src/test/java/org/elasticsearch/search/query/QueryPhaseCollectorTests.java
+++ b/server/src/test/java/org/elasticsearch/search/query/QueryPhaseCollectorTests.java
@@ -1138,6 +1138,11 @@ public void testSetScorerAfterCollectionTerminated() throws IOException {
             public float score() {
                 return 0;
             }
+
+            @Override
+            public int docID() {
+                return 0;
+            }
         };
 
         QueryPhaseCollector queryPhaseCollector = new QueryPhaseCollector(
@@ -1467,6 +1472,11 @@ public float score() throws IOException {
             return 0;
         }
 
+        @Override
+        public int docID() {
+            return 0;
+        }
+
         @Override
         public void setMinCompetitiveScore(float minScore) {
             setMinCompetitiveScoreCalled = true;
diff --git a/server/src/test/java/org/elasticsearch/search/sort/BucketedSortForFloatsTests.java b/server/src/test/java/org/elasticsearch/search/sort/BucketedSortForFloatsTests.java
index 7f136a097e24..0f088d2948fc 100644
--- a/server/src/test/java/org/elasticsearch/search/sort/BucketedSortForFloatsTests.java
+++ b/server/src/test/java/org/elasticsearch/search/sort/BucketedSortForFloatsTests.java
@@ -120,12 +120,18 @@ public void testScorer() throws IOException {
     }
 
     private class MockScorable extends Scorable {
+        private int doc;
         private float score;
 
         @Override
         public float score() throws IOException {
             return score;
         }
+
+        @Override
+        public int docID() {
+            return doc;
+        }
     }
 
     /**

From aa57a1553e3371158c23faed7a5f7c5833a6e18d Mon Sep 17 00:00:00 2001
From: Liam Thompson <32779855+leemthompo@users.noreply.github.com>
Date: Thu, 29 Aug 2024 09:13:30 +0100
Subject: [PATCH 11/30] [DOCS] Rewrite "What is Elasticsearch?" (Part 1)
 (#112213)

---
 docs/reference/intro.asciidoc                 | 132 ++++++++++--------
 .../search-your-data/near-real-time.asciidoc  |   2 +-
 2 files changed, 72 insertions(+), 62 deletions(-)

diff --git a/docs/reference/intro.asciidoc b/docs/reference/intro.asciidoc
index 3fc23b44994a..cd9c126e7b1f 100644
--- a/docs/reference/intro.asciidoc
+++ b/docs/reference/intro.asciidoc
@@ -1,42 +1,70 @@
 [[elasticsearch-intro]]
 == What is {es}?
-_**You know, for search (and analysis)**_
-
-{es} is the distributed search and analytics engine at the heart of
-the {stack}. {ls} and {beats} facilitate collecting, aggregating, and
-enriching your data and storing it in {es}. {kib} enables you to
-interactively explore, visualize, and share insights into your data and manage
-and monitor the stack. {es} is where the indexing, search, and analysis
-magic happens.
-
-{es} provides near real-time search and analytics for all types of data. Whether you
-have structured or unstructured text, numerical data, or geospatial data,
-{es} can efficiently store and index it in a way that supports fast searches.
-You can go far beyond simple data retrieval and aggregate information to discover
-trends and patterns in your data. And as your data and query volume grows, the
-distributed nature of {es} enables your deployment to grow seamlessly right
-along with it.
-
-While not _every_ problem is a search problem, {es} offers speed and flexibility
-to handle data in a wide variety of use cases:
-
-* Add a search box to an app or website
-* Store and analyze logs, metrics, and security event data
-* Use machine learning to automatically model the behavior of your data in real
-  time
-* Use {es} as a vector database to create, store, and search vector embeddings
-* Automate business workflows using {es} as a storage engine
-* Manage, integrate, and analyze spatial information using {es} as a geographic
-  information system (GIS)
-* Store and process genetic data using {es} as a bioinformatics research tool
-
-We’re continually amazed by the novel ways people use search. But whether
-your use case is similar to one of these, or you're using {es} to tackle a new
-problem, the way you work with your data, documents, and indices in {es} is
-the same.
+
+{es-repo}[{es}] is a distributed search and analytics engine, scalable data store, and vector database built on Apache Lucene.
+It's optimized for speed and relevance on production-scale workloads.
+Use {es} to search, index, store, and analyze data of all shapes and sizes in near real time.
+
+[TIP]
+====
+{es} has a lot of features. Explore the full list on the https://www.elastic.co/elasticsearch/features[product webpage^].
+====
+
+{es} is the heart of the {estc-welcome-current}/stack-components.html[Elastic Stack] and powers the Elastic https://www.elastic.co/enterprise-search[Search], https://www.elastic.co/observability[Observability] and https://www.elastic.co/security[Security] solutions.
+
+{es} is used for a wide and growing range of use cases. Here are a few examples:
+
+* *Monitor log and event data*. Store logs, metrics, and event data for observability and security information and event management (SIEM).
+* *Build search applications*. Add search capabilities to apps or websites, or build enterprise search engines over your organization's internal data sources.
+* *Vector database*. Store and search vectorized data, and create vector embeddings with built-in and third-party natural language processing (NLP) models.
+* *Retrieval augmented generation (RAG)*. Use {es} as a retrieval engine to augment Generative AI models.
+* *Application and security monitoring*. Monitor and analyze application performance and security data effectively.
+* *Machine learning*. Use {ml} to automatically model the behavior of your data in real-time.
+
+This is just a sample of search, observability, and security use cases enabled by {es}. 
+Refer to our https://www.elastic.co/customers/success-stories[customer success stories] for concrete examples across a range of industries.
+// Link to demos, search labs chatbots
+
+[discrete]
+[[elasticsearch-intro-elastic-stack]]
+.What is the Elastic Stack?
+*******************************
+{es} is the core component of the Elastic Stack, a suite of products for collecting, storing, searching, and visualizing data.
+https://www.elastic.co/guide/en/starting-with-the-elasticsearch-platform-and-its-solutions/current/stack-components.html[Learn more about the Elastic Stack].
+*******************************
+// TODO: Remove once we've moved Stack Overview to a subpage?
+
+[discrete]
+[[elasticsearch-intro-deploy]]
+=== Deployment options
+
+To use {es}, you need a running instance of the {es} service.
+You can deploy {es} in various ways:
+
+* <<run-elasticsearch-locally,*Local dev*>>. Get started quickly with a minimal local Docker setup. 
+* {cloud}/ec-getting-started-trial.html[*Elastic Cloud*]. {es} is available as part of our hosted Elastic Stack offering, deployed in the cloud with your provider of choice. Sign up for a https://cloud.elastic.co/registration[14 day free trial].
+* {serverless-docs}/general/sign-up-trial[*Elastic Cloud Serverless* (technical preview)]. Create serverless projects for autoscaled and fully managed {es} deployments. Sign up for a https://cloud.elastic.co/serverless-registration[14 day free trial].
+
+**Advanced deployment options**
+
+* <<elasticsearch-deployment-options,*Self-managed*>>. Install, configure, and run {es} on your own premises.
+* {ece-ref}/Elastic-Cloud-Enterprise-overview.html[*Elastic Cloud Enterprise*]. Deploy Elastic Cloud on public or private clouds, virtual machines, or your own premises.
+* {eck-ref}/k8s-overview.html[*Elastic Cloud on Kubernetes*]. Deploy Elastic Cloud on Kubernetes.
+
+[discrete]
+[[elasticsearch-next-steps]]
+=== Learn more
+
+Here are some resources to help you get started:
+
+* <<getting-started, Quickstart>>. A beginner's guide to deploying your first {es} instance, indexing data, and running queries.
+* https://elastic.co/webinars/getting-started-elasticsearch[Webinar: Introduction to {es}]. Register for our live webinars to learn directly from {es} experts.
+* https://www.elastic.co/search-labs[Elastic Search Labs]. Tutorials and blogs that explore AI-powered search using the latest {es} features.
+** Follow our tutorial https://www.elastic.co/search-labs/tutorials/search-tutorial/welcome[to build a hybrid search solution in Python].
+** Check out the https://github.com/elastic/elasticsearch-labs?tab=readme-ov-file#elasticsearch-examples--apps[`elasticsearch-labs` repository] for a range of Python notebooks and apps for various use cases.
 
 [[documents-indices]]
-=== Data in: documents and indices
+=== Documents and indices
 
 {es} is a distributed document store. Instead of storing information as rows of
 columnar data, {es} stores complex data structures that have been serialized
@@ -65,8 +93,7 @@ behavior makes it easy to index and explore your data--just start
 indexing documents and {es} will detect and map booleans, floating point and
 integer values, dates, and strings to the appropriate {es} data types.
 
-Ultimately, however, you know more about your data and how you want to use it
-than {es} can. You can define rules to control dynamic mapping and explicitly
+You can define rules to control dynamic mapping and explicitly
 define mappings to take full control of how fields are stored and indexed.
 
 Defining your own mappings enables you to:
@@ -89,7 +116,7 @@ used at search time. When you query a full-text field, the query text undergoes
 the same analysis before the terms are looked up in the index.
 
 [[search-analyze]]
-=== Information out: search and analyze
+=== Search and analyze
 
 While you can use {es} as a document store and retrieve documents and their
 metadata, the real power comes from being able to easily access the full suite
@@ -160,27 +187,8 @@ size 70 needles, you’re displaying a count of the size 70 needles
 that match your users' search criteria--for example, all size 70 _non-stick
 embroidery_ needles.
 
-[discrete]
-[[more-features]]
-===== But wait, there’s more
-
-Want to automate the analysis of your time series data? You can use
-{ml-docs}/ml-ad-overview.html[machine learning] features to create accurate
-baselines of normal behavior in your data and identify anomalous patterns. With
-machine learning, you can detect:
-
-* Anomalies related to temporal deviations in values, counts, or frequencies
-* Statistical rarity
-* Unusual behaviors for a member of a population
-
-And the best part? You can do this without having to specify algorithms, models,
-or other data science-related configurations.
-
 [[scalability]]
-=== Scalability and resilience: clusters, nodes, and shards
-++++
-<titleabbrev>Scalability and resilience</titleabbrev>
-++++
+=== Scalability and resilience
 
 {es} is built to be always available and to scale with your needs. It does this
 by being distributed by nature. You can add servers (nodes) to a cluster to
@@ -209,7 +217,7 @@ interrupting indexing or query operations.
 
 [discrete]
 [[it-depends]]
-==== It depends...
+==== Shard size and number of shards
 
 There are a number of performance considerations and trade offs with respect
 to shard size and the number of primary shards configured for an index. The more
@@ -237,7 +245,7 @@ testing with your own data and queries].
 
 [discrete]
 [[disaster-ccr]]
-==== In case of disaster
+==== Disaster recovery
 
 A cluster's nodes need good, reliable connections to each other. To provide
 better connections, you typically co-locate the nodes in the same data center or
@@ -257,7 +265,7 @@ secondary clusters are read-only followers.
 
 [discrete]
 [[admin]]
-==== Care and feeding
+==== Security, management, and monitoring
 
 As with any enterprise system, you need tools to secure, manage, and
 monitor your {es} clusters. Security, monitoring, and administrative features
@@ -265,3 +273,5 @@ that are integrated into {es} enable you to use {kibana-ref}/introduction.html[{
 as a control center for managing a cluster. Features like <<downsampling,
 downsampling>> and <<index-lifecycle-management, index lifecycle management>>
 help you intelligently manage your data over time.
+
+Refer to <<monitor-elasticsearch-cluster>> for more information.
\ No newline at end of file
diff --git a/docs/reference/search/search-your-data/near-real-time.asciidoc b/docs/reference/search/search-your-data/near-real-time.asciidoc
index 46a996c237c3..47618ecd9fd7 100644
--- a/docs/reference/search/search-your-data/near-real-time.asciidoc
+++ b/docs/reference/search/search-your-data/near-real-time.asciidoc
@@ -2,7 +2,7 @@
 
 [[near-real-time]]
 === Near real-time search
-The overview of <<documents-indices,documents and indices>> indicates that when a document is stored in {es}, it is indexed and fully searchable in _near real-time_--within 1 second. What defines near real-time search?
+When a document is stored in {es}, it is indexed and fully searchable in _near real-time_--within 1 second. What defines near real-time search?
 
 Lucene, the Java libraries on which {es} is based, introduced the concept of per-segment search. A _segment_ is similar to an inverted index, but the word _index_ in Lucene means "a collection of segments plus a commit point". After a commit, a new segment is added to the commit point and the buffer is cleared.
 

From 320ccbc24748809feecc42df1f7bab6c4d6fd4cc Mon Sep 17 00:00:00 2001
From: Kostas Krikellas <131142368+kkrik-es@users.noreply.github.com>
Date: Thu, 29 Aug 2024 11:25:04 +0300
Subject: [PATCH 12/30] Reduce load for stress test to avoid oom (#112331)

Fixes #112326
---
 .../logsdb/datageneration/DataGeneratorTests.java             | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorTests.java b/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorTests.java
index db3b81891e87..4a4ffca0f37a 100644
--- a/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorTests.java
+++ b/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorTests.java
@@ -113,13 +113,13 @@ protected Collection<? extends Plugin> getPlugins() {
     }
 
     public void testDataGeneratorStressTest() throws IOException {
-        // Let's generate 1000000 fields to test an extreme case (2 levels of objects + 1 leaf level with 100 fields per object).
+        // Let's generate 125000 fields to test an extreme case (2 levels of objects + 1 leaf level with 50 fields per object).
         var testChildFieldGenerator = new DataSourceResponse.ChildFieldGenerator() {
             private int generatedFields = 0;
 
             @Override
             public int generateChildFieldCount() {
-                return 100;
+                return 50;
             }
 
             @Override

From 2c29a3ae0a6e743c2df72df5895e90aa56dd2683 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?=
 <istvan.szabo@elastic.co>
Date: Thu, 29 Aug 2024 12:43:10 +0200
Subject: [PATCH 13/30] [DOCS] Highlights auto-chunking in intro of semantic
 text. (#111836)

---
 docs/reference/mapping/types/semantic-text.asciidoc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/reference/mapping/types/semantic-text.asciidoc b/docs/reference/mapping/types/semantic-text.asciidoc
index 522a0c54c8aa..a006f288dc66 100644
--- a/docs/reference/mapping/types/semantic-text.asciidoc
+++ b/docs/reference/mapping/types/semantic-text.asciidoc
@@ -7,8 +7,8 @@
 
 beta[]
 
-The `semantic_text` field type automatically generates embeddings for text
-content using an inference endpoint.
+The `semantic_text` field type automatically generates embeddings for text content using an inference endpoint.
+Long passages are <<auto-text-chunking, automatically chunked>> to smaller sections to enable the processing of larger corpuses of text.
 
 The `semantic_text` field type specifies an inference endpoint identifier that will be used to generate embeddings.
 You can create the inference endpoint by using the <<put-inference-api>>.

From 35fe3a9c47500ab21735f7c40f7184fb7d724f9c Mon Sep 17 00:00:00 2001
From: weizijun <weizijun.wzj@alibaba-inc.com>
Date: Thu, 29 Aug 2024 19:46:58 +0800
Subject: [PATCH 14/30] some fixed (#112332)

---
 .../inference/service-alibabacloud-ai-search.asciidoc     | 2 +-
 docs/reference/inference/service-amazon-bedrock.asciidoc  | 8 --------
 2 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/docs/reference/inference/service-alibabacloud-ai-search.asciidoc b/docs/reference/inference/service-alibabacloud-ai-search.asciidoc
index df5220573d9e..23a3d532635a 100644
--- a/docs/reference/inference/service-alibabacloud-ai-search.asciidoc
+++ b/docs/reference/inference/service-alibabacloud-ai-search.asciidoc
@@ -25,7 +25,7 @@ include::inference-shared.asciidoc[tag=task-type]
 Available task types:
 
 * `text_embedding`,
-* `sparse_embedding`.
+* `sparse_embedding`,
 * `rerank`.
 --
 
diff --git a/docs/reference/inference/service-amazon-bedrock.asciidoc b/docs/reference/inference/service-amazon-bedrock.asciidoc
index 4ffa368613a0..dbffd5c26fbc 100644
--- a/docs/reference/inference/service-amazon-bedrock.asciidoc
+++ b/docs/reference/inference/service-amazon-bedrock.asciidoc
@@ -122,14 +122,6 @@ Only available for `anthropic`, `cohere`, and `mistral` providers.
 Alternative to `temperature`. Limits samples to the top-K most likely words, balancing coherence and variability.
 Should not be used if `temperature` is specified.
 
-=====
-+
-.`task_settings` for the `text_embedding` task type
-[%collapsible%closed]
-=====
-
-There are no `task_settings` available for the `text_embedding` task type.
-
 =====
 
 [discrete]

From b4c8fa362dc88d9d1220c7466ec2c0219a258433 Mon Sep 17 00:00:00 2001
From: Bogdan Pintea <bogdan.pintea@elastic.co>
Date: Thu, 29 Aug 2024 13:50:24 +0200
Subject: [PATCH 15/30] Reenable 26_aggs_bucket EsqlClientYamlIT (#112343)

Reenable 26_aggs_bucket EsqlClientYamlIT, fixed in #111897.

Fixes #111901, fixes #111902.
---
 muted-tests.yml | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/muted-tests.yml b/muted-tests.yml
index 508403ee6238..e4c2f62d2617 100644
--- a/muted-tests.yml
+++ b/muted-tests.yml
@@ -122,12 +122,6 @@ tests:
 - class: org.elasticsearch.xpack.restart.CoreFullClusterRestartIT
   method: testSnapshotRestore {cluster=UPGRADED}
   issue: https://github.com/elastic/elasticsearch/issues/111799
-- class: org.elasticsearch.xpack.esql.qa.mixed.EsqlClientYamlIT
-  method: "test {p0=esql/26_aggs_bucket/friendlier BUCKET interval hourly: #110916}"
-  issue: https://github.com/elastic/elasticsearch/issues/111901
-- class: org.elasticsearch.xpack.esql.qa.mixed.EsqlClientYamlIT
-  method: "test {p0=esql/26_aggs_bucket/friendlier BUCKET interval: monthly #110916}"
-  issue: https://github.com/elastic/elasticsearch/issues/111902
 - class: org.elasticsearch.xpack.esql.qa.mixed.FieldExtractorIT
   method: testScaledFloat
   issue: https://github.com/elastic/elasticsearch/issues/112003

From a97b0e226e3d7ea5e27eb565ae05d01ca22b06a9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20FOUCRET?= <aurelien.foucret@gmail.com>
Date: Thu, 29 Aug 2024 13:59:16 +0200
Subject: [PATCH 16/30] Fix test failures in ScriptScoreQueryTests (#112334)

---
 muted-tests.yml                                             | 6 ------
 .../elasticsearch/search/query/ScriptScoreQueryTests.java   | 2 +-
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/muted-tests.yml b/muted-tests.yml
index e4c2f62d2617..e80a39040a4e 100644
--- a/muted-tests.yml
+++ b/muted-tests.yml
@@ -151,12 +151,6 @@ tests:
 - class: org.elasticsearch.xpack.ml.integration.MlJobIT
   method: testDeleteJobAsync
   issue: https://github.com/elastic/elasticsearch/issues/112212
-- class: org.elasticsearch.search.query.ScriptScoreQueryTests
-  method: testScriptTermStatsAvailable
-  issue: https://github.com/elastic/elasticsearch/issues/112278
-- class: org.elasticsearch.search.query.ScriptScoreQueryTests
-  method: testScriptTermStatsNotAvailable
-  issue: https://github.com/elastic/elasticsearch/issues/112290
 - class: org.elasticsearch.search.retriever.rankdoc.RankDocsSortBuilderTests
   method: testEqualsAndHashcode
   issue: https://github.com/elastic/elasticsearch/issues/112312
diff --git a/server/src/test/java/org/elasticsearch/search/query/ScriptScoreQueryTests.java b/server/src/test/java/org/elasticsearch/search/query/ScriptScoreQueryTests.java
index d6b1da9f76b4..177968b9a132 100644
--- a/server/src/test/java/org/elasticsearch/search/query/ScriptScoreQueryTests.java
+++ b/server/src/test/java/org/elasticsearch/search/query/ScriptScoreQueryTests.java
@@ -72,7 +72,7 @@ public void initSearcher() throws IOException {
         w.commit();
         reader = DirectoryReader.open(w);
         searcher = newSearcher(reader);
-        leafReaderContext = reader.leaves().get(0);
+        leafReaderContext = searcher.getTopReaderContext().leaves().get(0);
     }
 
     @After

From a69f8e19ed4513d552b24a655f45b38098336b26 Mon Sep 17 00:00:00 2001
From: Albert Zaharovits <albert.zaharovits@elastic.co>
Date: Thu, 29 Aug 2024 15:09:28 +0300
Subject: [PATCH 17/30] Avoid redundant cluster state build (#112340)

Avoid redundant cluster state build when creating index
---
 .../cluster/metadata/MetadataCreateIndexService.java       | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java
index b5ee0ebd7e38..b1a19d99dcb1 100644
--- a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java
+++ b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java
@@ -1249,11 +1249,10 @@ static ClusterState clusterStateCreateIndex(
         ClusterBlocks.Builder blocks = createClusterBlocksBuilder(currentState, indexName, clusterBlocks);
         blocks.updateBlocks(indexMetadata);
 
-        ClusterState updatedState = ClusterState.builder(currentState).blocks(blocks).metadata(newMetadata).build();
+        RoutingTable.Builder routingTableBuilder = RoutingTable.builder(shardRoutingRoleStrategy, currentState.routingTable())
+            .addAsNew(newMetadata.index(indexName));
 
-        RoutingTable.Builder routingTableBuilder = RoutingTable.builder(shardRoutingRoleStrategy, updatedState.routingTable())
-            .addAsNew(updatedState.metadata().index(indexName));
-        return ClusterState.builder(updatedState).routingTable(routingTableBuilder.build()).build();
+        return ClusterState.builder(currentState).blocks(blocks).metadata(newMetadata).routingTable(routingTableBuilder).build();
     }
 
     static IndexMetadata buildIndexMetadata(

From cefe358b4197332aca6b4d15d440851033134d61 Mon Sep 17 00:00:00 2001
From: Armin Braun <me@obrown.io>
Date: Thu, 29 Aug 2024 14:15:29 +0200
Subject: [PATCH 18/30] Fix DLS using runtime fields and synthetic source
 (#112341)

Somewhat of a tortured test but applying the same fix from #112260
to synthetic source which was running into the same bug as
a stored field source.
---
 docs/changelog/112341.yaml                    |  5 +++
 .../lookup/SyntheticSourceProvider.java       | 36 +++++-------------
 .../DocumentLevelSecurityRandomTests.java     | 38 ++++++++++++++++++-
 3 files changed, 52 insertions(+), 27 deletions(-)
 create mode 100644 docs/changelog/112341.yaml

diff --git a/docs/changelog/112341.yaml b/docs/changelog/112341.yaml
new file mode 100644
index 000000000000..8f44b53ad999
--- /dev/null
+++ b/docs/changelog/112341.yaml
@@ -0,0 +1,5 @@
+pr: 112341
+summary: Fix DLS using runtime fields and synthetic source
+area: Authorization
+type: bug
+issues: []
diff --git a/server/src/main/java/org/elasticsearch/search/lookup/SyntheticSourceProvider.java b/server/src/main/java/org/elasticsearch/search/lookup/SyntheticSourceProvider.java
index bccfc22dc7e9..a4549f0814a0 100644
--- a/server/src/main/java/org/elasticsearch/search/lookup/SyntheticSourceProvider.java
+++ b/server/src/main/java/org/elasticsearch/search/lookup/SyntheticSourceProvider.java
@@ -8,13 +8,14 @@
 
 package org.elasticsearch.search.lookup;
 
-import org.apache.lucene.index.IndexReaderContext;
 import org.apache.lucene.index.LeafReaderContext;
+import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
 import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
 import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
 import org.elasticsearch.index.mapper.SourceLoader;
 
 import java.io.IOException;
+import java.util.Map;
 
 // NB This is written under the assumption that individual segments are accessed by a single
 // thread, even if separate segments may be searched concurrently.  If we ever implement
@@ -22,7 +23,7 @@
 class SyntheticSourceProvider implements SourceProvider {
 
     private final SourceLoader sourceLoader;
-    private volatile SyntheticSourceLeafLoader[] leafLoaders;
+    private final Map<Object, SyntheticSourceLeafLoader> leaves = ConcurrentCollections.newConcurrentMap();
 
     SyntheticSourceProvider(SourceLoader sourceLoader) {
         this.sourceLoader = sourceLoader;
@@ -30,31 +31,14 @@ class SyntheticSourceProvider implements SourceProvider {
 
     @Override
     public Source getSource(LeafReaderContext ctx, int doc) throws IOException {
-        maybeInit(ctx);
-        if (leafLoaders[ctx.ord] == null) {
-            // individual segments are currently only accessed on one thread so there's no need
-            // for locking here.
-            leafLoaders[ctx.ord] = new SyntheticSourceLeafLoader(ctx);
+        final Object id = ctx.id();
+        var provider = leaves.get(id);
+        if (provider == null) {
+            provider = new SyntheticSourceLeafLoader(ctx);
+            var existing = leaves.put(id, provider);
+            assert existing == null : "unexpected source provider [" + existing + "]";
         }
-        return leafLoaders[ctx.ord].getSource(doc);
-    }
-
-    private void maybeInit(LeafReaderContext ctx) {
-        if (leafLoaders == null) {
-            synchronized (this) {
-                if (leafLoaders == null) {
-                    leafLoaders = new SyntheticSourceLeafLoader[findParentContext(ctx).leaves().size()];
-                }
-            }
-        }
-    }
-
-    private IndexReaderContext findParentContext(LeafReaderContext ctx) {
-        if (ctx.parent != null) {
-            return ctx.parent;
-        }
-        assert ctx.isTopLevel;
-        return ctx;
+        return provider.getSource(doc);
     }
 
     private class SyntheticSourceLeafLoader {
diff --git a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DocumentLevelSecurityRandomTests.java b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DocumentLevelSecurityRandomTests.java
index fb7463197081..1bf7d8934775 100644
--- a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DocumentLevelSecurityRandomTests.java
+++ b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DocumentLevelSecurityRandomTests.java
@@ -144,6 +144,43 @@ public void testWithRuntimeFields() throws Exception {
                         .endObject()
                 )
         );
+        doTestWithRuntimeFieldsInTestIndex();
+    }
+
+    public void testWithRuntimeFieldsAndSyntheticSource() throws Exception {
+        assertAcked(
+            indicesAdmin().prepareCreate("test")
+                .setMapping(
+                    XContentFactory.jsonBuilder()
+                        .startObject()
+                        .startObject("_source")
+                        .field("mode", "synthetic")
+                        .endObject()
+                        .startObject("runtime")
+                        .startObject("field1")
+                        .field("type", "keyword")
+                        .endObject()
+                        .startObject("field2")
+                        .field("type", "keyword")
+                        .endObject()
+                        .endObject()
+                        .startObject("properties")
+                        .startObject("field1")
+                        .field("type", "text")
+                        .field("store", true)
+                        .endObject()
+                        .startObject("field2")
+                        .field("type", "text")
+                        .field("store", true)
+                        .endObject()
+                        .endObject()
+                        .endObject()
+                )
+        );
+        doTestWithRuntimeFieldsInTestIndex();
+    }
+
+    private void doTestWithRuntimeFieldsInTestIndex() {
         List<IndexRequestBuilder> requests = new ArrayList<>(47);
         for (int i = 1; i <= 42; i++) {
             requests.add(prepareIndex("test").setSource("field1", "value1", "field2", "foo" + i));
@@ -158,5 +195,4 @@ public void testWithRuntimeFields() throws Exception {
             42L
         );
     }
-
 }

From 9387ce335757194da1986722a98f95338a45a873 Mon Sep 17 00:00:00 2001
From: David Turner <david.turner@elastic.co>
Date: Thu, 29 Aug 2024 13:16:37 +0100
Subject: [PATCH 19/30] Deduplicate unstable-cluster troubleshooting docs
 (#112333)

We duplicated these docs in order to avoid breaking older links, but
this makes it confusing and hard to link to the right copy of the
information. This commit removes the duplication by replacing the docs
at the old locations with stubs that link to the new locations.
---
 .../discovery/fault-detection.asciidoc        | 295 +---------------
 .../troubleshooting-unstable-cluster.asciidoc | 314 +++++++++++++++++-
 .../common/reference-docs-links.json          |   4 +-
 3 files changed, 321 insertions(+), 292 deletions(-)

diff --git a/docs/reference/modules/discovery/fault-detection.asciidoc b/docs/reference/modules/discovery/fault-detection.asciidoc
index d12985b70597..21f4ae2317e6 100644
--- a/docs/reference/modules/discovery/fault-detection.asciidoc
+++ b/docs/reference/modules/discovery/fault-detection.asciidoc
@@ -35,313 +35,30 @@ starting from the beginning of the cluster state update. Refer to
 
 [[cluster-fault-detection-troubleshooting]]
 ==== Troubleshooting an unstable cluster
-//tag::troubleshooting[]
-Normally, a node will only leave a cluster if deliberately shut down. If a node
-leaves the cluster unexpectedly, it's important to address the cause. A cluster
-in which nodes leave unexpectedly is unstable and can create several issues.
-For instance:
 
-* The cluster health may be yellow or red.
-
-* Some shards will be initializing and other shards may be failing.
-
-* Search, indexing, and monitoring operations may fail and report exceptions in
-logs.
-
-* The `.security` index may be unavailable, blocking access to the cluster.
-
-* The master may appear busy due to frequent cluster state updates.
-
-To troubleshoot a cluster in this state, first ensure the cluster has a
-<<discovery-troubleshooting,stable master>>. Next, focus on the nodes
-unexpectedly leaving the cluster ahead of all other issues. It will not be
-possible to solve other issues until the cluster has a stable master node and
-stable node membership.
-
-Diagnostics and statistics are usually not useful in an unstable cluster. These
-tools only offer a view of the state of the cluster at a single point in time.
-Instead, look at the cluster logs to see the pattern of behaviour over time.
-Focus particularly on logs from the elected master. When a node leaves the
-cluster, logs for the elected master include a message like this (with line
-breaks added to make it easier to read):
-
-[source,text]
-----
-[2022-03-21T11:02:35,513][INFO ][o.e.c.c.NodeLeftExecutor] [instance-0000000000]
-    node-left: [{instance-0000000004}{bfcMDTiDRkietFb9v_di7w}{aNlyORLASam1ammv2DzYXA}{172.27.47.21}{172.27.47.21:19054}{m}]
-    with reason [disconnected]
-----
-
-This message says that the `NodeLeftExecutor` on the elected master
-(`instance-0000000000`) processed a `node-left` task, identifying the node that
-was removed and the reason for its removal. When the node joins the cluster
-again, logs for the elected master will include a message like this (with line
-breaks added to make it easier to read):
-
-[source,text]
-----
-[2022-03-21T11:02:59,892][INFO ][o.e.c.c.NodeJoinExecutor] [instance-0000000000]
-    node-join: [{instance-0000000004}{bfcMDTiDRkietFb9v_di7w}{UNw_RuazQCSBskWZV8ID_w}{172.27.47.21}{172.27.47.21:19054}{m}]
-    with reason [joining after restart, removed [24s] ago with reason [disconnected]]
-----
-
-This message says that the `NodeJoinExecutor` on the elected master
-(`instance-0000000000`) processed a `node-join` task, identifying the node that
-was added to the cluster and the reason for the task.
-
-Other nodes may log similar messages, but report fewer details:
-
-[source,text]
-----
-[2020-01-29T11:02:36,985][INFO ][o.e.c.s.ClusterApplierService]
-    [instance-0000000001] removed {
-        {instance-0000000004}{bfcMDTiDRkietFb9v_di7w}{aNlyORLASam1ammv2DzYXA}{172.27.47.21}{172.27.47.21:19054}{m}
-        {tiebreaker-0000000003}{UNw_RuazQCSBskWZV8ID_w}{bltyVOQ-RNu20OQfTHSLtA}{172.27.161.154}{172.27.161.154:19251}{mv}
-    }, term: 14, version: 1653415, reason: Publication{term=14, version=1653415}
-----
-
-These messages are not especially useful for troubleshooting, so focus on the
-ones from the `NodeLeftExecutor` and `NodeJoinExecutor` which are only emitted
-on the elected master and which contain more details. If you don't see the
-messages from the `NodeLeftExecutor` and `NodeJoinExecutor`, check that:
-
-* You're looking at the logs for the elected master node.
-
-* The logs cover the correct time period.
-
-* Logging is enabled at `INFO` level.
-
-Nodes will also log a message containing `master node changed` whenever they
-start or stop following the elected master. You can use these messages to
-determine each node's view of the state of the master over time.
-
-If a node restarts, it will leave the cluster and then join the cluster again.
-When it rejoins, the `NodeJoinExecutor` will log that it processed a
-`node-join` task indicating that the node is `joining after restart`. If a node
-is unexpectedly restarting, look at the node's logs to see why it is shutting
-down.
-
-The <<health-api>> API on the affected node will also provide some useful
-information about the situation.
-
-If the node did not restart then you should look at the reason for its
-departure more closely. Each reason has different troubleshooting steps,
-described below. There are three possible reasons:
-
-* `disconnected`: The connection from the master node to the removed node was
-closed.
-
-* `lagging`: The master published a cluster state update, but the removed node
-did not apply it within the permitted timeout. By default, this timeout is 2
-minutes. Refer to <<modules-discovery-settings>> for information about the
-settings which control this mechanism.
-
-* `followers check retry count exceeded`: The master sent a number of
-consecutive health checks to the removed node. These checks were rejected or
-timed out. By default, each health check times out after 10 seconds and {es}
-removes the node removed after three consecutively failed health checks. Refer
-to <<modules-discovery-settings>> for information about the settings which
-control this mechanism.
+See <<troubleshooting-unstable-cluster>>.
 
 [discrete]
 ===== Diagnosing `disconnected` nodes
 
-Nodes typically leave the cluster with reason `disconnected` when they shut
-down, but if they rejoin the cluster without restarting then there is some
-other problem.
-
-{es} is designed to run on a fairly reliable network. It opens a number of TCP
-connections between nodes and expects these connections to remain open
-<<long-lived-connections,forever>>. If a connection is closed then {es} will
-try and reconnect, so the occasional blip may fail some in-flight operations
-but should otherwise have limited impact on the cluster. In contrast,
-repeatedly-dropped connections will severely affect its operation.
-
-The connections from the elected master node to every other node in the cluster
-are particularly important. The elected master never spontaneously closes its
-outbound connections to other nodes. Similarly, once an inbound connection is
-fully established, a node never spontaneously it unless the node is shutting
-down.
-
-If you see a node unexpectedly leave the cluster with the `disconnected`
-reason, something other than {es} likely caused the connection to close. A
-common cause is a misconfigured firewall with an improper timeout or another
-policy that's <<long-lived-connections,incompatible with {es}>>. It could also
-be caused by general connectivity issues, such as packet loss due to faulty
-hardware or network congestion. If you're an advanced user, configure the
-following loggers to get more detailed information about network exceptions:
-
-[source,yaml]
-----
-logger.org.elasticsearch.transport.TcpTransport: DEBUG
-logger.org.elasticsearch.xpack.core.security.transport.netty4.SecurityNetty4Transport: DEBUG
-----
-
-If these logs do not show enough information to diagnose the problem, obtain a
-packet capture simultaneously from the nodes at both ends of an unstable
-connection and analyse it alongside the {es} logs from those nodes to determine
-if traffic between the nodes is being disrupted by another device on the
-network.
+See <<troubleshooting-unstable-cluster-disconnected>>.
 
 [discrete]
 ===== Diagnosing `lagging` nodes
 
-{es} needs every node to process cluster state updates reasonably quickly. If a
-node takes too long to process a cluster state update, it can be harmful to the
-cluster. The master will remove these nodes with the `lagging` reason. Refer to
-<<modules-discovery-settings>> for information about the settings which control
-this mechanism.
-
-Lagging is typically caused by performance issues on the removed node. However,
-a node may also lag due to severe network delays. To rule out network delays,
-ensure that `net.ipv4.tcp_retries2` is <<system-config-tcpretries,configured
-properly>>. Log messages that contain `warn threshold` may provide more
-information about the root cause.
-
-If you're an advanced user, you can get more detailed information about what
-the node was doing when it was removed by configuring the following logger:
-
-[source,yaml]
-----
-logger.org.elasticsearch.cluster.coordination.LagDetector: DEBUG
-----
-
-When this logger is enabled, {es} will attempt to run the
-<<cluster-nodes-hot-threads>> API on the faulty node and report the results in
-the logs on the elected master. The results are compressed, encoded, and split
-into chunks to avoid truncation:
-
-[source,text]
-----
-[DEBUG][o.e.c.c.LagDetector      ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 1]: H4sIAAAAAAAA/x...
-[DEBUG][o.e.c.c.LagDetector      ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 2]: p7x3w1hmOQVtuV...
-[DEBUG][o.e.c.c.LagDetector      ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 3]: v7uTboMGDbyOy+...
-[DEBUG][o.e.c.c.LagDetector      ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 4]: 4tse0RnPnLeDNN...
-[DEBUG][o.e.c.c.LagDetector      ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] (gzip compressed, base64-encoded, and split into 4 parts on preceding log lines)
-----
-
-To reconstruct the output, base64-decode the data and decompress it using
-`gzip`. For instance, on Unix-like systems:
-
-[source,sh]
-----
-cat lagdetector.log | sed -e 's/.*://' | base64 --decode | gzip --decompress
-----
+See <<troubleshooting-unstable-cluster-lagging>>.
 
 [discrete]
 ===== Diagnosing `follower check retry count exceeded` nodes
 
-Nodes sometimes leave the cluster with reason `follower check retry count
-exceeded` when they shut down, but if they rejoin the cluster without
-restarting then there is some other problem.
-
-{es} needs every node to respond to network messages successfully and
-reasonably quickly. If a node rejects requests or does not respond at all then
-it can be harmful to the cluster. If enough consecutive checks fail then the
-master will remove the node with reason `follower check retry count exceeded`
-and will indicate in the `node-left` message how many of the consecutive
-unsuccessful checks failed and how many of them timed out. Refer to
-<<modules-discovery-settings>> for information about the settings which control
-this mechanism.
-
-Timeouts and failures may be due to network delays or performance problems on
-the affected nodes. Ensure that `net.ipv4.tcp_retries2` is
-<<system-config-tcpretries,configured properly>> to eliminate network delays as
-a possible cause for this kind of instability. Log messages containing
-`warn threshold` may give further clues about the cause of the instability.
-
-If the last check failed with an exception then the exception is reported, and
-typically indicates the problem that needs to be addressed. If any of the
-checks timed out then narrow down the problem as follows.
-
-include::../../troubleshooting/network-timeouts.asciidoc[tag=troubleshooting-network-timeouts-gc-vm]
-
-include::../../troubleshooting/network-timeouts.asciidoc[tag=troubleshooting-network-timeouts-packet-capture-fault-detection]
-
-include::../../troubleshooting/network-timeouts.asciidoc[tag=troubleshooting-network-timeouts-threads]
-
-By default the follower checks will time out after 30s, so if node departures
-are unpredictable then capture stack dumps every 15s to be sure that at least
-one stack dump was taken at the right time.
+See <<troubleshooting-unstable-cluster-follower-check>>.
 
 [discrete]
 ===== Diagnosing `ShardLockObtainFailedException` failures
 
-If a node leaves and rejoins the cluster then {es} will usually shut down and
-re-initialize its shards. If the shards do not shut down quickly enough then
-{es} may fail to re-initialize them due to a `ShardLockObtainFailedException`.
-
-To gather more information about the reason for shards shutting down slowly,
-configure the following logger:
-
-[source,yaml]
-----
-logger.org.elasticsearch.env.NodeEnvironment: DEBUG
-----
-
-When this logger is enabled, {es} will attempt to run the
-<<cluster-nodes-hot-threads>> API whenever it encounters a
-`ShardLockObtainFailedException`. The results are compressed, encoded, and
-split into chunks to avoid truncation:
-
-[source,text]
-----
-[DEBUG][o.e.e.NodeEnvironment    ] [master] hot threads while failing to obtain shard lock for [index][0] [part 1]: H4sIAAAAAAAA/x...
-[DEBUG][o.e.e.NodeEnvironment    ] [master] hot threads while failing to obtain shard lock for [index][0] [part 2]: p7x3w1hmOQVtuV...
-[DEBUG][o.e.e.NodeEnvironment    ] [master] hot threads while failing to obtain shard lock for [index][0] [part 3]: v7uTboMGDbyOy+...
-[DEBUG][o.e.e.NodeEnvironment    ] [master] hot threads while failing to obtain shard lock for [index][0] [part 4]: 4tse0RnPnLeDNN...
-[DEBUG][o.e.e.NodeEnvironment    ] [master] hot threads while failing to obtain shard lock for [index][0] (gzip compressed, base64-encoded, and split into 4 parts on preceding log lines)
-----
-
-To reconstruct the output, base64-decode the data and decompress it using
-`gzip`. For instance, on Unix-like systems:
-
-[source,sh]
-----
-cat shardlock.log | sed -e 's/.*://' | base64 --decode | gzip --decompress
-----
+See <<troubleshooting-unstable-cluster-shardlockobtainfailedexception>>.
 
 [discrete]
 ===== Diagnosing other network disconnections
 
-{es} is designed to run on a fairly reliable network. It opens a number of TCP
-connections between nodes and expects these connections to remain open
-<<long-lived-connections,forever>>. If a connection is closed then {es} will
-try and reconnect, so the occasional blip may fail some in-flight operations
-but should otherwise have limited impact on the cluster. In contrast,
-repeatedly-dropped connections will severely affect its operation.
-
-{es} nodes will only actively close an outbound connection to another node if
-the other node leaves the cluster. See
-<<cluster-fault-detection-troubleshooting>> for further information about
-identifying and troubleshooting this situation. If an outbound connection
-closes for some other reason, nodes will log a message such as the following:
-
-[source,text]
-----
-[INFO ][o.e.t.ClusterConnectionManager] [node-1] transport connection to [{node-2}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] closed by remote
-----
-
-Similarly, once an inbound connection is fully established, a node never
-spontaneously closes it unless the node is shutting down.
-
-Therefore if you see a node report that a connection to another node closed
-unexpectedly, something other than {es} likely caused the connection to close.
-A common cause is a misconfigured firewall with an improper timeout or another
-policy that's <<long-lived-connections,incompatible with {es}>>. It could also
-be caused by general connectivity issues, such as packet loss due to faulty
-hardware or network congestion. If you're an advanced user, configure the
-following loggers to get more detailed information about network exceptions:
-
-[source,yaml]
-----
-logger.org.elasticsearch.transport.TcpTransport: DEBUG
-logger.org.elasticsearch.xpack.core.security.transport.netty4.SecurityNetty4Transport: DEBUG
-----
-
-If these logs do not show enough information to diagnose the problem, obtain a
-packet capture simultaneously from the nodes at both ends of an unstable
-connection and analyse it alongside the {es} logs from those nodes to determine
-if traffic between the nodes is being disrupted by another device on the
-network.
-//end::troubleshooting[]
+See <<troubleshooting-unstable-cluster-network>>.
diff --git a/docs/reference/troubleshooting/troubleshooting-unstable-cluster.asciidoc b/docs/reference/troubleshooting/troubleshooting-unstable-cluster.asciidoc
index 387ebcdcd43c..cbb35f773103 100644
--- a/docs/reference/troubleshooting/troubleshooting-unstable-cluster.asciidoc
+++ b/docs/reference/troubleshooting/troubleshooting-unstable-cluster.asciidoc
@@ -1,4 +1,316 @@
 [[troubleshooting-unstable-cluster]]
 == Troubleshooting an unstable cluster
 
-include::../modules/discovery/fault-detection.asciidoc[tag=troubleshooting,leveloffset=-2]
\ No newline at end of file
+Normally, a node will only leave a cluster if deliberately shut down. If a node
+leaves the cluster unexpectedly, it's important to address the cause. A cluster
+in which nodes leave unexpectedly is unstable and can create several issues.
+For instance:
+
+* The cluster health may be yellow or red.
+
+* Some shards will be initializing and other shards may be failing.
+
+* Search, indexing, and monitoring operations may fail and report exceptions in
+logs.
+
+* The `.security` index may be unavailable, blocking access to the cluster.
+
+* The master may appear busy due to frequent cluster state updates.
+
+To troubleshoot a cluster in this state, first ensure the cluster has a
+<<discovery-troubleshooting,stable master>>. Next, focus on the nodes
+unexpectedly leaving the cluster ahead of all other issues. It will not be
+possible to solve other issues until the cluster has a stable master node and
+stable node membership.
+
+Diagnostics and statistics are usually not useful in an unstable cluster. These
+tools only offer a view of the state of the cluster at a single point in time.
+Instead, look at the cluster logs to see the pattern of behaviour over time.
+Focus particularly on logs from the elected master. When a node leaves the
+cluster, logs for the elected master include a message like this (with line
+breaks added to make it easier to read):
+
+[source,text]
+----
+[2022-03-21T11:02:35,513][INFO ][o.e.c.c.NodeLeftExecutor] [instance-0000000000]
+    node-left: [{instance-0000000004}{bfcMDTiDRkietFb9v_di7w}{aNlyORLASam1ammv2DzYXA}{172.27.47.21}{172.27.47.21:19054}{m}]
+    with reason [disconnected]
+----
+
+This message says that the `NodeLeftExecutor` on the elected master
+(`instance-0000000000`) processed a `node-left` task, identifying the node that
+was removed and the reason for its removal. When the node joins the cluster
+again, logs for the elected master will include a message like this (with line
+breaks added to make it easier to read):
+
+[source,text]
+----
+[2022-03-21T11:02:59,892][INFO ][o.e.c.c.NodeJoinExecutor] [instance-0000000000]
+    node-join: [{instance-0000000004}{bfcMDTiDRkietFb9v_di7w}{UNw_RuazQCSBskWZV8ID_w}{172.27.47.21}{172.27.47.21:19054}{m}]
+    with reason [joining after restart, removed [24s] ago with reason [disconnected]]
+----
+
+This message says that the `NodeJoinExecutor` on the elected master
+(`instance-0000000000`) processed a `node-join` task, identifying the node that
+was added to the cluster and the reason for the task.
+
+Other nodes may log similar messages, but report fewer details:
+
+[source,text]
+----
+[2020-01-29T11:02:36,985][INFO ][o.e.c.s.ClusterApplierService]
+    [instance-0000000001] removed {
+        {instance-0000000004}{bfcMDTiDRkietFb9v_di7w}{aNlyORLASam1ammv2DzYXA}{172.27.47.21}{172.27.47.21:19054}{m}
+        {tiebreaker-0000000003}{UNw_RuazQCSBskWZV8ID_w}{bltyVOQ-RNu20OQfTHSLtA}{172.27.161.154}{172.27.161.154:19251}{mv}
+    }, term: 14, version: 1653415, reason: Publication{term=14, version=1653415}
+----
+
+These messages are not especially useful for troubleshooting, so focus on the
+ones from the `NodeLeftExecutor` and `NodeJoinExecutor` which are only emitted
+on the elected master and which contain more details. If you don't see the
+messages from the `NodeLeftExecutor` and `NodeJoinExecutor`, check that:
+
+* You're looking at the logs for the elected master node.
+
+* The logs cover the correct time period.
+
+* Logging is enabled at `INFO` level.
+
+Nodes will also log a message containing `master node changed` whenever they
+start or stop following the elected master. You can use these messages to
+determine each node's view of the state of the master over time.
+
+If a node restarts, it will leave the cluster and then join the cluster again.
+When it rejoins, the `NodeJoinExecutor` will log that it processed a
+`node-join` task indicating that the node is `joining after restart`. If a node
+is unexpectedly restarting, look at the node's logs to see why it is shutting
+down.
+
+The <<health-api>> API on the affected node will also provide some useful
+information about the situation.
+
+If the node did not restart then you should look at the reason for its
+departure more closely. Each reason has different troubleshooting steps,
+described below. There are three possible reasons:
+
+* `disconnected`: The connection from the master node to the removed node was
+closed.
+
+* `lagging`: The master published a cluster state update, but the removed node
+did not apply it within the permitted timeout. By default, this timeout is 2
+minutes. Refer to <<modules-discovery-settings>> for information about the
+settings which control this mechanism.
+
+* `followers check retry count exceeded`: The master sent a number of
+consecutive health checks to the removed node. These checks were rejected or
+timed out. By default, each health check times out after 10 seconds and {es}
+removes the node removed after three consecutively failed health checks. Refer
+to <<modules-discovery-settings>> for information about the settings which
+control this mechanism.
+
+[discrete]
+[[troubleshooting-unstable-cluster-disconnected]]
+=== Diagnosing `disconnected` nodes
+
+Nodes typically leave the cluster with reason `disconnected` when they shut
+down, but if they rejoin the cluster without restarting then there is some
+other problem.
+
+{es} is designed to run on a fairly reliable network. It opens a number of TCP
+connections between nodes and expects these connections to remain open
+<<long-lived-connections,forever>>. If a connection is closed then {es} will
+try and reconnect, so the occasional blip may fail some in-flight operations
+but should otherwise have limited impact on the cluster. In contrast,
+repeatedly-dropped connections will severely affect its operation.
+
+The connections from the elected master node to every other node in the cluster
+are particularly important. The elected master never spontaneously closes its
+outbound connections to other nodes. Similarly, once an inbound connection is
+fully established, a node never spontaneously it unless the node is shutting
+down.
+
+If you see a node unexpectedly leave the cluster with the `disconnected`
+reason, something other than {es} likely caused the connection to close. A
+common cause is a misconfigured firewall with an improper timeout or another
+policy that's <<long-lived-connections,incompatible with {es}>>. It could also
+be caused by general connectivity issues, such as packet loss due to faulty
+hardware or network congestion. If you're an advanced user, configure the
+following loggers to get more detailed information about network exceptions:
+
+[source,yaml]
+----
+logger.org.elasticsearch.transport.TcpTransport: DEBUG
+logger.org.elasticsearch.xpack.core.security.transport.netty4.SecurityNetty4Transport: DEBUG
+----
+
+If these logs do not show enough information to diagnose the problem, obtain a
+packet capture simultaneously from the nodes at both ends of an unstable
+connection and analyse it alongside the {es} logs from those nodes to determine
+if traffic between the nodes is being disrupted by another device on the
+network.
+
+[discrete]
+[[troubleshooting-unstable-cluster-lagging]]
+=== Diagnosing `lagging` nodes
+
+{es} needs every node to process cluster state updates reasonably quickly. If a
+node takes too long to process a cluster state update, it can be harmful to the
+cluster. The master will remove these nodes with the `lagging` reason. Refer to
+<<modules-discovery-settings>> for information about the settings which control
+this mechanism.
+
+Lagging is typically caused by performance issues on the removed node. However,
+a node may also lag due to severe network delays. To rule out network delays,
+ensure that `net.ipv4.tcp_retries2` is <<system-config-tcpretries,configured
+properly>>. Log messages that contain `warn threshold` may provide more
+information about the root cause.
+
+If you're an advanced user, you can get more detailed information about what
+the node was doing when it was removed by configuring the following logger:
+
+[source,yaml]
+----
+logger.org.elasticsearch.cluster.coordination.LagDetector: DEBUG
+----
+
+When this logger is enabled, {es} will attempt to run the
+<<cluster-nodes-hot-threads>> API on the faulty node and report the results in
+the logs on the elected master. The results are compressed, encoded, and split
+into chunks to avoid truncation:
+
+[source,text]
+----
+[DEBUG][o.e.c.c.LagDetector      ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 1]: H4sIAAAAAAAA/x...
+[DEBUG][o.e.c.c.LagDetector      ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 2]: p7x3w1hmOQVtuV...
+[DEBUG][o.e.c.c.LagDetector      ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 3]: v7uTboMGDbyOy+...
+[DEBUG][o.e.c.c.LagDetector      ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 4]: 4tse0RnPnLeDNN...
+[DEBUG][o.e.c.c.LagDetector      ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] (gzip compressed, base64-encoded, and split into 4 parts on preceding log lines)
+----
+
+To reconstruct the output, base64-decode the data and decompress it using
+`gzip`. For instance, on Unix-like systems:
+
+[source,sh]
+----
+cat lagdetector.log | sed -e 's/.*://' | base64 --decode | gzip --decompress
+----
+
+[discrete]
+[[troubleshooting-unstable-cluster-follower-check]]
+=== Diagnosing `follower check retry count exceeded` nodes
+
+Nodes sometimes leave the cluster with reason `follower check retry count
+exceeded` when they shut down, but if they rejoin the cluster without
+restarting then there is some other problem.
+
+{es} needs every node to respond to network messages successfully and
+reasonably quickly. If a node rejects requests or does not respond at all then
+it can be harmful to the cluster. If enough consecutive checks fail then the
+master will remove the node with reason `follower check retry count exceeded`
+and will indicate in the `node-left` message how many of the consecutive
+unsuccessful checks failed and how many of them timed out. Refer to
+<<modules-discovery-settings>> for information about the settings which control
+this mechanism.
+
+Timeouts and failures may be due to network delays or performance problems on
+the affected nodes. Ensure that `net.ipv4.tcp_retries2` is
+<<system-config-tcpretries,configured properly>> to eliminate network delays as
+a possible cause for this kind of instability. Log messages containing
+`warn threshold` may give further clues about the cause of the instability.
+
+If the last check failed with an exception then the exception is reported, and
+typically indicates the problem that needs to be addressed. If any of the
+checks timed out then narrow down the problem as follows.
+
+include::network-timeouts.asciidoc[tag=troubleshooting-network-timeouts-gc-vm]
+
+include::network-timeouts.asciidoc[tag=troubleshooting-network-timeouts-packet-capture-fault-detection]
+
+include::network-timeouts.asciidoc[tag=troubleshooting-network-timeouts-threads]
+
+By default the follower checks will time out after 30s, so if node departures
+are unpredictable then capture stack dumps every 15s to be sure that at least
+one stack dump was taken at the right time.
+
+[discrete]
+[[troubleshooting-unstable-cluster-shardlockobtainfailedexception]]
+=== Diagnosing `ShardLockObtainFailedException` failures
+
+If a node leaves and rejoins the cluster then {es} will usually shut down and
+re-initialize its shards. If the shards do not shut down quickly enough then
+{es} may fail to re-initialize them due to a `ShardLockObtainFailedException`.
+
+To gather more information about the reason for shards shutting down slowly,
+configure the following logger:
+
+[source,yaml]
+----
+logger.org.elasticsearch.env.NodeEnvironment: DEBUG
+----
+
+When this logger is enabled, {es} will attempt to run the
+<<cluster-nodes-hot-threads>> API whenever it encounters a
+`ShardLockObtainFailedException`. The results are compressed, encoded, and
+split into chunks to avoid truncation:
+
+[source,text]
+----
+[DEBUG][o.e.e.NodeEnvironment    ] [master] hot threads while failing to obtain shard lock for [index][0] [part 1]: H4sIAAAAAAAA/x...
+[DEBUG][o.e.e.NodeEnvironment    ] [master] hot threads while failing to obtain shard lock for [index][0] [part 2]: p7x3w1hmOQVtuV...
+[DEBUG][o.e.e.NodeEnvironment    ] [master] hot threads while failing to obtain shard lock for [index][0] [part 3]: v7uTboMGDbyOy+...
+[DEBUG][o.e.e.NodeEnvironment    ] [master] hot threads while failing to obtain shard lock for [index][0] [part 4]: 4tse0RnPnLeDNN...
+[DEBUG][o.e.e.NodeEnvironment    ] [master] hot threads while failing to obtain shard lock for [index][0] (gzip compressed, base64-encoded, and split into 4 parts on preceding log lines)
+----
+
+To reconstruct the output, base64-decode the data and decompress it using
+`gzip`. For instance, on Unix-like systems:
+
+[source,sh]
+----
+cat shardlock.log | sed -e 's/.*://' | base64 --decode | gzip --decompress
+----
+
+[discrete]
+[[troubleshooting-unstable-cluster-network]]
+=== Diagnosing other network disconnections
+
+{es} is designed to run on a fairly reliable network. It opens a number of TCP
+connections between nodes and expects these connections to remain open
+<<long-lived-connections,forever>>. If a connection is closed then {es} will
+try and reconnect, so the occasional blip may fail some in-flight operations
+but should otherwise have limited impact on the cluster. In contrast,
+repeatedly-dropped connections will severely affect its operation.
+
+{es} nodes will only actively close an outbound connection to another node if
+the other node leaves the cluster. See
+<<cluster-fault-detection-troubleshooting>> for further information about
+identifying and troubleshooting this situation. If an outbound connection
+closes for some other reason, nodes will log a message such as the following:
+
+[source,text]
+----
+[INFO ][o.e.t.ClusterConnectionManager] [node-1] transport connection to [{node-2}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] closed by remote
+----
+
+Similarly, once an inbound connection is fully established, a node never
+spontaneously closes it unless the node is shutting down.
+
+Therefore if you see a node report that a connection to another node closed
+unexpectedly, something other than {es} likely caused the connection to close.
+A common cause is a misconfigured firewall with an improper timeout or another
+policy that's <<long-lived-connections,incompatible with {es}>>. It could also
+be caused by general connectivity issues, such as packet loss due to faulty
+hardware or network congestion. If you're an advanced user, configure the
+following loggers to get more detailed information about network exceptions:
+
+[source,yaml]
+----
+logger.org.elasticsearch.transport.TcpTransport: DEBUG
+logger.org.elasticsearch.xpack.core.security.transport.netty4.SecurityNetty4Transport: DEBUG
+----
+
+If these logs do not show enough information to diagnose the problem, obtain a
+packet capture simultaneously from the nodes at both ends of an unstable
+connection and analyse it alongside the {es} logs from those nodes to determine
+if traffic between the nodes is being disrupted by another device on the
+network.
diff --git a/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json b/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json
index 3eb8939c22a6..cc0bc5e2257c 100644
--- a/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json
+++ b/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json
@@ -2,8 +2,8 @@
   "INITIAL_MASTER_NODES": "important-settings.html#initial_master_nodes",
   "DISCOVERY_TROUBLESHOOTING": "discovery-troubleshooting.html",
   "UNSTABLE_CLUSTER_TROUBLESHOOTING": "troubleshooting-unstable-cluster.html",
-  "LAGGING_NODE_TROUBLESHOOTING": "troubleshooting-unstable-cluster.html#_diagnosing_lagging_nodes_2",
-  "SHARD_LOCK_TROUBLESHOOTING": "troubleshooting-unstable-cluster.html#_diagnosing_shardlockobtainfailedexception_failures_2",
+  "LAGGING_NODE_TROUBLESHOOTING": "troubleshooting-unstable-cluster.html#troubleshooting-unstable-cluster-lagging",
+  "SHARD_LOCK_TROUBLESHOOTING": "troubleshooting-unstable-cluster.html#troubleshooting-unstable-cluster-shardlockobtainfailedexception",
   "CONCURRENT_REPOSITORY_WRITERS": "diagnosing-corrupted-repositories.html",
   "ARCHIVE_INDICES": "archive-indices.html",
   "HTTP_TRACER": "modules-network.html#http-rest-request-tracer",

From 5ac4d8c71e06880624a9a91bfec4ae310d9cab2f Mon Sep 17 00:00:00 2001
From: Craig Taverner <craig@amanzi.com>
Date: Thu, 29 Aug 2024 14:48:15 +0200
Subject: [PATCH 20/30] Fix union-types where one index is missing the field
 (#111932)

* Fix union-types where one index is missing the field

When none of the indexes has the field, a validation error is correctly thrown, and when all indexes have the field, union-types works as normal.
But when some indexes have the field and some do not, we were getting and internal error.
We treat this case similarly to when some documents are missing the field, in which case `null` values are produced.
So now a multi-index query where some indexes are missing the field will produce nulls for the documents coming from those indexes.

* Update docs/changelog/111932.yaml

* Added capability for this fix (missing-field)
---
 docs/changelog/111932.yaml                    |  6 ++
 .../xpack/esql/CsvTestsDataLoader.java        |  6 ++
 .../mapping-missing_ip_sample_data.json       | 13 ++++
 .../main/resources/missing_ip_sample_data.csv |  8 +++
 .../src/main/resources/union_types.csv-spec   | 68 +++++++++++++++++++
 .../xpack/esql/action/EsqlCapabilities.java   |  5 ++
 .../planner/EsPhysicalOperationProviders.java |  4 +-
 7 files changed, 109 insertions(+), 1 deletion(-)
 create mode 100644 docs/changelog/111932.yaml
 create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-missing_ip_sample_data.json
 create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/missing_ip_sample_data.csv

diff --git a/docs/changelog/111932.yaml b/docs/changelog/111932.yaml
new file mode 100644
index 000000000000..ce840ecebcff
--- /dev/null
+++ b/docs/changelog/111932.yaml
@@ -0,0 +1,6 @@
+pr: 111932
+summary: Fix union-types where one index is missing the field
+area: ES|QL
+type: bug
+issues:
+ - 111912
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java
index b20e3bb0d540..9ee22113a424 100644
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java
@@ -68,6 +68,11 @@ public class CsvTestsDataLoader {
         "mapping-sample_data_ts_long.json",
         "sample_data_ts_long.csv"
     );
+    private static final TestsDataset MISSING_IP_SAMPLE_DATA = new TestsDataset(
+        "missing_ip_sample_data",
+        "mapping-missing_ip_sample_data.json",
+        "missing_ip_sample_data.csv"
+    );
     private static final TestsDataset CLIENT_IPS = new TestsDataset("clientips", "mapping-clientips.json", "clientips.csv");
     private static final TestsDataset CLIENT_CIDR = new TestsDataset("client_cidr", "mapping-client_cidr.json", "client_cidr.csv");
     private static final TestsDataset AGES = new TestsDataset("ages", "mapping-ages.json", "ages.csv");
@@ -112,6 +117,7 @@ public class CsvTestsDataLoader {
         Map.entry(ALERTS.indexName, ALERTS),
         Map.entry(SAMPLE_DATA_STR.indexName, SAMPLE_DATA_STR),
         Map.entry(SAMPLE_DATA_TS_LONG.indexName, SAMPLE_DATA_TS_LONG),
+        Map.entry(MISSING_IP_SAMPLE_DATA.indexName, MISSING_IP_SAMPLE_DATA),
         Map.entry(CLIENT_IPS.indexName, CLIENT_IPS),
         Map.entry(CLIENT_CIDR.indexName, CLIENT_CIDR),
         Map.entry(AGES.indexName, AGES),
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-missing_ip_sample_data.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-missing_ip_sample_data.json
new file mode 100644
index 000000000000..6f3796dd7715
--- /dev/null
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-missing_ip_sample_data.json
@@ -0,0 +1,13 @@
+{
+    "properties": {
+        "@timestamp": {
+            "type": "date"
+        },
+        "event_duration": {
+            "type": "long"
+        },
+        "message": {
+            "type": "keyword"
+        }
+    }
+}
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/missing_ip_sample_data.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/missing_ip_sample_data.csv
new file mode 100644
index 000000000000..e8e9ddcaee83
--- /dev/null
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/missing_ip_sample_data.csv
@@ -0,0 +1,8 @@
+@timestamp:date,event_duration:long,message:keyword
+2023-10-23T13:55:01.543Z,1756467,Connected to 10.1.0.1
+2023-10-23T13:53:55.832Z,5033755,Connection error
+2023-10-23T13:52:55.015Z,8268153,Connection error
+2023-10-23T13:51:54.732Z,725448,Connection error
+2023-10-23T13:33:34.937Z,1232382,Disconnected
+2023-10-23T12:27:28.948Z,2764889,Connected to 10.1.0.2
+2023-10-23T12:15:03.360Z,3450233,Connected to 10.1.0.3
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec
index 6819727be013..c6a2d47a78dc 100644
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec
@@ -405,6 +405,74 @@ count:long  |  message:keyword
 2           |  Connected to 10.1.0.3
 ;
 
+multiIndexMissingIpToString
+required_capability: union_types
+required_capability: union_types_missing_field
+
+FROM sample_data, sample_data_str, missing_ip_sample_data METADATA _index
+| EVAL client_ip = TO_STRING(client_ip)
+| KEEP _index, @timestamp, client_ip, event_duration, message
+| SORT _index ASC, @timestamp DESC
+;
+
+_index:keyword         | @timestamp:date           |  client_ip:keyword  |  event_duration:long  |  message:keyword
+missing_ip_sample_data | 2023-10-23T13:55:01.543Z  |  null               |  1756467              |  Connected to 10.1.0.1
+missing_ip_sample_data | 2023-10-23T13:53:55.832Z  |  null               |  5033755              |  Connection error
+missing_ip_sample_data | 2023-10-23T13:52:55.015Z  |  null               |  8268153              |  Connection error
+missing_ip_sample_data | 2023-10-23T13:51:54.732Z  |  null               |  725448               |  Connection error
+missing_ip_sample_data | 2023-10-23T13:33:34.937Z  |  null               |  1232382              |  Disconnected
+missing_ip_sample_data | 2023-10-23T12:27:28.948Z  |  null               |  2764889              |  Connected to 10.1.0.2
+missing_ip_sample_data | 2023-10-23T12:15:03.360Z  |  null               |  3450233              |  Connected to 10.1.0.3
+sample_data            | 2023-10-23T13:55:01.543Z  |  172.21.3.15        |  1756467              |  Connected to 10.1.0.1
+sample_data            | 2023-10-23T13:53:55.832Z  |  172.21.3.15        |  5033755              |  Connection error
+sample_data            | 2023-10-23T13:52:55.015Z  |  172.21.3.15        |  8268153              |  Connection error
+sample_data            | 2023-10-23T13:51:54.732Z  |  172.21.3.15        |  725448               |  Connection error
+sample_data            | 2023-10-23T13:33:34.937Z  |  172.21.0.5         |  1232382              |  Disconnected
+sample_data            | 2023-10-23T12:27:28.948Z  |  172.21.2.113       |  2764889              |  Connected to 10.1.0.2
+sample_data            | 2023-10-23T12:15:03.360Z  |  172.21.2.162       |  3450233              |  Connected to 10.1.0.3
+sample_data_str        | 2023-10-23T13:55:01.543Z  |  172.21.3.15        |  1756467              |  Connected to 10.1.0.1
+sample_data_str        | 2023-10-23T13:53:55.832Z  |  172.21.3.15        |  5033755              |  Connection error
+sample_data_str        | 2023-10-23T13:52:55.015Z  |  172.21.3.15        |  8268153              |  Connection error
+sample_data_str        | 2023-10-23T13:51:54.732Z  |  172.21.3.15        |  725448               |  Connection error
+sample_data_str        | 2023-10-23T13:33:34.937Z  |  172.21.0.5         |  1232382              |  Disconnected
+sample_data_str        | 2023-10-23T12:27:28.948Z  |  172.21.2.113       |  2764889              |  Connected to 10.1.0.2
+sample_data_str        | 2023-10-23T12:15:03.360Z  |  172.21.2.162       |  3450233              |  Connected to 10.1.0.3
+;
+
+multiIndexMissingIpToIp
+required_capability: union_types
+required_capability: union_types_missing_field
+
+FROM sample_data, sample_data_str, missing_ip_sample_data METADATA _index
+| EVAL client_ip = TO_IP(client_ip)
+| KEEP _index, @timestamp, client_ip, event_duration, message
+| SORT _index ASC, @timestamp DESC
+;
+
+_index:keyword         | @timestamp:date           |  client_ip:ip  |  event_duration:long  |  message:keyword
+missing_ip_sample_data | 2023-10-23T13:55:01.543Z  |  null          |  1756467              |  Connected to 10.1.0.1
+missing_ip_sample_data | 2023-10-23T13:53:55.832Z  |  null          |  5033755              |  Connection error
+missing_ip_sample_data | 2023-10-23T13:52:55.015Z  |  null          |  8268153              |  Connection error
+missing_ip_sample_data | 2023-10-23T13:51:54.732Z  |  null          |  725448               |  Connection error
+missing_ip_sample_data | 2023-10-23T13:33:34.937Z  |  null          |  1232382              |  Disconnected
+missing_ip_sample_data | 2023-10-23T12:27:28.948Z  |  null          |  2764889              |  Connected to 10.1.0.2
+missing_ip_sample_data | 2023-10-23T12:15:03.360Z  |  null          |  3450233              |  Connected to 10.1.0.3
+sample_data            | 2023-10-23T13:55:01.543Z  |  172.21.3.15   |  1756467              |  Connected to 10.1.0.1
+sample_data            | 2023-10-23T13:53:55.832Z  |  172.21.3.15   |  5033755              |  Connection error
+sample_data            | 2023-10-23T13:52:55.015Z  |  172.21.3.15   |  8268153              |  Connection error
+sample_data            | 2023-10-23T13:51:54.732Z  |  172.21.3.15   |  725448               |  Connection error
+sample_data            | 2023-10-23T13:33:34.937Z  |  172.21.0.5    |  1232382              |  Disconnected
+sample_data            | 2023-10-23T12:27:28.948Z  |  172.21.2.113  |  2764889              |  Connected to 10.1.0.2
+sample_data            | 2023-10-23T12:15:03.360Z  |  172.21.2.162  |  3450233              |  Connected to 10.1.0.3
+sample_data_str        | 2023-10-23T13:55:01.543Z  |  172.21.3.15   |  1756467              |  Connected to 10.1.0.1
+sample_data_str        | 2023-10-23T13:53:55.832Z  |  172.21.3.15   |  5033755              |  Connection error
+sample_data_str        | 2023-10-23T13:52:55.015Z  |  172.21.3.15   |  8268153              |  Connection error
+sample_data_str        | 2023-10-23T13:51:54.732Z  |  172.21.3.15   |  725448               |  Connection error
+sample_data_str        | 2023-10-23T13:33:34.937Z  |  172.21.0.5    |  1232382              |  Disconnected
+sample_data_str        | 2023-10-23T12:27:28.948Z  |  172.21.2.113  |  2764889              |  Connected to 10.1.0.2
+sample_data_str        | 2023-10-23T12:15:03.360Z  |  172.21.2.162  |  3450233              |  Connected to 10.1.0.3
+;
+
 multiIndexTsLong
 required_capability: union_types
 required_capability: metadata_fields
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
index 81b2ba71b880..120323ebeb7a 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
@@ -183,6 +183,11 @@ public enum Cap {
          */
         UNION_TYPES_FIX_RENAME_RESOLUTION,
 
+        /**
+         * Fix for union-types when some indexes are missing the required field. Done in #111932.
+         */
+        UNION_TYPES_MISSING_FIELD,
+
         /**
          * Fix a parsing issue where numbers below Long.MIN_VALUE threw an exception instead of parsing as doubles.
          * see <a href="https://github.com/elastic/elasticsearch/issues/104323"> Parsing large numbers is inconsistent #104323 </a>
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java
index 8fddb7407a02..04be73148426 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java
@@ -138,7 +138,9 @@ private BlockLoader getBlockLoaderFor(
         if (unionTypes != null) {
             String indexName = shardContext.ctx.index().getName();
             Expression conversion = unionTypes.getConversionExpressionForIndex(indexName);
-            return new TypeConvertingBlockLoader(blockLoader, (AbstractConvertFunction) conversion);
+            return conversion == null
+                ? BlockLoader.CONSTANT_NULLS
+                : new TypeConvertingBlockLoader(blockLoader, (AbstractConvertFunction) conversion);
         }
         return blockLoader;
     }

From 5c200afb9e3a02400cfdf45c4469c30bd1417223 Mon Sep 17 00:00:00 2001
From: "Mark J. Hoy" <mark.hoy@elastic.co>
Date: Thu, 29 Aug 2024 09:12:03 -0400
Subject: [PATCH 21/30] [ML] Adds Explain Functionality to LTR Rescoring
 (#112155)

---
 .../integration/LearningToRankRescorerIT.java | 433 ++++++++++--------
 .../inference/ltr/LearningToRankRescorer.java |  55 ++-
 2 files changed, 303 insertions(+), 185 deletions(-)

diff --git a/x-pack/plugin/ml/qa/single-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/LearningToRankRescorerIT.java b/x-pack/plugin/ml/qa/single-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/LearningToRankRescorerIT.java
index b2a0b60aed7b..4a703117c655 100644
--- a/x-pack/plugin/ml/qa/single-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/LearningToRankRescorerIT.java
+++ b/x-pack/plugin/ml/qa/single-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/LearningToRankRescorerIT.java
@@ -16,7 +16,9 @@
 import org.junit.Before;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.List;
+import java.util.Map;
 
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
@@ -28,189 +30,11 @@ public class LearningToRankRescorerIT extends InferenceTestCase {
 
     @Before
     public void setupModelAndData() throws IOException {
-        putRegressionModel(MODEL_ID, """
-            {
-              "description": "super complex model for tests",
-              "inference_config": {
-                "learning_to_rank": {
-                  "feature_extractors": [
-                    {
-                      "query_extractor": {
-                        "feature_name": "cost",
-                        "query": {"script_score": {"query": {"match_all":{}}, "script": {"source": "return doc['cost'].value;"}}}
-                      }
-                    },
-                    {
-                      "query_extractor": {
-                        "feature_name": "type_tv",
-                        "query": {"constant_score": {"filter": {"term": { "product": "TV" }}, "boost": 1.0}}
-                      }
-                    },
-                    {
-                      "query_extractor": {
-                        "feature_name": "type_vcr",
-                        "query": {"constant_score": {"filter": {"term": { "product": "VCR" }}, "boost": 1.0}}
-                      }
-                    },
-                    {
-                      "query_extractor": {
-                        "feature_name": "type_laptop",
-                        "query": {"constant_score": {"filter": {"term": { "product": "Laptop" }}, "boost": 1.0}}
-                      }
-                    },
-                    {
-                        "query_extractor": {
-                            "feature_name": "two",
-                            "query": { "script_score": { "query": { "match_all": {} }, "script": { "source": "return 2.0;" } } }
-                        }
-                    },
-                    {
-                        "query_extractor": {
-                            "feature_name": "product_bm25",
-                            "query": { "term": { "product": "{{keyword}}" } }
-                        }
-                    }
-                  ]
-                }
-              },
-              "definition": {
-                "trained_model": {
-                  "ensemble": {
-                    "feature_names": ["cost", "type_tv", "type_vcr", "type_laptop", "two", "product_bm25"],
-                    "target_type": "regression",
-                    "trained_models": [
-                    {
-                      "tree": {
-                        "feature_names": [
-                          "cost"
-                        ],
-                        "tree_structure": [
-                        {
-                          "node_index": 0,
-                          "split_feature": 0,
-                          "split_gain": 12,
-                          "threshold": 400,
-                          "decision_type": "lte",
-                          "default_left": true,
-                          "left_child": 1,
-                          "right_child": 2
-                        },
-                        {
-                          "node_index": 1,
-                          "leaf_value": 5.0
-                        },
-                        {
-                          "node_index": 2,
-                          "leaf_value": 2.0
-                        }
-                        ],
-                        "target_type": "regression"
-                      }
-                    },
-                    {
-                      "tree": {
-                        "feature_names": [
-                          "type_tv"
-                        ],
-                        "tree_structure": [
-                        {
-                          "node_index": 0,
-                          "split_feature": 0,
-                          "split_gain": 12,
-                          "threshold": 1,
-                          "decision_type": "lt",
-                          "default_left": true,
-                          "left_child": 1,
-                          "right_child": 2
-                        },
-                        {
-                          "node_index": 1,
-                          "leaf_value": 1.0
-                        },
-                        {
-                          "node_index": 2,
-                          "leaf_value": 12.0
-                        }
-                        ],
-                        "target_type": "regression"
-                      }
-                    },
-                     {
-                      "tree": {
-                        "feature_names": [
-                          "two"
-                        ],
-                        "tree_structure": [
-                        {
-                          "node_index": 0,
-                          "split_feature": 0,
-                          "split_gain": 12,
-                          "threshold": 1,
-                          "decision_type": "lt",
-                          "default_left": true,
-                          "left_child": 1,
-                          "right_child": 2
-                        },
-                        {
-                          "node_index": 1,
-                          "leaf_value": 1.0
-                        },
-                        {
-                          "node_index": 2,
-                          "leaf_value": 2.0
-                        }
-                        ],
-                        "target_type": "regression"
-                      }
-                    },
-                     {
-                      "tree": {
-                        "feature_names": [
-                          "product_bm25"
-                        ],
-                        "tree_structure": [
-                        {
-                          "node_index": 0,
-                          "split_feature": 0,
-                          "split_gain": 12,
-                          "threshold": 1,
-                          "decision_type": "lt",
-                          "default_left": true,
-                          "left_child": 1,
-                          "right_child": 2
-                        },
-                        {
-                          "node_index": 1,
-                          "leaf_value": 1.0
-                        },
-                        {
-                          "node_index": 2,
-                          "leaf_value": 4.0
-                        }
-                        ],
-                        "target_type": "regression"
-                      }
-                    }
-                    ]
-                  }
-                }
-              }
-            }
-            """);
-        createIndex(INDEX_NAME, Settings.EMPTY, """
-            "properties":{
-              "product":{"type": "keyword"},
-              "cost":{"type": "integer"}
-            }""");
-        indexData("{ \"product\": \"TV\", \"cost\": 300}");
-        indexData("{ \"product\": \"TV\", \"cost\": 400}");
-        indexData("{ \"product\": \"TV\", \"cost\": 600}");
-        indexData("{ \"product\": \"VCR\", \"cost\": 15}");
-        indexData("{ \"product\": \"VCR\", \"cost\": 350}");
-        indexData("{ \"product\": \"VCR\", \"cost\": 580}");
-        indexData("{ \"product\": \"Laptop\", \"cost\": 100}");
-        indexData("{ \"product\": \"Laptop\", \"cost\": 300}");
-        indexData("{ \"product\": \"Laptop\", \"cost\": 500}");
+        putRegressionModel(MODEL_ID, testRegressionModel);
+        createIndex(INDEX_NAME, Settings.EMPTY, testIndexDefinition);
+        for (String testDataItem : testIndexData) {
+            indexData(testDataItem);
+        }
         adminClient().performRequest(new Request("POST", INDEX_NAME + "/_refresh"));
     }
 
@@ -249,6 +73,19 @@ public void testLearningToRankRescore() throws Exception {
         assertHitScores(client().performRequest(request), List.of(9.0, 9.0, 6.0));
     }
 
+    public void testLearningToRankRescoreWithExplain() throws Exception {
+        Request request = new Request("GET", "store/_search?size=3&explain=true&error_trace");
+        request.setJsonEntity("""
+            {
+              "rescore": {
+                "window_size": 10,
+                "learning_to_rank": { "model_id": "ltr-model" }
+              }
+            }""");
+        var response = client().performRequest(request);
+        assertExplainExtractedFeatures(response, List.of("type_tv", "cost", "two"));
+    }
+
     public void testLearningToRankRescoreSmallWindow() throws Exception {
         Request request = new Request("GET", "store/_search?size=5");
         request.setJsonEntity("""
@@ -336,4 +173,234 @@ private void indexData(String data) throws IOException {
     private static void assertHitScores(Response response, List<Double> expectedScores) throws IOException {
         assertThat((List<Double>) XContentMapValues.extractValue("hits.hits._score", responseAsMap(response)), equalTo(expectedScores));
     }
+
+    @SuppressWarnings("unchecked")
+    private static void assertExplainExtractedFeatures(Response response, List<String> expectedFeatures) throws IOException {
+        var explainValues = (ArrayList<Map<String, Object>>) XContentMapValues.extractValue(
+            "hits.hits._explanation",
+            responseAsMap(response)
+        );
+
+        assertThat(explainValues.size(), equalTo(3));
+        for (Map<String, Object> hit : explainValues) {
+            assertThat(hit.get("description"), equalTo("rescored using LTR model ltr-model"));
+
+            var queryDetails = (ArrayList<Map<String, Object>>) hit.get("details");
+            assertThat(queryDetails.size(), equalTo(2));
+
+            assertThat(queryDetails.get(0).get("description"), equalTo("first pass query score"));
+            assertThat(queryDetails.get(1).get("description"), equalTo("extracted features"));
+
+            var featureDetails = new ArrayList<>((ArrayList<Map<String, Object>>) queryDetails.get(1).get("details"));
+            assertThat(featureDetails.size(), equalTo(3));
+
+            var missingKeys = new ArrayList<String>();
+            for (String expectedFeature : expectedFeatures) {
+                var expectedDescription = Strings.format("feature value for [%s]", expectedFeature);
+
+                var wasFound = false;
+                for (Map<String, Object> detailItem : featureDetails) {
+                    if (detailItem.get("description").equals(expectedDescription)) {
+                        featureDetails.remove(detailItem);
+                        wasFound = true;
+                        break;
+                    }
+                }
+
+                if (wasFound == false) {
+                    missingKeys.add(expectedFeature);
+                }
+            }
+
+            assertThat(Strings.format("Could not find features: [%s]", String.join(", ", missingKeys)), featureDetails.size(), equalTo(0));
+        }
+    }
+
+    private static String testIndexDefinition = """
+        "properties":{
+          "product":{"type": "keyword"},
+          "cost":{"type": "integer"}
+        }""";
+
+    private static List<String> testIndexData = List.of(
+        "{ \"product\": \"TV\", \"cost\": 300}",
+        "{ \"product\": \"TV\", \"cost\": 400}",
+        "{ \"product\": \"TV\", \"cost\": 600}",
+        "{ \"product\": \"VCR\", \"cost\": 15}",
+        "{ \"product\": \"VCR\", \"cost\": 350}",
+        "{ \"product\": \"VCR\", \"cost\": 580}",
+        "{ \"product\": \"Laptop\", \"cost\": 100}",
+        "{ \"product\": \"Laptop\", \"cost\": 300}",
+        "{ \"product\": \"Laptop\", \"cost\": 500}"
+    );
+
+    private static String testRegressionModel = """
+        {
+          "description": "super complex model for tests",
+          "inference_config": {
+            "learning_to_rank": {
+              "feature_extractors": [
+                {
+                  "query_extractor": {
+                    "feature_name": "cost",
+                    "query": {"script_score": {"query": {"match_all":{}}, "script": {"source": "return doc['cost'].value;"}}}
+                  }
+                },
+                {
+                  "query_extractor": {
+                    "feature_name": "type_tv",
+                    "query": {"constant_score": {"filter": {"term": { "product": "TV" }}, "boost": 1.0}}
+                  }
+                },
+                {
+                  "query_extractor": {
+                    "feature_name": "type_vcr",
+                    "query": {"constant_score": {"filter": {"term": { "product": "VCR" }}, "boost": 1.0}}
+                  }
+                },
+                {
+                  "query_extractor": {
+                    "feature_name": "type_laptop",
+                    "query": {"constant_score": {"filter": {"term": { "product": "Laptop" }}, "boost": 1.0}}
+                  }
+                },
+                {
+                    "query_extractor": {
+                        "feature_name": "two",
+                        "query": { "script_score": { "query": { "match_all": {} }, "script": { "source": "return 2.0;" } } }
+                    }
+                },
+                {
+                    "query_extractor": {
+                        "feature_name": "product_bm25",
+                        "query": { "term": { "product": "{{keyword}}" } }
+                    }
+                }
+              ]
+            }
+          },
+          "definition": {
+            "trained_model": {
+              "ensemble": {
+                "feature_names": ["cost", "type_tv", "type_vcr", "type_laptop", "two", "product_bm25"],
+                "target_type": "regression",
+                "trained_models": [
+                {
+                  "tree": {
+                    "feature_names": [
+                      "cost"
+                    ],
+                    "tree_structure": [
+                    {
+                      "node_index": 0,
+                      "split_feature": 0,
+                      "split_gain": 12,
+                      "threshold": 400,
+                      "decision_type": "lte",
+                      "default_left": true,
+                      "left_child": 1,
+                      "right_child": 2
+                    },
+                    {
+                      "node_index": 1,
+                      "leaf_value": 5.0
+                    },
+                    {
+                      "node_index": 2,
+                      "leaf_value": 2.0
+                    }
+                    ],
+                    "target_type": "regression"
+                  }
+                },
+                {
+                  "tree": {
+                    "feature_names": [
+                      "type_tv"
+                    ],
+                    "tree_structure": [
+                    {
+                      "node_index": 0,
+                      "split_feature": 0,
+                      "split_gain": 12,
+                      "threshold": 1,
+                      "decision_type": "lt",
+                      "default_left": true,
+                      "left_child": 1,
+                      "right_child": 2
+                    },
+                    {
+                      "node_index": 1,
+                      "leaf_value": 1.0
+                    },
+                    {
+                      "node_index": 2,
+                      "leaf_value": 12.0
+                    }
+                    ],
+                    "target_type": "regression"
+                  }
+                },
+                 {
+                  "tree": {
+                    "feature_names": [
+                      "two"
+                    ],
+                    "tree_structure": [
+                    {
+                      "node_index": 0,
+                      "split_feature": 0,
+                      "split_gain": 12,
+                      "threshold": 1,
+                      "decision_type": "lt",
+                      "default_left": true,
+                      "left_child": 1,
+                      "right_child": 2
+                    },
+                    {
+                      "node_index": 1,
+                      "leaf_value": 1.0
+                    },
+                    {
+                      "node_index": 2,
+                      "leaf_value": 2.0
+                    }
+                    ],
+                    "target_type": "regression"
+                  }
+                },
+                 {
+                  "tree": {
+                    "feature_names": [
+                      "product_bm25"
+                    ],
+                    "tree_structure": [
+                    {
+                      "node_index": 0,
+                      "split_feature": 0,
+                      "split_gain": 12,
+                      "threshold": 1,
+                      "decision_type": "lt",
+                      "default_left": true,
+                      "left_child": 1,
+                      "right_child": 2
+                    },
+                    {
+                      "node_index": 1,
+                      "leaf_value": 1.0
+                    },
+                    {
+                      "node_index": 2,
+                      "leaf_value": 4.0
+                    }
+                    ],
+                    "target_type": "regression"
+                  }
+                }
+                ]
+              }
+            }
+          }
+        }
+        """;
 }
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/ltr/LearningToRankRescorer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/ltr/LearningToRankRescorer.java
index 8a310ba2719f..70d0b980bb3b 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/ltr/LearningToRankRescorer.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/ltr/LearningToRankRescorer.java
@@ -28,6 +28,7 @@
 import java.util.Comparator;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 import java.util.Set;
 
 import static java.util.stream.Collectors.toUnmodifiableSet;
@@ -129,8 +130,58 @@ public TopDocs rescore(TopDocs topDocs, IndexSearcher searcher, RescoreContext r
     @Override
     public Explanation explain(int topLevelDocId, IndexSearcher searcher, RescoreContext rescoreContext, Explanation sourceExplanation)
         throws IOException {
-        // TODO: Call infer again but with individual feature importance values and explaining the model (which features are used, etc.)
-        return null;
+        if (sourceExplanation == null) {
+            return Explanation.noMatch("no match found");
+        }
+
+        LearningToRankRescorerContext ltrContext = (LearningToRankRescorerContext) rescoreContext;
+        LocalModel localModelDefinition = ltrContext.regressionModelDefinition;
+
+        if (localModelDefinition == null) {
+            throw new IllegalStateException("local model reference is null, missing rewriteAndFetch before rescore phase?");
+        }
+
+        List<LeafReaderContext> leaves = ltrContext.executionContext.searcher().getIndexReader().leaves();
+
+        int endDoc = 0;
+        int readerUpto = -1;
+        LeafReaderContext currentSegment = null;
+
+        while (topLevelDocId >= endDoc) {
+            readerUpto++;
+            currentSegment = leaves.get(readerUpto);
+            endDoc = currentSegment.docBase + currentSegment.reader().maxDoc();
+        }
+
+        assert currentSegment != null : "Unexpected null segment";
+
+        int targetDoc = topLevelDocId - currentSegment.docBase;
+
+        List<FeatureExtractor> featureExtractors = ltrContext.buildFeatureExtractors(searcher);
+        int featureSize = featureExtractors.stream().mapToInt(fe -> fe.featureNames().size()).sum();
+
+        Map<String, Object> features = Maps.newMapWithExpectedSize(featureSize);
+
+        for (FeatureExtractor featureExtractor : featureExtractors) {
+            featureExtractor.setNextReader(currentSegment);
+            featureExtractor.addFeatures(features, targetDoc);
+        }
+
+        // Predicting the value
+        var ltrScore = ((Number) localModelDefinition.inferLtr(features, ltrContext.learningToRankConfig).predictedValue()).floatValue();
+
+        List<Explanation> featureExplanations = new ArrayList<>();
+        for (String featureName : features.keySet()) {
+            Number featureValue = Objects.requireNonNullElse((Number) features.get(featureName), 0);
+            featureExplanations.add(Explanation.match(featureValue, "feature value for [" + featureName + "]"));
+        }
+
+        return Explanation.match(
+            ltrScore,
+            "rescored using LTR model " + ltrContext.regressionModelDefinition.getModelId(),
+            Explanation.match(sourceExplanation.getValue(), "first pass query score", sourceExplanation),
+            Explanation.match(0f, "extracted features", featureExplanations)
+        );
     }
 
     /** Returns a new {@link TopDocs} with the topN from the incoming one, or the same TopDocs if the number of hits is already &lt;=

From e36b5551ab418860e9db84cdaa96dc2e0df30d6b Mon Sep 17 00:00:00 2001
From: Nik Everett <nik9000@gmail.com>
Date: Thu, 29 Aug 2024 09:44:15 -0400
Subject: [PATCH 22/30] ESQL: Method to convert BooleanBlock to a "mask"
 (#112253)

This adds a method, `BooleanBlock#toMask` to convert `BooleanBlock`s
into a "mask" for use with `keepMask`.
---
 .../compute/data/BooleanArrayBlock.java       | 21 ++++++
 .../compute/data/BooleanBigArrayBlock.java    | 21 ++++++
 .../compute/data/BooleanBlock.java            |  7 ++
 .../compute/data/BooleanVectorBlock.java      |  6 ++
 .../compute/data/ConstantNullBlock.java       |  5 ++
 .../elasticsearch/compute/data/ToMask.java    | 22 +++++++
 .../compute/data/X-ArrayBlock.java.st         | 22 +++++++
 .../compute/data/X-BigArrayBlock.java.st      | 23 +++++++
 .../compute/data/X-Block.java.st              | 10 ++-
 .../compute/data/X-Vector.java.st             |  2 +-
 .../compute/data/X-VectorBlock.java.st        |  9 ++-
 .../compute/data/BasicBlockTests.java         | 41 +++++++++++-
 .../data/BigArrayBlockBuilderTests.java       | 66 +++++++++++++++++++
 .../compute/data/BigArrayVectorTests.java     |  6 ++
 .../compute/data/BlockMultiValuedTests.java   | 49 ++++++++++++++
 15 files changed, 306 insertions(+), 4 deletions(-)
 create mode 100644 x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ToMask.java

diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanArrayBlock.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanArrayBlock.java
index 14f6c9591ed1..3d600bec1bd6 100644
--- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanArrayBlock.java
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanArrayBlock.java
@@ -85,6 +85,27 @@ public BooleanVector asVector() {
         return null;
     }
 
+    @Override
+    public ToMask toMask() {
+        if (getPositionCount() == 0) {
+            return new ToMask(blockFactory().newConstantBooleanVector(false, 0), false);
+        }
+        try (BooleanVector.FixedBuilder builder = blockFactory().newBooleanVectorFixedBuilder(getPositionCount())) {
+            boolean hasMv = false;
+            for (int p = 0; p < getPositionCount(); p++) {
+                builder.appendBoolean(switch (getValueCount(p)) {
+                    case 0 -> false;
+                    case 1 -> getBoolean(getFirstValueIndex(p));
+                    default -> {
+                        hasMv = true;
+                        yield false;
+                    }
+                });
+            }
+            return new ToMask(builder.build(), hasMv);
+        }
+    }
+
     @Override
     public boolean getBoolean(int valueIndex) {
         return vector.getBoolean(valueIndex);
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBigArrayBlock.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBigArrayBlock.java
index 5342728af4fe..f353512eb93b 100644
--- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBigArrayBlock.java
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBigArrayBlock.java
@@ -86,6 +86,27 @@ public BooleanVector asVector() {
         return null;
     }
 
+    @Override
+    public ToMask toMask() {
+        if (getPositionCount() == 0) {
+            return new ToMask(blockFactory().newConstantBooleanVector(false, 0), false);
+        }
+        try (BooleanVector.FixedBuilder builder = blockFactory().newBooleanVectorFixedBuilder(getPositionCount())) {
+            boolean hasMv = false;
+            for (int p = 0; p < getPositionCount(); p++) {
+                builder.appendBoolean(switch (getValueCount(p)) {
+                    case 0 -> false;
+                    case 1 -> getBoolean(getFirstValueIndex(p));
+                    default -> {
+                        hasMv = true;
+                        yield false;
+                    }
+                });
+            }
+            return new ToMask(builder.build(), hasMv);
+        }
+    }
+
     @Override
     public boolean getBoolean(int valueIndex) {
         return vector.getBoolean(valueIndex);
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBlock.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBlock.java
index 566b8fbed445..5d2d6c97a11f 100644
--- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBlock.java
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBlock.java
@@ -37,6 +37,13 @@ public sealed interface BooleanBlock extends Block permits BooleanArrayBlock, Bo
     @Override
     BooleanVector asVector();
 
+    /**
+     * Convert this to a {@link BooleanVector "mask"} that's appropriate for
+     * passing to {@link #keepMask}. Null and multivalued positions will be
+     * converted to {@code false}.
+     */
+    ToMask toMask();
+
     @Override
     BooleanBlock filter(int... positions);
 
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVectorBlock.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVectorBlock.java
index ca2fc58bf0bb..1544cc3355cd 100644
--- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVectorBlock.java
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVectorBlock.java
@@ -31,6 +31,12 @@ public BooleanVector asVector() {
         return vector;
     }
 
+    @Override
+    public ToMask toMask() {
+        vector.incRef();
+        return new ToMask(vector, false);
+    }
+
     @Override
     public boolean getBoolean(int valueIndex) {
         return vector.getBoolean(valueIndex);
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java
index fc4cdc1d41f4..3d61613ba70e 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java
@@ -48,6 +48,11 @@ public OrdinalBytesRefBlock asOrdinals() {
         return null;
     }
 
+    @Override
+    public ToMask toMask() {
+        return new ToMask(blockFactory.newConstantBooleanVector(false, positionCount), false);
+    }
+
     @Override
     public boolean isNull(int position) {
         return true;
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ToMask.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ToMask.java
new file mode 100644
index 000000000000..5b71679048e2
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ToMask.java
@@ -0,0 +1,22 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.data;
+
+import org.elasticsearch.core.Releasable;
+
+/**
+ * Result from calling {@link BooleanBlock#toMask}. {@link #close closing} this will
+ * close the contained {@link #mask()}. If you want to keep a reference to it then you'll
+ * have to {@link Block#incRef()} it.
+ */
+public record ToMask(BooleanVector mask, boolean hadMultivaluedFields) implements Releasable {
+    @Override
+    public void close() {
+        mask.close();
+    }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ArrayBlock.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ArrayBlock.java.st
index 750de95e7b8d..e855e6d6296d 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ArrayBlock.java.st
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ArrayBlock.java.st
@@ -101,6 +101,28 @@ $if(BytesRef)$
     public OrdinalBytesRefBlock asOrdinals() {
         return null;
     }
+
+$elseif(boolean)$
+    @Override
+    public ToMask toMask() {
+        if (getPositionCount() == 0) {
+            return new ToMask(blockFactory().newConstantBooleanVector(false, 0), false);
+        }
+        try (BooleanVector.FixedBuilder builder = blockFactory().newBooleanVectorFixedBuilder(getPositionCount())) {
+            boolean hasMv = false;
+            for (int p = 0; p < getPositionCount(); p++) {
+                builder.appendBoolean(switch (getValueCount(p)) {
+                    case 0 -> false;
+                    case 1 -> getBoolean(getFirstValueIndex(p));
+                    default -> {
+                        hasMv = true;
+                        yield false;
+                    }
+                });
+            }
+            return new ToMask(builder.build(), hasMv);
+        }
+    }
 $endif$
 
     @Override
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-BigArrayBlock.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-BigArrayBlock.java.st
index bf9e6fec1872..23632bf41349 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-BigArrayBlock.java.st
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-BigArrayBlock.java.st
@@ -86,6 +86,29 @@ public final class $Type$BigArrayBlock extends AbstractArrayBlock implements $Ty
         return null;
     }
 
+$if(boolean)$
+    @Override
+    public ToMask toMask() {
+        if (getPositionCount() == 0) {
+            return new ToMask(blockFactory().newConstantBooleanVector(false, 0), false);
+        }
+        try (BooleanVector.FixedBuilder builder = blockFactory().newBooleanVectorFixedBuilder(getPositionCount())) {
+            boolean hasMv = false;
+            for (int p = 0; p < getPositionCount(); p++) {
+                builder.appendBoolean(switch (getValueCount(p)) {
+                    case 0 -> false;
+                    case 1 -> getBoolean(getFirstValueIndex(p));
+                    default -> {
+                        hasMv = true;
+                        yield false;
+                    }
+                });
+            }
+            return new ToMask(builder.build(), hasMv);
+        }
+    }
+$endif$
+
     @Override
     public $type$ get$Type$(int valueIndex) {
         return vector.get$Type$(valueIndex);
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Block.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Block.java.st
index da0769af2d18..67e4ac4bb334 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Block.java.st
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Block.java.st
@@ -63,8 +63,16 @@ $if(BytesRef)$
      * returns null. Callers must not release the returned block as no extra reference is retained by this method.
      */
     OrdinalBytesRefBlock asOrdinals();
-$endif$
 
+$elseif(boolean)$
+    /**
+     * Convert this to a {@link BooleanVector "mask"} that's appropriate for
+     * passing to {@link #keepMask}. Null and multivalued positions will be
+     * converted to {@code false}.
+     */
+    ToMask toMask();
+
+$endif$
     @Override
     $Type$Block filter(int... positions);
 
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Vector.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Vector.java.st
index 09f11f350439..e19c1788cdb6 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Vector.java.st
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Vector.java.st
@@ -51,8 +51,8 @@ $if(BytesRef)$
      * returns null. Callers must not release the returned vector as no extra reference is retained by this method.
      */
     OrdinalBytesRefVector asOrdinals();
-$endif$
 
+$endif$
     @Override
     $Type$Vector filter(int... positions);
 
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-VectorBlock.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-VectorBlock.java.st
index eec75f62f22f..d4c6859e64b2 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-VectorBlock.java.st
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-VectorBlock.java.st
@@ -44,8 +44,15 @@ $if(BytesRef)$
             return null;
         }
     }
-$endif$
 
+$elseif(boolean)$
+    @Override
+    public ToMask toMask() {
+        vector.incRef();
+        return new ToMask(vector, false);
+    }
+
+$endif$
     @Override
 $if(BytesRef)$
     public BytesRef getBytesRef(int valueIndex, BytesRef dest) {
diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BasicBlockTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BasicBlockTests.java
index e8401048af01..ad372da47d6b 100644
--- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BasicBlockTests.java
+++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BasicBlockTests.java
@@ -800,6 +800,12 @@ public void testBooleanBlock() {
             }
             assertLookup(block, positions(blockFactory, positionCount + 1000), singletonList(null));
             assertEmptyLookup(blockFactory, block);
+            try (ToMask mask = block.toMask()) {
+                assertThat(mask.hadMultivaluedFields(), equalTo(false));
+                for (int p = 0; p < positionCount; p++) {
+                    assertThat(mask.mask().getBoolean(p), equalTo(p % 10 == 0));
+                }
+            }
 
             try (BooleanBlock.Builder blockBuilder = blockFactory.newBooleanBlockBuilder(1)) {
                 BooleanBlock copy = blockBuilder.copyFrom(block, 0, block.getPositionCount()).build();
@@ -826,6 +832,7 @@ public void testBooleanBlock() {
             IntStream.range(0, positionCount).mapToObj(ii -> randomBoolean()).forEach(vectorBuilder::appendBoolean);
             BooleanVector vector = vectorBuilder.build();
             assertSingleValueDenseBlock(vector.asBlock());
+            assertToMask(vector);
             releaseAndAssertBreaker(vector.asBlock());
         }
     }
@@ -1358,6 +1365,19 @@ <B extends Block, BB extends Block.Builder, T> void assertNullValues(
         assertTrue(block.isNull(randomNullPosition));
         assertFalse(block.isNull(randomNonNullPosition));
         releaseAndAssertBreaker(block);
+        if (block instanceof BooleanBlock bb) {
+            try (ToMask mask = bb.toMask()) {
+                assertThat(mask.hadMultivaluedFields(), equalTo(false));
+                for (int p = 0; p < positionCount; p++) {
+                    assertThat(mask.mask().getBoolean(p), equalTo(nullsMask.get(p) == false && p % 10 == 0));
+                }
+            }
+        }
+    }
+
+    void assertZeroPositionsAndRelease(BooleanBlock block) {
+        assertToMaskZeroPositions(block);
+        assertZeroPositionsAndRelease((Block) block);
     }
 
     void assertZeroPositionsAndRelease(Block block) {
@@ -1366,6 +1386,11 @@ void assertZeroPositionsAndRelease(Block block) {
         releaseAndAssertBreaker(block);
     }
 
+    void assertZeroPositionsAndRelease(BooleanVector vector) {
+        assertToMask(vector);
+        assertZeroPositionsAndRelease((Vector) vector);
+    }
+
     void assertZeroPositionsAndRelease(Vector vector) {
         assertThat(vector.getPositionCount(), is(0));
         assertKeepMaskEmpty(vector);
@@ -1386,6 +1411,20 @@ static void assertKeepMaskEmpty(Vector vector) {
         }
     }
 
+    static void assertToMaskZeroPositions(BooleanBlock block) {
+        try (ToMask mask = block.toMask()) {
+            assertThat(mask.mask().getPositionCount(), equalTo(0));
+            assertThat(mask.hadMultivaluedFields(), equalTo(false));
+        }
+    }
+
+    static void assertToMask(BooleanVector vector) {
+        try (ToMask mask = vector.asBlock().toMask()) {
+            assertThat(mask.mask(), sameInstance(vector));
+            assertThat(mask.hadMultivaluedFields(), equalTo(false));
+        }
+    }
+
     void releaseAndAssertBreaker(Block... blocks) {
         assertThat(breaker.getUsed(), greaterThan(0L));
         Page[] pages = Arrays.stream(blocks).map(Page::new).toArray(Page[]::new);
@@ -1836,7 +1875,7 @@ static void assertKeepMask(Block block) {
     /**
      * Build a random valid "mask" of single valued boolean fields that.
      */
-    private static BooleanVector randomMask(int positions) {
+    static BooleanVector randomMask(int positions) {
         try (BooleanVector.Builder builder = TestBlockFactory.getNonBreakingInstance().newBooleanVectorFixedBuilder(positions)) {
             for (int i = 0; i < positions; i++) {
                 builder.appendBoolean(randomBoolean());
diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayBlockBuilderTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayBlockBuilderTests.java
index df32dcaddd92..34d591cd87d8 100644
--- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayBlockBuilderTests.java
+++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayBlockBuilderTests.java
@@ -164,6 +164,12 @@ public void testBooleanVector() throws IOException {
                     assertThat(block.getBoolean(i), equalTo(elements[i]));
                 }
                 assertKeepMask(block);
+                try (ToMask mask = block.toMask()) {
+                    assertThat(mask.hadMultivaluedFields(), equalTo(false));
+                    for (int p = 0; p < elements.length; p++) {
+                        assertThat(mask.mask().getBoolean(p), equalTo(elements[p]));
+                    }
+                }
                 try (var copy = serializeDeserializeBlock(block)) {
                     assertThat(copy, instanceOf(BooleanVectorBlock.class));
                     assertThat(block.asVector(), instanceOf(BooleanArrayVector.class));
@@ -224,6 +230,12 @@ public void testBooleanBlock() throws IOException {
                     assertThat(block.getBoolean(i), equalTo(elements[i]));
                 }
                 assertKeepMask(block);
+                try (ToMask mask = block.toMask()) {
+                    assertThat(mask.hadMultivaluedFields(), equalTo(true));
+                    for (int p = 0; p < elements.length; p++) {
+                        assertThat(mask.mask().getBoolean(p), equalTo(false));
+                    }
+                }
                 try (var copy = serializeDeserializeBlock(block)) {
                     assertThat(copy, instanceOf(BooleanArrayBlock.class));
                     assertNull(copy.asVector());
@@ -253,6 +265,12 @@ public void testBooleanBlock() throws IOException {
                     assertThat(block.getBoolean(i), equalTo(elements[i]));
                 }
                 assertKeepMask(block);
+                try (ToMask mask = block.toMask()) {
+                    assertThat(mask.hadMultivaluedFields(), equalTo(true));
+                    for (int p = 0; p < elements.length; p++) {
+                        assertThat(mask.mask().getBoolean(p), equalTo(false));
+                    }
+                }
                 try (var copy = serializeDeserializeBlock(block)) {
                     assertThat(copy, instanceOf(BooleanBigArrayBlock.class));
                     assertNull(block.asVector());
@@ -266,4 +284,52 @@ public void testBooleanBlock() throws IOException {
         }
         assertThat(blockFactory.breaker().getUsed(), equalTo(0L));
     }
+
+    /**
+     * Tests a block with one value being multivalued and the rest are single valued.
+     */
+    public void testBooleanBlockOneMv() {
+        int mvCount = between(2, 10);
+        int positionCount = randomIntBetween(1000, 5000);
+        blockFactory = new BlockFactory(blockFactory.breaker(), blockFactory.bigArrays(), ByteSizeValue.ofBytes(1));
+        try (var builder = blockFactory.newBooleanBlockBuilder(between(1, mvCount + positionCount))) {
+            boolean[] elements = new boolean[positionCount + mvCount];
+            builder.beginPositionEntry();
+            for (int i = 0; i < mvCount; i++) {
+                elements[i] = randomBoolean();
+                builder.appendBoolean(elements[i]);
+            }
+            builder.endPositionEntry();
+            for (int p = 1; p < positionCount; p++) {
+                elements[mvCount + p] = randomBoolean();
+                builder.appendBoolean(elements[mvCount + p]);
+            }
+            try (var block = builder.build()) {
+                assertThat(block, instanceOf(BooleanBigArrayBlock.class));
+                assertNull(block.asVector());
+                assertThat(block.getPositionCount(), equalTo(positionCount));
+                assertThat(block.getValueCount(0), equalTo(mvCount));
+                for (int i = 0; i < mvCount; i++) {
+                    assertThat(block.getBoolean(block.getFirstValueIndex(0) + i), equalTo(elements[i]));
+                }
+                for (int p = 1; p < positionCount; p++) {
+                    assertThat(block.getValueCount(p), equalTo(1));
+                    assertThat(block.getBoolean(block.getFirstValueIndex(p)), equalTo(elements[mvCount + p]));
+                }
+                assertKeepMask(block);
+                try (ToMask mask = block.toMask()) {
+                    /*
+                     * NOTE: this test is customized to the layout above where we don't make
+                     * any fields with 0 values.
+                     */
+                    assertThat(mask.hadMultivaluedFields(), equalTo(true));
+                    assertThat(mask.mask().getBoolean(0), equalTo(false));
+                    for (int p = 1; p < positionCount; p++) {
+                        assertThat(mask.mask().getBoolean(p), equalTo(elements[mvCount + p]));
+                    }
+                }
+            }
+        }
+        assertThat(blockFactory.breaker().getUsed(), equalTo(0L));
+    }
 }
diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayVectorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayVectorTests.java
index af4c643a9062..aab8b86f9b79 100644
--- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayVectorTests.java
+++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayVectorTests.java
@@ -72,6 +72,12 @@ public void testBoolean() throws IOException {
             assertEmptyLookup(blockFactory, vector.asBlock());
             assertSerialization(block);
             assertThat(vector.toString(), containsString("BooleanBigArrayVector[positions=" + positionCount));
+            try (ToMask mask = block.toMask()) {
+                assertThat(mask.hadMultivaluedFields(), equalTo(false));
+                for (int p = 0; p < values.length; p++) {
+                    assertThat(mask.mask().getBoolean(p), equalTo(values[p]));
+                }
+            }
         }
     }
 
diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockMultiValuedTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockMultiValuedTests.java
index c5e130726844..e37b2638b56f 100644
--- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockMultiValuedTests.java
+++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockMultiValuedTests.java
@@ -31,6 +31,7 @@
 
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.greaterThanOrEqualTo;
+import static org.hamcrest.Matchers.nullValue;
 
 public class BlockMultiValuedTests extends ESTestCase {
     @ParametersFactory
@@ -122,6 +123,54 @@ public void testLookupFromSingleManyPages() {
         assertLookup(ByteSizeValue.ofBytes(1), between(1, 32), p -> 1);
     }
 
+    public void testToMask() {
+        if (elementType != ElementType.BOOLEAN) {
+            return;
+        }
+        int positionCount = randomIntBetween(1, 16 * 1024);
+        var b = BasicBlockTests.randomBlock(blockFactory(), elementType, positionCount, nullAllowed, 2, 10, 0, 0);
+        try (ToMask mask = ((BooleanBlock) b.block()).toMask()) {
+            assertThat(mask.hadMultivaluedFields(), equalTo(true));
+            for (int p = 0; p < b.values().size(); p++) {
+                List<Object> v = b.values().get(p);
+                if (v == null) {
+                    assertThat(mask.mask().getBoolean(p), equalTo(false));
+                    continue;
+                }
+                if (v.size() != 1) {
+                    assertThat(mask.mask().getBoolean(p), equalTo(false));
+                    continue;
+                }
+                assertThat(mask.mask().getBoolean(p), equalTo(v.get(0)));
+            }
+        } finally {
+            b.block().close();
+        }
+    }
+
+    public void testMask() {
+        int positionCount = randomIntBetween(1, 16 * 1024);
+        var b = BasicBlockTests.randomBlock(blockFactory(), elementType, positionCount, nullAllowed, 0, 10, 0, 0);
+        try (
+            BooleanVector mask = BasicBlockTests.randomMask(b.values().size() + between(0, 1000));
+            Block masked = b.block().keepMask(mask)
+        ) {
+            for (int p = 0; p < b.values().size(); p++) {
+                List<Object> inputValues = b.values().get(p);
+                List<Object> valuesAtPosition = BasicBlockTests.valuesAtPositions(masked, p, p + 1).get(0);
+                if (inputValues == null || mask.getBoolean(p) == false) {
+                    assertThat(masked.isNull(p), equalTo(true));
+                    assertThat(valuesAtPosition, nullValue());
+                    continue;
+                }
+                assertThat(masked.isNull(p), equalTo(false));
+                assertThat(valuesAtPosition, equalTo(inputValues));
+            }
+        } finally {
+            b.block().close();
+        }
+    }
+
     private void assertFiltered(boolean all, boolean shuffled) {
         int positionCount = randomIntBetween(1, 16 * 1024);
         var b = BasicBlockTests.randomBlock(blockFactory(), elementType, positionCount, nullAllowed, 0, 10, 0, 0);

From 68b211e025f2222704e50b2f6b6890dbf8f94515 Mon Sep 17 00:00:00 2001
From: Salvatore Campagna
 <93581129+salvatore-campagna@users.noreply.github.com>
Date: Thu, 29 Aug 2024 16:01:56 +0200
Subject: [PATCH 23/30] Store original source for keywords using a normalizer
 (#112151)

Using a normalizer for a keyword field might result in not being able
to reconstruct the original source when using synthetic source.
Here if synthetic source is enabled and a normalizer is configured
we store the original value in a stored field which is later used
at document reconstruction time to reconstruct the field value as
it was in the original document.

We use the same fallback solution we use in other places like
`ignore_malformed`.
---
 docs/changelog/112151.yaml                    |  5 ++
 .../test/mget/90_synthetic_source.yml         | 88 +++++++++++++++++++
 .../index/mapper/KeywordFieldMapper.java      | 19 ++--
 .../index/mapper/MapperFeatures.java          |  1 +
 .../KeywordFieldSyntheticSourceSupport.java   |  9 +-
 5 files changed, 105 insertions(+), 17 deletions(-)
 create mode 100644 docs/changelog/112151.yaml

diff --git a/docs/changelog/112151.yaml b/docs/changelog/112151.yaml
new file mode 100644
index 000000000000..f5cbfd8da07c
--- /dev/null
+++ b/docs/changelog/112151.yaml
@@ -0,0 +1,5 @@
+pr: 112151
+summary: Store original source for keywords using a normalizer
+area: Logs
+type: enhancement
+issues: []
diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mget/90_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mget/90_synthetic_source.yml
index 2935c0c1c41b..ff17a92ed0fc 100644
--- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mget/90_synthetic_source.yml
+++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mget/90_synthetic_source.yml
@@ -46,6 +46,94 @@ keyword:
       docs.1._source:
         kwd: bar
 
+---
+keyword with normalizer:
+  - requires:
+      cluster_features: [ "mapper.keyword_normalizer_synthetic_source" ]
+      reason: support for normalizer on keyword fields
+  - do:
+      indices.create:
+        index: test-keyword-with-normalizer
+        body:
+          settings:
+            analysis:
+              normalizer:
+                lowercase:
+                  type: custom
+                  filter:
+                    - lowercase
+          mappings:
+            _source:
+              mode: synthetic
+            properties:
+              keyword:
+                type: keyword
+                normalizer: lowercase
+              keyword_with_ignore_above:
+                type: keyword
+                normalizer: lowercase
+                ignore_above: 10
+              keyword_without_doc_values:
+                type: keyword
+                normalizer: lowercase
+                doc_values: false
+
+  - do:
+      index:
+        index: test-keyword-with-normalizer
+        id: 1
+        body:
+          keyword: "the Quick Brown Fox jumps over the lazy Dog"
+          keyword_with_ignore_above: "the Quick Brown Fox jumps over the lazy Dog"
+          keyword_without_doc_values: "the Quick Brown Fox jumps over the lazy Dog"
+
+  - do:
+      index:
+        index: test-keyword-with-normalizer
+        id: 2
+        body:
+          keyword: "The five BOXING wizards jump Quickly"
+          keyword_with_ignore_above: "The five BOXING wizards jump Quickly"
+          keyword_without_doc_values: "The five BOXING wizards jump Quickly"
+
+  - do:
+      index:
+        index: test-keyword-with-normalizer
+        id: 3
+        body:
+          keyword: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ]
+          keyword_with_ignore_above: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ]
+          keyword_without_doc_values: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ]
+
+  - do:
+      mget:
+        index: test-keyword-with-normalizer
+        body:
+          ids:    [ 1, 2, 3 ]
+  - match: { docs.0._index: "test-keyword-with-normalizer" }
+  - match: { docs.0._id: "1" }
+  - match:
+      docs.0._source:
+        keyword: "the Quick Brown Fox jumps over the lazy Dog"
+        keyword_with_ignore_above: "the Quick Brown Fox jumps over the lazy Dog"
+        keyword_without_doc_values: "the Quick Brown Fox jumps over the lazy Dog"
+
+  - match: { docs.1._index: "test-keyword-with-normalizer" }
+  - match: { docs.1._id: "2" }
+  - match:
+      docs.1._source:
+        keyword: "The five BOXING wizards jump Quickly"
+        keyword_with_ignore_above: "The five BOXING wizards jump Quickly"
+        keyword_without_doc_values: "The five BOXING wizards jump Quickly"
+
+  - match: { docs.2._index: "test-keyword-with-normalizer" }
+  - match: { docs.2._id: "3" }
+  - match:
+      docs.2._source:
+        keyword: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ]
+        keyword_with_ignore_above: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ]
+        keyword_without_doc_values: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ]
+
 ---
 stored text:
   - requires:
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
index 9645b4397df4..d130f37c3e8e 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
@@ -89,6 +89,7 @@ public final class KeywordFieldMapper extends FieldMapper {
     public static final String CONTENT_TYPE = "keyword";
 
     static final NodeFeature KEYWORD_DIMENSION_IGNORE_ABOVE = new NodeFeature("mapper.keyword_dimension_ignore_above");
+    static final NodeFeature KEYWORD_NORMALIZER_SYNTHETIC_SOURCE = new NodeFeature("mapper.keyword_normalizer_synthetic_source");
 
     public static class Defaults {
         public static final FieldType FIELD_TYPE;
@@ -856,7 +857,7 @@ public boolean hasNormalizer() {
     private final Script script;
     private final ScriptCompiler scriptCompiler;
     private final IndexVersion indexCreatedVersion;
-    private final boolean storeIgnored;
+    private final boolean isSyntheticSource;
 
     private final IndexAnalyzers indexAnalyzers;
 
@@ -866,7 +867,7 @@ private KeywordFieldMapper(
         KeywordFieldType mappedFieldType,
         MultiFields multiFields,
         CopyTo copyTo,
-        boolean storeIgnored,
+        boolean isSyntheticSource,
         Builder builder
     ) {
         super(simpleName, mappedFieldType, multiFields, copyTo, builder.script.get() != null, builder.onScriptError.getValue());
@@ -881,7 +882,7 @@ private KeywordFieldMapper(
         this.indexAnalyzers = builder.indexAnalyzers;
         this.scriptCompiler = builder.scriptCompiler;
         this.indexCreatedVersion = builder.indexCreatedVersion;
-        this.storeIgnored = storeIgnored;
+        this.isSyntheticSource = isSyntheticSource;
     }
 
     @Override
@@ -916,7 +917,7 @@ private void indexValue(DocumentParserContext context, String value) {
 
         if (value.length() > fieldType().ignoreAbove()) {
             context.addIgnoredField(fullPath());
-            if (storeIgnored) {
+            if (isSyntheticSource) {
                 // Save a copy of the field so synthetic source can load it
                 context.doc().add(new StoredField(originalName(), new BytesRef(value)));
             }
@@ -1026,6 +1027,11 @@ private String originalName() {
 
     @Override
     protected SyntheticSourceMode syntheticSourceMode() {
+        if (hasNormalizer()) {
+            // NOTE: no matter if we have doc values or not we use a stored field to reconstruct the original value
+            // whose doc values would be altered by the normalizer
+            return SyntheticSourceMode.FALLBACK;
+        }
         if (fieldType.stored() || hasDocValues) {
             return SyntheticSourceMode.NATIVE;
         }
@@ -1047,11 +1053,6 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader(String simpleName)
                 "field [" + fullPath() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to"
             );
         }
-        if (hasNormalizer()) {
-            throw new IllegalArgumentException(
-                "field [" + fullPath() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares a normalizer"
-            );
-        }
 
         if (syntheticSourceMode() != SyntheticSourceMode.NATIVE) {
             return super.syntheticFieldLoader();
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java
index 6dce9d6c7b86..63bbef061c61 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java
@@ -33,6 +33,7 @@ public Set<NodeFeature> getFeatures() {
             NodeMappingStats.SEGMENT_LEVEL_FIELDS_STATS,
             BooleanFieldMapper.BOOLEAN_DIMENSION,
             ObjectMapper.SUBOBJECTS_AUTO,
+            KeywordFieldMapper.KEYWORD_NORMALIZER_SYNTHETIC_SOURCE,
             SourceFieldMapper.SYNTHETIC_SOURCE_STORED_FIELDS_ADVANCE_FIX
         );
     }
diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/KeywordFieldSyntheticSourceSupport.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/KeywordFieldSyntheticSourceSupport.java
index 6abe92385131..2f452161b10c 100644
--- a/test/framework/src/main/java/org/elasticsearch/index/mapper/KeywordFieldSyntheticSourceSupport.java
+++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/KeywordFieldSyntheticSourceSupport.java
@@ -21,8 +21,6 @@
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
-import static org.hamcrest.Matchers.equalTo;
-
 public class KeywordFieldSyntheticSourceSupport implements MapperTestCase.SyntheticSourceSupport {
     private final Integer ignoreAbove;
     private final boolean allIgnored;
@@ -128,11 +126,6 @@ private void mapping(XContentBuilder b) throws IOException {
 
     @Override
     public List<MapperTestCase.SyntheticSourceInvalidExample> invalidExample() throws IOException {
-        return List.of(
-            new MapperTestCase.SyntheticSourceInvalidExample(
-                equalTo("field [field] of type [keyword] doesn't support synthetic source because it declares a normalizer"),
-                b -> b.field("type", "keyword").field("normalizer", "lowercase")
-            )
-        );
+        return List.of();
     }
 }

From 1be4f65da2ac35d971626353b8a076aa75a7b693 Mon Sep 17 00:00:00 2001
From: David Turner <david.turner@elastic.co>
Date: Thu, 29 Aug 2024 15:20:03 +0100
Subject: [PATCH 24/30] Add constants for UUID lengths (#112353)

Our UUID strings have fixed lengths (depending on the type of UUID).
Sometimes we might want code to rely on knowing these lengths rather
than doing some other string manipulations to look for a boundary. This
commit exposes constants for these things.
---
 .../common/RandomBasedUUIDGenerator.java      |  4 +-
 .../common/TimeBasedUUIDGenerator.java        |  4 +-
 .../java/org/elasticsearch/common/UUIDs.java  | 40 +++++++++++++++----
 .../org/elasticsearch/common/UUIDTests.java   | 16 ++++++++
 .../blobstore/RepositoryFileType.java         |  5 ++-
 5 files changed, 57 insertions(+), 12 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/common/RandomBasedUUIDGenerator.java b/server/src/main/java/org/elasticsearch/common/RandomBasedUUIDGenerator.java
index e731cf3bc58b..58c23ab9aa39 100644
--- a/server/src/main/java/org/elasticsearch/common/RandomBasedUUIDGenerator.java
+++ b/server/src/main/java/org/elasticsearch/common/RandomBasedUUIDGenerator.java
@@ -56,8 +56,10 @@ public static String getBase64UUID(Random random) {
         return Base64.getUrlEncoder().withoutPadding().encodeToString(getUUIDBytes(random));
     }
 
+    static final int SIZE_IN_BYTES = 16;
+
     private static byte[] getUUIDBytes(Random random) {
-        final byte[] randomBytes = new byte[16];
+        final byte[] randomBytes = new byte[SIZE_IN_BYTES];
         random.nextBytes(randomBytes);
         /* Set the version to version 4 (see http://www.ietf.org/rfc/rfc4122.txt)
          * The randomly or pseudo-randomly generated version.
diff --git a/server/src/main/java/org/elasticsearch/common/TimeBasedUUIDGenerator.java b/server/src/main/java/org/elasticsearch/common/TimeBasedUUIDGenerator.java
index f7f7f520fec9..d66b0f579ce3 100644
--- a/server/src/main/java/org/elasticsearch/common/TimeBasedUUIDGenerator.java
+++ b/server/src/main/java/org/elasticsearch/common/TimeBasedUUIDGenerator.java
@@ -47,6 +47,8 @@ protected byte[] macAddress() {
         return SECURE_MUNGED_ADDRESS;
     }
 
+    static final int SIZE_IN_BYTES = 15;
+
     @Override
     public String getBase64UUID() {
         final int sequenceId = sequenceNumber.incrementAndGet() & 0xffffff;
@@ -61,7 +63,7 @@ public String getBase64UUID() {
             sequenceId == 0 ? (lastTimestamp, currentTimeMillis) -> Math.max(lastTimestamp, currentTimeMillis) + 1 : Math::max
         );
 
-        final byte[] uuidBytes = new byte[15];
+        final byte[] uuidBytes = new byte[SIZE_IN_BYTES];
         int i = 0;
 
         // We have auto-generated ids, which are usually used for append-only workloads.
diff --git a/server/src/main/java/org/elasticsearch/common/UUIDs.java b/server/src/main/java/org/elasticsearch/common/UUIDs.java
index 43a232e82510..ebc0978f38d4 100644
--- a/server/src/main/java/org/elasticsearch/common/UUIDs.java
+++ b/server/src/main/java/org/elasticsearch/common/UUIDs.java
@@ -17,26 +17,50 @@ public class UUIDs {
     private static final RandomBasedUUIDGenerator RANDOM_UUID_GENERATOR = new RandomBasedUUIDGenerator();
     private static final UUIDGenerator TIME_UUID_GENERATOR = new TimeBasedUUIDGenerator();
 
-    /** Generates a time-based UUID (similar to Flake IDs), which is preferred when generating an ID to be indexed into a Lucene index as
-     *  primary key.  The id is opaque and the implementation is free to change at any time! */
+    /**
+     * The length of a UUID string generated by {@link #base64UUID}.
+     */
+    // A 15-byte time-based UUID is base64-encoded as 5 3-byte chunks (each becoming 4 chars after encoding).
+    public static final int TIME_BASED_UUID_STRING_LENGTH = 20;
+
+    /**
+     * Generates a time-based UUID (similar to Flake IDs), which is preferred when generating an ID to be indexed into a Lucene index as
+     * primary key. The id is opaque and the implementation is free to change at any time!
+     * The resulting string has length {@link #TIME_BASED_UUID_STRING_LENGTH}.
+     */
     public static String base64UUID() {
         return TIME_UUID_GENERATOR.getBase64UUID();
     }
 
-    /** Returns a Base64 encoded version of a Version 4.0 compatible UUID as defined here: http://www.ietf.org/rfc/rfc4122.txt, using the
-     *  provided {@code Random} instance */
+    /**
+     * The length of a UUID string generated by {@link #randomBase64UUID} and {@link #randomBase64UUIDSecureString}.
+     */
+    // A 16-byte v4 UUID is base64-encoded as 5 3-byte chunks (each becoming 4 chars after encoding) plus another byte (becomes 2 chars).
+    public static final int RANDOM_BASED_UUID_STRING_LENGTH = 22;
+
+    /**
+     * Returns a Base64 encoded string representing a <a href="http://www.ietf.org/rfc/rfc4122.txt">RFC4122 version 4 UUID</a>, using the
+     * provided {@code Random} instance.
+     * The resulting string has length {@link #RANDOM_BASED_UUID_STRING_LENGTH}.
+     */
     public static String randomBase64UUID(Random random) {
         return RandomBasedUUIDGenerator.getBase64UUID(random);
     }
 
-    /** Returns a Base64 encoded version of a Version 4.0 compatible UUID as defined here: http://www.ietf.org/rfc/rfc4122.txt, using a
-     *  private {@code SecureRandom} instance */
+    /**
+     * Returns a Base64 encoded string representing a <a href="http://www.ietf.org/rfc/rfc4122.txt">RFC4122 version 4 UUID</a>, using a
+     * private {@code SecureRandom} instance.
+     * The resulting string has length {@link #RANDOM_BASED_UUID_STRING_LENGTH}.
+     */
     public static String randomBase64UUID() {
         return RANDOM_UUID_GENERATOR.getBase64UUID();
     }
 
-    /** Returns a Base64 encoded {@link SecureString} of a Version 4.0 compatible UUID as defined here: http://www.ietf.org/rfc/rfc4122.txt,
-     *  using a private {@code SecureRandom} instance */
+    /**
+     * Returns a Base64 encoded {@link SecureString} representing a <a href="http://www.ietf.org/rfc/rfc4122.txt">RFC4122 version 4
+     * UUID</a>, using a private {@code SecureRandom} instance.
+     * The resulting string has length {@link #RANDOM_BASED_UUID_STRING_LENGTH}.
+     */
     public static SecureString randomBase64UUIDSecureString() {
         return RandomBasedUUIDGenerator.getBase64UUIDSecureString();
     }
diff --git a/server/src/test/java/org/elasticsearch/common/UUIDTests.java b/server/src/test/java/org/elasticsearch/common/UUIDTests.java
index 5af036ce0648..3229049b67b4 100644
--- a/server/src/test/java/org/elasticsearch/common/UUIDTests.java
+++ b/server/src/test/java/org/elasticsearch/common/UUIDTests.java
@@ -176,4 +176,20 @@ protected byte[] macAddress() {
         );
         return bytesPerDoc;
     }
+
+    public void testStringLength() {
+        assertEquals(UUIDs.RANDOM_BASED_UUID_STRING_LENGTH, getUnpaddedBase64StringLength(RandomBasedUUIDGenerator.SIZE_IN_BYTES));
+        assertEquals(UUIDs.RANDOM_BASED_UUID_STRING_LENGTH, UUIDs.randomBase64UUID().length());
+        assertEquals(UUIDs.RANDOM_BASED_UUID_STRING_LENGTH, UUIDs.randomBase64UUID(random()).length());
+        try (var secureString = UUIDs.randomBase64UUIDSecureString()) {
+            assertEquals(UUIDs.RANDOM_BASED_UUID_STRING_LENGTH, secureString.toString().length());
+        }
+
+        assertEquals(UUIDs.TIME_BASED_UUID_STRING_LENGTH, getUnpaddedBase64StringLength(TimeBasedUUIDGenerator.SIZE_IN_BYTES));
+        assertEquals(UUIDs.TIME_BASED_UUID_STRING_LENGTH, UUIDs.base64UUID().length());
+    }
+
+    private static int getUnpaddedBase64StringLength(int sizeInBytes) {
+        return (int) Math.ceil(sizeInBytes * 4.0 / 3.0);
+    }
 }
diff --git a/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/RepositoryFileType.java b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/RepositoryFileType.java
index 014cbcd2bcc3..8d20cce33bbb 100644
--- a/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/RepositoryFileType.java
+++ b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/RepositoryFileType.java
@@ -9,6 +9,7 @@
 package org.elasticsearch.repositories.blobstore;
 
 import org.elasticsearch.common.Strings;
+import org.elasticsearch.common.UUIDs;
 
 import java.nio.file.Path;
 import java.util.regex.Pattern;
@@ -38,9 +39,9 @@ public enum RepositoryFileType {
                     // decimal numbers
                     .replace("NUM", "(0|[1-9][0-9]*)")
                     // 15-byte UUIDS from TimeBasedUUIDGenerator
-                    .replace("SHORTUUID", "[0-9a-zA-Z_-]{20}")
+                    .replace("SHORTUUID", "[0-9a-zA-Z_-]{" + UUIDs.TIME_BASED_UUID_STRING_LENGTH + "}")
                     // 16-byte UUIDs from RandomBasedUUIDGenerator
-                    .replace("UUID", "[0-9a-zA-Z_-]{22}")
+                    .replace("UUID", "[0-9a-zA-Z_-]{" + UUIDs.RANDOM_BASED_UUID_STRING_LENGTH + "}")
                 + ")$"
         );
     }

From 9e01181f0dd9bf6ea919164f8929f0874ad6e26d Mon Sep 17 00:00:00 2001
From: Albert Zaharovits <albert.zaharovits@elastic.co>
Date: Thu, 29 Aug 2024 17:33:05 +0300
Subject: [PATCH 25/30] Remove unused cluster blocks in create index (#112352)

Remove unused cluster blocks in create index
---
 .../CreateIndexClusterStateUpdateRequest.java |  9 -------
 .../metadata/MetadataCreateIndexService.java  | 24 ++++---------------
 .../MetadataCreateIndexServiceTests.java      | 10 +-------
 3 files changed, 6 insertions(+), 37 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexClusterStateUpdateRequest.java b/server/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexClusterStateUpdateRequest.java
index 8a46daa45e73..948199fbe74f 100644
--- a/server/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexClusterStateUpdateRequest.java
+++ b/server/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexClusterStateUpdateRequest.java
@@ -12,7 +12,6 @@
 import org.elasticsearch.action.admin.indices.shrink.ResizeType;
 import org.elasticsearch.action.support.ActiveShardCount;
 import org.elasticsearch.cluster.ack.ClusterStateUpdateRequest;
-import org.elasticsearch.cluster.block.ClusterBlock;
 import org.elasticsearch.cluster.metadata.ComposableIndexTemplate;
 import org.elasticsearch.cluster.metadata.IndexMetadata;
 import org.elasticsearch.common.settings.Settings;
@@ -43,8 +42,6 @@ public class CreateIndexClusterStateUpdateRequest extends ClusterStateUpdateRequ
 
     private final Set<Alias> aliases = new HashSet<>();
 
-    private final Set<ClusterBlock> blocks = new HashSet<>();
-
     private ActiveShardCount waitForActiveShards = ActiveShardCount.DEFAULT;
 
     private boolean performReroute = true;
@@ -125,10 +122,6 @@ public Set<Alias> aliases() {
         return aliases;
     }
 
-    public Set<ClusterBlock> blocks() {
-        return blocks;
-    }
-
     public Index recoverFrom() {
         return recoverFrom;
     }
@@ -229,8 +222,6 @@ public String toString() {
             + settings
             + ", aliases="
             + aliases
-            + ", blocks="
-            + blocks
             + ", waitForActiveShards="
             + waitForActiveShards
             + ", systemDataStreamDescriptor="
diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java
index b1a19d99dcb1..07dcb7baf077 100644
--- a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java
+++ b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java
@@ -25,7 +25,6 @@
 import org.elasticsearch.cluster.AckedClusterStateUpdateTask;
 import org.elasticsearch.cluster.ClusterState;
 import org.elasticsearch.cluster.ClusterStateUpdateTask;
-import org.elasticsearch.cluster.block.ClusterBlock;
 import org.elasticsearch.cluster.block.ClusterBlockLevel;
 import org.elasticsearch.cluster.block.ClusterBlocks;
 import org.elasticsearch.cluster.node.DiscoveryNodes;
@@ -514,7 +513,6 @@ private ClusterState applyCreateIndexWithTemporaryService(
 
             ClusterState updated = clusterStateCreateIndex(
                 currentState,
-                request.blocks(),
                 indexMetadata,
                 metadataTransformer,
                 allocationService.getShardRoutingRoleStrategy()
@@ -1231,7 +1229,6 @@ public static List<AliasMetadata> resolveAndValidateAliases(
      */
     static ClusterState clusterStateCreateIndex(
         ClusterState currentState,
-        Set<ClusterBlock> clusterBlocks,
         IndexMetadata indexMetadata,
         BiConsumer<Metadata.Builder, IndexMetadata> metadataTransformer,
         ShardRoutingRoleStrategy shardRoutingRoleStrategy
@@ -1245,14 +1242,13 @@ static ClusterState clusterStateCreateIndex(
             newMetadata = currentState.metadata().withAddedIndex(indexMetadata);
         }
 
-        String indexName = indexMetadata.getIndex().getName();
-        ClusterBlocks.Builder blocks = createClusterBlocksBuilder(currentState, indexName, clusterBlocks);
-        blocks.updateBlocks(indexMetadata);
+        var blocksBuilder = ClusterBlocks.builder().blocks(currentState.blocks());
+        blocksBuilder.updateBlocks(indexMetadata);
 
-        RoutingTable.Builder routingTableBuilder = RoutingTable.builder(shardRoutingRoleStrategy, currentState.routingTable())
-            .addAsNew(newMetadata.index(indexName));
+        var routingTableBuilder = RoutingTable.builder(shardRoutingRoleStrategy, currentState.routingTable())
+            .addAsNew(newMetadata.index(indexMetadata.getIndex().getName()));
 
-        return ClusterState.builder(currentState).blocks(blocks).metadata(newMetadata).routingTable(routingTableBuilder).build();
+        return ClusterState.builder(currentState).blocks(blocksBuilder).metadata(newMetadata).routingTable(routingTableBuilder).build();
     }
 
     static IndexMetadata buildIndexMetadata(
@@ -1325,16 +1321,6 @@ private static IndexMetadata.Builder createIndexMetadataBuilder(
         return builder;
     }
 
-    private static ClusterBlocks.Builder createClusterBlocksBuilder(ClusterState currentState, String index, Set<ClusterBlock> blocks) {
-        ClusterBlocks.Builder blocksBuilder = ClusterBlocks.builder().blocks(currentState.blocks());
-        if (blocks.isEmpty() == false) {
-            for (ClusterBlock block : blocks) {
-                blocksBuilder.addIndexBlock(index, block);
-            }
-        }
-        return blocksBuilder;
-    }
-
     private static void updateIndexMappingsAndBuildSortOrder(
         IndexService indexService,
         CreateIndexClusterStateUpdateRequest request,
diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexServiceTests.java
index 8a487e565362..f7d343b43b29 100644
--- a/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexServiceTests.java
+++ b/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexServiceTests.java
@@ -967,13 +967,7 @@ public void testClusterStateCreateIndexThrowsWriteIndexValidationException() thr
         assertThat(
             expectThrows(
                 IllegalStateException.class,
-                () -> clusterStateCreateIndex(
-                    currentClusterState,
-                    Set.of(),
-                    newIndex,
-                    null,
-                    TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY
-                )
+                () -> clusterStateCreateIndex(currentClusterState, newIndex, null, TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY)
             ).getMessage(),
             startsWith("alias [alias1] has more than one write index [")
         );
@@ -991,7 +985,6 @@ public void testClusterStateCreateIndex() {
 
         ClusterState updatedClusterState = clusterStateCreateIndex(
             currentClusterState,
-            Set.of(INDEX_READ_ONLY_BLOCK),
             newIndexMetadata,
             null,
             TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY
@@ -1037,7 +1030,6 @@ public void testClusterStateCreateIndexWithMetadataTransaction() {
 
         ClusterState updatedClusterState = clusterStateCreateIndex(
             currentClusterState,
-            Set.of(INDEX_READ_ONLY_BLOCK),
             newIndexMetadata,
             metadataTransformer,
             TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY

From cf0e18872878cce9332722c491b5cc7749106ae4 Mon Sep 17 00:00:00 2001
From: Stanislav Malyshev <smalyshev@users.noreply.github.com>
Date: Thu, 29 Aug 2024 08:56:00 -0600
Subject: [PATCH 26/30] Add isAsync() to SearcTask and eliminate code for async
 detection from TransportSearchAction (#112311)

---
 .../action/search/SearchTask.java             |  7 +++++
 .../action/search/TransportSearchAction.java  | 30 +------------------
 .../xpack/search/AsyncSearchTask.java         |  5 ++++
 3 files changed, 13 insertions(+), 29 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchTask.java b/server/src/main/java/org/elasticsearch/action/search/SearchTask.java
index 3bf72313c4c2..cc5d60ad0b0c 100644
--- a/server/src/main/java/org/elasticsearch/action/search/SearchTask.java
+++ b/server/src/main/java/org/elasticsearch/action/search/SearchTask.java
@@ -69,4 +69,11 @@ public Supplier<SearchResponseMerger> getSearchResponseMergerSupplier() {
     public void setSearchResponseMergerSupplier(Supplier<SearchResponseMerger> supplier) {
         this.searchResponseMergerSupplier = supplier;
     }
+
+    /**
+     * Is this async search?
+     */
+    public boolean isAsync() {
+        return false;
+    }
 }
diff --git a/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java b/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java
index 6e1645c1ed71..32ee9c331295 100644
--- a/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java
+++ b/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java
@@ -369,7 +369,7 @@ void executeRequest(
             } else {
                 if ((listener instanceof TelemetryListener tl) && CCS_TELEMETRY_FEATURE_FLAG.isEnabled()) {
                     tl.setRemotes(resolvedIndices.getRemoteClusterIndices().size());
-                    if (isAsyncSearchTask(task)) {
+                    if (task.isAsync()) {
                         tl.setFeature(CCSUsageTelemetry.ASYNC_FEATURE);
                     }
                     String client = task.getHeader(Task.X_ELASTIC_PRODUCT_ORIGIN_HTTP_HEADER);
@@ -1514,34 +1514,6 @@ public SearchPhase newSearchPhase(
         }
     }
 
-    /**
-     * TransportSearchAction cannot access async-search code, so can't check whether this the Task
-     * is an instance of AsyncSearchTask, so this roundabout method is used
-     * @param searchTask SearchTask to analyze
-     * @return true if this is an async search task; false if a synchronous search task
-     */
-    private boolean isAsyncSearchTask(SearchTask searchTask) {
-        assert assertAsyncSearchTaskListener(searchTask) : "AsyncSearchTask SearchProgressListener is not one of the expected types";
-        // AsyncSearchTask will not return SearchProgressListener.NOOP, since it uses its own progress listener
-        // which delegates to CCSSingleCoordinatorSearchProgressListener when minimizing roundtrips.
-        // Only synchronous SearchTask uses SearchProgressListener.NOOP or CCSSingleCoordinatorSearchProgressListener directly
-        return searchTask.getProgressListener() != SearchProgressListener.NOOP
-            && searchTask.getProgressListener() instanceof CCSSingleCoordinatorSearchProgressListener == false;
-    }
-
-    /**
-     * @param searchTask SearchTask to analyze
-     * @return true if AsyncSearchTask still uses its own special listener, not one of the two that synchronous SearchTask uses
-     */
-    private boolean assertAsyncSearchTaskListener(SearchTask searchTask) {
-        if (searchTask.getClass().getSimpleName().contains("AsyncSearchTask")) {
-            SearchProgressListener progressListener = searchTask.getProgressListener();
-            return progressListener != SearchProgressListener.NOOP
-                && progressListener instanceof CCSSingleCoordinatorSearchProgressListener == false;
-        }
-        return true;
-    }
-
     private static void validateAndResolveWaitForCheckpoint(
         ClusterState clusterState,
         IndexNameExpressionResolver resolver,
diff --git a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java
index c0305f873327..5068ac69e462 100644
--- a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java
+++ b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java
@@ -545,4 +545,9 @@ public void onFailure(Exception exc) {
             executeCompletionListeners();
         }
     }
+
+    @Override
+    public boolean isAsync() {
+        return true;
+    }
 }

From 8f526098dbfcb109b8e5b01ee436e09491169025 Mon Sep 17 00:00:00 2001
From: Armin Braun <me@obrown.io>
Date: Thu, 29 Aug 2024 16:57:51 +0200
Subject: [PATCH 27/30] Fix SearchServiceTests not waiting for scroll clear
 (#111547)

We were not waiting on the response here but assume the scrolls are cleared
in the following lines. This worked as long as the transport action wasn't forking but is broken now
that we fork to generic. Fixed by just waiting.

closes #111529
---
 .../test/java/org/elasticsearch/search/SearchServiceTests.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/server/src/test/java/org/elasticsearch/search/SearchServiceTests.java b/server/src/test/java/org/elasticsearch/search/SearchServiceTests.java
index bdddea58b713..2617f82b09f0 100644
--- a/server/src/test/java/org/elasticsearch/search/SearchServiceTests.java
+++ b/server/src/test/java/org/elasticsearch/search/SearchServiceTests.java
@@ -1548,7 +1548,7 @@ public void testMaxOpenScrollContexts() throws Exception {
 
         ClearScrollRequest clearScrollRequest = new ClearScrollRequest();
         clearScrollRequest.setScrollIds(clearScrollIds);
-        client().clearScroll(clearScrollRequest);
+        client().clearScroll(clearScrollRequest).get();
 
         for (int i = 0; i < clearScrollIds.size(); i++) {
             client().prepareSearch("index").setSize(1).setScroll(TimeValue.timeValueMinutes(1)).get().decRef();

From e966d0d9da74cb24c97a17d174f1b65324411e2b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20FOUCRET?= <aurelien.foucret@gmail.com>
Date: Thu, 29 Aug 2024 17:30:16 +0200
Subject: [PATCH 28/30] Removing the feature flag mechanism for LTR. (#112358)

---
 .../ml/DefaultMachineLearningExtension.java      |  5 -----
 .../elasticsearch/xpack/ml/MachineLearning.java  | 14 +++++---------
 .../xpack/ml/MachineLearningExtension.java       |  4 ----
 .../ml/LocalStateMachineLearningAdOnly.java      |  2 +-
 .../ml/LocalStateMachineLearningDfaOnly.java     |  2 +-
 .../ml/LocalStateMachineLearningNlpOnly.java     |  2 +-
 .../MachineLearningInfoTransportActionTests.java |  9 +--------
 .../xpack/ml/MachineLearningTests.java           | 16 ++++------------
 8 files changed, 13 insertions(+), 41 deletions(-)

diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/DefaultMachineLearningExtension.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/DefaultMachineLearningExtension.java
index 66f4797ef707..fa94bf96c116 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/DefaultMachineLearningExtension.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/DefaultMachineLearningExtension.java
@@ -51,11 +51,6 @@ public boolean isNlpEnabled() {
         return true;
     }
 
-    @Override
-    public boolean isLearningToRankEnabled() {
-        return true;
-    }
-
     @Override
     public String[] getAnalyticsDestIndexAllowedSettings() {
         return ANALYTICS_DEST_INDEX_ALLOWED_SETTINGS;
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java
index c4bf92401be9..5876836185ba 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java
@@ -902,7 +902,7 @@ private static void reportClashingNodeAttribute(String attrName) {
 
     @Override
     public List<RescorerSpec<?>> getRescorers() {
-        if (enabled && machineLearningExtension.get().isLearningToRankEnabled()) {
+        if (enabled) {
             return List.of(
                 new RescorerSpec<>(
                     LearningToRankRescorerBuilder.NAME,
@@ -1864,10 +1864,8 @@ public List<NamedXContentRegistry.Entry> getNamedXContent() {
             )
         );
         namedXContent.addAll(new CorrelationNamedContentProvider().getNamedXContentParsers());
-        // LTR Combine with Inference named content provider when feature flag is removed
-        if (machineLearningExtension.get().isLearningToRankEnabled()) {
-            namedXContent.addAll(new MlLTRNamedXContentProvider().getNamedXContentParsers());
-        }
+        namedXContent.addAll(new MlLTRNamedXContentProvider().getNamedXContentParsers());
+
         return namedXContent;
     }
 
@@ -1958,10 +1956,8 @@ public List<NamedWriteableRegistry.Entry> getNamedWriteables() {
         namedWriteables.addAll(MlAutoscalingNamedWritableProvider.getNamedWriteables());
         namedWriteables.addAll(new CorrelationNamedContentProvider().getNamedWriteables());
         namedWriteables.addAll(new ChangePointNamedContentProvider().getNamedWriteables());
-        // LTR Combine with Inference named content provider when feature flag is removed
-        if (machineLearningExtension.get().isLearningToRankEnabled()) {
-            namedWriteables.addAll(new MlLTRNamedXContentProvider().getNamedWriteables());
-        }
+        namedWriteables.addAll(new MlLTRNamedXContentProvider().getNamedWriteables());
+
         return namedWriteables;
     }
 
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearningExtension.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearningExtension.java
index 528883439ef2..f46652978753 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearningExtension.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearningExtension.java
@@ -25,10 +25,6 @@ default void configure(Settings settings) {}
 
     boolean isNlpEnabled();
 
-    default boolean isLearningToRankEnabled() {
-        return true;
-    }
-
     default boolean disableInferenceProcessCache() {
         return false;
     }
diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningAdOnly.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningAdOnly.java
index 175a035a70f7..3ff3a4a404f9 100644
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningAdOnly.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningAdOnly.java
@@ -14,6 +14,6 @@
 
 public class LocalStateMachineLearningAdOnly extends LocalStateMachineLearning {
     public LocalStateMachineLearningAdOnly(final Settings settings, final Path configPath) {
-        super(settings, configPath, new MlTestExtensionLoader(new MlTestExtension(true, true, true, false, false, false)));
+        super(settings, configPath, new MlTestExtensionLoader(new MlTestExtension(true, true, true, false, false)));
     }
 }
diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningDfaOnly.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningDfaOnly.java
index f054e52dc29e..1a72f27865d8 100644
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningDfaOnly.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningDfaOnly.java
@@ -14,6 +14,6 @@
 
 public class LocalStateMachineLearningDfaOnly extends LocalStateMachineLearning {
     public LocalStateMachineLearningDfaOnly(final Settings settings, final Path configPath) {
-        super(settings, configPath, new MlTestExtensionLoader(new MlTestExtension(true, true, false, true, false, false)));
+        super(settings, configPath, new MlTestExtensionLoader(new MlTestExtension(true, true, false, true, false)));
     }
 }
diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningNlpOnly.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningNlpOnly.java
index a3d684011e93..0f11e8033b83 100644
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningNlpOnly.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningNlpOnly.java
@@ -14,6 +14,6 @@
 
 public class LocalStateMachineLearningNlpOnly extends LocalStateMachineLearning {
     public LocalStateMachineLearningNlpOnly(final Settings settings, final Path configPath) {
-        super(settings, configPath, new MlTestExtensionLoader(new MlTestExtension(true, true, false, false, true, false)));
+        super(settings, configPath, new MlTestExtensionLoader(new MlTestExtension(true, true, false, false, true)));
     }
 }
diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java
index afa372fb9452..e5575abfeb02 100644
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java
@@ -160,14 +160,7 @@ private MachineLearningUsageTransportAction newUsageAction(
             licenseState,
             jobManagerHolder,
             new MachineLearningExtensionHolder(
-                new MachineLearningTests.MlTestExtension(
-                    true,
-                    true,
-                    isAnomalyDetectionEnabled,
-                    isDataFrameAnalyticsEnabled,
-                    isNlpEnabled,
-                    true
-                )
+                new MachineLearningTests.MlTestExtension(true, true, isAnomalyDetectionEnabled, isDataFrameAnalyticsEnabled, isNlpEnabled)
             )
         );
     }
diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningTests.java
index c35b9da7b2bd..8a05537917ab 100644
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningTests.java
@@ -220,7 +220,7 @@ public void testNoAttributes_givenClash() throws IOException {
 
     public void testAnomalyDetectionOnly() throws IOException {
         Settings settings = Settings.builder().put("path.home", createTempDir()).build();
-        MlTestExtensionLoader loader = new MlTestExtensionLoader(new MlTestExtension(false, false, true, false, false, false));
+        MlTestExtensionLoader loader = new MlTestExtensionLoader(new MlTestExtension(false, false, true, false, false));
         try (MachineLearning machineLearning = createTrialLicensedMachineLearning(settings, loader)) {
             List<RestHandler> restHandlers = machineLearning.getRestHandlers(settings, null, null, null, null, null, null, null, null);
             assertThat(restHandlers, hasItem(instanceOf(RestMlInfoAction.class)));
@@ -240,7 +240,7 @@ public void testAnomalyDetectionOnly() throws IOException {
 
     public void testDataFrameAnalyticsOnly() throws IOException {
         Settings settings = Settings.builder().put("path.home", createTempDir()).build();
-        MlTestExtensionLoader loader = new MlTestExtensionLoader(new MlTestExtension(false, false, false, true, false, false));
+        MlTestExtensionLoader loader = new MlTestExtensionLoader(new MlTestExtension(false, false, false, true, false));
         try (MachineLearning machineLearning = createTrialLicensedMachineLearning(settings, loader)) {
             List<RestHandler> restHandlers = machineLearning.getRestHandlers(settings, null, null, null, null, null, null, null, null);
             assertThat(restHandlers, hasItem(instanceOf(RestMlInfoAction.class)));
@@ -260,7 +260,7 @@ public void testDataFrameAnalyticsOnly() throws IOException {
 
     public void testNlpOnly() throws IOException {
         Settings settings = Settings.builder().put("path.home", createTempDir()).build();
-        MlTestExtensionLoader loader = new MlTestExtensionLoader(new MlTestExtension(false, false, false, false, true, false));
+        MlTestExtensionLoader loader = new MlTestExtensionLoader(new MlTestExtension(false, false, false, false, true));
         try (MachineLearning machineLearning = createTrialLicensedMachineLearning(settings, loader)) {
             List<RestHandler> restHandlers = machineLearning.getRestHandlers(settings, null, null, null, null, null, null, null, null);
             assertThat(restHandlers, hasItem(instanceOf(RestMlInfoAction.class)));
@@ -287,22 +287,19 @@ public static class MlTestExtension implements MachineLearningExtension {
         private final boolean isAnomalyDetectionEnabled;
         private final boolean isDataFrameAnalyticsEnabled;
         private final boolean isNlpEnabled;
-        private final boolean isLearningToRankEnabled;
 
         MlTestExtension(
             boolean useIlm,
             boolean includeNodeInfo,
             boolean isAnomalyDetectionEnabled,
             boolean isDataFrameAnalyticsEnabled,
-            boolean isNlpEnabled,
-            boolean isLearningToRankEnabled
+            boolean isNlpEnabled
         ) {
             this.useIlm = useIlm;
             this.includeNodeInfo = includeNodeInfo;
             this.isAnomalyDetectionEnabled = isAnomalyDetectionEnabled;
             this.isDataFrameAnalyticsEnabled = isDataFrameAnalyticsEnabled;
             this.isNlpEnabled = isNlpEnabled;
-            this.isLearningToRankEnabled = isLearningToRankEnabled;
         }
 
         @Override
@@ -330,11 +327,6 @@ public boolean isNlpEnabled() {
             return isNlpEnabled;
         }
 
-        @Override
-        public boolean isLearningToRankEnabled() {
-            return isLearningToRankEnabled;
-        }
-
         @Override
         public String[] getAnalyticsDestIndexAllowedSettings() {
             return ANALYTICS_DEST_INDEX_ALLOWED_SETTINGS;

From 592858281400cfa608080aca5dac8331d3ea1f5c Mon Sep 17 00:00:00 2001
From: Fang Xing <155562079+fang-xing-esql@users.noreply.github.com>
Date: Thu, 29 Aug 2024 12:03:03 -0400
Subject: [PATCH 29/30] [ES|QL] Combine 3 commonTypes into one (#112220)

Combine 3 commonTypes into one.
---
 .../esql/core/type/DataTypeConverter.java     |  80 ---------
 .../predicate/operator/arithmetic/Add.java    |   1 -
 .../arithmetic/ArithmeticOperation.java       |   7 +-
 .../BinaryComparisonInversible.java           |   2 +-
 .../predicate/operator/arithmetic/Div.java    |   1 -
 .../arithmetic/EsqlArithmeticOperation.java   |   5 +-
 .../predicate/operator/arithmetic/Mul.java    |   1 -
 .../predicate/operator/arithmetic/Sub.java    |   1 -
 .../comparison/EsqlBinaryComparison.java      |   4 +-
 .../predicate/operator/comparison/In.java     |   4 +-
 .../rules/SimplifyComparisonsArithmetics.java |   4 +-
 .../esql/type/EsqlDataTypeConverter.java      |  77 +++++++--
 .../xpack/esql/type/EsqlDataTypeRegistry.java |  24 ---
 .../esql/type/DataTypeConversionTests.java    |  20 ---
 .../esql/type/EsqlDataTypeConverterTests.java | 158 ++++++++++++++++++
 15 files changed, 234 insertions(+), 155 deletions(-)
 rename x-pack/plugin/{esql-core/src/main/java/org/elasticsearch/xpack/esql/core => esql/src/main/java/org/elasticsearch/xpack/esql}/expression/predicate/operator/arithmetic/ArithmeticOperation.java (80%)
 rename x-pack/plugin/{esql-core/src/main/java/org/elasticsearch/xpack/esql/core => esql/src/main/java/org/elasticsearch/xpack/esql}/expression/predicate/operator/arithmetic/BinaryComparisonInversible.java (91%)

diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataTypeConverter.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataTypeConverter.java
index 1e68d63ef7bb..78b395503e70 100644
--- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataTypeConverter.java
+++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataTypeConverter.java
@@ -38,7 +38,6 @@
 import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG;
 import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION;
 import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTime;
-import static org.elasticsearch.xpack.esql.core.type.DataType.isPrimitiveAndSupported;
 import static org.elasticsearch.xpack.esql.core.type.DataType.isString;
 import static org.elasticsearch.xpack.esql.core.util.NumericUtils.UNSIGNED_LONG_MAX;
 import static org.elasticsearch.xpack.esql.core.util.NumericUtils.inUnsignedLongRange;
@@ -51,85 +50,6 @@ public final class DataTypeConverter {
 
     private DataTypeConverter() {}
 
-    /**
-     * Returns the type compatible with both left and right types
-     * <p>
-     * If one of the types is null - returns another type
-     * If both types are numeric - returns type with the highest precision int &lt; long &lt; float &lt; double
-     * If one of the types is string and another numeric - returns numeric
-     */
-    public static DataType commonType(DataType left, DataType right) {
-        if (left == right) {
-            return left;
-        }
-        if (left == NULL) {
-            return right;
-        }
-        if (right == NULL) {
-            return left;
-        }
-        if (isString(left) && isString(right)) {
-            if (left == TEXT || right == TEXT) {
-                return TEXT;
-            }
-            if (left == KEYWORD) {
-                return KEYWORD;
-            }
-            return right;
-        }
-        if (left.isNumeric() && right.isNumeric()) {
-            int lsize = left.estimatedSize().orElseThrow();
-            int rsize = right.estimatedSize().orElseThrow();
-            // if one is int
-            if (left.isWholeNumber()) {
-                // promote the highest int
-                if (right.isWholeNumber()) {
-                    if (left == UNSIGNED_LONG || right == UNSIGNED_LONG) {
-                        return UNSIGNED_LONG;
-                    }
-                    return lsize > rsize ? left : right;
-                }
-                // promote the rational
-                return right;
-            }
-            // try the other side
-            if (right.isWholeNumber()) {
-                return left;
-            }
-            // promote the highest rational
-            return lsize > rsize ? left : right;
-        }
-        if (isString(left)) {
-            if (right.isNumeric()) {
-                return right;
-            }
-        }
-        if (isString(right)) {
-            if (left.isNumeric()) {
-                return left;
-            }
-        }
-
-        if (isDateTime(left) && isDateTime(right)) {
-            return DATETIME;
-        }
-
-        // none found
-        return null;
-    }
-
-    /**
-     * Returns true if the from type can be converted to the to type, false - otherwise
-     */
-    public static boolean canConvert(DataType from, DataType to) {
-        // Special handling for nulls and if conversion is not requires
-        if (from == to || from == NULL) {
-            return true;
-        }
-        // only primitives are supported so far
-        return isPrimitiveAndSupported(from) && isPrimitiveAndSupported(to) && converterFor(from, to) != null;
-    }
-
     /**
      * Get the conversion from one type to another.
      */
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Add.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Add.java
index b6ec9b6fd0e2..8f8d885ee379 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Add.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Add.java
@@ -12,7 +12,6 @@
 import org.elasticsearch.compute.ann.Evaluator;
 import org.elasticsearch.compute.ann.Fixed;
 import org.elasticsearch.xpack.esql.core.expression.Expression;
-import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.BinaryComparisonInversible;
 import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
 import org.elasticsearch.xpack.esql.core.tree.Source;
 import org.elasticsearch.xpack.esql.core.util.NumericUtils;
diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/operator/arithmetic/ArithmeticOperation.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/ArithmeticOperation.java
similarity index 80%
rename from x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/operator/arithmetic/ArithmeticOperation.java
rename to x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/ArithmeticOperation.java
index 8dc0f5808317..cb7e7c4643fb 100644
--- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/operator/arithmetic/ArithmeticOperation.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/ArithmeticOperation.java
@@ -4,16 +4,17 @@
  * 2.0; you may not use this file except in compliance with the Elastic License
  * 2.0.
  */
-package org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic;
+package org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic;
 
 import org.elasticsearch.xpack.esql.core.expression.Expression;
 import org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal;
 import org.elasticsearch.xpack.esql.core.expression.predicate.BinaryOperator;
+import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.BinaryArithmeticOperation;
 import org.elasticsearch.xpack.esql.core.tree.Source;
 import org.elasticsearch.xpack.esql.core.type.DataType;
-import org.elasticsearch.xpack.esql.core.type.DataTypeConverter;
 
 import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNumeric;
+import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.commonType;
 
 public abstract class ArithmeticOperation extends BinaryOperator<Object, Object, Object, BinaryArithmeticOperation> {
 
@@ -36,7 +37,7 @@ public ArithmeticOperation swapLeftAndRight() {
     @Override
     public DataType dataType() {
         if (dataType == null) {
-            dataType = DataTypeConverter.commonType(left().dataType(), right().dataType());
+            dataType = commonType(left().dataType(), right().dataType());
         }
         return dataType;
     }
diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/operator/arithmetic/BinaryComparisonInversible.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/BinaryComparisonInversible.java
similarity index 91%
rename from x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/operator/arithmetic/BinaryComparisonInversible.java
rename to x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/BinaryComparisonInversible.java
index 358ad59ec635..b0ab4c48d970 100644
--- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/operator/arithmetic/BinaryComparisonInversible.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/BinaryComparisonInversible.java
@@ -5,7 +5,7 @@
  * 2.0.
  */
 
-package org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic;
+package org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic;
 
 import org.elasticsearch.xpack.esql.core.expression.Expression;
 import org.elasticsearch.xpack.esql.core.tree.Source;
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Div.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Div.java
index 0e4c506a90d8..f1e197cf350b 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Div.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Div.java
@@ -11,7 +11,6 @@
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.compute.ann.Evaluator;
 import org.elasticsearch.xpack.esql.core.expression.Expression;
-import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.BinaryComparisonInversible;
 import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
 import org.elasticsearch.xpack.esql.core.tree.Source;
 import org.elasticsearch.xpack.esql.core.type.DataType;
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/EsqlArithmeticOperation.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/EsqlArithmeticOperation.java
index 647071c44cfd..400e70b64111 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/EsqlArithmeticOperation.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/EsqlArithmeticOperation.java
@@ -13,14 +13,12 @@
 import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
 import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException;
 import org.elasticsearch.xpack.esql.core.expression.Expression;
-import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.ArithmeticOperation;
 import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.BinaryArithmeticOperation;
 import org.elasticsearch.xpack.esql.core.tree.Source;
 import org.elasticsearch.xpack.esql.core.type.DataType;
 import org.elasticsearch.xpack.esql.evaluator.mapper.EvaluatorMapper;
 import org.elasticsearch.xpack.esql.expression.function.scalar.math.Cast;
 import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
-import org.elasticsearch.xpack.esql.type.EsqlDataTypeRegistry;
 
 import java.io.IOException;
 import java.util.List;
@@ -31,6 +29,7 @@
 import static org.elasticsearch.xpack.esql.core.type.DataType.INTEGER;
 import static org.elasticsearch.xpack.esql.core.type.DataType.LONG;
 import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG;
+import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.commonType;
 
 public abstract class EsqlArithmeticOperation extends ArithmeticOperation implements EvaluatorMapper {
     public static List<NamedWriteableRegistry.Entry> getNamedWriteables() {
@@ -133,7 +132,7 @@ public Object fold() {
 
     public DataType dataType() {
         if (dataType == null) {
-            dataType = EsqlDataTypeRegistry.INSTANCE.commonType(left().dataType(), right().dataType());
+            dataType = commonType(left().dataType(), right().dataType());
         }
         return dataType;
     }
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Mul.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Mul.java
index a73562ff153b..03981a821f52 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Mul.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Mul.java
@@ -11,7 +11,6 @@
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.compute.ann.Evaluator;
 import org.elasticsearch.xpack.esql.core.expression.Expression;
-import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.BinaryComparisonInversible;
 import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
 import org.elasticsearch.xpack.esql.core.tree.Source;
 import org.elasticsearch.xpack.esql.core.util.NumericUtils;
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Sub.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Sub.java
index ee2ccc3b7107..27f5579129cc 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Sub.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Sub.java
@@ -12,7 +12,6 @@
 import org.elasticsearch.compute.ann.Evaluator;
 import org.elasticsearch.compute.ann.Fixed;
 import org.elasticsearch.xpack.esql.core.expression.Expression;
-import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.BinaryComparisonInversible;
 import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
 import org.elasticsearch.xpack.esql.core.tree.Source;
 import org.elasticsearch.xpack.esql.core.type.DataType;
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/EsqlBinaryComparison.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/EsqlBinaryComparison.java
index 52d4c111b2ea..b50d70e69819 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/EsqlBinaryComparison.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/EsqlBinaryComparison.java
@@ -22,7 +22,6 @@
 import org.elasticsearch.xpack.esql.expression.function.scalar.math.Cast;
 import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.EsqlArithmeticOperation;
 import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
-import org.elasticsearch.xpack.esql.type.EsqlDataTypeRegistry;
 
 import java.io.IOException;
 import java.time.ZoneId;
@@ -32,6 +31,7 @@
 
 import static org.elasticsearch.common.logging.LoggerMessageFormat.format;
 import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG;
+import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.commonType;
 
 public abstract class EsqlBinaryComparison extends BinaryComparison implements EvaluatorMapper {
     public static List<NamedWriteableRegistry.Entry> getNamedWriteables() {
@@ -172,7 +172,7 @@ public EvalOperator.ExpressionEvaluator.Factory toEvaluator(
         Function<Expression, EvalOperator.ExpressionEvaluator.Factory> toEvaluator
     ) {
         // Our type is always boolean, so figure out the evaluator type from the inputs
-        DataType commonType = EsqlDataTypeRegistry.INSTANCE.commonType(left().dataType(), right().dataType());
+        DataType commonType = commonType(left().dataType(), right().dataType());
         EvalOperator.ExpressionEvaluator.Factory lhs;
         EvalOperator.ExpressionEvaluator.Factory rhs;
 
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/In.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/In.java
index 636b31fcc691..333f32e82c57 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/In.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/In.java
@@ -27,7 +27,7 @@
 import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction;
 import org.elasticsearch.xpack.esql.expression.function.scalar.math.Cast;
 import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
-import org.elasticsearch.xpack.esql.type.EsqlDataTypeRegistry;
+import org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter;
 
 import java.io.IOException;
 import java.util.BitSet;
@@ -269,7 +269,7 @@ private DataType commonType() {
                     break;
                 }
             }
-            commonType = EsqlDataTypeRegistry.INSTANCE.commonType(commonType, e.dataType());
+            commonType = EsqlDataTypeConverter.commonType(commonType, e.dataType());
         }
         return commonType;
     }
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SimplifyComparisonsArithmetics.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SimplifyComparisonsArithmetics.java
index 4ef069ea16d0..fe83aeb647bf 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SimplifyComparisonsArithmetics.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SimplifyComparisonsArithmetics.java
@@ -9,10 +9,10 @@
 
 import org.elasticsearch.xpack.esql.core.expression.Expression;
 import org.elasticsearch.xpack.esql.core.expression.Literal;
-import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.ArithmeticOperation;
-import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.BinaryComparisonInversible;
 import org.elasticsearch.xpack.esql.core.expression.predicate.operator.comparison.BinaryComparison;
 import org.elasticsearch.xpack.esql.core.type.DataType;
+import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.ArithmeticOperation;
+import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.BinaryComparisonInversible;
 import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Neg;
 import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Sub;
 
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java
index 1572f8950e0a..b090708a64ad 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java
@@ -58,6 +58,7 @@
 import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_POINT;
 import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_SHAPE;
 import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME;
+import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_PERIOD;
 import static org.elasticsearch.xpack.esql.core.type.DataType.DOUBLE;
 import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_POINT;
 import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_SHAPE;
@@ -67,9 +68,14 @@
 import static org.elasticsearch.xpack.esql.core.type.DataType.LONG;
 import static org.elasticsearch.xpack.esql.core.type.DataType.NULL;
 import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT;
+import static org.elasticsearch.xpack.esql.core.type.DataType.TIME_DURATION;
 import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG;
 import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION;
-import static org.elasticsearch.xpack.esql.core.type.DataType.isPrimitiveAndSupported;
+import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTime;
+import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTimeOrTemporal;
+import static org.elasticsearch.xpack.esql.core.type.DataType.isNullOrDatePeriod;
+import static org.elasticsearch.xpack.esql.core.type.DataType.isNullOrTemporalAmount;
+import static org.elasticsearch.xpack.esql.core.type.DataType.isNullOrTimeDuration;
 import static org.elasticsearch.xpack.esql.core.type.DataType.isString;
 import static org.elasticsearch.xpack.esql.core.type.DataTypeConverter.safeDoubleToLong;
 import static org.elasticsearch.xpack.esql.core.type.DataTypeConverter.safeToInt;
@@ -107,18 +113,6 @@ public class EsqlDataTypeConverter {
         entry(VERSION, ToVersion::new)
     );
 
-    /**
-     * Returns true if the from type can be converted to the to type, false - otherwise
-     */
-    public static boolean canConvert(DataType from, DataType to) {
-        // Special handling for nulls and if conversion is not requires
-        if (from == to || from == NULL) {
-            return true;
-        }
-        // only primitives are supported so far
-        return isPrimitiveAndSupported(from) && isPrimitiveAndSupported(to) && converterFor(from, to) != null;
-    }
-
     public static Converter converterFor(DataType from, DataType to) {
         // TODO move EXPRESSION_TO_LONG here if there is no regression
         if (isString(from)) {
@@ -230,8 +224,63 @@ public static Object convert(Object value, DataType dataType) {
         return converter.convert(value);
     }
 
+    /**
+     * Returns the type compatible with both left and right types
+     * <p>
+     * If one of the types is null - returns another type
+     * If both types are numeric - returns type with the highest precision int &lt; long &lt; float &lt; double
+     */
     public static DataType commonType(DataType left, DataType right) {
-        return DataTypeConverter.commonType(left, right);
+        if (left == right) {
+            return left;
+        }
+        if (left == NULL) {
+            return right;
+        }
+        if (right == NULL) {
+            return left;
+        }
+        if (isDateTimeOrTemporal(left) || isDateTimeOrTemporal(right)) {
+            if ((isDateTime(left) && isNullOrTemporalAmount(right)) || (isNullOrTemporalAmount(left) && isDateTime(right))) {
+                return DATETIME;
+            }
+            if (isNullOrTimeDuration(left) && isNullOrTimeDuration(right)) {
+                return TIME_DURATION;
+            }
+            if (isNullOrDatePeriod(left) && isNullOrDatePeriod(right)) {
+                return DATE_PERIOD;
+            }
+        }
+        if (isString(left) && isString(right)) {
+            if (left == TEXT || right == TEXT) {
+                return TEXT;
+            }
+            return right;
+        }
+        if (left.isNumeric() && right.isNumeric()) {
+            int lsize = left.estimatedSize().orElseThrow();
+            int rsize = right.estimatedSize().orElseThrow();
+            // if one is int
+            if (left.isWholeNumber()) {
+                // promote the highest int
+                if (right.isWholeNumber()) {
+                    if (left == UNSIGNED_LONG || right == UNSIGNED_LONG) {
+                        return UNSIGNED_LONG;
+                    }
+                    return lsize > rsize ? left : right;
+                }
+                // promote the rational
+                return right;
+            }
+            // try the other side
+            if (right.isWholeNumber()) {
+                return left;
+            }
+            // promote the highest rational
+            return lsize > rsize ? left : right;
+        }
+        // none found
+        return null;
     }
 
     // generally supporting abbreviations from https://en.wikipedia.org/wiki/Unit_of_time
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistry.java
index 96e206b82cf0..f8e8cd37dc8b 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistry.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistry.java
@@ -10,15 +10,6 @@
 import org.elasticsearch.index.mapper.TimeSeriesParams;
 import org.elasticsearch.xpack.esql.core.type.DataType;
 
-import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME;
-import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_PERIOD;
-import static org.elasticsearch.xpack.esql.core.type.DataType.TIME_DURATION;
-import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTime;
-import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTimeOrTemporal;
-import static org.elasticsearch.xpack.esql.core.type.DataType.isNullOrDatePeriod;
-import static org.elasticsearch.xpack.esql.core.type.DataType.isNullOrTemporalAmount;
-import static org.elasticsearch.xpack.esql.core.type.DataType.isNullOrTimeDuration;
-
 public class EsqlDataTypeRegistry {
 
     public static final EsqlDataTypeRegistry INSTANCE = new EsqlDataTypeRegistry();
@@ -35,19 +26,4 @@ public DataType fromEs(String typeName, TimeSeriesParams.MetricType metricType)
          */
         return metricType == TimeSeriesParams.MetricType.COUNTER ? type.widenSmallNumeric().counter() : type;
     }
-
-    public DataType commonType(DataType left, DataType right) {
-        if (isDateTimeOrTemporal(left) || isDateTimeOrTemporal(right)) {
-            if ((isDateTime(left) && isNullOrTemporalAmount(right)) || (isNullOrTemporalAmount(left) && isDateTime(right))) {
-                return DATETIME;
-            }
-            if (isNullOrTimeDuration(left) && isNullOrTimeDuration(right)) {
-                return TIME_DURATION;
-            }
-            if (isNullOrDatePeriod(left) && isNullOrDatePeriod(right)) {
-                return DATE_PERIOD;
-            }
-        }
-        return EsqlDataTypeConverter.commonType(left, right);
-    }
 }
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/DataTypeConversionTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/DataTypeConversionTests.java
index 9f8c8f91b703..871bf632adcc 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/DataTypeConversionTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/DataTypeConversionTests.java
@@ -35,7 +35,6 @@
 import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG;
 import static org.elasticsearch.xpack.esql.core.type.DataType.UNSUPPORTED;
 import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION;
-import static org.elasticsearch.xpack.esql.core.type.DataTypeConverter.commonType;
 import static org.elasticsearch.xpack.esql.core.type.DataTypeConverter.converterFor;
 import static org.elasticsearch.xpack.esql.core.util.DateUtils.asDateTime;
 
@@ -522,25 +521,6 @@ public void testConversionToIdentity() {
         assertEquals(10, conversion.convert(10));
     }
 
-    public void testCommonType() {
-        assertEquals(BOOLEAN, commonType(BOOLEAN, NULL));
-        assertEquals(BOOLEAN, commonType(NULL, BOOLEAN));
-        assertEquals(BOOLEAN, commonType(BOOLEAN, BOOLEAN));
-        assertEquals(NULL, commonType(NULL, NULL));
-        assertEquals(INTEGER, commonType(INTEGER, KEYWORD));
-        assertEquals(LONG, commonType(TEXT, LONG));
-        assertEquals(SHORT, commonType(SHORT, BYTE));
-        assertEquals(FLOAT, commonType(BYTE, FLOAT));
-        assertEquals(FLOAT, commonType(FLOAT, INTEGER));
-        assertEquals(UNSIGNED_LONG, commonType(UNSIGNED_LONG, LONG));
-        assertEquals(DOUBLE, commonType(DOUBLE, FLOAT));
-        assertEquals(FLOAT, commonType(FLOAT, UNSIGNED_LONG));
-
-        // strings
-        assertEquals(TEXT, commonType(TEXT, KEYWORD));
-        assertEquals(TEXT, commonType(KEYWORD, TEXT));
-    }
-
     public void testEsDataTypes() {
         for (DataType type : DataType.types()) {
             assertEquals(type, DataType.fromTypeName(type.typeName()));
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverterTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverterTests.java
index 0997c88aac2b..8ad083683f69 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverterTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverterTests.java
@@ -8,6 +8,44 @@
 package org.elasticsearch.xpack.esql.type;
 
 import org.elasticsearch.test.ESTestCase;
+import org.elasticsearch.xpack.esql.core.type.DataType;
+
+import java.util.Arrays;
+import java.util.List;
+
+import static org.elasticsearch.xpack.esql.core.type.DataType.BOOLEAN;
+import static org.elasticsearch.xpack.esql.core.type.DataType.BYTE;
+import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_POINT;
+import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_SHAPE;
+import static org.elasticsearch.xpack.esql.core.type.DataType.COUNTER_DOUBLE;
+import static org.elasticsearch.xpack.esql.core.type.DataType.COUNTER_INTEGER;
+import static org.elasticsearch.xpack.esql.core.type.DataType.COUNTER_LONG;
+import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME;
+import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_NANOS;
+import static org.elasticsearch.xpack.esql.core.type.DataType.DOC_DATA_TYPE;
+import static org.elasticsearch.xpack.esql.core.type.DataType.DOUBLE;
+import static org.elasticsearch.xpack.esql.core.type.DataType.FLOAT;
+import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_POINT;
+import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_SHAPE;
+import static org.elasticsearch.xpack.esql.core.type.DataType.HALF_FLOAT;
+import static org.elasticsearch.xpack.esql.core.type.DataType.INTEGER;
+import static org.elasticsearch.xpack.esql.core.type.DataType.IP;
+import static org.elasticsearch.xpack.esql.core.type.DataType.LONG;
+import static org.elasticsearch.xpack.esql.core.type.DataType.NULL;
+import static org.elasticsearch.xpack.esql.core.type.DataType.OBJECT;
+import static org.elasticsearch.xpack.esql.core.type.DataType.PARTIAL_AGG;
+import static org.elasticsearch.xpack.esql.core.type.DataType.SCALED_FLOAT;
+import static org.elasticsearch.xpack.esql.core.type.DataType.SHORT;
+import static org.elasticsearch.xpack.esql.core.type.DataType.SOURCE;
+import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT;
+import static org.elasticsearch.xpack.esql.core.type.DataType.TSID_DATA_TYPE;
+import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG;
+import static org.elasticsearch.xpack.esql.core.type.DataType.UNSUPPORTED;
+import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION;
+import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTime;
+import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTimeOrTemporal;
+import static org.elasticsearch.xpack.esql.core.type.DataType.isString;
+import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.commonType;
 
 public class EsqlDataTypeConverterTests extends ESTestCase {
 
@@ -16,4 +54,124 @@ public void testNanoTimeToString() {
         long actual = EsqlDataTypeConverter.dateNanosToLong(EsqlDataTypeConverter.nanoTimeToString(expected));
         assertEquals(expected, actual);
     }
+
+    public void testCommonTypeNull() {
+        for (DataType dataType : DataType.values()) {
+            assertEqualsCommonType(dataType, NULL, dataType);
+        }
+    }
+
+    public void testCommonTypeStrings() {
+        List<DataType> STRINGS = Arrays.stream(DataType.values()).filter(DataType::isString).toList();
+        for (DataType dataType1 : STRINGS) {
+            for (DataType dataType2 : DataType.values()) {
+                if (dataType2 == NULL) {
+                    assertEqualsCommonType(dataType1, NULL, dataType1);
+                } else if ((isString(dataType1) && isString(dataType2))) {
+                    if (dataType1 == dataType2) {
+                        assertEqualsCommonType(dataType1, dataType2, dataType1);
+                    } else {
+                        assertEqualsCommonType(dataType1, dataType2, TEXT);
+                    }
+                } else {
+                    assertNullCommonType(dataType1, dataType2);
+                }
+            }
+        }
+    }
+
+    public void testCommonTypeDateTimeIntervals() {
+        List<DataType> DATE_TIME_INTERVALS = Arrays.stream(DataType.values()).filter(DataType::isDateTimeOrTemporal).toList();
+        for (DataType dataType1 : DATE_TIME_INTERVALS) {
+            for (DataType dataType2 : DataType.values()) {
+                if (dataType2 == NULL) {
+                    assertEqualsCommonType(dataType1, NULL, dataType1);
+                } else if (isDateTimeOrTemporal(dataType2)) {
+                    if (isDateTime(dataType1) || isDateTime(dataType2)) {
+                        assertEqualsCommonType(dataType1, dataType2, DATETIME);
+                    } else if (dataType1 == dataType2) {
+                        assertEqualsCommonType(dataType1, dataType2, dataType1);
+                    } else {
+                        assertNullCommonType(dataType1, dataType2);
+                    }
+                } else {
+                    assertNullCommonType(dataType1, dataType2);
+                }
+            }
+        }
+    }
+
+    public void testCommonTypeNumeric() {
+        // whole numbers
+        commonNumericType(BYTE, List.of(NULL, BYTE));
+        commonNumericType(SHORT, List.of(NULL, BYTE, SHORT));
+        commonNumericType(INTEGER, List.of(NULL, BYTE, SHORT, INTEGER));
+        commonNumericType(LONG, List.of(NULL, BYTE, SHORT, INTEGER, LONG));
+        commonNumericType(UNSIGNED_LONG, List.of(NULL, BYTE, SHORT, INTEGER, LONG, UNSIGNED_LONG));
+        // floats
+        commonNumericType(HALF_FLOAT, List.of(NULL, BYTE, SHORT, INTEGER, LONG, UNSIGNED_LONG, HALF_FLOAT, FLOAT));
+        commonNumericType(FLOAT, List.of(NULL, BYTE, SHORT, INTEGER, LONG, UNSIGNED_LONG, FLOAT, HALF_FLOAT));
+        commonNumericType(DOUBLE, List.of(NULL, BYTE, SHORT, INTEGER, LONG, UNSIGNED_LONG, HALF_FLOAT, FLOAT, DOUBLE, SCALED_FLOAT));
+        commonNumericType(SCALED_FLOAT, List.of(NULL, BYTE, SHORT, INTEGER, LONG, UNSIGNED_LONG, HALF_FLOAT, FLOAT, SCALED_FLOAT, DOUBLE));
+    }
+
+    /**
+     * The first argument and the second argument(s) have the first argument as a common type.
+     */
+    private static void commonNumericType(DataType numericType, List<DataType> lowerTypes) {
+        List<DataType> NUMERICS = Arrays.stream(DataType.values()).filter(DataType::isNumeric).toList();
+        List<DataType> DOUBLES = Arrays.stream(DataType.values()).filter(DataType::isRationalNumber).toList();
+        for (DataType dataType : DataType.values()) {
+            if (DOUBLES.containsAll(List.of(numericType, dataType)) && (dataType.estimatedSize().equals(numericType.estimatedSize()))) {
+                assertEquals(numericType, commonType(dataType, numericType));
+            } else if (lowerTypes.contains(dataType)) {
+                assertEqualsCommonType(numericType, dataType, numericType);
+            } else if (NUMERICS.contains(dataType)) {
+                assertEqualsCommonType(numericType, dataType, dataType);
+            } else {
+                assertNullCommonType(numericType, dataType);
+            }
+        }
+    }
+
+    public void testCommonTypeMiscellaneous() {
+        List<DataType> MISCELLANEOUS = List.of(
+            COUNTER_INTEGER,
+            COUNTER_LONG,
+            COUNTER_DOUBLE,
+            UNSUPPORTED,
+            OBJECT,
+            SOURCE,
+            DATE_NANOS,
+            DOC_DATA_TYPE,
+            TSID_DATA_TYPE,
+            PARTIAL_AGG,
+            IP,
+            VERSION,
+            GEO_POINT,
+            GEO_SHAPE,
+            CARTESIAN_POINT,
+            CARTESIAN_SHAPE,
+            BOOLEAN
+        );
+        for (DataType dataType1 : MISCELLANEOUS) {
+            for (DataType dataType2 : DataType.values()) {
+                if (dataType2 == NULL || dataType1 == dataType2) {
+                    assertEqualsCommonType(dataType1, dataType2, dataType1);
+                } else {
+                    assertNullCommonType(dataType1, dataType2);
+                }
+            }
+        }
+    }
+
+    private static void assertEqualsCommonType(DataType dataType1, DataType dataType2, DataType commonType) {
+        assertEquals(commonType, commonType(dataType1, dataType2));
+        assertEquals(commonType, commonType(dataType2, dataType1));
+    }
+
+    private static void assertNullCommonType(DataType dataType1, DataType dataType2) {
+        assertNull(commonType(dataType1, dataType2));
+        assertNull(commonType(dataType2, dataType1));
+    }
 }

From e3e562ffbfb981014bdd71bf663bb6f972f5e352 Mon Sep 17 00:00:00 2001
From: David Kyle <david.kyle@elastic.co>
Date: Thu, 29 Aug 2024 17:18:54 +0100
Subject: [PATCH 30/30] [ML] Support sparse embedding models in the
 elasticsearch inference service (#112270)

For a sparse embedding model created with the ml trained models APIs
---
 docs/changelog/112270.yaml                    |   5 +
 .../inference/service-elasticsearch.asciidoc  |   3 +-
 .../xpack/inference/CustomElandModelIT.java   | 134 +++++++++
 .../xpack/inference/RerankingIT.java          |   8 +-
 .../BaseElasticsearchInternalService.java     |   6 +-
 .../ElasticsearchInternalService.java         | 151 +++-------
 .../services/elser/ElserInternalService.java  |  28 --
 .../ElasticsearchInternalServiceTests.java    | 278 +++++++++++-------
 8 files changed, 363 insertions(+), 250 deletions(-)
 create mode 100644 docs/changelog/112270.yaml
 create mode 100644 x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/CustomElandModelIT.java

diff --git a/docs/changelog/112270.yaml b/docs/changelog/112270.yaml
new file mode 100644
index 000000000000..1e6b9c7fc929
--- /dev/null
+++ b/docs/changelog/112270.yaml
@@ -0,0 +1,5 @@
+pr: 112270
+summary: Support sparse embedding models in the elasticsearch inference service
+area: Machine Learning
+type: enhancement
+issues: []
diff --git a/docs/reference/inference/service-elasticsearch.asciidoc b/docs/reference/inference/service-elasticsearch.asciidoc
index 99fd41ee2db6..572cad591fba 100644
--- a/docs/reference/inference/service-elasticsearch.asciidoc
+++ b/docs/reference/inference/service-elasticsearch.asciidoc
@@ -31,6 +31,7 @@ include::inference-shared.asciidoc[tag=task-type]
 Available task types:
 
 * `rerank`,
+* `sparse_embedding`,
 * `text_embedding`.
 --
 
@@ -182,4 +183,4 @@ PUT _inference/text_embedding/my-e5-model
   }
 }
 ------------------------------------------------------------
-// TEST[skip:TBD]
\ No newline at end of file
+// TEST[skip:TBD]
diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/CustomElandModelIT.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/CustomElandModelIT.java
new file mode 100644
index 000000000000..65b7a138e7e1
--- /dev/null
+++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/CustomElandModelIT.java
@@ -0,0 +1,134 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference;
+
+import org.elasticsearch.client.Request;
+import org.elasticsearch.core.Strings;
+import org.elasticsearch.inference.TaskType;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Base64;
+import java.util.List;
+import java.util.stream.Collectors;
+
+public class CustomElandModelIT extends InferenceBaseRestTest {
+
+    // The model definition is taken from org.elasticsearch.xpack.ml.integration.TextExpansionQueryIT
+
+    static final String BASE_64_ENCODED_MODEL = "UEsDBAAACAgAAAAAAAAAAAAAAAAAA"
+        + "AAAAAAUAA4Ac2ltcGxlbW9kZWwvZGF0YS5wa2xGQgoAWlpaWlpaWlpaWoACY19fdG9yY2hfXwpUaW55VG"
+        + "V4dEV4cGFuc2lvbgpxACmBfShYCAAAAHRyYWluaW5ncQGJWBYAAABfaXNfZnVsbF9iYWNrd2FyZF9ob29"
+        + "rcQJOdWJxAy5QSwcIITmbsFgAAABYAAAAUEsDBBQACAgIAAAAAAAAAAAAAAAAAAAAAAAdAB0Ac2ltcGxl"
+        + "bW9kZWwvY29kZS9fX3RvcmNoX18ucHlGQhkAWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWoWRT4+cMAzF7"
+        + "/spfASJomF3e0Ga3nrrn8vcELIyxAzRhAQlpjvbT19DWDrdquqBA/bvPT87nVUxwsm41xPd+PNtUi4a77"
+        + "KvXs+W8voBAHFSQY3EFCIiHKFp1+p57vs/ShyUccZdoIaz93aBTMR+thbPqru+qKBx8P4q/e8TyxRlmwVc"
+        + "tJp66H1YmCyS7WsZwD50A2L5V7pCBADGTTOj0bGGE7noQyqzv5JDfp0o9fZRCWqP37yjhE4+mqX5X3AdF"
+        + "ZHGM/2TzOHDpy1IvQWR+OWo3KwsRiKdpcqg4pBFDtm+QJ7nqwIPckrlnGfFJG0uNhOl38Sjut3pCqg26Qu"
+        + "Zy8BR9In7ScHHrKkKMW0TIucFrGQXCMpdaDO05O6DpOiy8e4kr0Ed/2YKOIhplW8gPr4ntygrd9ixpx3j9"
+        + "UZZVRagl2c6+imWUzBjuf5m+Ch7afphuvvW+r/0dsfn+2N9MZGb9+/SFtCYdhd83CMYp+mGy0LiKNs8y/e"
+        + "UuEA8B/d2z4dfUEsHCFSE3IaCAQAAIAMAAFBLAwQUAAgICAAAAAAAAAAAAAAAAAAAAAAAJwApAHNpbXBsZ"
+        + "W1vZGVsL2NvZGUvX190b3JjaF9fLnB5LmRlYnVnX3BrbEZCJQBaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlp"
+        + "aWlpaWlpaWlpaWlpahZHLbtNAFIZtp03rSVIuLRKXjdk5ojitKJsiFq24lem0KKSqpRIZt55gE9/GM+lNL"
+        + "Fgx4i1Ys2aHhIBXgAVICNggHgNm6rqJN2BZGv36/v/MOWeea/Z5RVHurLfRUsfZXOnccx522itrd53O0vL"
+        + "qbaKYtsAKUe1pcege7hm9JNtzM8+kOOzNApIX0A3xBXE6YE7g0UWjg2OaZAJXbKvALOnj2GEHKc496ykLkt"
+        + "gNt3Jz17hprCUxFqExe7YIpQkNpO1/kfHhPUdtUAdH2/gfmeYiIFW7IkM6IBP2wrDNbMe3Mjf2ksiK3Hjg"
+        + "hg7F2DN9l/omZZl5Mmez2QRk0q4WUUB0+1oh9nDwxGdUXJdXPMRZQs352eGaRPV9s2lcMeZFGWBfKJJiw0Y"
+        + "gbCMLBaRmXyy4flx6a667Fch55q05QOq2Jg2ANOyZwplhNsjiohVApo7aa21QnNGW5+4GXv8gxK1beBeHSR"
+        + "rhmLXWVh+0aBhErZ7bx1ejxMOhlR6QU4ycNqGyk8/yNGCWkwY7/RCD7UEQek4QszCgDJAzZtfErA0VqHBy9"
+        + "ugQP9pUfUmgCjVYgWNwHFbhBJyEOgSwBuuwARWZmoI6J9PwLfzEocpRpPrT8DP8wqHG0b4UX+E3DiscvRgl"
+        + "XIoi81KKPwioHI5x9EooNKWiy0KOc/T6WF4SssrRuzJ9L2VNRXUhJzj6UKYfS4W/q/5wuh/l4M9R9qsU+y2"
+        + "dpoo2hJzkaEET8r6KRONicnRdK9EbUi6raFVIwNGjsrlbpk6ZPi7TbS3fv3LyNjPiEKzG0aG0tvNb6xw90/"
+        + "whe6ONjnJcUxobHDUqQ8bIOW79BVBLBwhfSmPKdAIAAE4EAABQSwMEAAAICAAAAAAAAAAAAAAAAAAAAAAAA"
+        + "BkABQBzaW1wbGVtb2RlbC9jb25zdGFudHMucGtsRkIBAFqAAikuUEsHCG0vCVcEAAAABAAAAFBLAwQAAAgI"
+        + "AAAAAAAAAAAAAAAAAAAAAAAAEwA7AHNpbXBsZW1vZGVsL3ZlcnNpb25GQjcAWlpaWlpaWlpaWlpaWlpaWlp"
+        + "aWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWjMKUEsHCNGeZ1UCAAAAAgAAAFBLAQIAAA"
+        + "AACAgAAAAAAAAhOZuwWAAAAFgAAAAUAAAAAAAAAAAAAAAAAAAAAABzaW1wbGVtb2RlbC9kYXRhLnBrbFBLA"
+        + "QIAABQACAgIAAAAAABUhNyGggEAACADAAAdAAAAAAAAAAAAAAAAAKgAAABzaW1wbGVtb2RlbC9jb2RlL19f"
+        + "dG9yY2hfXy5weVBLAQIAABQACAgIAAAAAABfSmPKdAIAAE4EAAAnAAAAAAAAAAAAAAAAAJICAABzaW1wbGVt"
+        + "b2RlbC9jb2RlL19fdG9yY2hfXy5weS5kZWJ1Z19wa2xQSwECAAAAAAgIAAAAAAAAbS8JVwQAAAAEAAAAGQAA"
+        + "AAAAAAAAAAAAAACEBQAAc2ltcGxlbW9kZWwvY29uc3RhbnRzLnBrbFBLAQIAAAAACAgAAAAAAADRnmdVAgAA"
+        + "AAIAAAATAAAAAAAAAAAAAAAAANQFAABzaW1wbGVtb2RlbC92ZXJzaW9uUEsGBiwAAAAAAAAAHgMtAAAAAAAA"
+        + "AAAABQAAAAAAAAAFAAAAAAAAAGoBAAAAAAAAUgYAAAAAAABQSwYHAAAAALwHAAAAAAAAAQAAAFBLBQYAAAAABQAFAGoBAABSBgAAAAA=";
+
+    static final long RAW_MODEL_SIZE; // size of the model before base64 encoding
+    static {
+        RAW_MODEL_SIZE = Base64.getDecoder().decode(BASE_64_ENCODED_MODEL).length;
+    }
+
+    // Test a sparse embedding model deployed with the ml trained models APIs
+    public void testSparse() throws IOException {
+        String modelId = "custom-text-expansion-model";
+
+        createTextExpansionModel(modelId);
+        putModelDefinition(modelId, BASE_64_ENCODED_MODEL, RAW_MODEL_SIZE);
+        putVocabulary(
+            List.of("these", "are", "my", "words", "the", "washing", "machine", "is", "leaking", "octopus", "comforter", "smells"),
+            modelId
+        );
+
+        var inferenceConfig = """
+            {
+              "service": "elasticsearch",
+              "service_settings": {
+                "model_id": "custom-text-expansion-model",
+                "num_allocations": 1,
+                "num_threads": 1
+              }
+            }
+            """;
+
+        var inferenceId = "sparse-inf";
+        putModel(inferenceId, inferenceConfig, TaskType.SPARSE_EMBEDDING);
+        var results = inferOnMockService(inferenceId, List.of("washing", "machine"));
+        deleteModel(inferenceId);
+        assertNotNull(results.get("sparse_embedding"));
+    }
+
+    protected void createTextExpansionModel(String modelId) throws IOException {
+        // with_special_tokens: false for this test with limited vocab
+        Request request = new Request("PUT", "/_ml/trained_models/" + modelId);
+        request.setJsonEntity("""
+            {
+               "description": "a text expansion model",
+               "model_type": "pytorch",
+               "inference_config": {
+                 "text_expansion": {
+                   "tokenization": {
+                     "bert": {
+                       "with_special_tokens": false
+                     }
+                   }
+                 }
+               }
+             }""");
+        client().performRequest(request);
+    }
+
+    protected void putVocabulary(List<String> vocabulary, String modelId) throws IOException {
+        List<String> vocabularyWithPad = new ArrayList<>();
+        vocabularyWithPad.add("[PAD]");
+        vocabularyWithPad.add("[UNK]");
+        vocabularyWithPad.addAll(vocabulary);
+        String quotedWords = vocabularyWithPad.stream().map(s -> "\"" + s + "\"").collect(Collectors.joining(","));
+
+        Request request = new Request("PUT", "_ml/trained_models/" + modelId + "/vocabulary");
+        request.setJsonEntity(Strings.format("""
+            { "vocabulary": [%s] }
+            """, quotedWords));
+        client().performRequest(request);
+    }
+
+    protected void putModelDefinition(String modelId, String base64EncodedModel, long unencodedModelSize) throws IOException {
+        Request request = new Request("PUT", "_ml/trained_models/" + modelId + "/definition/0");
+        String body = Strings.format("""
+            {"total_definition_length":%s,"definition": "%s","total_parts": 1}""", unencodedModelSize, base64EncodedModel);
+        request.setJsonEntity(body);
+        client().performRequest(request);
+    }
+}
diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/RerankingIT.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/RerankingIT.java
index 77251ada4c48..893d3fb3e9b8 100644
--- a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/RerankingIT.java
+++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/RerankingIT.java
@@ -35,7 +35,7 @@ private String putCohereRerankEndpoint() throws IOException {
                 "api_key": ""
               }
             }
-            """);// TODO remove key
+            """);
         return endpointID;
     }
 
@@ -61,7 +61,7 @@ private String putCohereRerankEndpointWithDocuments() throws IOException {
                 "return_documents": true
               }
             }
-            """);// TODO remove key
+            """);
         return endpointID;
     }
 
@@ -81,13 +81,13 @@ private String putCohereRerankEndpointWithTop2() throws IOException {
               "service": "cohere",
               "service_settings": {
                 "model_id": "rerank-english-v2.0",
-                "api_key": "8TNPBvpBO7oN97009HQHzQbBhNrxmREbcJrZCwkK"
+                "api_key": ""
               },
               "task_settings": {
                 "top_n": 2
               }
             }
-            """);// TODO remove key
+            """);
         return endpointID;
     }
 
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java
index 574ca77d4587..457416370e55 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java
@@ -154,10 +154,10 @@ public void isModelDownloaded(Model model, ActionListener<Boolean> listener) {
             executeAsyncWithOrigin(client, INFERENCE_ORIGIN, GetTrainedModelsAction.INSTANCE, getRequest, getModelsResponseListener);
         } else {
             listener.onFailure(
-                new IllegalArgumentException(
-                    "Unable to determine supported model for ["
+                new IllegalStateException(
+                    "Can not check the download status of the model used by ["
                         + model.getConfigurations().getInferenceEntityId()
-                        + "] please verify the request and submit a bug report if necessary."
+                        + "] as the model_id cannot be found."
                 )
             );
         }
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java
index c3a011156231..cca8ae63e974 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java
@@ -7,8 +7,6 @@
 
 package org.elasticsearch.xpack.inference.services.elasticsearch;
 
-import org.apache.logging.log4j.LogManager;
-import org.apache.logging.log4j.Logger;
 import org.elasticsearch.ElasticsearchStatusException;
 import org.elasticsearch.TransportVersion;
 import org.elasticsearch.TransportVersions;
@@ -27,19 +25,18 @@
 import org.elasticsearch.inference.TaskType;
 import org.elasticsearch.rest.RestStatus;
 import org.elasticsearch.xpack.core.inference.results.ErrorChunkedInferenceResults;
+import org.elasticsearch.xpack.core.inference.results.InferenceChunkedSparseEmbeddingResults;
 import org.elasticsearch.xpack.core.inference.results.InferenceChunkedTextEmbeddingFloatResults;
 import org.elasticsearch.xpack.core.inference.results.InferenceTextEmbeddingFloatResults;
 import org.elasticsearch.xpack.core.inference.results.RankedDocsResults;
+import org.elasticsearch.xpack.core.inference.results.SparseEmbeddingResults;
 import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsAction;
 import org.elasticsearch.xpack.core.ml.action.InferModelAction;
-import org.elasticsearch.xpack.core.ml.action.PutTrainedModelAction;
-import org.elasticsearch.xpack.core.ml.action.StartTrainedModelDeploymentAction;
-import org.elasticsearch.xpack.core.ml.action.StopTrainedModelDeploymentAction;
-import org.elasticsearch.xpack.core.ml.inference.TrainedModelConfig;
-import org.elasticsearch.xpack.core.ml.inference.TrainedModelInput;
 import org.elasticsearch.xpack.core.ml.inference.results.ErrorInferenceResults;
 import org.elasticsearch.xpack.core.ml.inference.results.MlChunkedTextEmbeddingFloatResults;
+import org.elasticsearch.xpack.core.ml.inference.results.MlChunkedTextExpansionResults;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextEmbeddingConfigUpdate;
+import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextExpansionConfigUpdate;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextSimilarityConfigUpdate;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TokenizationConfigUpdate;
 import org.elasticsearch.xpack.inference.services.ConfigurationParseContext;
@@ -53,8 +50,6 @@
 import java.util.Set;
 import java.util.function.Function;
 
-import static org.elasticsearch.xpack.core.ClientHelper.INFERENCE_ORIGIN;
-import static org.elasticsearch.xpack.core.ClientHelper.executeAsyncWithOrigin;
 import static org.elasticsearch.xpack.core.inference.results.ResultUtils.createInvalidChunkedResultException;
 import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMap;
 import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrThrowIfNull;
@@ -71,15 +66,13 @@ public class ElasticsearchInternalService extends BaseElasticsearchInternalServi
         MULTILINGUAL_E5_SMALL_MODEL_ID_LINUX_X86
     );
 
-    private static final Logger logger = LogManager.getLogger(ElasticsearchInternalService.class);
-
     public ElasticsearchInternalService(InferenceServiceExtension.InferenceServiceFactoryContext context) {
         super(context);
     }
 
     @Override
     protected EnumSet<TaskType> supportedTaskTypes() {
-        return EnumSet.of(TaskType.RERANK, TaskType.TEXT_EMBEDDING);
+        return EnumSet.of(TaskType.RERANK, TaskType.TEXT_EMBEDDING, TaskType.SPARSE_EMBEDDING);
     }
 
     @Override
@@ -161,6 +154,12 @@ private static CustomElandModel createCustomElandModel(
                 NAME,
                 CustomElandInternalTextEmbeddingServiceSettings.fromMap(serviceSettings, context)
             );
+            case SPARSE_EMBEDDING -> new CustomElandModel(
+                inferenceEntityId,
+                taskType,
+                NAME,
+                elandServiceSettings(serviceSettings, context)
+            );
             case RERANK -> new CustomElandRerankModel(
                 inferenceEntityId,
                 taskType,
@@ -334,6 +333,8 @@ public void infer(
             inferTextEmbedding(model, input, inputType, timeout, listener);
         } else if (TaskType.RERANK.equals(taskType)) {
             inferRerank(model, query, input, inputType, timeout, taskSettings, listener);
+        } else if (TaskType.SPARSE_EMBEDDING.equals(taskType)) {
+            inferSparseEmbedding(model, input, inputType, timeout, listener);
         } else {
             throw new ElasticsearchStatusException(TaskType.unsupportedTaskTypeErrorMsg(taskType, NAME), RestStatus.BAD_REQUEST);
         }
@@ -364,6 +365,31 @@ public void inferTextEmbedding(
         );
     }
 
+    public void inferSparseEmbedding(
+        Model model,
+        List<String> inputs,
+        InputType inputType,
+        TimeValue timeout,
+        ActionListener<InferenceServiceResults> listener
+    ) {
+        var request = buildInferenceRequest(
+            model.getConfigurations().getInferenceEntityId(),
+            TextExpansionConfigUpdate.EMPTY_UPDATE,
+            inputs,
+            inputType,
+            timeout,
+            false
+        );
+
+        client.execute(
+            InferModelAction.INSTANCE,
+            request,
+            listener.delegateFailureAndWrap(
+                (l, inferenceResult) -> l.onResponse(SparseEmbeddingResults.of(inferenceResult.getInferenceResults()))
+            )
+        );
+    }
+
     public void inferRerank(
         Model model,
         String query,
@@ -422,7 +448,7 @@ public void chunkedInfer(
         TimeValue timeout,
         ActionListener<List<ChunkedInferenceServiceResults>> listener
     ) {
-        if (TaskType.TEXT_EMBEDDING.isAnyOrSame(model.getTaskType()) == false) {
+        if ((TaskType.TEXT_EMBEDDING.equals(model.getTaskType()) || TaskType.SPARSE_EMBEDDING.equals(model.getTaskType())) == false) {
             listener.onFailure(
                 new ElasticsearchStatusException(TaskType.unsupportedTaskTypeErrorMsg(model.getTaskType(), NAME), RestStatus.BAD_REQUEST)
             );
@@ -464,6 +490,8 @@ private static List<ChunkedInferenceServiceResults> translateToChunkedResults(Li
     private static ChunkedInferenceServiceResults translateToChunkedResult(InferenceResults inferenceResult) {
         if (inferenceResult instanceof MlChunkedTextEmbeddingFloatResults mlChunkedResult) {
             return InferenceChunkedTextEmbeddingFloatResults.ofMlResults(mlChunkedResult);
+        } else if (inferenceResult instanceof MlChunkedTextExpansionResults mlChunkedResult) {
+            return InferenceChunkedSparseEmbeddingResults.ofMlResult(mlChunkedResult);
         } else if (inferenceResult instanceof ErrorInferenceResults error) {
             return new ErrorChunkedInferenceResults(error.getException());
         } else {
@@ -471,103 +499,6 @@ private static ChunkedInferenceServiceResults translateToChunkedResult(Inference
         }
     }
 
-    @Override
-    public void start(Model model, ActionListener<Boolean> listener) {
-        if (model instanceof ElasticsearchInternalModel == false) {
-            listener.onFailure(notElasticsearchModelException(model));
-            return;
-        }
-
-        if (model.getTaskType() != TaskType.TEXT_EMBEDDING && model.getTaskType() != TaskType.RERANK) {
-            listener.onFailure(
-                new IllegalStateException(TaskType.unsupportedTaskTypeErrorMsg(model.getConfigurations().getTaskType(), NAME))
-            );
-            return;
-        }
-
-        var startRequest = ((ElasticsearchInternalModel) model).getStartTrainedModelDeploymentActionRequest();
-        var responseListener = ((ElasticsearchInternalModel) model).getCreateTrainedModelAssignmentActionListener(model, listener);
-
-        client.execute(StartTrainedModelDeploymentAction.INSTANCE, startRequest, responseListener);
-    }
-
-    @Override
-    public void stop(String inferenceEntityId, ActionListener<Boolean> listener) {
-        var request = new StopTrainedModelDeploymentAction.Request(inferenceEntityId);
-        request.setForce(true);
-        client.execute(
-            StopTrainedModelDeploymentAction.INSTANCE,
-            request,
-            listener.delegateFailureAndWrap((delegatedResponseListener, response) -> delegatedResponseListener.onResponse(Boolean.TRUE))
-        );
-    }
-
-    @Override
-    public void putModel(Model model, ActionListener<Boolean> listener) {
-        if (model instanceof ElasticsearchInternalModel == false) {
-            listener.onFailure(notElasticsearchModelException(model));
-            return;
-        } else if (model instanceof MultilingualE5SmallModel e5Model) {
-            String modelId = e5Model.getServiceSettings().modelId();
-            var input = new TrainedModelInput(List.<String>of("text_field")); // by convention text_field is used
-            var config = TrainedModelConfig.builder().setInput(input).setModelId(modelId).validate(true).build();
-            PutTrainedModelAction.Request putRequest = new PutTrainedModelAction.Request(config, false, true);
-            executeAsyncWithOrigin(
-                client,
-                INFERENCE_ORIGIN,
-                PutTrainedModelAction.INSTANCE,
-                putRequest,
-                ActionListener.wrap(response -> listener.onResponse(Boolean.TRUE), e -> {
-                    if (e instanceof ElasticsearchStatusException esException
-                        && esException.getMessage().contains(PutTrainedModelAction.MODEL_ALREADY_EXISTS_ERROR_MESSAGE_FRAGMENT)) {
-                        listener.onResponse(Boolean.TRUE);
-                    } else {
-                        listener.onFailure(e);
-                    }
-                })
-            );
-        } else if (model instanceof CustomElandModel) {
-            logger.info("Custom eland model detected, model must have been already loaded into the cluster with eland.");
-            listener.onResponse(Boolean.TRUE);
-        } else {
-            listener.onFailure(
-                new IllegalArgumentException(
-                    "Can not download model automatically for ["
-                        + model.getConfigurations().getInferenceEntityId()
-                        + "] you may need to download it through the trained models API or with eland."
-                )
-            );
-            return;
-        }
-    }
-
-    @Override
-    public void isModelDownloaded(Model model, ActionListener<Boolean> listener) {
-        ActionListener<GetTrainedModelsAction.Response> getModelsResponseListener = listener.delegateFailure((delegate, response) -> {
-            if (response.getResources().count() < 1) {
-                delegate.onResponse(Boolean.FALSE);
-            } else {
-                delegate.onResponse(Boolean.TRUE);
-            }
-        });
-
-        if (model.getServiceSettings() instanceof ElasticsearchInternalServiceSettings internalServiceSettings) {
-            String modelId = internalServiceSettings.modelId();
-            GetTrainedModelsAction.Request getRequest = new GetTrainedModelsAction.Request(modelId);
-            executeAsyncWithOrigin(client, INFERENCE_ORIGIN, GetTrainedModelsAction.INSTANCE, getRequest, getModelsResponseListener);
-        } else if (model instanceof ElasticsearchInternalModel == false) {
-            listener.onFailure(notElasticsearchModelException(model));
-        } else {
-            listener.onFailure(
-                new IllegalArgumentException(
-                    "Unable to determine supported model for ["
-                        + model.getConfigurations().getInferenceEntityId()
-                        + "] please verify the request and submit a bug report if necessary."
-                )
-            );
-        }
-    }
-
     @Override
     public TransportVersion getMinimalSupportedVersion() {
         return TransportVersions.V_8_14_0;
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalService.java
index 775ddca16046..948117954a63 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalService.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalService.java
@@ -28,7 +28,6 @@
 import org.elasticsearch.xpack.core.inference.results.ErrorChunkedInferenceResults;
 import org.elasticsearch.xpack.core.inference.results.InferenceChunkedSparseEmbeddingResults;
 import org.elasticsearch.xpack.core.inference.results.SparseEmbeddingResults;
-import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsAction;
 import org.elasticsearch.xpack.core.ml.action.InferModelAction;
 import org.elasticsearch.xpack.core.ml.inference.results.ErrorInferenceResults;
 import org.elasticsearch.xpack.core.ml.inference.results.MlChunkedTextExpansionResults;
@@ -43,8 +42,6 @@
 import java.util.Map;
 import java.util.Set;
 
-import static org.elasticsearch.xpack.core.ClientHelper.INFERENCE_ORIGIN;
-import static org.elasticsearch.xpack.core.ClientHelper.executeAsyncWithOrigin;
 import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrThrowIfNull;
 import static org.elasticsearch.xpack.inference.services.ServiceUtils.throwIfNotEmptyMap;
 import static org.elasticsearch.xpack.inference.services.elser.ElserModels.ELSER_V2_MODEL;
@@ -242,31 +239,6 @@ private void checkCompatibleTaskType(TaskType taskType) {
         }
     }
 
-    @Override
-    public void isModelDownloaded(Model model, ActionListener<Boolean> listener) {
-        ActionListener<GetTrainedModelsAction.Response> getModelsResponseListener = listener.delegateFailure((delegate, response) -> {
-            if (response.getResources().count() < 1) {
-                delegate.onResponse(Boolean.FALSE);
-            } else {
-                delegate.onResponse(Boolean.TRUE);
-            }
-        });
-
-        if (model instanceof ElserInternalModel elserModel) {
-            String modelId = elserModel.getServiceSettings().modelId();
-            GetTrainedModelsAction.Request getRequest = new GetTrainedModelsAction.Request(modelId);
-            executeAsyncWithOrigin(client, INFERENCE_ORIGIN, GetTrainedModelsAction.INSTANCE, getRequest, getModelsResponseListener);
-        } else {
-            listener.onFailure(
-                new IllegalArgumentException(
-                    "Can not download model automatically for ["
-                        + model.getConfigurations().getInferenceEntityId()
-                        + "] you may need to download it through the trained models API or with eland."
-                )
-            );
-        }
-    }
-
     private static ElserMlNodeTaskSettings taskSettingsFromMap(TaskType taskType, Map<String, Object> config) {
         if (taskType != TaskType.SPARSE_EMBEDDING) {
             throw new ElasticsearchStatusException(TaskType.unsupportedTaskTypeErrorMsg(taskType, NAME), RestStatus.BAD_REQUEST);
diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java
index e6fd725a5019..257616033f08 100644
--- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java
+++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java
@@ -17,6 +17,7 @@
 import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
 import org.elasticsearch.inference.ChunkedInferenceServiceResults;
 import org.elasticsearch.inference.ChunkingOptions;
+import org.elasticsearch.inference.EmptyTaskSettings;
 import org.elasticsearch.inference.InferenceResults;
 import org.elasticsearch.inference.InferenceServiceExtension;
 import org.elasticsearch.inference.InputType;
@@ -31,6 +32,7 @@
 import org.elasticsearch.xpack.core.action.util.QueryPage;
 import org.elasticsearch.xpack.core.inference.action.InferenceAction;
 import org.elasticsearch.xpack.core.inference.results.ErrorChunkedInferenceResults;
+import org.elasticsearch.xpack.core.inference.results.InferenceChunkedSparseEmbeddingResults;
 import org.elasticsearch.xpack.core.inference.results.InferenceChunkedTextEmbeddingFloatResults;
 import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsAction;
 import org.elasticsearch.xpack.core.ml.action.InferModelAction;
@@ -39,8 +41,10 @@
 import org.elasticsearch.xpack.core.ml.inference.TrainedModelConfig;
 import org.elasticsearch.xpack.core.ml.inference.TrainedModelPrefixStrings;
 import org.elasticsearch.xpack.core.ml.inference.results.ErrorInferenceResults;
+import org.elasticsearch.xpack.core.ml.inference.results.InferenceChunkedTextExpansionResultsTests;
 import org.elasticsearch.xpack.core.ml.inference.results.MlChunkedTextEmbeddingFloatResults;
 import org.elasticsearch.xpack.core.ml.inference.results.MlChunkedTextEmbeddingFloatResultsTests;
+import org.elasticsearch.xpack.core.ml.inference.results.MlChunkedTextExpansionResults;
 import org.elasticsearch.xpack.core.ml.inference.results.MlTextEmbeddingResults;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextEmbeddingConfigUpdate;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TokenizationConfigUpdate;
@@ -52,12 +56,10 @@
 import org.mockito.Mockito;
 
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.Random;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
@@ -76,7 +78,6 @@
 
 public class ElasticsearchInternalServiceTests extends ESTestCase {
 
-    TaskType taskType = TaskType.TEXT_EMBEDDING;
     String randomInferenceEntityId = randomAlphaOfLength(10);
 
     private static ThreadPool threadPool;
@@ -92,7 +93,25 @@ public void shutdownThreadPool() {
     }
 
     public void testParseRequestConfig() {
+        var service = createService(mock(Client.class));
+        var settings = new HashMap<String, Object>();
+        settings.put(
+            ModelConfigurations.SERVICE_SETTINGS,
+            new HashMap<>(
+                Map.of(ElasticsearchInternalServiceSettings.NUM_ALLOCATIONS, 1, ElasticsearchInternalServiceSettings.NUM_THREADS, 4)
+            )
+        );
 
+        ActionListener<Model> modelListener = ActionListener.<Model>wrap(
+            model -> fail("Model parsing should have failed"),
+            e -> assertThat(e, instanceOf(IllegalArgumentException.class))
+        );
+
+        var taskType = randomFrom(TaskType.TEXT_EMBEDDING, TaskType.RERANK, TaskType.SPARSE_EMBEDDING);
+        service.parseRequestConfig(randomInferenceEntityId, taskType, settings, Set.of(), modelListener);
+    }
+
+    public void testParseRequestConfig_Misconfigured() {
         // Null model variant
         {
             var service = createService(mock(Client.class));
@@ -109,43 +128,10 @@ public void testParseRequestConfig() {
                 e -> assertThat(e, instanceOf(IllegalArgumentException.class))
             );
 
+            var taskType = randomFrom(TaskType.TEXT_EMBEDDING, TaskType.RERANK, TaskType.SPARSE_EMBEDDING);
             service.parseRequestConfig(randomInferenceEntityId, taskType, settings, Set.of(), modelListener);
         }
 
-        // Valid model variant
-        {
-            var service = createService(mock(Client.class));
-            var settings = new HashMap<String, Object>();
-            settings.put(
-                ModelConfigurations.SERVICE_SETTINGS,
-                new HashMap<>(
-                    Map.of(
-                        ElasticsearchInternalServiceSettings.NUM_ALLOCATIONS,
-                        1,
-                        ElasticsearchInternalServiceSettings.NUM_THREADS,
-                        4,
-                        ElasticsearchInternalServiceSettings.MODEL_ID,
-                        ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID
-                    )
-                )
-            );
-
-            var e5ServiceSettings = new MultilingualE5SmallInternalServiceSettings(
-                1,
-                4,
-                ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID,
-                null
-            );
-
-            service.parseRequestConfig(
-                randomInferenceEntityId,
-                taskType,
-                settings,
-                Set.of(),
-                getModelVerificationActionListener(e5ServiceSettings)
-            );
-        }
-
         // Invalid config map
         {
             var service = createService(mock(Client.class));
@@ -163,10 +149,12 @@ public void testParseRequestConfig() {
                 e -> assertThat(e, instanceOf(ElasticsearchStatusException.class))
             );
 
+            var taskType = randomFrom(TaskType.TEXT_EMBEDDING, TaskType.RERANK, TaskType.SPARSE_EMBEDDING);
             service.parseRequestConfig(randomInferenceEntityId, taskType, settings, Set.of(), modelListener);
         }
+    }
 
-        // Invalid service settings
+    public void testParseRequestConfig_E5() {
         {
             var service = createService(mock(Client.class));
             var settings = new HashMap<String, Object>();
@@ -179,52 +167,28 @@ public void testParseRequestConfig() {
                         ElasticsearchInternalServiceSettings.NUM_THREADS,
                         4,
                         ElasticsearchInternalServiceSettings.MODEL_ID,
-                        ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID, // we can't directly test the eland case until we mock
-                                                                                     // the threadpool within the client
-                        "not_a_valid_service_setting",
-                        randomAlphaOfLength(10)
+                        ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID
                     )
                 )
             );
 
-            ActionListener<Model> modelListener = ActionListener.<Model>wrap(
-                model -> fail("Model parsing should have failed"),
-                e -> assertThat(e, instanceOf(ElasticsearchStatusException.class))
-            );
-
-            service.parseRequestConfig(randomInferenceEntityId, taskType, settings, Set.of(), modelListener);
-        }
-
-        // Extra service settings
-        {
-            var service = createService(mock(Client.class));
-            var settings = new HashMap<String, Object>();
-            settings.put(
-                ModelConfigurations.SERVICE_SETTINGS,
-                new HashMap<>(
-                    Map.of(
-                        ElasticsearchInternalServiceSettings.NUM_ALLOCATIONS,
-                        1,
-                        ElasticsearchInternalServiceSettings.NUM_THREADS,
-                        4,
-                        ElasticsearchInternalServiceSettings.MODEL_ID,
-                        ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID, // we can't directly test the eland case until we mock
-                                                                                     // the threadpool within the client
-                        "extra_setting_that_should_not_be_here",
-                        randomAlphaOfLength(10)
-                    )
-                )
+            var e5ServiceSettings = new MultilingualE5SmallInternalServiceSettings(
+                1,
+                4,
+                ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID,
+                null
             );
 
-            ActionListener<Model> modelListener = ActionListener.<Model>wrap(
-                model -> fail("Model parsing should have failed"),
-                e -> assertThat(e, instanceOf(ElasticsearchStatusException.class))
+            service.parseRequestConfig(
+                randomInferenceEntityId,
+                TaskType.TEXT_EMBEDDING,
+                settings,
+                Set.of(),
+                getModelVerificationActionListener(e5ServiceSettings)
             );
-
-            service.parseRequestConfig(randomInferenceEntityId, taskType, settings, Set.of(), modelListener);
         }
 
-        // Extra settings
+        // Invalid service settings
         {
             var service = createService(mock(Client.class));
             var settings = new HashMap<String, Object>();
@@ -237,19 +201,19 @@ public void testParseRequestConfig() {
                         ElasticsearchInternalServiceSettings.NUM_THREADS,
                         4,
                         ElasticsearchInternalServiceSettings.MODEL_ID,
-                        ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID // we can't directly test the eland case until we mock
-                        // the threadpool within the client
+                        ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID,
+                        "not_a_valid_service_setting",
+                        randomAlphaOfLength(10)
                     )
                 )
             );
-            settings.put("extra_setting_that_should_not_be_here", randomAlphaOfLength(10));
 
             ActionListener<Model> modelListener = ActionListener.<Model>wrap(
                 model -> fail("Model parsing should have failed"),
                 e -> assertThat(e, instanceOf(ElasticsearchStatusException.class))
             );
 
-            service.parseRequestConfig(randomInferenceEntityId, taskType, settings, Set.of(), modelListener);
+            service.parseRequestConfig(randomInferenceEntityId, TaskType.TEXT_EMBEDDING, settings, Set.of(), modelListener);
         }
     }
 
@@ -342,10 +306,53 @@ public void testParseRequestConfig_Rerank_DefaultTaskSettings() {
         }
     }
 
+    @SuppressWarnings("unchecked")
+    public void testParseRequestConfig_SparseEmbedding() {
+        var client = mock(Client.class);
+        doAnswer(invocation -> {
+            var listener = (ActionListener<GetTrainedModelsAction.Response>) invocation.getArguments()[2];
+            listener.onResponse(
+                new GetTrainedModelsAction.Response(new QueryPage<>(List.of(mock(TrainedModelConfig.class)), 1, mock(ParseField.class)))
+            );
+            return null;
+        }).when(client).execute(Mockito.same(GetTrainedModelsAction.INSTANCE), any(), any());
+
+        when(client.threadPool()).thenReturn(threadPool);
+
+        var service = createService(client);
+        var settings = new HashMap<String, Object>();
+        settings.put(
+            ModelConfigurations.SERVICE_SETTINGS,
+            new HashMap<>(
+                Map.of(
+                    ElasticsearchInternalServiceSettings.NUM_ALLOCATIONS,
+                    1,
+                    ElasticsearchInternalServiceSettings.NUM_THREADS,
+                    4,
+                    ElasticsearchInternalServiceSettings.MODEL_ID,
+                    "foo"
+                )
+            )
+        );
+
+        ActionListener<Model> modelListener = ActionListener.<Model>wrap(model -> {
+            assertThat(model, instanceOf(CustomElandModel.class));
+            assertThat(model.getTaskSettings(), instanceOf(EmptyTaskSettings.class));
+            assertThat(model.getServiceSettings(), instanceOf(CustomElandInternalServiceSettings.class));
+        }, e -> { fail("Model parsing failed " + e.getMessage()); });
+
+        service.parseRequestConfig(randomInferenceEntityId, TaskType.SPARSE_EMBEDDING, settings, Set.of(), modelListener);
+    }
+
     private ActionListener<Model> getModelVerificationActionListener(MultilingualE5SmallInternalServiceSettings e5ServiceSettings) {
         return ActionListener.<Model>wrap(model -> {
             assertEquals(
-                new MultilingualE5SmallModel(randomInferenceEntityId, taskType, ElasticsearchInternalService.NAME, e5ServiceSettings),
+                new MultilingualE5SmallModel(
+                    randomInferenceEntityId,
+                    TaskType.TEXT_EMBEDDING,
+                    ElasticsearchInternalService.NAME,
+                    e5ServiceSettings
+                ),
                 model
             );
         }, e -> { fail("Model parsing failed " + e.getMessage()); });
@@ -371,7 +378,10 @@ public void testParsePersistedConfig() {
                 )
             );
 
-            expectThrows(IllegalArgumentException.class, () -> service.parsePersistedConfig(randomInferenceEntityId, taskType, settings));
+            expectThrows(
+                IllegalArgumentException.class,
+                () -> service.parsePersistedConfig(randomInferenceEntityId, TaskType.TEXT_EMBEDDING, settings)
+            );
 
         }
 
@@ -397,12 +407,17 @@ public void testParsePersistedConfig() {
 
             CustomElandEmbeddingModel parsedModel = (CustomElandEmbeddingModel) service.parsePersistedConfig(
                 randomInferenceEntityId,
-                taskType,
+                TaskType.TEXT_EMBEDDING,
                 settings
             );
             var elandServiceSettings = new CustomElandInternalTextEmbeddingServiceSettings(1, 4, "invalid", null);
             assertEquals(
-                new CustomElandEmbeddingModel(randomInferenceEntityId, taskType, ElasticsearchInternalService.NAME, elandServiceSettings),
+                new CustomElandEmbeddingModel(
+                    randomInferenceEntityId,
+                    TaskType.TEXT_EMBEDDING,
+                    ElasticsearchInternalService.NAME,
+                    elandServiceSettings
+                ),
                 parsedModel
             );
         }
@@ -436,11 +451,16 @@ public void testParsePersistedConfig() {
 
             MultilingualE5SmallModel parsedModel = (MultilingualE5SmallModel) service.parsePersistedConfig(
                 randomInferenceEntityId,
-                taskType,
+                TaskType.TEXT_EMBEDDING,
                 settings
             );
             assertEquals(
-                new MultilingualE5SmallModel(randomInferenceEntityId, taskType, ElasticsearchInternalService.NAME, e5ServiceSettings),
+                new MultilingualE5SmallModel(
+                    randomInferenceEntityId,
+                    TaskType.TEXT_EMBEDDING,
+                    ElasticsearchInternalService.NAME,
+                    e5ServiceSettings
+                ),
                 parsedModel
             );
         }
@@ -456,6 +476,8 @@ public void testParsePersistedConfig() {
                 )
             );
             settings.put("not_a_valid_config_setting", randomAlphaOfLength(10));
+
+            var taskType = randomFrom(TaskType.TEXT_EMBEDDING, TaskType.RERANK, TaskType.SPARSE_EMBEDDING);
             expectThrows(IllegalArgumentException.class, () -> service.parsePersistedConfig(randomInferenceEntityId, taskType, settings));
         }
 
@@ -476,12 +498,13 @@ public void testParsePersistedConfig() {
                     )
                 )
             );
+            var taskType = randomFrom(TaskType.TEXT_EMBEDDING, TaskType.RERANK, TaskType.SPARSE_EMBEDDING);
             expectThrows(IllegalArgumentException.class, () -> service.parsePersistedConfig(randomInferenceEntityId, taskType, settings));
         }
     }
 
     @SuppressWarnings("unchecked")
-    public void testChunkInfer() {
+    public void testChunkInfer_e5() {
         var mlTrainedModelResults = new ArrayList<InferenceResults>();
         mlTrainedModelResults.add(MlChunkedTextEmbeddingFloatResultsTests.createRandomResults());
         mlTrainedModelResults.add(MlChunkedTextEmbeddingFloatResultsTests.createRandomResults());
@@ -568,6 +591,63 @@ public void testChunkInfer() {
         assertTrue("Listener not called", gotResults.get());
     }
 
+    @SuppressWarnings("unchecked")
+    public void testChunkInfer_Sparse() {
+        var mlTrainedModelResults = new ArrayList<InferenceResults>();
+        mlTrainedModelResults.add(InferenceChunkedTextExpansionResultsTests.createRandomResults());
+        mlTrainedModelResults.add(InferenceChunkedTextExpansionResultsTests.createRandomResults());
+        mlTrainedModelResults.add(new ErrorInferenceResults(new RuntimeException("boom")));
+        var response = new InferModelAction.Response(mlTrainedModelResults, "foo", true);
+
+        ThreadPool threadpool = new TestThreadPool("test");
+        Client client = mock(Client.class);
+        when(client.threadPool()).thenReturn(threadpool);
+        doAnswer(invocationOnMock -> {
+            var listener = (ActionListener<InferModelAction.Response>) invocationOnMock.getArguments()[2];
+            listener.onResponse(response);
+            return null;
+        }).when(client).execute(same(InferModelAction.INSTANCE), any(InferModelAction.Request.class), any(ActionListener.class));
+
+        var model = new CustomElandModel(
+            "foo",
+            TaskType.SPARSE_EMBEDDING,
+            "elasticsearch",
+            new ElasticsearchInternalServiceSettings(1, 1, "model-id", null)
+        );
+        var service = createService(client);
+
+        var gotResults = new AtomicBoolean();
+        var resultsListener = ActionListener.<List<ChunkedInferenceServiceResults>>wrap(chunkedResponse -> {
+            assertThat(chunkedResponse, hasSize(3));
+            assertThat(chunkedResponse.get(0), instanceOf(InferenceChunkedSparseEmbeddingResults.class));
+            var result1 = (InferenceChunkedSparseEmbeddingResults) chunkedResponse.get(0);
+            assertEquals(((MlChunkedTextExpansionResults) mlTrainedModelResults.get(0)).getChunks(), result1.getChunkedResults());
+            assertThat(chunkedResponse.get(1), instanceOf(InferenceChunkedSparseEmbeddingResults.class));
+            var result2 = (InferenceChunkedSparseEmbeddingResults) chunkedResponse.get(1);
+            assertEquals(((MlChunkedTextExpansionResults) mlTrainedModelResults.get(1)).getChunks(), result2.getChunkedResults());
+            var result3 = (ErrorChunkedInferenceResults) chunkedResponse.get(2);
+            assertThat(result3.getException(), instanceOf(RuntimeException.class));
+            assertThat(result3.getException().getMessage(), containsString("boom"));
+            gotResults.set(true);
+        }, ESTestCase::fail);
+
+        service.chunkedInfer(
+            model,
+            null,
+            List.of("foo", "bar"),
+            Map.of(),
+            InputType.SEARCH,
+            new ChunkingOptions(null, null),
+            InferenceAction.Request.DEFAULT_TIMEOUT,
+            ActionListener.runAfter(resultsListener, () -> terminate(threadpool))
+        );
+
+        if (gotResults.get() == false) {
+            terminate(threadpool);
+        }
+        assertTrue("Listener not called", gotResults.get());
+    }
+
     @SuppressWarnings("unchecked")
     public void testChunkInferSetsTokenization() {
         var expectedSpan = new AtomicInteger();
@@ -711,7 +791,7 @@ public void testParseRequestConfigEland_PreservesTaskType() {
             )
         );
 
-        var taskType = randomFrom(EnumSet.of(TaskType.RERANK, TaskType.TEXT_EMBEDDING));
+        var taskType = randomFrom(EnumSet.of(TaskType.RERANK, TaskType.TEXT_EMBEDDING, TaskType.SPARSE_EMBEDDING));
         CustomElandModel expectedModel = getCustomElandModel(taskType);
 
         PlainActionFuture<Model> listener = new PlainActionFuture<>();
@@ -739,6 +819,13 @@ private CustomElandModel getCustomElandModel(TaskType taskType) {
                 ElasticsearchInternalService.NAME,
                 serviceSettings
             );
+        } else if (taskType == TaskType.SPARSE_EMBEDDING) {
+            expectedModel = new CustomElandModel(
+                randomInferenceEntityId,
+                taskType,
+                ElasticsearchInternalService.NAME,
+                new CustomElandInternalServiceSettings(1, 4, "custom-model", null)
+            );
         }
         return expectedModel;
     }
@@ -867,21 +954,4 @@ private ElasticsearchInternalService createService(Client client) {
         var context = new InferenceServiceExtension.InferenceServiceFactoryContext(client);
         return new ElasticsearchInternalService(context);
     }
-
-    public static Model randomModelConfig(String inferenceEntityId) {
-        List<String> givenList = Arrays.asList("MultilingualE5SmallModel");
-        Random rand = org.elasticsearch.common.Randomness.get();
-        String model = givenList.get(rand.nextInt(givenList.size()));
-
-        return switch (model) {
-            case "MultilingualE5SmallModel" -> new MultilingualE5SmallModel(
-                inferenceEntityId,
-                TaskType.TEXT_EMBEDDING,
-                ElasticsearchInternalService.NAME,
-                MultilingualE5SmallInternalServiceSettingsTests.createRandom()
-            );
-            default -> throw new IllegalArgumentException("model " + model + " is not supported for testing");
-        };
-    }
-
 }